Skip to main content

polyglot_sql/
parser.rs

1//! SQL Parser -- recursive-descent parser that converts a token stream into an AST.
2//!
3//! The central type is [`Parser`], which consumes tokens produced by the
4//! [`Tokenizer`](crate::tokens::Tokenizer) and builds a tree of [`Expression`]
5//! nodes covering the full SQL grammar: queries, DML, DDL, set operations,
6//! window functions, CTEs, and dialect-specific extensions for 30+ databases.
7//!
8//! The simplest entry point is [`Parser::parse_sql`], which tokenizes and
9//! parses a SQL string in one call.
10//!
11//! # Static configuration maps
12//!
13//! This module also exports several `LazyLock<HashSet<TokenType>>` constants
14//! (ported from Python sqlglot's `parser.py`) that classify token types:
15//!
16//! - [`TYPE_TOKENS`] -- all tokens that represent SQL data types
17//! - [`NESTED_TYPE_TOKENS`] -- parametric types like `ARRAY`, `MAP`, `STRUCT`
18//! - [`RESERVED_TOKENS`] -- tokens that cannot be used as unquoted identifiers
19//! - [`NO_PAREN_FUNCTIONS`] / [`NO_PAREN_FUNCTION_NAMES`] -- zero-argument
20//!   functions that may be written without parentheses (e.g. `CURRENT_DATE`)
21//! - [`DB_CREATABLES`] -- object kinds valid after `CREATE` (TABLE, VIEW, etc.)
22//! - [`SUBQUERY_PREDICATES`] -- tokens introducing subquery predicates (ANY, ALL, EXISTS)
23
24use crate::error::{Error, Result};
25use crate::expressions::*;
26use crate::tokens::{Span, Token, TokenType, Tokenizer, TokenizerConfig};
27use std::collections::HashSet;
28use std::sync::LazyLock;
29
30// =============================================================================
31// Parser Configuration Maps (ported from Python SQLGlot parser.py)
32// =============================================================================
33
34/// NO_PAREN_FUNCTIONS: Functions that can be called without parentheses
35/// Maps TokenType to the function name for generation
36/// Python: NO_PAREN_FUNCTIONS = {TokenType.CURRENT_DATE: exp.CurrentDate, ...}
37pub static NO_PAREN_FUNCTIONS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
38    let mut set = HashSet::new();
39    set.insert(TokenType::CurrentDate);
40    set.insert(TokenType::CurrentDateTime);
41    set.insert(TokenType::CurrentTime);
42    set.insert(TokenType::CurrentTimestamp);
43    set.insert(TokenType::CurrentUser);
44    set.insert(TokenType::CurrentRole);
45    set.insert(TokenType::CurrentSchema);
46    set.insert(TokenType::CurrentCatalog);
47    // Additional no-paren functions (from tokens.rs)
48    set.insert(TokenType::LocalTime);
49    set.insert(TokenType::LocalTimestamp);
50    set.insert(TokenType::SysTimestamp);
51    set.insert(TokenType::UtcDate);
52    set.insert(TokenType::UtcTime);
53    set.insert(TokenType::UtcTimestamp);
54    set.insert(TokenType::SessionUser);
55    set
56});
57
58/// NO_PAREN_FUNCTION_NAMES: String names that can be no-paren functions
59/// These are often tokenized as Var/Identifier instead of specific TokenTypes
60pub static NO_PAREN_FUNCTION_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
61    crate::function_registry::NO_PAREN_FUNCTION_NAME_LIST
62        .iter()
63        .copied()
64        .collect()
65});
66
67/// STRUCT_TYPE_TOKENS: Tokens that represent struct-like types
68/// Python: STRUCT_TYPE_TOKENS = {TokenType.FILE, TokenType.NESTED, TokenType.OBJECT, ...}
69pub static STRUCT_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
70    let mut set = HashSet::new();
71    set.insert(TokenType::File);
72    set.insert(TokenType::Nested);
73    set.insert(TokenType::Object);
74    set.insert(TokenType::Struct);
75    // Note: UNION is part of STRUCT_TYPE_TOKENS in Python but we handle it as a set operation
76    set
77});
78
79/// NESTED_TYPE_TOKENS: Tokens that can have nested type parameters
80/// Python: NESTED_TYPE_TOKENS = {TokenType.ARRAY, TokenType.LIST, ...}
81pub static NESTED_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
82    let mut set = HashSet::new();
83    set.insert(TokenType::Array);
84    set.insert(TokenType::List);
85    set.insert(TokenType::LowCardinality);
86    set.insert(TokenType::Map);
87    set.insert(TokenType::Nullable);
88    set.insert(TokenType::Range);
89    // Include STRUCT_TYPE_TOKENS
90    set.insert(TokenType::File);
91    set.insert(TokenType::Nested);
92    set.insert(TokenType::Object);
93    set.insert(TokenType::Struct);
94    set
95});
96
97/// Check if an uppercased type name is a known SQL custom type that should stay uppercased.
98/// Used to distinguish between known types like DATETIME2, SYSNAME etc. and user-defined types
99/// like UserDefinedTableType that should preserve their original case.
100fn convert_name_is_known_custom(name: &str) -> bool {
101    // Known SQL types that appear in the _ (default) branch of parse_data_type
102    // These should remain uppercased.
103    matches!(
104        name,
105        "DATETIME2"
106            | "DATETIMEOFFSET"
107            | "SMALLDATETIME"
108            | "DATETIME"
109            | "NVARCHAR2"
110            | "VARCHAR2"
111            | "NCHAR"
112            | "MONEY"
113            | "SMALLMONEY"
114            | "TINYINT"
115            | "MEDIUMINT"
116            | "BYTEINT"
117            | "SUPER"
118            | "HLLSKETCH"
119            | "TIMETZ"
120            | "TIMESTAMPTZ"
121            | "SYSNAME"
122            | "XML"
123            | "SQL_VARIANT"
124            | "HIERARCHYID"
125            | "ROWVERSION"
126            | "IMAGE"
127            | "CURSOR"
128            | "TABLE"
129            | "UNIQUEIDENTIFIER"
130            | "VARIANT"
131            | "OBJECT"
132            | "NUMBER"
133            | "BINARY_FLOAT"
134            | "BINARY_DOUBLE"
135            | "CLOB"
136            | "NCLOB"
137            | "RAW"
138            | "LONG"
139            | "MEDIUMTEXT"
140            | "LONGTEXT"
141            | "MEDIUMBLOB"
142            | "LONGBLOB"
143            | "TINYTEXT"
144            | "TINYBLOB"
145            | "INT2"
146            | "INT4"
147            | "INT8"
148            | "FLOAT4"
149            | "FLOAT8"
150            | "SERIAL"
151            | "BIGSERIAL"
152            | "SMALLSERIAL"
153            | "YEAR"
154            | "FIXED"
155            | "SIGNED"
156            | "UNSIGNED"
157            | "ROW"
158            | "BIT"
159            | "BOOLEAN"
160            | "BOOL"
161            | "TEXT"
162            | "STRING"
163            | "NTEXT"
164            | "INT128"
165            | "INT256"
166            | "UINT8"
167            | "UINT16"
168            | "UINT32"
169            | "UINT64"
170            | "UINT128"
171            | "UINT256"
172            | "FLOAT32"
173            | "FLOAT64"
174            | "LOWCARDINALITY"
175            | "NULLABLE"
176            | "IPADDRESS"
177            | "IPV4"
178            | "IPV6"
179            | "AGGREGATEFUNCTION"
180            | "SIMPLEAGGREGATEFUNCTION"
181            | "FIXEDSTRING"
182            | "RING"
183            | "NESTED"
184    )
185}
186
187/// ENUM_TYPE_TOKENS: Tokens that represent enum types
188/// Python: ENUM_TYPE_TOKENS = {TokenType.DYNAMIC, TokenType.ENUM, ...}
189pub static ENUM_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
190    let mut set = HashSet::new();
191    set.insert(TokenType::Dynamic);
192    set.insert(TokenType::Enum);
193    set.insert(TokenType::Enum8);
194    set.insert(TokenType::Enum16);
195    set
196});
197
198/// AGGREGATE_TYPE_TOKENS: Tokens for aggregate function types (ClickHouse)
199/// Python: AGGREGATE_TYPE_TOKENS = {TokenType.AGGREGATEFUNCTION, ...}
200pub static AGGREGATE_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
201    let mut set = HashSet::new();
202    set.insert(TokenType::AggregateFunction);
203    set.insert(TokenType::SimpleAggregateFunction);
204    set
205});
206
207/// TYPE_TOKENS: All tokens that represent data types
208/// Python: TYPE_TOKENS = {TokenType.BIT, TokenType.BOOLEAN, ...}
209pub static TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
210    let mut set = HashSet::new();
211    // Basic types
212    set.insert(TokenType::Bit);
213    set.insert(TokenType::Boolean);
214    // Integer types
215    set.insert(TokenType::TinyInt);
216    set.insert(TokenType::UTinyInt);
217    set.insert(TokenType::SmallInt);
218    set.insert(TokenType::USmallInt);
219    set.insert(TokenType::MediumInt);
220    set.insert(TokenType::UMediumInt);
221    set.insert(TokenType::Int);
222    set.insert(TokenType::UInt);
223    set.insert(TokenType::BigInt);
224    set.insert(TokenType::UBigInt);
225    set.insert(TokenType::BigNum);
226    set.insert(TokenType::Int128);
227    set.insert(TokenType::UInt128);
228    set.insert(TokenType::Int256);
229    set.insert(TokenType::UInt256);
230    // Floating point types
231    set.insert(TokenType::Float);
232    set.insert(TokenType::Double);
233    set.insert(TokenType::UDouble);
234    // Decimal types
235    set.insert(TokenType::Decimal);
236    set.insert(TokenType::Decimal32);
237    set.insert(TokenType::Decimal64);
238    set.insert(TokenType::Decimal128);
239    set.insert(TokenType::Decimal256);
240    set.insert(TokenType::DecFloat);
241    set.insert(TokenType::UDecimal);
242    set.insert(TokenType::BigDecimal);
243    // String types
244    set.insert(TokenType::Char);
245    set.insert(TokenType::NChar);
246    set.insert(TokenType::VarChar);
247    set.insert(TokenType::NVarChar);
248    set.insert(TokenType::BpChar);
249    set.insert(TokenType::Text);
250    set.insert(TokenType::MediumText);
251    set.insert(TokenType::LongText);
252    set.insert(TokenType::TinyText);
253    set.insert(TokenType::Name);
254    set.insert(TokenType::FixedString);
255    // Binary types
256    set.insert(TokenType::Binary);
257    set.insert(TokenType::VarBinary);
258    set.insert(TokenType::Blob);
259    set.insert(TokenType::MediumBlob);
260    set.insert(TokenType::LongBlob);
261    set.insert(TokenType::TinyBlob);
262    // Date/time types
263    set.insert(TokenType::Date);
264    set.insert(TokenType::Date32);
265    set.insert(TokenType::Time);
266    set.insert(TokenType::TimeTz);
267    set.insert(TokenType::TimeNs);
268    set.insert(TokenType::Timestamp);
269    set.insert(TokenType::TimestampTz);
270    set.insert(TokenType::TimestampLtz);
271    set.insert(TokenType::TimestampNtz);
272    set.insert(TokenType::TimestampS);
273    set.insert(TokenType::TimestampMs);
274    set.insert(TokenType::TimestampNs);
275    set.insert(TokenType::DateTime);
276    set.insert(TokenType::DateTime2);
277    set.insert(TokenType::DateTime64);
278    set.insert(TokenType::SmallDateTime);
279    set.insert(TokenType::Year);
280    set.insert(TokenType::Interval);
281    // JSON types
282    set.insert(TokenType::Json);
283    set.insert(TokenType::JsonB);
284    // UUID
285    set.insert(TokenType::Uuid);
286    // Spatial types
287    set.insert(TokenType::Geography);
288    set.insert(TokenType::GeographyPoint);
289    set.insert(TokenType::Geometry);
290    set.insert(TokenType::Point);
291    set.insert(TokenType::Ring);
292    set.insert(TokenType::LineString);
293    set.insert(TokenType::MultiLineString);
294    set.insert(TokenType::Polygon);
295    set.insert(TokenType::MultiPolygon);
296    // Range types (PostgreSQL)
297    set.insert(TokenType::Int4Range);
298    set.insert(TokenType::Int4MultiRange);
299    set.insert(TokenType::Int8Range);
300    set.insert(TokenType::Int8MultiRange);
301    set.insert(TokenType::NumRange);
302    set.insert(TokenType::NumMultiRange);
303    set.insert(TokenType::TsRange);
304    set.insert(TokenType::TsMultiRange);
305    set.insert(TokenType::TsTzRange);
306    set.insert(TokenType::TsTzMultiRange);
307    set.insert(TokenType::DateRange);
308    set.insert(TokenType::DateMultiRange);
309    // PostgreSQL special types
310    set.insert(TokenType::HllSketch);
311    set.insert(TokenType::HStore);
312    set.insert(TokenType::Serial);
313    set.insert(TokenType::SmallSerial);
314    set.insert(TokenType::BigSerial);
315    // XML
316    set.insert(TokenType::Xml);
317    // Other special types
318    set.insert(TokenType::Super);
319    set.insert(TokenType::PseudoType);
320    set.insert(TokenType::UserDefined);
321    set.insert(TokenType::Money);
322    set.insert(TokenType::SmallMoney);
323    set.insert(TokenType::RowVersion);
324    set.insert(TokenType::Image);
325    set.insert(TokenType::Variant);
326    set.insert(TokenType::Object);
327    set.insert(TokenType::ObjectIdentifier);
328    set.insert(TokenType::Inet);
329    set.insert(TokenType::IpAddress);
330    set.insert(TokenType::IpPrefix);
331    set.insert(TokenType::Ipv4);
332    set.insert(TokenType::Ipv6);
333    set.insert(TokenType::Unknown);
334    set.insert(TokenType::Null);
335    set.insert(TokenType::TDigest);
336    set.insert(TokenType::Vector);
337    set.insert(TokenType::Void);
338    // Include ENUM_TYPE_TOKENS
339    set.insert(TokenType::Dynamic);
340    set.insert(TokenType::Enum);
341    set.insert(TokenType::Enum8);
342    set.insert(TokenType::Enum16);
343    // Include NESTED_TYPE_TOKENS
344    set.insert(TokenType::Array);
345    set.insert(TokenType::List);
346    set.insert(TokenType::LowCardinality);
347    set.insert(TokenType::Map);
348    set.insert(TokenType::Nullable);
349    set.insert(TokenType::Range);
350    set.insert(TokenType::File);
351    set.insert(TokenType::Nested);
352    set.insert(TokenType::Struct);
353    // Include AGGREGATE_TYPE_TOKENS
354    set.insert(TokenType::AggregateFunction);
355    set.insert(TokenType::SimpleAggregateFunction);
356    set
357});
358
359/// SIGNED_TO_UNSIGNED_TYPE_TOKEN: Maps signed types to unsigned types
360/// Python: SIGNED_TO_UNSIGNED_TYPE_TOKEN = {TokenType.BIGINT: TokenType.UBIGINT, ...}
361pub static SIGNED_TO_UNSIGNED_TYPE_TOKEN: LazyLock<
362    std::collections::HashMap<TokenType, TokenType>,
363> = LazyLock::new(|| {
364    let mut map = std::collections::HashMap::new();
365    map.insert(TokenType::BigInt, TokenType::UBigInt);
366    map.insert(TokenType::Int, TokenType::UInt);
367    map.insert(TokenType::MediumInt, TokenType::UMediumInt);
368    map.insert(TokenType::SmallInt, TokenType::USmallInt);
369    map.insert(TokenType::TinyInt, TokenType::UTinyInt);
370    map.insert(TokenType::Decimal, TokenType::UDecimal);
371    map.insert(TokenType::Double, TokenType::UDouble);
372    map
373});
374
375/// SUBQUERY_PREDICATES: Tokens that introduce subquery predicates
376/// Python: SUBQUERY_PREDICATES = {TokenType.ANY: exp.Any, ...}
377pub static SUBQUERY_PREDICATES: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
378    let mut set = HashSet::new();
379    set.insert(TokenType::Any);
380    set.insert(TokenType::All);
381    set.insert(TokenType::Exists);
382    set.insert(TokenType::Some);
383    set
384});
385
386/// DB_CREATABLES: Object types that can be created with CREATE
387/// Python: DB_CREATABLES = {TokenType.DATABASE, TokenType.SCHEMA, ...}
388pub static DB_CREATABLES: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
389    let mut set = HashSet::new();
390    set.insert(TokenType::Database);
391    set.insert(TokenType::Dictionary);
392    set.insert(TokenType::FileFormat);
393    set.insert(TokenType::Model);
394    set.insert(TokenType::Namespace);
395    set.insert(TokenType::Schema);
396    set.insert(TokenType::SemanticView);
397    set.insert(TokenType::Sequence);
398    set.insert(TokenType::Sink);
399    set.insert(TokenType::Source);
400    set.insert(TokenType::Stage);
401    set.insert(TokenType::StorageIntegration);
402    set.insert(TokenType::Streamlit);
403    set.insert(TokenType::Table);
404    set.insert(TokenType::Tag);
405    set.insert(TokenType::View);
406    set.insert(TokenType::Warehouse);
407    set
408});
409
410/// RESERVED_TOKENS: Tokens that cannot be used as identifiers without quoting
411/// These are typically structural keywords that affect query parsing
412pub static RESERVED_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
413    let mut set = HashSet::new();
414    // Query structure keywords
415    set.insert(TokenType::Select);
416    set.insert(TokenType::From);
417    set.insert(TokenType::Where);
418    set.insert(TokenType::GroupBy);
419    set.insert(TokenType::OrderBy);
420    set.insert(TokenType::Having);
421    set.insert(TokenType::Limit);
422    set.insert(TokenType::Offset);
423    set.insert(TokenType::Union);
424    set.insert(TokenType::Intersect);
425    set.insert(TokenType::Except);
426    set.insert(TokenType::Join);
427    set.insert(TokenType::On);
428    set.insert(TokenType::With);
429    set.insert(TokenType::Into);
430    set.insert(TokenType::Values);
431    set.insert(TokenType::Set);
432    // DDL keywords
433    set.insert(TokenType::Create);
434    set.insert(TokenType::Drop);
435    set.insert(TokenType::Alter);
436    set.insert(TokenType::Truncate);
437    // DML keywords
438    set.insert(TokenType::Insert);
439    set.insert(TokenType::Update);
440    set.insert(TokenType::Delete);
441    set.insert(TokenType::Merge);
442    // Control flow
443    set.insert(TokenType::Case);
444    set.insert(TokenType::When);
445    set.insert(TokenType::Then);
446    set.insert(TokenType::Else);
447    set.insert(TokenType::End);
448    // Boolean operators
449    set.insert(TokenType::And);
450    set.insert(TokenType::Or);
451    set.insert(TokenType::Not);
452    // Comparison
453    set.insert(TokenType::In);
454    set.insert(TokenType::Is);
455    set.insert(TokenType::Between);
456    set.insert(TokenType::Like);
457    set.insert(TokenType::ILike);
458    set.insert(TokenType::Exists);
459    // Literals
460    set.insert(TokenType::Null);
461    set.insert(TokenType::True);
462    set.insert(TokenType::False);
463    // Punctuation tokens (these are always reserved)
464    set.insert(TokenType::LParen);
465    set.insert(TokenType::RParen);
466    set.insert(TokenType::LBracket);
467    set.insert(TokenType::RBracket);
468    set.insert(TokenType::LBrace);
469    set.insert(TokenType::RBrace);
470    set.insert(TokenType::Comma);
471    set.insert(TokenType::Semicolon);
472    set.insert(TokenType::Star);
473    set.insert(TokenType::Eq);
474    set.insert(TokenType::Neq);
475    set.insert(TokenType::Lt);
476    set.insert(TokenType::Lte);
477    set.insert(TokenType::Gt);
478    set.insert(TokenType::Gte);
479    set
480});
481
482// Note: Function name normalization is handled directly in parse_typed_function
483// by matching all aliases to the same typed expression, following Python SQLGlot's pattern.
484// The generator then outputs dialect-specific names via TRANSFORMS.
485
486/// Recursive-descent SQL parser that converts a token stream into an AST.
487///
488/// The parser consumes a `Vec<Token>` produced by the [`Tokenizer`](crate::tokens::Tokenizer)
489/// and builds a tree of [`Expression`] nodes. It supports the full SQL grammar
490/// including SELECT, DML (INSERT/UPDATE/DELETE/MERGE), DDL (CREATE/ALTER/DROP),
491/// window functions, CTEs, set operations, and 30+ dialect-specific extensions.
492///
493/// # Quick start
494///
495/// For most use cases the static helper [`Parser::parse_sql`] is the simplest entry point:
496///
497/// ```rust,ignore
498/// use polyglot_sql::parser::Parser;
499///
500/// let statements = Parser::parse_sql("SELECT 1; SELECT 2")?;
501/// assert_eq!(statements.len(), 2);
502/// ```
503///
504/// For dialect-aware parsing, use [`Parser::with_config`] or
505/// [`Parser::parse_sql_with_config`].
506pub struct Parser {
507    tokens: Vec<Token>,
508    current: usize,
509    config: ParserConfig,
510    /// Original source SQL (used for preserving exact text in Command expressions)
511    source: Option<String>,
512    /// Comments captured by parse_comparison when no comparison operator follows.
513    /// These are leading comments from the first token of an expression that need
514    /// to be placed by the caller (e.g., after an alias, or after an AND operand).
515    pending_leading_comments: Vec<String>,
516}
517
518/// Configuration for the SQL [`Parser`].
519///
520/// Controls dialect-specific parsing behavior. Most users can rely on the
521/// `Default` implementation; set `dialect` when you need to handle syntax
522/// that is unique to a particular database engine (e.g. BigQuery backtick
523/// quoting, TSQL square-bracket identifiers, Snowflake QUALIFY clause).
524#[derive(Debug, Clone, Default)]
525pub struct ParserConfig {
526    /// Allow trailing commas in SELECT lists (e.g. BigQuery permits `SELECT a, b, FROM t`).
527    pub allow_trailing_commas: bool,
528    /// Dialect type for dialect-specific parsing behavior.
529    pub dialect: Option<crate::dialects::DialectType>,
530}
531
532impl Parser {
533    /// Create a new parser from a pre-tokenized token stream with default configuration.
534    ///
535    /// Prefer [`Parser::parse_sql`] if you are starting from a raw SQL string.
536    pub fn new(tokens: Vec<Token>) -> Self {
537        Self {
538            tokens,
539            current: 0,
540            config: ParserConfig::default(),
541            source: None,
542            pending_leading_comments: Vec::new(),
543        }
544    }
545
546    /// Create a parser from a pre-tokenized token stream with a custom [`ParserConfig`].
547    pub fn with_config(tokens: Vec<Token>, config: ParserConfig) -> Self {
548        Self {
549            tokens,
550            current: 0,
551            config,
552            source: None,
553            pending_leading_comments: Vec::new(),
554        }
555    }
556
557    /// Create a parser with source SQL attached.
558    ///
559    /// The original SQL text is stored so that `Command` expressions (unparsed
560    /// dialect-specific statements) can preserve the exact source verbatim.
561    pub fn with_source(tokens: Vec<Token>, config: ParserConfig, source: String) -> Self {
562        Self {
563            tokens,
564            current: 0,
565            config,
566            source: Some(source),
567            pending_leading_comments: Vec::new(),
568        }
569    }
570
571    /// Parse one or more SQL statements from a raw string.
572    ///
573    /// This is the main entry point for most callers. It tokenizes the input with
574    /// the default [`TokenizerConfig`], then parses all semicolon-separated
575    /// statements and returns them as a `Vec<Expression>`.
576    ///
577    /// # Errors
578    ///
579    /// Returns an error if the input contains invalid tokens or syntax that the
580    /// parser cannot recognize.
581    ///
582    /// # Example
583    ///
584    /// ```rust,ignore
585    /// let stmts = Parser::parse_sql("SELECT a FROM t WHERE x = 1")?;
586    /// ```
587    pub fn parse_sql(sql: &str) -> Result<Vec<Expression>> {
588        let tokenizer = Tokenizer::default();
589        let tokens = tokenizer.tokenize(sql)?;
590        let mut parser = Parser::with_source(tokens, ParserConfig::default(), sql.to_string());
591        parser.parse()
592    }
593
594    /// Parse SQL from a string using a custom [`TokenizerConfig`].
595    ///
596    /// Use this variant when the source dialect requires non-default tokenizer
597    /// settings (e.g. different string quoting or comment syntax).
598    pub fn parse_sql_with_config(
599        sql: &str,
600        tokenizer_config: TokenizerConfig,
601    ) -> Result<Vec<Expression>> {
602        let tokenizer = Tokenizer::new(tokenizer_config);
603        let tokens = tokenizer.tokenize(sql)?;
604        let mut parser = Parser::with_source(tokens, ParserConfig::default(), sql.to_string());
605        parser.parse()
606    }
607
608    /// Parse all remaining statements from the token stream.
609    ///
610    /// Consumes tokens until the end of input, splitting on semicolons.
611    /// Returns one `Expression` per statement.
612    pub fn parse(&mut self) -> Result<Vec<Expression>> {
613        let mut statements = Vec::new();
614
615        while !self.is_at_end() {
616            let mut stmt = self.parse_statement()?;
617
618            // Before consuming the semicolon, capture its leading comments
619            // and attach them to the statement (e.g., SELECT foo\n/* comment */\n;)
620            if self.check(TokenType::Semicolon) {
621                let semi_comments = self.current_leading_comments().to_vec();
622                if !semi_comments.is_empty() {
623                    stmt = Expression::Annotated(Box::new(Annotated {
624                        this: stmt,
625                        trailing_comments: semi_comments,
626                    }));
627                }
628            }
629
630            // ClickHouse: consume trailing SETTINGS key=val, ... after any statement
631            if matches!(
632                self.config.dialect,
633                Some(crate::dialects::DialectType::ClickHouse)
634            ) && self.check(TokenType::Settings)
635            {
636                self.skip(); // consume SETTINGS
637                let _ = self.parse_settings_property()?;
638            }
639
640            // ClickHouse: consume trailing FORMAT <name> after any statement
641            if matches!(
642                self.config.dialect,
643                Some(crate::dialects::DialectType::ClickHouse)
644            ) && self.check(TokenType::Format)
645            {
646                self.skip(); // consume FORMAT
647                             // Accept any identifier/keyword/Null as format name
648                if self.check(TokenType::Null) {
649                    self.skip();
650                } else if self.is_identifier_token() || self.check_keyword() {
651                    self.skip();
652                }
653            }
654
655            // ClickHouse: PARALLEL WITH between statements (multi-statement execution)
656            if matches!(
657                self.config.dialect,
658                Some(crate::dialects::DialectType::ClickHouse)
659            ) && self.check_identifier("PARALLEL")
660                && self.check_next(TokenType::With)
661            {
662                self.skip(); // consume PARALLEL
663                self.skip(); // consume WITH
664                statements.push(stmt);
665                continue;
666            }
667
668            // After parsing a statement, the next token must be a semicolon or EOF.
669            // If not, there are unconsumed tokens which indicates a parse error.
670            // This matches Python sqlglot's behavior (parser.py line 1826-1827).
671            if !self.is_at_end() && !self.check(TokenType::Semicolon) {
672                if matches!(
673                    self.config.dialect,
674                    Some(crate::dialects::DialectType::ClickHouse)
675                ) {
676                    // ClickHouse fallback: consume unconsumed tokens until semicolon/EOF.
677                    // This matches Python sqlglot's _parse_as_command behavior for
678                    // ClickHouse-specific syntax that we don't fully parse yet.
679                    while !self.is_at_end() && !self.check(TokenType::Semicolon) {
680                        self.skip();
681                    }
682                } else {
683                    return Err(self.parse_error("Invalid expression / Unexpected token"));
684                }
685            }
686
687            // Consume optional semicolons (ClickHouse allows multiple like `;;`)
688            while self.match_token(TokenType::Semicolon) {}
689
690            statements.push(stmt);
691        }
692
693        Ok(statements)
694    }
695
696    /// Parse a single SQL statement from the current position in the token stream.
697    ///
698    /// Dispatches to the appropriate sub-parser based on the leading keyword
699    /// (SELECT, INSERT, CREATE, etc.). Unknown or dialect-specific statements
700    /// fall through to a `Command` expression that preserves the raw SQL text.
701    pub fn parse_statement(&mut self) -> Result<Expression> {
702        // Skip any leading semicolons
703        while self.match_token(TokenType::Semicolon) {}
704
705        if self.is_at_end() {
706            return Err(self.parse_error("Unexpected end of input"));
707        }
708
709        match self.peek().token_type {
710            // Handle hint comment /*+ ... */ before a statement - convert to regular comment
711            TokenType::Hint => {
712                let hint_token = self.advance();
713                let hint_text = hint_token.text.clone();
714                // Convert hint to regular comment (preserve the + as part of the content)
715                let comment = format!("/* + {} */", hint_text.trim());
716
717                // Parse the following statement
718                let mut stmt = self.parse_statement()?;
719
720                // Attach the comment to the statement's leading_comments
721                match &mut stmt {
722                    Expression::Select(select) => {
723                        select.leading_comments.insert(0, comment);
724                    }
725                    Expression::Insert(insert) => {
726                        insert.leading_comments.insert(0, comment);
727                    }
728                    Expression::Update(update) => {
729                        update.leading_comments.insert(0, comment);
730                    }
731                    Expression::Delete(delete) => {
732                        delete.leading_comments.insert(0, comment);
733                    }
734                    Expression::CreateTable(ct) => {
735                        ct.leading_comments.insert(0, comment);
736                    }
737                    _ => {
738                        // For other statement types, we can't attach comments
739                        // but at least the statement parses successfully
740                    }
741                }
742                Ok(stmt)
743            }
744            TokenType::Select => self.parse_select(),
745            TokenType::With => self.parse_with(),
746            TokenType::Insert => self.parse_insert(),
747            TokenType::Replace => self.parse_replace(),
748            TokenType::Update => self.parse_update(),
749            TokenType::Delete => self.parse_delete(),
750            TokenType::Create => self.parse_create(),
751            TokenType::Drop => self.parse_drop(),
752            TokenType::Alter => self.parse_alter(),
753            TokenType::Truncate => {
754                // TRUNCATE could be TRUNCATE TABLE (statement) or TRUNCATE(a, b) (function)
755                // Check if followed by ( to determine which
756                if self.check_next(TokenType::LParen) {
757                    // TRUNCATE(a, b) - function call
758                    self.parse_expression()
759                } else {
760                    self.parse_truncate()
761                }
762            }
763            TokenType::Values => {
764                // VALUES could be VALUES(...) statement or VALUES 1, 2, 3 (bare values)
765                if self.check_next(TokenType::LParen)
766                    || self.check_next(TokenType::Number)
767                    || self.check_next(TokenType::String)
768                {
769                    self.parse_values()
770                } else {
771                    // "values" by itself is an identifier/expression
772                    self.parse_expression()
773                }
774            }
775            TokenType::Use => self.parse_use(),
776            TokenType::Cache => self.parse_cache(),
777            TokenType::Uncache => self.parse_uncache(),
778            TokenType::Refresh => {
779                self.skip(); // consume REFRESH
780                self.parse_refresh()?
781                    .ok_or_else(|| self.parse_error("Failed to parse REFRESH statement"))
782            }
783            TokenType::Load => self.parse_load_data(),
784            TokenType::Grant => self.parse_grant(),
785            TokenType::Revoke => self.parse_revoke(),
786            TokenType::Comment => self.parse_comment(),
787            TokenType::Merge => {
788                self.skip(); // consume MERGE
789                self.parse_merge()?
790                    .ok_or_else(|| self.parse_error("Failed to parse MERGE statement"))
791            }
792            TokenType::Set => self.parse_set(),
793            TokenType::Database
794                if matches!(
795                    self.config.dialect,
796                    Some(crate::dialects::DialectType::Teradata)
797                ) =>
798            {
799                // Teradata: DATABASE tduser -> USE tduser
800                self.skip(); // consume DATABASE
801                let name = self.expect_identifier_or_keyword()?;
802                Ok(Expression::Use(Box::new(Use {
803                    kind: None,
804                    this: Identifier::new(name),
805                })))
806            }
807            TokenType::Lock
808                if matches!(
809                    self.config.dialect,
810                    Some(crate::dialects::DialectType::Teradata)
811                ) =>
812            {
813                self.parse_locking_statement()
814            }
815            TokenType::Command => {
816                self.skip(); // consume command keyword
817                self.parse_command()?
818                    .ok_or_else(|| self.parse_error("Failed to parse COMMAND statement"))
819            }
820            TokenType::Rename
821                if matches!(
822                    self.config.dialect,
823                    Some(crate::dialects::DialectType::Teradata)
824                        | Some(crate::dialects::DialectType::ClickHouse)
825                ) =>
826            {
827                self.skip(); // consume RENAME
828                self.parse_command()?
829                    .ok_or_else(|| self.parse_error("Failed to parse RENAME statement"))
830            }
831            TokenType::Pragma => self.parse_pragma(),
832            TokenType::Rollback => self.parse_rollback(),
833            TokenType::Commit => self.parse_commit(),
834            TokenType::Begin => self.parse_transaction(),
835            TokenType::End => {
836                // In PostgreSQL, END is an alias for COMMIT (END [WORK|TRANSACTION])
837                // In TSQL and other dialects, END is a block delimiter (BEGIN...END)
838                if matches!(
839                    self.config.dialect,
840                    Some(crate::dialects::DialectType::PostgreSQL)
841                ) {
842                    self.parse_end_transaction()
843                } else {
844                    self.skip(); // consume END
845                    Ok(Expression::Command(Box::new(Command {
846                        this: "END".to_string(),
847                    })))
848                }
849            }
850            TokenType::Start => self.parse_start_transaction(),
851            TokenType::Describe | TokenType::Desc => self.parse_describe(),
852            TokenType::Show => self.parse_show(),
853            TokenType::Copy => self.parse_copy(),
854            TokenType::Put => self.parse_put(),
855            TokenType::Kill
856                if matches!(
857                    self.config.dialect,
858                    Some(crate::dialects::DialectType::ClickHouse)
859                ) =>
860            {
861                self.skip(); // consume KILL
862                self.parse_command()?
863                    .ok_or_else(|| self.parse_error("Failed to parse KILL statement"))
864            }
865            TokenType::Kill => self.parse_kill(),
866            TokenType::Execute => {
867                // ClickHouse: EXECUTE AS username statement → parse as command
868                if matches!(
869                    self.config.dialect,
870                    Some(crate::dialects::DialectType::ClickHouse)
871                ) {
872                    self.skip(); // consume EXECUTE
873                    self.parse_command()?
874                        .ok_or_else(|| self.parse_error("Failed to parse EXECUTE statement"))
875                } else if self.peek_nth(1).map(|t| t.text.eq_ignore_ascii_case("IMMEDIATE")) == Some(true) {
876                    // EXECUTE IMMEDIATE — Snowflake/BigQuery dynamic SQL, treat as raw command
877                    self.skip(); // consume EXECUTE
878                    self.parse_command()?
879                        .ok_or_else(|| self.parse_error("Failed to parse EXECUTE IMMEDIATE statement"))
880                } else {
881                    self.parse_execute()
882                }
883            }
884            TokenType::Declare => {
885                self.skip(); // consume DECLARE
886                self.parse_declare()?
887                    .ok_or_else(|| self.parse_error("Failed to parse DECLARE statement"))
888            }
889            // GET is a command only when followed by @ (stage reference), otherwise it's a function
890            // If followed by ( it should be parsed as GET() function, so fall through to expression parsing
891            TokenType::Get
892                if self.check_next(TokenType::DAt) || !self.check_next(TokenType::LParen) =>
893            {
894                self.parse_get_command()
895            }
896            TokenType::Var
897                if self.peek().text.eq_ignore_ascii_case("RM")
898                    || self.peek().text.eq_ignore_ascii_case("REMOVE") =>
899            {
900                self.parse_rm_command()
901            }
902            TokenType::Var if self.peek().text.eq_ignore_ascii_case("CALL") => self.parse_call(),
903            TokenType::Var
904                if self.peek().text.eq_ignore_ascii_case("EXCHANGE")
905                    && matches!(
906                        self.config.dialect,
907                        Some(crate::dialects::DialectType::ClickHouse)
908                    ) =>
909            {
910                self.skip(); // consume EXCHANGE
911                self.parse_command()?
912                    .ok_or_else(|| self.parse_error("Failed to parse EXCHANGE statement"))
913            }
914            // EXPLAIN is treated as DESCRIBE (MySQL maps EXPLAIN -> DESCRIBE)
915            TokenType::Var if self.peek().text.eq_ignore_ascii_case("EXPLAIN") => {
916                self.parse_describe()
917            }
918            // LOCK TABLES / UNLOCK TABLES (MySQL)
919            TokenType::Var
920                if self.peek().text.eq_ignore_ascii_case("LOCK")
921                    || self.peek().text.eq_ignore_ascii_case("UNLOCK") =>
922            {
923                self.skip(); // consume LOCK/UNLOCK
924                self.parse_command()?
925                    .ok_or_else(|| self.parse_error("Failed to parse LOCK/UNLOCK statement"))
926            }
927            TokenType::Var if self.peek().text.eq_ignore_ascii_case("ANALYZE") => {
928                self.skip(); // consume ANALYZE
929                self.parse_analyze()?
930                    .ok_or_else(|| self.parse_error("Failed to parse ANALYZE statement"))
931            }
932            // TSQL: PRINT expression
933            TokenType::Var if self.peek().text.eq_ignore_ascii_case("PRINT") => {
934                self.skip(); // consume PRINT
935                self.parse_command()?
936                    .ok_or_else(|| self.parse_error("Failed to parse PRINT statement"))
937            }
938            // TSQL: WAITFOR DELAY '00:00:05' / WAITFOR TIME '23:00:00'
939            TokenType::Var if self.peek().text.eq_ignore_ascii_case("WAITFOR") => {
940                self.skip(); // consume WAITFOR
941                self.parse_command()?
942                    .ok_or_else(|| self.parse_error("Failed to parse WAITFOR statement"))
943            }
944            // TSQL: BULK INSERT table FROM 'file' WITH (options)
945            TokenType::Var if self.peek().text.eq_ignore_ascii_case("BULK") => {
946                self.skip(); // consume BULK
947                self.parse_command()?
948                    .ok_or_else(|| self.parse_error("Failed to parse BULK INSERT statement"))
949            }
950            // ClickHouse: CHECK TABLE t [PARTITION p] [SETTINGS ...]
951            TokenType::Check
952                if matches!(
953                    self.config.dialect,
954                    Some(crate::dialects::DialectType::ClickHouse)
955                ) =>
956            {
957                self.skip(); // consume CHECK
958                self.parse_command()?
959                    .ok_or_else(|| self.parse_error("Failed to parse CHECK statement"))
960            }
961            // ClickHouse: SETTINGS key=value, ... (standalone statement or after another statement)
962            TokenType::Settings
963                if matches!(
964                    self.config.dialect,
965                    Some(crate::dialects::DialectType::ClickHouse)
966                ) =>
967            {
968                self.skip(); // consume SETTINGS
969                self.parse_command()?
970                    .ok_or_else(|| self.parse_error("Failed to parse SETTINGS statement"))
971            }
972            // ClickHouse: SYSTEM STOP/START MERGES, etc.
973            TokenType::System
974                if matches!(
975                    self.config.dialect,
976                    Some(crate::dialects::DialectType::ClickHouse)
977                ) =>
978            {
979                self.skip(); // consume SYSTEM
980                self.parse_command()?
981                    .ok_or_else(|| self.parse_error("Failed to parse SYSTEM statement"))
982            }
983            // ClickHouse: RENAME TABLE db.t1 TO db.t2 [, db.t3 TO db.t4 ...]
984            TokenType::Var
985                if self.peek().text.eq_ignore_ascii_case("RENAME")
986                    && matches!(
987                        self.config.dialect,
988                        Some(crate::dialects::DialectType::ClickHouse)
989                    ) =>
990            {
991                self.skip(); // consume RENAME
992                self.parse_command()?
993                    .ok_or_else(|| self.parse_error("Failed to parse RENAME statement"))
994            }
995            // ClickHouse: OPTIMIZE TABLE t [FINAL] [DEDUPLICATE [BY ...]]
996            // MySQL: OPTIMIZE [LOCAL|NO_WRITE_TO_BINLOG] TABLE t1 [, t2, ...]
997            TokenType::Var
998                if self.peek().text.eq_ignore_ascii_case("OPTIMIZE")
999                    && matches!(
1000                        self.config.dialect,
1001                        Some(crate::dialects::DialectType::ClickHouse)
1002                            | Some(crate::dialects::DialectType::MySQL)
1003                            | Some(crate::dialects::DialectType::SingleStore)
1004                            | Some(crate::dialects::DialectType::Doris)
1005                            | Some(crate::dialects::DialectType::StarRocks)
1006                    ) =>
1007            {
1008                self.skip(); // consume OPTIMIZE
1009                self.parse_command()?
1010                    .ok_or_else(|| self.parse_error("Failed to parse OPTIMIZE statement"))
1011            }
1012            // ClickHouse: EXISTS [TEMPORARY] TABLE/DATABASE/DICTIONARY ...
1013            TokenType::Exists
1014                if matches!(
1015                    self.config.dialect,
1016                    Some(crate::dialects::DialectType::ClickHouse)
1017                ) && !self.check_next(TokenType::LParen) =>
1018            {
1019                self.skip(); // consume EXISTS
1020                self.parse_command()?
1021                    .ok_or_else(|| self.parse_error("Failed to parse EXISTS statement"))
1022            }
1023            // ClickHouse: SHOW ... (various SHOW commands beyond what's already handled)
1024            TokenType::Var
1025                if self.peek().text.eq_ignore_ascii_case("EXISTS")
1026                    && matches!(
1027                        self.config.dialect,
1028                        Some(crate::dialects::DialectType::ClickHouse)
1029                    ) =>
1030            {
1031                self.skip(); // consume EXISTS
1032                self.parse_command()?
1033                    .ok_or_else(|| self.parse_error("Failed to parse EXISTS statement"))
1034            }
1035            // DuckDB: ATTACH [DATABASE] [IF NOT EXISTS] 'path' [AS alias] [(options)]
1036            TokenType::Var if self.peek().text.eq_ignore_ascii_case("ATTACH") => {
1037                self.skip(); // consume ATTACH
1038                if matches!(
1039                    self.config.dialect,
1040                    Some(crate::dialects::DialectType::ClickHouse)
1041                ) {
1042                    self.parse_command()?
1043                        .ok_or_else(|| self.parse_error("Failed to parse ATTACH statement"))
1044                } else {
1045                    self.parse_attach_detach(true)
1046                }
1047            }
1048            // UNDROP TABLE/SCHEMA/DATABASE (ClickHouse, Snowflake)
1049            TokenType::Var
1050                if self.peek().text.eq_ignore_ascii_case("UNDROP")
1051                    && matches!(
1052                        self.config.dialect,
1053                        Some(crate::dialects::DialectType::ClickHouse)
1054                            | Some(crate::dialects::DialectType::Snowflake)
1055                    ) =>
1056            {
1057                self.skip(); // consume UNDROP
1058                let kind = if self.match_token(TokenType::Table) {
1059                    "TABLE"
1060                } else if self.match_token(TokenType::Schema) {
1061                    "SCHEMA"
1062                } else if self.match_token(TokenType::Database) {
1063                    "DATABASE"
1064                } else {
1065                    return Err(self.parse_error("Expected TABLE, SCHEMA, or DATABASE after UNDROP"));
1066                };
1067                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
1068                let name = self.parse_table_ref()?;
1069                Ok(Expression::Undrop(Box::new(crate::expressions::Undrop {
1070                    kind: kind.to_string(),
1071                    name,
1072                    if_exists,
1073                })))
1074            }
1075            // ClickHouse: DETACH TABLE [IF EXISTS] ... [ON CLUSTER ...]
1076            TokenType::Var
1077                if self.peek().text.eq_ignore_ascii_case("DETACH")
1078                    && matches!(
1079                        self.config.dialect,
1080                        Some(crate::dialects::DialectType::ClickHouse)
1081                    ) =>
1082            {
1083                self.skip(); // consume DETACH
1084                self.parse_command()?
1085                    .ok_or_else(|| self.parse_error("Failed to parse DETACH statement"))
1086            }
1087            // DuckDB: DETACH [DATABASE] [IF EXISTS] name
1088            TokenType::Var if self.peek().text.eq_ignore_ascii_case("DETACH") => {
1089                self.skip(); // consume DETACH
1090                self.parse_attach_detach(false)
1091            }
1092            // DuckDB: INSTALL extension [FROM source]
1093            TokenType::Var if self.peek().text.eq_ignore_ascii_case("INSTALL") => {
1094                self.skip(); // consume INSTALL
1095                self.parse_install(false)
1096            }
1097            // DuckDB: FORCE INSTALL extension | FORCE CHECKPOINT db
1098            TokenType::Var if self.peek().text.eq_ignore_ascii_case("FORCE") => {
1099                self.skip(); // consume FORCE
1100                self.parse_force_statement()
1101            }
1102            // DuckDB: SUMMARIZE [TABLE] expression
1103            TokenType::Var if self.peek().text.eq_ignore_ascii_case("SUMMARIZE") => {
1104                self.skip(); // consume SUMMARIZE
1105                self.parse_summarize_statement()
1106            }
1107            // DuckDB: RESET [SESSION|GLOBAL|LOCAL] variable
1108            TokenType::Var if self.peek().text.eq_ignore_ascii_case("RESET") => {
1109                self.skip(); // consume RESET
1110                self.parse_as_command()?
1111                    .ok_or_else(|| self.parse_error("Failed to parse RESET statement"))
1112            }
1113            // DuckDB statement-level PIVOT/UNPIVOT/PIVOT_WIDER syntax
1114            TokenType::Pivot => {
1115                self.skip(); // consume PIVOT
1116                self.parse_simplified_pivot(false)?
1117                    .ok_or_else(|| self.parse_error("Failed to parse PIVOT statement"))
1118            }
1119            TokenType::Unpivot => {
1120                self.skip(); // consume UNPIVOT
1121                self.parse_simplified_pivot(true)?
1122                    .ok_or_else(|| self.parse_error("Failed to parse UNPIVOT statement"))
1123            }
1124            // DuckDB: PIVOT_WIDER is an alias for PIVOT
1125            TokenType::Var if self.peek().text.eq_ignore_ascii_case("PIVOT_WIDER") => {
1126                self.skip(); // consume PIVOT_WIDER
1127                self.parse_simplified_pivot(false)?
1128                    .ok_or_else(|| self.parse_error("Failed to parse PIVOT_WIDER statement"))
1129            }
1130            // BigQuery procedural FOR...IN...DO loop
1131            TokenType::For => {
1132                self.skip(); // consume FOR
1133                self.parse_for_in()
1134            }
1135            // BigQuery/procedural LOOP, REPEAT, WHILE control flow statements
1136            TokenType::Var if self.peek().text.eq_ignore_ascii_case("LOOP") => {
1137                self.skip(); // consume LOOP
1138                self.parse_command()?
1139                    .ok_or_else(|| self.parse_error("Failed to parse LOOP statement"))
1140            }
1141            TokenType::Var if self.peek().text.eq_ignore_ascii_case("REPEAT") => {
1142                self.skip(); // consume REPEAT
1143                self.parse_command()?
1144                    .ok_or_else(|| self.parse_error("Failed to parse REPEAT statement"))
1145            }
1146            TokenType::Var if self.peek().text.eq_ignore_ascii_case("WHILE") => {
1147                self.skip(); // consume WHILE
1148                self.parse_command()?
1149                    .ok_or_else(|| self.parse_error("Failed to parse WHILE statement"))
1150            }
1151            // Athena/Presto: UNLOAD (SELECT ...) TO 'location' WITH (options)
1152            TokenType::Var if self.peek().text.eq_ignore_ascii_case("UNLOAD") => {
1153                self.parse_unload()
1154            }
1155            // Athena: USING EXTERNAL FUNCTION ... SELECT ...
1156            TokenType::Using => self.parse_using_external_function(),
1157            // BigQuery: EXPORT DATA [WITH CONNECTION conn] OPTIONS (...) AS SELECT ...
1158            TokenType::Var if self.peek().text.eq_ignore_ascii_case("EXPORT") => {
1159                self.parse_export_data()
1160            }
1161            // Presto/Trino: DEALLOCATE PREPARE <name>
1162            TokenType::Var if self.peek().text.eq_ignore_ascii_case("DEALLOCATE") => {
1163                self.parse_deallocate_prepare()
1164            }
1165            // DuckDB FROM-first syntax: FROM tbl = SELECT * FROM tbl
1166            TokenType::From => self.parse_from_first_query(),
1167            TokenType::LParen => {
1168                // Check if this is a parenthesized query (SELECT, WITH, PIVOT, UNPIVOT, FROM, or EXPLAIN inside)
1169                // by looking ahead after the opening paren
1170                let next_is_explain = self.current + 1 < self.tokens.len()
1171                    && self.tokens[self.current + 1].token_type == TokenType::Var
1172                    && self.tokens[self.current + 1]
1173                        .text
1174                        .eq_ignore_ascii_case("EXPLAIN");
1175                if self.check_next(TokenType::Select)
1176                    || self.check_next(TokenType::With)
1177                    || self.check_next(TokenType::Pivot)
1178                    || self.check_next(TokenType::Unpivot)
1179                    || self.check_next(TokenType::From)
1180                    || next_is_explain
1181                {
1182                    // Parse parenthesized query: (SELECT ...) ORDER BY x LIMIT y OFFSET z
1183                    self.skip(); // consume (
1184                    let inner = self.parse_statement()?;
1185                    self.expect(TokenType::RParen)?;
1186                    // Wrap in Subquery to preserve parentheses when used in set operations
1187                    let subquery = Expression::Subquery(Box::new(Subquery {
1188                        this: inner,
1189                        alias: None,
1190                        column_aliases: Vec::new(),
1191                        order_by: None,
1192                        limit: None,
1193                        offset: None,
1194                        distribute_by: None,
1195                        sort_by: None,
1196                        cluster_by: None,
1197                        lateral: false,
1198                        modifiers_inside: false,
1199                        trailing_comments: Vec::new(),
1200                        inferred_type: None,
1201                    }));
1202                    // Check for set operations after the parenthesized query
1203                    let result = self.parse_set_operation(subquery)?;
1204                    // Check for ORDER BY, LIMIT, OFFSET after parenthesized subquery
1205                    self.parse_query_modifiers(result)
1206                } else if self.check_next(TokenType::LParen) {
1207                    // Nested parentheses - could be ((SELECT...)) or ((a, b))
1208                    // For deeply nested queries like (((SELECT 1) UNION SELECT 1) UNION SELECT 1),
1209                    // recurse into parse_statement to handle the inner parenthesized query with set ops
1210                    self.skip(); // consume (
1211                    let inner = self.parse_statement()?;
1212                    // Check for set operations inside the outer parens
1213                    let result = self.parse_set_operation(inner)?;
1214                    self.expect(TokenType::RParen)?;
1215                    let subquery = Expression::Subquery(Box::new(Subquery {
1216                        this: result,
1217                        alias: None,
1218                        column_aliases: Vec::new(),
1219                        order_by: None,
1220                        limit: None,
1221                        offset: None,
1222                        distribute_by: None,
1223                        sort_by: None,
1224                        cluster_by: None,
1225                        lateral: false,
1226                        modifiers_inside: false,
1227                        trailing_comments: Vec::new(),
1228                        inferred_type: None,
1229                    }));
1230                    // Check for set operations after the outer parenthesized query
1231                    let result = self.parse_set_operation(subquery)?;
1232                    let pre_alias_comments = self.previous_trailing_comments().to_vec();
1233                    if self.match_token(TokenType::As) {
1234                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
1235                        let trailing_comments = self.previous_trailing_comments().to_vec();
1236                        Ok(Expression::Alias(Box::new(Alias {
1237                            this: result,
1238                            alias,
1239                            column_aliases: Vec::new(),
1240                            pre_alias_comments,
1241                            trailing_comments,
1242                            inferred_type: None,
1243                        })))
1244                    } else {
1245                        // Check for LIMIT/OFFSET after parenthesized expression
1246                        // e.g., ((SELECT 1)) LIMIT 1
1247                        self.parse_query_modifiers(result)
1248                    }
1249                } else {
1250                    // Regular parenthesized expression like (a, b) or (x)
1251                    // Let parse_expression handle it
1252                    let expr = self.parse_expression()?;
1253                    let pre_alias_comments = self.previous_trailing_comments().to_vec();
1254                    if self.match_token(TokenType::As) {
1255                        // Check for tuple alias: AS ("a", "b", ...)
1256                        if self.match_token(TokenType::LParen) {
1257                            let mut column_aliases = Vec::new();
1258                            loop {
1259                                let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
1260                                column_aliases.push(col_alias);
1261                                if !self.match_token(TokenType::Comma) {
1262                                    break;
1263                                }
1264                            }
1265                            self.expect(TokenType::RParen)?;
1266                            let trailing_comments = self.previous_trailing_comments().to_vec();
1267                            Ok(Expression::Alias(Box::new(Alias {
1268                                this: expr,
1269                                alias: Identifier::empty(),
1270                                column_aliases,
1271                                pre_alias_comments,
1272                                trailing_comments,
1273                                inferred_type: None,
1274                            })))
1275                        } else {
1276                            let alias = self.expect_identifier_or_keyword_with_quoted()?;
1277                            let trailing_comments = self.previous_trailing_comments().to_vec();
1278                            Ok(Expression::Alias(Box::new(Alias {
1279                                this: expr,
1280                                alias,
1281                                column_aliases: Vec::new(),
1282                                pre_alias_comments,
1283                                trailing_comments,
1284                                inferred_type: None,
1285                            })))
1286                        }
1287                    } else {
1288                        Ok(expr)
1289                    }
1290                }
1291            }
1292            _ => {
1293                // Capture leading comments from the first token before parsing
1294                let leading_comments = self.current_leading_comments().to_vec();
1295                // Parse expression and check for optional alias
1296                let expr = self.parse_expression()?;
1297                // Capture any comments between expression and AS keyword
1298                let pre_alias_comments = self.previous_trailing_comments().to_vec();
1299                if self.match_token(TokenType::As) {
1300                    // Capture comments from AS token (e.g., AS /* foo */ (a, b, c))
1301                    // These go into trailing_comments (after the alias), not pre_alias_comments
1302                    let as_comments = self.previous_trailing_comments().to_vec();
1303                    // Check for tuple alias: AS ("a", "b", ...)
1304                    if self.match_token(TokenType::LParen) {
1305                        let mut column_aliases = Vec::new();
1306                        loop {
1307                            let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
1308                            column_aliases.push(col_alias);
1309                            if !self.match_token(TokenType::Comma) {
1310                                break;
1311                            }
1312                        }
1313                        self.expect(TokenType::RParen)?;
1314                        let mut trailing_comments = as_comments;
1315                        trailing_comments.extend_from_slice(self.previous_trailing_comments());
1316                        Ok(Expression::Alias(Box::new(Alias {
1317                            this: expr,
1318                            alias: Identifier::empty(),
1319                            column_aliases,
1320                            pre_alias_comments,
1321                            trailing_comments,
1322                            inferred_type: None,
1323                        })))
1324                    } else {
1325                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
1326                        let mut trailing_comments = self.previous_trailing_comments().to_vec();
1327                        // If there were leading comments on the expression (from a separate line),
1328                        // add them as trailing comments after the alias
1329                        trailing_comments.extend(leading_comments.iter().cloned());
1330                        Ok(Expression::Alias(Box::new(Alias {
1331                            this: expr,
1332                            alias,
1333                            column_aliases: Vec::new(),
1334                            pre_alias_comments,
1335                            trailing_comments,
1336                            inferred_type: None,
1337                        })))
1338                    }
1339                } else if (self.check(TokenType::Var) && !self.check_keyword())
1340                    || self.is_command_keyword_as_alias()
1341                {
1342                    // Implicit alias (without AS) - e.g., "1. x" or "1.x" -> "1. AS x"
1343                    // This handles cases like PostgreSQL's "1.x" which parses as float 1. with alias x
1344                    let alias_text = self.advance().text.clone();
1345                    let trailing_comments = self.previous_trailing_comments().to_vec();
1346                    Ok(Expression::Alias(Box::new(Alias {
1347                        this: expr,
1348                        alias: Identifier::new(alias_text),
1349                        column_aliases: Vec::new(),
1350                        pre_alias_comments,
1351                        trailing_comments,
1352                        inferred_type: None,
1353                    })))
1354                } else if !pre_alias_comments.is_empty() {
1355                    // Wrap in Annotated to preserve trailing comments for expressions without aliases
1356                    match &expr {
1357                        Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
1358                            Ok(Expression::Annotated(Box::new(
1359                                crate::expressions::Annotated {
1360                                    this: expr,
1361                                    trailing_comments: pre_alias_comments,
1362                                },
1363                            )))
1364                        }
1365                        // For expressions that already have trailing_comments fields, don't double-wrap
1366                        _ => Ok(expr),
1367                    }
1368                } else if !leading_comments.is_empty() {
1369                    // Wrap in Annotated to preserve leading comments as trailing comments
1370                    // This matches Python sqlglot which converts leading line comments to trailing block comments
1371                    Ok(Expression::Annotated(Box::new(
1372                        crate::expressions::Annotated {
1373                            this: expr,
1374                            trailing_comments: leading_comments,
1375                        },
1376                    )))
1377                } else {
1378                    Ok(expr)
1379                }
1380            }
1381        }
1382    }
1383
1384    /// Parse a SELECT statement
1385    fn parse_select(&mut self) -> Result<Expression> {
1386        let result = self.parse_select_body()?;
1387        // Check for set operations (UNION, INTERSECT, EXCEPT)
1388        self.parse_set_operation(result)
1389    }
1390
1391    /// Parse a SELECT statement body without consuming trailing set operations.
1392    /// Used by `parse_select_or_paren_select` to avoid mutual recursion with
1393    /// `parse_set_operation`, which handles set-op chaining iteratively.
1394    fn parse_select_body(&mut self) -> Result<Expression> {
1395        // Capture the SELECT token to get its comments
1396        let select_token = self.expect(TokenType::Select)?;
1397        let leading_comments = select_token.comments;
1398        let post_select_comments = select_token.trailing_comments;
1399
1400        // Parse query hint /*+ ... */ if present (comes immediately after SELECT)
1401        let hint = if self.check(TokenType::Hint) {
1402            Some(self.parse_hint()?)
1403        } else {
1404            None
1405        };
1406
1407        // Parse TOP clause (SQL Server style - comes before DISTINCT)
1408        // But not if TOP is followed by DOT (e.g., SELECT top.x - top is a table alias)
1409        let top = if self.check(TokenType::Top)
1410            && !self.check_next(TokenType::Dot)
1411            && self.match_token(TokenType::Top)
1412        {
1413            // TOP can have parentheses: TOP (10) or without: TOP 10
1414            let (amount, parenthesized) = if self.match_token(TokenType::LParen) {
1415                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
1416                    let stmt = self.parse_statement()?;
1417                    Expression::Subquery(Box::new(Subquery {
1418                        this: stmt,
1419                        alias: None,
1420                        column_aliases: Vec::new(),
1421                        order_by: None,
1422                        limit: None,
1423                        offset: None,
1424                        distribute_by: None,
1425                        sort_by: None,
1426                        cluster_by: None,
1427                        lateral: false,
1428                        modifiers_inside: false,
1429                        trailing_comments: Vec::new(),
1430                        inferred_type: None,
1431                    }))
1432                } else {
1433                    self.parse_expression()?
1434                };
1435                self.expect(TokenType::RParen)?;
1436                (expr, true)
1437            } else {
1438                (self.parse_primary()?, false)
1439            };
1440            let percent = self.match_token(TokenType::Percent);
1441            let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
1442            Some(Top {
1443                this: amount,
1444                percent,
1445                with_ties,
1446                parenthesized,
1447            })
1448        } else {
1449            None
1450        };
1451
1452        // Parse DISTINCT / DISTINCT ON / DISTINCTROW / ALL
1453        // Oracle: UNIQUE is equivalent to DISTINCT (SELECT UNIQUE ... is old-style Oracle syntax)
1454        let is_distinct_token = self.match_token(TokenType::Distinct)
1455            || (matches!(
1456                self.config.dialect,
1457                Some(crate::dialects::DialectType::Oracle)
1458            ) && self.match_token(TokenType::Unique));
1459        let (distinct, distinct_on) = if is_distinct_token {
1460            if self.match_token(TokenType::On) {
1461                // DISTINCT ON (expr, ...)
1462                self.expect(TokenType::LParen)?;
1463                let exprs = self.parse_expression_list()?;
1464                self.expect(TokenType::RParen)?;
1465                (true, Some(exprs))
1466            } else {
1467                (true, None)
1468            }
1469        } else if self.check_identifier("DISTINCTROW") {
1470            // MySQL DISTINCTROW - equivalent to DISTINCT
1471            self.skip();
1472            (true, None)
1473        } else {
1474            // Only consume ALL if it's the SELECT ALL modifier, not if it's a column reference like "all.count"
1475            if self.check(TokenType::All) && !self.check_next(TokenType::Dot) {
1476                self.skip();
1477            }
1478            (false, None)
1479        };
1480
1481        // TSQL: SELECT DISTINCT TOP n - TOP can come after DISTINCT
1482        // If no TOP was parsed before DISTINCT, check for TOP after DISTINCT
1483        let top = if top.is_none()
1484            && self.check(TokenType::Top)
1485            && !self.check_next(TokenType::Dot)
1486            && self.match_token(TokenType::Top)
1487        {
1488            let (amount, parenthesized) = if self.match_token(TokenType::LParen) {
1489                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
1490                    let stmt = self.parse_statement()?;
1491                    Expression::Subquery(Box::new(Subquery {
1492                        this: stmt,
1493                        alias: None,
1494                        column_aliases: Vec::new(),
1495                        order_by: None,
1496                        limit: None,
1497                        offset: None,
1498                        distribute_by: None,
1499                        sort_by: None,
1500                        cluster_by: None,
1501                        lateral: false,
1502                        modifiers_inside: false,
1503                        trailing_comments: Vec::new(),
1504                        inferred_type: None,
1505                    }))
1506                } else {
1507                    self.parse_expression()?
1508                };
1509                self.expect(TokenType::RParen)?;
1510                (expr, true)
1511            } else {
1512                (self.parse_primary()?, false)
1513            };
1514            let percent = self.match_token(TokenType::Percent);
1515            let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
1516            Some(Top {
1517                this: amount,
1518                percent,
1519                with_ties,
1520                parenthesized,
1521            })
1522        } else {
1523            top
1524        };
1525
1526        // Parse MySQL operation modifiers (HIGH_PRIORITY, STRAIGHT_JOIN, SQL_CALC_FOUND_ROWS, etc.)
1527        // These appear after DISTINCT/ALL and before the projections
1528        // Only apply for MySQL-family dialects - other dialects treat these as identifiers
1529        let mut operation_modifiers = Vec::new();
1530        let is_mysql_dialect = matches!(
1531            self.config.dialect,
1532            Some(crate::dialects::DialectType::MySQL)
1533                | Some(crate::dialects::DialectType::SingleStore)
1534                | Some(crate::dialects::DialectType::StarRocks)
1535                | Some(crate::dialects::DialectType::TiDB)
1536                | Some(crate::dialects::DialectType::Doris)
1537        );
1538        if is_mysql_dialect {
1539            const MYSQL_MODIFIERS: &[&str] = &[
1540                "HIGH_PRIORITY",
1541                "STRAIGHT_JOIN",
1542                "SQL_SMALL_RESULT",
1543                "SQL_BIG_RESULT",
1544                "SQL_BUFFER_RESULT",
1545                "SQL_NO_CACHE",
1546                "SQL_CALC_FOUND_ROWS",
1547            ];
1548            loop {
1549                if self.check(TokenType::StraightJoin) {
1550                    self.skip();
1551                    operation_modifiers.push("STRAIGHT_JOIN".to_string());
1552                } else if self.check(TokenType::Var) {
1553                    let upper = self.peek().text.to_ascii_uppercase();
1554                    if MYSQL_MODIFIERS.contains(&upper.as_str()) {
1555                        self.skip();
1556                        operation_modifiers.push(upper);
1557                    } else {
1558                        break;
1559                    }
1560                } else {
1561                    break;
1562                }
1563            }
1564        }
1565
1566        // Parse BigQuery SELECT AS STRUCT / SELECT AS VALUE
1567        let kind = if self.match_token(TokenType::As) {
1568            if self.match_identifier("STRUCT") {
1569                Some("STRUCT".to_string())
1570            } else if self.match_identifier("VALUE") {
1571                Some("VALUE".to_string())
1572            } else {
1573                // Not AS STRUCT/VALUE, backtrack the AS token
1574                self.current -= 1;
1575                None
1576            }
1577        } else {
1578            None
1579        };
1580
1581        // Parse select expressions
1582        let mut expressions = self.parse_select_expressions()?;
1583
1584        // Redshift: EXCLUDE clause at the end of the projection list
1585        // e.g., SELECT *, 4 AS col4 EXCLUDE (col2, col3) FROM ...
1586        // e.g., SELECT col1, *, col2 EXCLUDE(col3) FROM ...
1587        // e.g., SELECT *, 4 AS col4 EXCLUDE col2, col3 FROM ...
1588        // In Python sqlglot, this is handled by overriding _parse_projections in the Redshift parser.
1589        // The EXCLUDE clause is separate from * EXCLUDE — it applies to the entire projection list.
1590        let exclude = if matches!(
1591            self.config.dialect,
1592            Some(crate::dialects::DialectType::Redshift)
1593        ) {
1594            // Check if previous token was EXCLUDE (parsed as implicit alias).
1595            // e.g., SELECT *, 4 AS col4 EXCLUDE col2, col3 FROM ...
1596            //   → "col4 EXCLUDE" was parsed as (col4 aliased-as EXCLUDE), then "col2" as next projection
1597            //   → We need to strip the EXCLUDE alias from the last projection and retreat
1598            // Also handle: EXCLUDE was consumed as a bare column name if no AS was present
1599            let mut retreat_for_exclude = false;
1600            if let Some(last_expr) = expressions.last() {
1601                // Case: "4 AS col4 EXCLUDE" without parens — parsed as separate column "EXCLUDE"
1602                // Actually with the comma break, this won't happen. But "col2 EXCLUDE(col3)" might.
1603                match last_expr {
1604                    Expression::Alias(alias)
1605                        if alias.alias.name.eq_ignore_ascii_case("EXCLUDE") =>
1606                    {
1607                        // The last expression is "something AS EXCLUDE" or implicit alias EXCLUDE
1608                        // Strip the alias and check if EXCLUDE is followed by paren or identifier
1609                        if self.check(TokenType::LParen)
1610                            || self.is_identifier_token()
1611                            || self.is_safe_keyword_as_identifier()
1612                        {
1613                            // Strip the EXCLUDE alias from the last expression
1614                            let stripped = alias.this.clone();
1615                            if let Some(last) = expressions.last_mut() {
1616                                *last = stripped;
1617                            }
1618                            retreat_for_exclude = true;
1619                        }
1620                    }
1621                    _ => {}
1622                }
1623            }
1624
1625            if retreat_for_exclude || self.check(TokenType::Exclude) {
1626                if !retreat_for_exclude {
1627                    self.skip(); // consume EXCLUDE
1628                }
1629                // Parse EXCLUDE columns - with or without parens
1630                let mut exclude_cols = Vec::new();
1631                if self.match_token(TokenType::LParen) {
1632                    // Parenthesized list: EXCLUDE (col1, col2, ...)
1633                    loop {
1634                        let col_expr = self.parse_expression()?;
1635                        exclude_cols.push(col_expr);
1636                        if !self.match_token(TokenType::Comma) {
1637                            break;
1638                        }
1639                    }
1640                    self.match_token(TokenType::RParen);
1641                } else {
1642                    // Non-parenthesized: EXCLUDE col1, col2, ...
1643                    // Parse comma-separated identifiers until FROM or other clause boundary
1644                    loop {
1645                        if self.is_at_end()
1646                            || self.check(TokenType::From)
1647                            || self.check(TokenType::Where)
1648                            || self.check(TokenType::Semicolon)
1649                            || self.check(TokenType::RParen)
1650                        {
1651                            break;
1652                        }
1653                        let col_expr = self.parse_expression()?;
1654                        exclude_cols.push(col_expr);
1655                        if !self.match_token(TokenType::Comma) {
1656                            break;
1657                        }
1658                    }
1659                }
1660                if exclude_cols.is_empty() {
1661                    None
1662                } else {
1663                    Some(exclude_cols)
1664                }
1665            } else {
1666                None
1667            }
1668        } else {
1669            None
1670        };
1671
1672        // Parse INTO clause (SELECT ... INTO [TEMPORARY|UNLOGGED] table_name)
1673        // Also handles Oracle PL/SQL: BULK COLLECT INTO v1, v2, ...
1674        let into = if self.match_text_seq(&["BULK", "COLLECT", "INTO"]) {
1675            // Oracle PL/SQL: BULK COLLECT INTO var1, var2, ...
1676            // Parse target variables as a comma-separated list
1677            let mut target_expressions = vec![self.parse_expression()?];
1678            while self.match_token(TokenType::Comma) {
1679                target_expressions.push(self.parse_expression()?);
1680            }
1681            if target_expressions.len() == 1 {
1682                Some(SelectInto {
1683                    this: target_expressions.remove(0),
1684                    temporary: false,
1685                    unlogged: false,
1686                    bulk_collect: true,
1687                    expressions: Vec::new(),
1688                })
1689            } else {
1690                // Multiple targets - use first as `this` and rest as `expressions`
1691                // Actually, to match Python sqlglot behavior, store all in expressions
1692                Some(SelectInto {
1693                    this: Expression::Null(Null),
1694                    temporary: false,
1695                    unlogged: false,
1696                    bulk_collect: true,
1697                    expressions: target_expressions,
1698                })
1699            }
1700        } else if self.match_token(TokenType::Into) {
1701            // Check for TEMPORARY/TEMP/UNLOGGED keyword (PostgreSQL)
1702            let temporary = self.match_token(TokenType::Temporary) || self.match_identifier("TEMP");
1703            let unlogged = !temporary && self.match_identifier("UNLOGGED");
1704            // Parse first target (table name or PL/SQL variable)
1705            let table_name = self.parse_table_ref()?;
1706            // Oracle PL/SQL: SELECT ... INTO var1, var2, ... FROM ...
1707            // If followed by comma, parse additional target variables
1708            if self.match_token(TokenType::Comma) {
1709                let mut target_expressions = vec![Expression::Table(Box::new(table_name))];
1710                target_expressions.push(self.parse_expression()?);
1711                while self.match_token(TokenType::Comma) {
1712                    target_expressions.push(self.parse_expression()?);
1713                }
1714                Some(SelectInto {
1715                    this: Expression::Null(Null),
1716                    temporary,
1717                    unlogged,
1718                    bulk_collect: false,
1719                    expressions: target_expressions,
1720                })
1721            } else {
1722                Some(SelectInto {
1723                    this: Expression::Table(Box::new(table_name)),
1724                    temporary,
1725                    unlogged,
1726                    bulk_collect: false,
1727                    expressions: Vec::new(),
1728                })
1729            }
1730        } else {
1731            None
1732        };
1733
1734        // Parse FROM clause
1735        let from = if self.match_token(TokenType::From) {
1736            Some(self.parse_from()?)
1737        } else {
1738            None
1739        };
1740
1741        // Parse JOINs
1742        let mut joins = self.parse_joins()?;
1743
1744        // Handle PIVOT/UNPIVOT that comes after JOINs (e.g., SELECT * FROM a JOIN b ON ... PIVOT(...))
1745        // Store PIVOT/UNPIVOT in the last join's pivots field (this matches SQLGlot's semantics)
1746        while self.check(TokenType::Pivot) || self.check(TokenType::Unpivot) {
1747            if !joins.is_empty() {
1748                let last_idx = joins.len() - 1;
1749                // Parse the pivot/unpivot and store in the join's pivots vector
1750                // We pass a Null expression as the `this` since the pivot applies to the entire join result
1751                if self.match_token(TokenType::Pivot) {
1752                    let pivot = self.parse_pivot(Expression::Null(crate::expressions::Null))?;
1753                    joins[last_idx].pivots.push(pivot);
1754                } else if self.match_token(TokenType::Unpivot) {
1755                    let unpivot = self.parse_unpivot(Expression::Null(crate::expressions::Null))?;
1756                    joins[last_idx].pivots.push(unpivot);
1757                }
1758            } else {
1759                // No joins - break to avoid infinite loop
1760                break;
1761            }
1762        }
1763
1764        // Parse LATERAL VIEW clauses (Hive/Spark)
1765        let lateral_views = self.parse_lateral_views()?;
1766
1767        // Parse PREWHERE clause (ClickHouse specific)
1768        let prewhere = if self.match_token(TokenType::Prewhere) {
1769            Some(self.parse_expression()?)
1770        } else {
1771            None
1772        };
1773
1774        // Parse WHERE clause
1775        let mut where_clause = if self.match_token(TokenType::Where) {
1776            Some(Where {
1777                this: self.parse_expression()?,
1778            })
1779        } else {
1780            None
1781        };
1782
1783        // Parse CONNECT BY clause (Oracle hierarchical queries)
1784        let connect = self.parse_connect()?;
1785
1786        // Parse GROUP BY
1787        let group_by = if self.check(TokenType::Group) {
1788            let group_comments = self.current_leading_comments().to_vec();
1789            if self.match_keywords(&[TokenType::Group, TokenType::By]) {
1790                let mut gb = self.parse_group_by()?;
1791                gb.comments = group_comments;
1792                Some(gb)
1793            } else {
1794                None
1795            }
1796        } else if matches!(
1797            self.config.dialect,
1798            Some(crate::dialects::DialectType::ClickHouse)
1799        ) && self.check(TokenType::With)
1800            && (self.check_next_identifier("TOTALS")
1801                || self.check_next(TokenType::Rollup)
1802                || self.check_next(TokenType::Cube))
1803        {
1804            // ClickHouse: WITH TOTALS/ROLLUP/CUBE without GROUP BY
1805            self.skip(); // consume WITH
1806            let totals = self.match_identifier("TOTALS");
1807            let mut expressions = Vec::new();
1808            if self.match_token(TokenType::Rollup) {
1809                expressions.push(Expression::Rollup(Box::new(Rollup {
1810                    expressions: Vec::new(),
1811                })));
1812            } else if self.match_token(TokenType::Cube) {
1813                expressions.push(Expression::Cube(Box::new(Cube {
1814                    expressions: Vec::new(),
1815                })));
1816            }
1817            // Check for chained WITH TOTALS after WITH ROLLUP/CUBE
1818            if !totals && self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
1819                self.skip();
1820                self.skip();
1821            }
1822            Some(GroupBy {
1823                expressions,
1824                all: None,
1825                totals,
1826                comments: Vec::new(),
1827            })
1828        } else {
1829            None
1830        };
1831
1832        // Parse HAVING
1833        let having = if self.check(TokenType::Having) {
1834            let having_comments = self.current_leading_comments().to_vec();
1835            self.skip(); // consume HAVING
1836            Some(Having {
1837                this: self.parse_expression()?,
1838                comments: having_comments,
1839            })
1840        } else {
1841            None
1842        };
1843
1844        // Parse QUALIFY clause (Snowflake, BigQuery, DuckDB)
1845        // QUALIFY can appear before or after WINDOW clause
1846        let mut qualify = if self.match_token(TokenType::Qualify) {
1847            Some(Qualify {
1848                this: self.parse_expression()?,
1849            })
1850        } else {
1851            None
1852        };
1853
1854        // Parse WINDOW clause (named windows)
1855        // Only match WINDOW if followed by identifier AS ( (a real window definition)
1856        // Otherwise "window" may be a table alias (e.g., SELECT * FROM foo window)
1857        let windows = if self.check(TokenType::Window) && {
1858            let next_pos = self.current + 1;
1859            next_pos < self.tokens.len()
1860                && (self.tokens[next_pos].token_type == TokenType::Var
1861                    || self.tokens[next_pos].token_type == TokenType::Identifier)
1862        } {
1863            self.skip(); // consume WINDOW
1864            Some(self.parse_named_windows()?)
1865        } else {
1866            None
1867        };
1868
1869        // QUALIFY can also appear after WINDOW clause (DuckDB)
1870        let qualify_after_window = if qualify.is_none() && self.match_token(TokenType::Qualify) {
1871            qualify = Some(Qualify {
1872                this: self.parse_expression()?,
1873            });
1874            true
1875        } else {
1876            false
1877        };
1878
1879        // Parse DISTRIBUTE BY (Hive/Spark) - comes before SORT BY
1880        let distribute_by = if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
1881            Some(self.parse_distribute_by()?)
1882        } else {
1883            None
1884        };
1885
1886        // Parse CLUSTER BY (Hive/Spark)
1887        let cluster_by = if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
1888            Some(self.parse_cluster_by()?)
1889        } else {
1890            None
1891        };
1892
1893        // Parse SORT BY (Hive/Spark) - can come before ORDER BY
1894        let sort_by = if self.match_keywords(&[TokenType::Sort, TokenType::By]) {
1895            Some(self.parse_sort_by()?)
1896        } else {
1897            None
1898        };
1899
1900        // Parse ORDER BY or ORDER SIBLINGS BY (Oracle) - comes after SORT BY
1901        let order_by = if self.check(TokenType::Order) {
1902            let order_comments = self.current_leading_comments().to_vec();
1903            if self.match_keywords(&[TokenType::Order, TokenType::Siblings, TokenType::By]) {
1904                // ORDER SIBLINGS BY (Oracle hierarchical queries)
1905                let mut ob = self.parse_order_by_with_siblings(true)?;
1906                ob.comments = order_comments;
1907                Some(ob)
1908            } else if self.match_keywords(&[TokenType::Order, TokenType::By]) {
1909                let mut ob = self.parse_order_by()?;
1910                ob.comments = order_comments;
1911                Some(ob)
1912            } else {
1913                None
1914            }
1915        } else {
1916            None
1917        };
1918
1919        // Parse LIMIT (supports MySQL syntax: LIMIT offset, count)
1920        // DuckDB supports: LIMIT 10 PERCENT or LIMIT 10%
1921        // Capture trailing comments from the token before LIMIT (e.g., WHERE condition's last token)
1922        // These comments should be emitted after the LIMIT value, not before LIMIT.
1923        let pre_limit_comments = if self.check(TokenType::Limit) {
1924            let mut comments = self.previous_trailing_comments().to_vec();
1925            // Also capture leading comments on the LIMIT token (comments on a separate line before LIMIT)
1926            comments.extend_from_slice(self.current_leading_comments());
1927            comments
1928        } else {
1929            Vec::new()
1930        };
1931        let (limit, offset) = if self.match_token(TokenType::Limit) {
1932            // Clear the pre-LIMIT comments from the WHERE condition expression to avoid duplication
1933            if !pre_limit_comments.is_empty() {
1934                if let Some(ref mut w) = where_clause {
1935                    Self::clear_rightmost_trailing_comments(&mut w.this);
1936                }
1937            }
1938            // First try parse_unary to check for PERCENT/% modifier.
1939            // This avoids parse_expression consuming % as the modulo operator.
1940            // Both "PERCENT" and "%" tokens have TokenType::Percent, but we need to
1941            // distinguish PERCENT-as-modifier from %-as-modulo. "%" is PERCENT when
1942            // followed by a clause boundary (OFFSET, end, semicolon, etc.).
1943            let saved_pos = self.current;
1944            let (first_expr, has_percent) = {
1945                let unary_result = self.parse_unary();
1946                match unary_result {
1947                    Ok(expr) => {
1948                        if self.check(TokenType::Percent) && self.is_percent_modifier() {
1949                            // Found PERCENT keyword or % symbol used as PERCENT modifier
1950                            self.skip();
1951                            (expr, true)
1952                        } else {
1953                            // No PERCENT - backtrack and use full parse_expression
1954                            self.current = saved_pos;
1955                            let full_expr = self.parse_expression()?;
1956                            // Check again for PERCENT keyword (e.g., after complex expression)
1957                            let has_pct =
1958                                if self.check(TokenType::Percent) && self.is_percent_modifier() {
1959                                    self.skip();
1960                                    true
1961                                } else {
1962                                    false
1963                                };
1964                            (full_expr, has_pct)
1965                        }
1966                    }
1967                    Err(_) => {
1968                        // Unary parsing failed - backtrack and use parse_expression
1969                        self.current = saved_pos;
1970                        let full_expr = self.parse_expression()?;
1971                        let has_pct =
1972                            if self.check(TokenType::Percent) && self.is_percent_modifier() {
1973                                self.skip();
1974                                true
1975                            } else {
1976                                false
1977                            };
1978                        (full_expr, has_pct)
1979                    }
1980                }
1981            };
1982            // MySQL syntax: LIMIT offset, count
1983            if self.match_token(TokenType::Comma) {
1984                let second_expr = self.parse_expression()?;
1985                // First expression is offset, second is count
1986                (
1987                    Some(Limit {
1988                        this: second_expr,
1989                        percent: false,
1990                        comments: pre_limit_comments.clone(),
1991                    }),
1992                    Some(Offset {
1993                        this: first_expr,
1994                        rows: None,
1995                    }),
1996                )
1997            } else {
1998                // Standard: LIMIT count [PERCENT]
1999                (
2000                    Some(Limit {
2001                        this: first_expr,
2002                        percent: has_percent,
2003                        comments: pre_limit_comments,
2004                    }),
2005                    None,
2006                )
2007            }
2008        } else {
2009            (None, None)
2010        };
2011
2012        // WITH TIES after LIMIT (ClickHouse, DuckDB)
2013        if limit.is_some() {
2014            let _ = self.match_keywords(&[TokenType::With, TokenType::Ties]);
2015        }
2016
2017        // Parse OFFSET (if not already parsed from MySQL LIMIT syntax)
2018        // Standard SQL syntax: OFFSET n [ROW|ROWS]
2019        // Some dialects (Presto/Trino) support: OFFSET n LIMIT m
2020        let (limit, offset) = if offset.is_none() && self.match_token(TokenType::Offset) {
2021            let expr = self.parse_expression()?;
2022            // Consume optional ROW or ROWS keyword and track it
2023            let rows = if self.match_token(TokenType::Row) || self.match_token(TokenType::Rows) {
2024                Some(true)
2025            } else {
2026                None
2027            };
2028            let offset = Some(Offset { this: expr, rows });
2029
2030            // Check for LIMIT after OFFSET (Presto/Trino syntax: OFFSET n LIMIT m)
2031            let limit = if limit.is_none() && self.match_token(TokenType::Limit) {
2032                let limit_expr = self.parse_expression()?;
2033                Some(Limit {
2034                    this: limit_expr,
2035                    percent: false,
2036                    comments: Vec::new(),
2037                })
2038            } else {
2039                limit
2040            };
2041
2042            (limit, offset)
2043        } else {
2044            (limit, offset)
2045        };
2046
2047        // ClickHouse: LIMIT ... BY expressions
2048        let limit_by = if matches!(
2049            self.config.dialect,
2050            Some(crate::dialects::DialectType::ClickHouse)
2051        ) && limit.is_some()
2052            && self.match_token(TokenType::By)
2053        {
2054            let expressions = self.parse_expression_list()?;
2055            if expressions.is_empty() {
2056                return Err(self.parse_error("Expected expression after LIMIT BY"));
2057            }
2058            Some(expressions)
2059        } else {
2060            None
2061        };
2062
2063        // ClickHouse: second LIMIT after LIMIT BY (LIMIT n BY expr LIMIT m)
2064        // Also supports LIMIT offset, count syntax
2065        let (limit, offset) = if limit_by.is_some() && self.match_token(TokenType::Limit) {
2066            let first_expr = self.parse_expression()?;
2067            if self.match_token(TokenType::Comma) {
2068                // LIMIT offset, count
2069                let count_expr = self.parse_expression()?;
2070                (
2071                    Some(Limit {
2072                        this: count_expr,
2073                        percent: false,
2074                        comments: Vec::new(),
2075                    }),
2076                    Some(Offset {
2077                        this: first_expr,
2078                        rows: None,
2079                    }),
2080                )
2081            } else {
2082                (
2083                    Some(Limit {
2084                        this: first_expr,
2085                        percent: false,
2086                        comments: Vec::new(),
2087                    }),
2088                    offset,
2089                )
2090            }
2091        } else {
2092            (limit, offset)
2093        };
2094
2095        // Parse FETCH FIRST/NEXT clause
2096        let fetch = if self.match_token(TokenType::Fetch) {
2097            Some(self.parse_fetch()?)
2098        } else {
2099            None
2100        };
2101
2102        // Parse SAMPLE / TABLESAMPLE clause
2103        let sample = self.parse_sample_clause()?;
2104
2105        // Parse FOR UPDATE/SHARE locks or FOR XML/JSON (T-SQL)
2106        let (locks, for_xml, for_json) = self.parse_locks_and_for_xml()?;
2107
2108        // TSQL: OPTION clause (e.g., OPTION(LABEL = 'foo', HASH JOIN))
2109        let option = if self.check_identifier("OPTION") && self.check_next(TokenType::LParen) {
2110            self.skip(); // consume OPTION
2111            self.skip(); // consume (
2112            let mut content = String::from("OPTION(");
2113            let mut depth = 1;
2114            while !self.is_at_end() && depth > 0 {
2115                let tok = self.advance();
2116                if tok.token_type == TokenType::LParen {
2117                    depth += 1;
2118                } else if tok.token_type == TokenType::RParen {
2119                    depth -= 1;
2120                }
2121                if depth > 0 {
2122                    if tok.token_type == TokenType::String {
2123                        if content.len() > 7 && !content.ends_with('(') && !content.ends_with(' ') {
2124                            content.push(' ');
2125                        }
2126                        content.push('\'');
2127                        content.push_str(&tok.text.replace('\'', "''"));
2128                        content.push('\'');
2129                    } else if tok.token_type == TokenType::Eq {
2130                        content.push_str(" = ");
2131                    } else if tok.token_type == TokenType::Comma {
2132                        content.push_str(", ");
2133                    } else {
2134                        if content.len() > 7 && !content.ends_with('(') && !content.ends_with(' ') {
2135                            content.push(' ');
2136                        }
2137                        content.push_str(&tok.text);
2138                    }
2139                }
2140            }
2141            content.push(')');
2142            Some(content)
2143        } else {
2144            None
2145        };
2146
2147        // ClickHouse: SETTINGS and FORMAT clauses after LIMIT/OFFSET/FETCH
2148        let (settings, format) = if matches!(
2149            self.config.dialect,
2150            Some(crate::dialects::DialectType::ClickHouse)
2151        ) {
2152            let mut settings: Option<Vec<Expression>> = None;
2153            let mut format: Option<Expression> = None;
2154
2155            loop {
2156                if settings.is_none() && self.match_token(TokenType::Settings) {
2157                    let mut settings_exprs = Vec::new();
2158                    loop {
2159                        settings_exprs.push(self.parse_expression()?);
2160                        if !self.match_token(TokenType::Comma) {
2161                            break;
2162                        }
2163                    }
2164                    settings = Some(settings_exprs);
2165                    continue;
2166                }
2167
2168                if format.is_none() && self.match_token(TokenType::Format) {
2169                    // ClickHouse: FORMAT Null is valid (Null is a keyword token, not an identifier)
2170                    let ident = if self.check(TokenType::Null) {
2171                        let text = self.advance().text;
2172                        Identifier::new(text)
2173                    } else {
2174                        self.expect_identifier_or_keyword_with_quoted()?
2175                    };
2176                    format = Some(Expression::Identifier(ident));
2177                    // ClickHouse: FORMAT <name> may be followed by inline data
2178                    // (CSV rows, JSON objects, etc.) — consume to semicolon
2179                    if matches!(
2180                        self.config.dialect,
2181                        Some(crate::dialects::DialectType::ClickHouse)
2182                    ) && !self.is_at_end()
2183                        && !self.check(TokenType::Semicolon)
2184                        && !self.check(TokenType::Settings)
2185                    {
2186                        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
2187                            self.skip();
2188                        }
2189                    }
2190                    continue;
2191                }
2192
2193                break;
2194            }
2195
2196            (settings, format)
2197        } else {
2198            (None, None)
2199        };
2200
2201        let select = Select {
2202            expressions,
2203            from,
2204            joins,
2205            lateral_views,
2206            prewhere,
2207            where_clause,
2208            group_by,
2209            having,
2210            qualify,
2211            order_by,
2212            distribute_by,
2213            cluster_by,
2214            sort_by,
2215            limit,
2216            offset,
2217            limit_by,
2218            fetch,
2219            distinct,
2220            distinct_on,
2221            top,
2222            with: None,
2223            sample,
2224            settings,
2225            format,
2226            windows,
2227            hint,
2228            connect,
2229            into,
2230            locks,
2231            for_xml,
2232            for_json,
2233            leading_comments,
2234            post_select_comments,
2235            kind,
2236            operation_modifiers,
2237            qualify_after_window,
2238            option,
2239            exclude,
2240        };
2241
2242        Ok(Expression::Select(Box::new(select)))
2243    }
2244
2245    /// Parse a WITH clause (CTEs)
2246    fn parse_with(&mut self) -> Result<Expression> {
2247        use crate::dialects::DialectType;
2248
2249        let with_token = self.expect(TokenType::With)?;
2250        let leading_comments = with_token.comments;
2251
2252        let recursive = self.match_token(TokenType::Recursive);
2253        let mut ctes = Vec::new();
2254
2255        loop {
2256            // ClickHouse supports expression-first WITH items:
2257            // WITH <expr> AS <alias> SELECT ...
2258            if matches!(self.config.dialect, Some(DialectType::ClickHouse)) {
2259                let saved_pos = self.current;
2260                if let Ok(expr) = self.parse_expression() {
2261                    // Check if parse_expression already consumed the AS alias
2262                    // (e.g., `(1, 2) AS a` gets parsed as Alias(Tuple, "a") by the tuple alias handler)
2263                    let (inner_expr, alias_opt) = if let Expression::Alias(ref alias_box) = expr {
2264                        (alias_box.this.clone(), Some(alias_box.alias.clone()))
2265                    } else {
2266                        (expr, None)
2267                    };
2268
2269                    if let Some(alias) = alias_opt {
2270                        // Expression already had AS alias consumed
2271                        ctes.push(Cte {
2272                            alias,
2273                            this: inner_expr,
2274                            columns: Vec::new(),
2275                            materialized: None,
2276                            key_expressions: Vec::new(),
2277                            alias_first: false,
2278                            comments: Vec::new(),
2279                        });
2280
2281                        if self.match_token(TokenType::Comma) {
2282                            continue;
2283                        }
2284                        break;
2285                    } else if self.match_token(TokenType::As)
2286                        && self.is_identifier_or_keyword_token()
2287                    {
2288                        // Require AS <alias> to disambiguate from standard CTE syntax
2289                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
2290                        ctes.push(Cte {
2291                            alias,
2292                            this: inner_expr,
2293                            columns: Vec::new(),
2294                            materialized: None,
2295                            key_expressions: Vec::new(),
2296                            alias_first: false,
2297                            comments: Vec::new(),
2298                        });
2299
2300                        if self.match_token(TokenType::Comma) {
2301                            continue;
2302                        }
2303                        break;
2304                    } else if self.check(TokenType::Select) || self.check(TokenType::Comma) {
2305                        // ClickHouse: WITH expr SELECT ... (unaliased expression in CTE)
2306                        ctes.push(Cte {
2307                            alias: Identifier::new(format!("{}", inner_expr)),
2308                            this: inner_expr,
2309                            columns: Vec::new(),
2310                            materialized: None,
2311                            key_expressions: Vec::new(),
2312                            alias_first: false,
2313                            comments: Vec::new(),
2314                        });
2315
2316                        if self.match_token(TokenType::Comma) {
2317                            continue;
2318                        }
2319                        break;
2320                    }
2321                }
2322                // Fall back to standard CTE parsing
2323                self.current = saved_pos;
2324            }
2325
2326            // CTE names can be keywords like 'view', 'use', 'all', etc.
2327            let name = self.expect_identifier_or_alias_keyword_with_quoted()?;
2328
2329            // Optional column list
2330            // But first check for Snowflake-style CTE: WITH t (SELECT ...) - no AS keyword
2331            // In that case, LParen is followed by SELECT, not column names
2332            let columns = if self.check(TokenType::LParen) && !self.check_next(TokenType::Select) {
2333                self.skip(); // consume LParen
2334                let cols = self.parse_identifier_list()?;
2335                self.expect(TokenType::RParen)?;
2336                cols
2337            } else {
2338                Vec::new()
2339            };
2340
2341            // Optional USING KEY (columns) for DuckDB recursive CTEs
2342            let key_expressions = if self.match_keywords(&[TokenType::Using, TokenType::Key]) {
2343                self.expect(TokenType::LParen)?;
2344                let keys = self.parse_identifier_list()?;
2345                self.expect(TokenType::RParen)?;
2346                keys
2347            } else {
2348                Vec::new()
2349            };
2350
2351            // ClickHouse: keyword -> body AS alias (single-param lambda where param is a keyword)
2352            // e.g., WITH time -> sin(time * 2 * pi()) AS sine_wave
2353            if matches!(self.config.dialect, Some(DialectType::ClickHouse))
2354                && self.check(TokenType::Arrow)
2355            {
2356                self.skip(); // consume ->
2357                let body = self.parse_expression()?;
2358                let lambda = Expression::Lambda(Box::new(LambdaExpr {
2359                    parameters: vec![name.clone()],
2360                    body,
2361                    colon: false,
2362                    parameter_types: Vec::new(),
2363                }));
2364                // Expect AS alias
2365                if self.match_token(TokenType::As) && self.is_identifier_or_keyword_token() {
2366                    let alias = self.expect_identifier_or_keyword_with_quoted()?;
2367                    ctes.push(Cte {
2368                        alias,
2369                        this: lambda,
2370                        columns: Vec::new(),
2371                        materialized: None,
2372                        key_expressions: Vec::new(),
2373                        alias_first: false,
2374                        comments: Vec::new(),
2375                    });
2376                } else {
2377                    // Unaliased lambda CTE
2378                    ctes.push(Cte {
2379                        alias: name,
2380                        this: lambda,
2381                        columns: Vec::new(),
2382                        materialized: None,
2383                        key_expressions: Vec::new(),
2384                        alias_first: false,
2385                        comments: Vec::new(),
2386                    });
2387                }
2388                if self.match_token(TokenType::Comma) {
2389                    continue;
2390                }
2391                break;
2392            }
2393
2394            // AS is optional (Snowflake allows WITH t (SELECT ...) without AS)
2395            let cte_comments = if self.match_token(TokenType::As) {
2396                // Capture trailing comments from the AS token
2397                // e.g., "WITH a AS /* comment */ (...)" -> comment goes after alias
2398                self.previous_trailing_comments().to_vec()
2399            } else {
2400                Vec::new()
2401            };
2402
2403            // Check for MATERIALIZED or NOT MATERIALIZED
2404            let materialized = if self.match_token(TokenType::Materialized) {
2405                Some(true)
2406            } else if self.match_token(TokenType::Not) {
2407                self.expect(TokenType::Materialized)?;
2408                Some(false)
2409            } else {
2410                None
2411            };
2412
2413            self.expect(TokenType::LParen)?;
2414            let query = self.parse_statement()?;
2415            self.expect(TokenType::RParen)?;
2416
2417            ctes.push(Cte {
2418                alias: name,
2419                this: query,
2420                columns,
2421                materialized,
2422                key_expressions,
2423                alias_first: true,
2424                comments: cte_comments,
2425            });
2426
2427            if !self.match_token(TokenType::Comma) {
2428                // Check for WITH merging: WITH a AS (...) WITH b AS (...) -> merged
2429                // If the next token is WITH (not followed by nothing), continue parsing CTEs
2430                if self.check(TokenType::With) {
2431                    self.skip(); // consume the redundant WITH keyword
2432                                 // Check if this WITH is also RECURSIVE
2433                    if self.match_token(TokenType::Recursive) && !recursive {
2434                        // If second WITH is RECURSIVE but first wasn't, ignore (keep non-recursive)
2435                    }
2436                    continue; // continue the loop to parse more CTEs
2437                }
2438                break;
2439            }
2440            // WI-14f: Skip redundant WITH keyword after comma in CTE list
2441            // e.g., WITH a AS (SELECT 1), WITH b AS (SELECT 2) SELECT *
2442            self.match_token(TokenType::With);
2443        }
2444
2445        // Parse optional SEARCH/CYCLE clause for recursive CTEs (PostgreSQL)
2446        // Syntax: SEARCH BREADTH|DEPTH FIRST BY column SET column [USING column]
2447        //     or: CYCLE column SET column USING column
2448        let search = self.parse_recursive_with_search()?;
2449
2450        // Parse the main query
2451        let mut main_query = self.parse_statement()?;
2452
2453        // Unwrap parenthesized wrappers to find the inner SELECT
2454        // (matching Python sqlglot: while isinstance(this, Subquery) and this.is_wrapper)
2455        loop {
2456            match main_query {
2457                Expression::Paren(paren) => {
2458                    main_query = paren.this;
2459                }
2460                Expression::Subquery(ref sub)
2461                    if sub.alias.is_none()
2462                        && sub.order_by.is_none()
2463                        && sub.limit.is_none()
2464                        && sub.offset.is_none() =>
2465                {
2466                    // Unwrap Subquery wrapper (parenthesized query without modifiers)
2467                    if let Expression::Subquery(sub) = main_query {
2468                        main_query = sub.this;
2469                    } else {
2470                        break;
2471                    }
2472                }
2473                _ => break,
2474            }
2475        }
2476
2477        // Attach WITH to the main query
2478        let with_clause = With {
2479            ctes,
2480            recursive,
2481            leading_comments,
2482            search,
2483        };
2484        match &mut main_query {
2485            Expression::Select(ref mut select) => {
2486                select.with = Some(with_clause);
2487            }
2488            Expression::Union(ref mut union) => {
2489                union.with = Some(with_clause);
2490            }
2491            Expression::Intersect(ref mut intersect) => {
2492                intersect.with = Some(with_clause);
2493            }
2494            Expression::Except(ref mut except) => {
2495                except.with = Some(with_clause);
2496            }
2497            Expression::Update(ref mut update) => {
2498                update.with = Some(with_clause);
2499            }
2500            Expression::Insert(ref mut insert) => {
2501                insert.with = Some(with_clause);
2502            }
2503            Expression::Delete(ref mut delete) => {
2504                delete.with = Some(with_clause);
2505            }
2506            Expression::CreateTable(ref mut ct) => {
2507                ct.with_cte = Some(with_clause);
2508            }
2509            Expression::Pivot(ref mut pivot) => {
2510                pivot.with = Some(with_clause);
2511            }
2512            Expression::Merge(ref mut merge) => {
2513                merge.with_ = Some(Box::new(Expression::With(Box::new(with_clause))));
2514            }
2515            _ => {}
2516        }
2517
2518        Ok(main_query)
2519    }
2520
2521    /// Parse SELECT expressions
2522    fn parse_select_expressions(&mut self) -> Result<Vec<Expression>> {
2523        let mut expressions = Vec::new();
2524
2525        loop {
2526            // Check if we're at end of select list (empty list case for TSQL TOP)
2527            // This allows queries like "SELECT TOP 10 PERCENT" with no columns
2528            // Also check for Oracle BULK COLLECT INTO sequence
2529            // ClickHouse: minus() is tokenized as Except but should be treated as function
2530            let is_ch_keyword_func = matches!(
2531                self.config.dialect,
2532                Some(crate::dialects::DialectType::ClickHouse)
2533            ) && (self.check(TokenType::Except)
2534                || self.check(TokenType::Intersect))
2535                && self.check_next(TokenType::LParen);
2536            // ClickHouse: `from`/`except` can be column names when followed by an operator
2537            // (e.g., `from + from`, `from in [0]`, `from, ...`)
2538            // Also: `from FROM t` — two consecutive FROM tokens means first is column name
2539            let is_ch_keyword_as_column = matches!(
2540                self.config.dialect,
2541                Some(crate::dialects::DialectType::ClickHouse)
2542            ) && (self.check(TokenType::From)
2543                || self.check(TokenType::Except))
2544                && {
2545                    let next_tt = self
2546                        .peek_nth(1)
2547                        .map(|t| t.token_type)
2548                        .unwrap_or(TokenType::Semicolon);
2549                    matches!(
2550                        next_tt,
2551                        TokenType::Plus | TokenType::Dash | TokenType::Star | TokenType::Slash
2552                        | TokenType::Percent | TokenType::Eq | TokenType::Neq | TokenType::Lt
2553                        | TokenType::Gt | TokenType::Lte | TokenType::Gte
2554                        | TokenType::And | TokenType::Or | TokenType::Comma | TokenType::Dot
2555                        | TokenType::In | TokenType::Is | TokenType::Not | TokenType::Like
2556                        | TokenType::Between | TokenType::Semicolon | TokenType::RParen
2557                        | TokenType::As | TokenType::DPipe | TokenType::Amp | TokenType::Pipe
2558                        | TokenType::LBracket
2559                        // Two consecutive FROM tokens: first is column name (e.g., SELECT from FROM t)
2560                        | TokenType::From
2561                    )
2562                };
2563            if !is_ch_keyword_func
2564                && !is_ch_keyword_as_column
2565                && (self.is_at_end()
2566                    || self.check(TokenType::From)
2567                    || self.check(TokenType::Where)
2568                    || self.check(TokenType::Into)
2569                    || self.check(TokenType::Union)
2570                    || self.check(TokenType::Intersect)
2571                    || self.check(TokenType::Except)
2572                    || self.check(TokenType::Order)
2573                    || self.check(TokenType::Limit)
2574                    || self.check(TokenType::Semicolon)
2575                    || self.check_text_seq(&["BULK", "COLLECT", "INTO"]))
2576            {
2577                break;
2578            }
2579
2580            // Handle star
2581            if self.check(TokenType::Star) {
2582                self.skip();
2583                let star_trailing_comments = self.previous_trailing_comments().to_vec();
2584                let star = self.parse_star_modifiers_with_comments(None, star_trailing_comments)?;
2585                let mut star_expr = Expression::Star(star);
2586                // ClickHouse: * APPLY(func) or * APPLY func or * APPLY(x -> expr) column transformer
2587                if matches!(
2588                    self.config.dialect,
2589                    Some(crate::dialects::DialectType::ClickHouse)
2590                ) {
2591                    while self.check(TokenType::Apply) {
2592                        self.skip(); // consume APPLY
2593                        let apply_expr = if self.match_token(TokenType::LParen) {
2594                            // Could be APPLY(func_name) or APPLY(x -> expr)
2595                            let expr = self.parse_expression()?;
2596                            self.expect(TokenType::RParen)?;
2597                            expr
2598                        } else {
2599                            // APPLY func or APPLY x -> expr (no parens)
2600                            // Parse as expression to handle lambdas
2601                            self.parse_expression()?
2602                        };
2603                        star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
2604                            this: Box::new(star_expr),
2605                            expression: Box::new(apply_expr),
2606                        }));
2607                    }
2608                }
2609                // ClickHouse: Also handle EXCEPT/REPLACE between APPLYs:
2610                // * APPLY(toDate) EXCEPT(i, j) APPLY(any)
2611                if matches!(
2612                    self.config.dialect,
2613                    Some(crate::dialects::DialectType::ClickHouse)
2614                ) && (self.check(TokenType::Except)
2615                    || self.check(TokenType::Exclude)
2616                    || self.check(TokenType::Replace))
2617                {
2618                    // Consume EXCEPT/REPLACE modifiers after APPLY
2619                    self.parse_star_modifiers(None)?;
2620                    // Continue with more APPLYs
2621                    while self.check(TokenType::Apply) {
2622                        self.skip();
2623                        let apply_expr = if self.match_token(TokenType::LParen) {
2624                            let expr = self.parse_expression()?;
2625                            self.expect(TokenType::RParen)?;
2626                            expr
2627                        } else {
2628                            self.parse_expression()?
2629                        };
2630                        star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
2631                            this: Box::new(star_expr),
2632                            expression: Box::new(apply_expr),
2633                        }));
2634                    }
2635                }
2636                // ClickHouse: * followed by operators (e.g., * IS NOT NULL, * AND expr)
2637                // Treat * as a regular expression and continue parsing operators
2638                if matches!(
2639                    self.config.dialect,
2640                    Some(crate::dialects::DialectType::ClickHouse)
2641                ) && matches!(
2642                    self.peek().token_type,
2643                    TokenType::Is
2644                        | TokenType::And
2645                        | TokenType::Or
2646                        | TokenType::Eq
2647                        | TokenType::Neq
2648                        | TokenType::Lt
2649                        | TokenType::Gt
2650                        | TokenType::Lte
2651                        | TokenType::Gte
2652                        | TokenType::Not
2653                        | TokenType::Plus
2654                        | TokenType::Dash
2655                        | TokenType::Slash
2656                        | TokenType::Percent
2657                        | TokenType::Like
2658                        | TokenType::Between
2659                        | TokenType::In
2660                ) {
2661                    // Re-parse from the operator with star_expr as the left side
2662                    let left = star_expr;
2663                    // Use parse_comparison / parse_is chain
2664                    if self.check(TokenType::Is) {
2665                        self.skip(); // consume IS
2666                        let not = self.match_token(TokenType::Not);
2667                        if self.match_token(TokenType::Null) {
2668                            star_expr = if not {
2669                                Expression::Not(Box::new(UnaryOp {
2670                                    this: Expression::Is(Box::new(BinaryOp::new(
2671                                        left,
2672                                        Expression::Null(Null),
2673                                    ))),
2674                                    inferred_type: None,
2675                                }))
2676                            } else {
2677                                Expression::Is(Box::new(BinaryOp::new(
2678                                    left,
2679                                    Expression::Null(Null),
2680                                )))
2681                            };
2682                        } else {
2683                            let right = self.parse_or()?;
2684                            star_expr = if not {
2685                                Expression::Not(Box::new(UnaryOp {
2686                                    this: Expression::Is(Box::new(BinaryOp::new(left, right))),
2687                                    inferred_type: None,
2688                                }))
2689                            } else {
2690                                Expression::Is(Box::new(BinaryOp::new(left, right)))
2691                            };
2692                        }
2693                    } else if self.match_token(TokenType::And) {
2694                        let right = self.parse_or()?;
2695                        star_expr = Expression::And(Box::new(BinaryOp::new(left, right)));
2696                    } else if self.match_token(TokenType::Or) {
2697                        let right = self.parse_or()?;
2698                        star_expr = Expression::Or(Box::new(BinaryOp::new(left, right)));
2699                    } else {
2700                        let op_token = self.advance();
2701                        let right = self.parse_or()?;
2702                        star_expr = match op_token.token_type {
2703                            TokenType::Eq => Expression::Eq(Box::new(BinaryOp::new(left, right))),
2704                            TokenType::Neq => Expression::Neq(Box::new(BinaryOp::new(left, right))),
2705                            TokenType::Lt => Expression::Lt(Box::new(BinaryOp::new(left, right))),
2706                            TokenType::Gt => Expression::Gt(Box::new(BinaryOp::new(left, right))),
2707                            TokenType::Lte => Expression::Lte(Box::new(BinaryOp::new(left, right))),
2708                            TokenType::Gte => Expression::Gte(Box::new(BinaryOp::new(left, right))),
2709                            TokenType::Plus => {
2710                                Expression::Add(Box::new(BinaryOp::new(left, right)))
2711                            }
2712                            TokenType::Dash => {
2713                                Expression::Sub(Box::new(BinaryOp::new(left, right)))
2714                            }
2715                            _ => left, // fallback
2716                        };
2717                    }
2718                }
2719                expressions.push(star_expr);
2720            } else {
2721                // Capture leading comments from the first token before parsing
2722                // These are comments on a separate line before the expression
2723                let leading_comments = self.current_leading_comments().to_vec();
2724                let expr = self.parse_expression()?;
2725
2726                // ClickHouse: COLUMNS(id, value) EXCEPT (id) REPLACE (5 AS id) APPLY func
2727                // Also: a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) - qualified star with APPLY
2728                let expr = if matches!(
2729                    self.config.dialect,
2730                    Some(crate::dialects::DialectType::ClickHouse)
2731                ) {
2732                    let is_columns_func = match &expr {
2733                        Expression::Function(f) => f.name.eq_ignore_ascii_case("COLUMNS"),
2734                        Expression::MethodCall(m) => m.method.name.eq_ignore_ascii_case("COLUMNS"),
2735                        Expression::Columns(_) => true,
2736                        _ => false,
2737                    };
2738                    let is_qualified_star = matches!(&expr, Expression::Star(_));
2739                    if (is_columns_func || is_qualified_star)
2740                        && (self.check(TokenType::Except)
2741                            || self.check(TokenType::Exclude)
2742                            || self.check(TokenType::Replace)
2743                            || self.check(TokenType::Apply))
2744                    {
2745                        let mut result = expr;
2746                        // Parse any mix of EXCEPT/REPLACE/APPLY in any order
2747                        // e.g., * APPLY(toDate) EXCEPT(i, j) APPLY(any)
2748                        loop {
2749                            if self.check(TokenType::Except) || self.check(TokenType::Exclude) {
2750                                // Parse EXCEPT/EXCLUDE modifier
2751                                self.skip();
2752                                self.match_identifier("STRICT");
2753                                if self.match_token(TokenType::LParen) {
2754                                    loop {
2755                                        if self.check(TokenType::RParen) {
2756                                            break;
2757                                        }
2758                                        let _ = self.parse_expression()?;
2759                                        if !self.match_token(TokenType::Comma) {
2760                                            break;
2761                                        }
2762                                    }
2763                                    self.expect(TokenType::RParen)?;
2764                                } else if self.is_identifier_token()
2765                                    || self.is_safe_keyword_as_identifier()
2766                                {
2767                                    let _ = self.parse_expression()?;
2768                                }
2769                            } else if self.check(TokenType::Replace) {
2770                                // Parse REPLACE modifier: REPLACE (expr AS alias, ...)
2771                                self.skip();
2772                                self.match_identifier("STRICT");
2773                                if self.match_token(TokenType::LParen) {
2774                                    loop {
2775                                        if self.check(TokenType::RParen) {
2776                                            break;
2777                                        }
2778                                        let _ = self.parse_expression()?;
2779                                        if self.match_token(TokenType::As) {
2780                                            if self.is_identifier_token()
2781                                                || self.is_safe_keyword_as_identifier()
2782                                            {
2783                                                self.skip();
2784                                            }
2785                                        }
2786                                        if !self.match_token(TokenType::Comma) {
2787                                            break;
2788                                        }
2789                                    }
2790                                    self.expect(TokenType::RParen)?;
2791                                } else {
2792                                    let _ = self.parse_expression()?;
2793                                    if self.match_token(TokenType::As) {
2794                                        if self.is_identifier_token()
2795                                            || self.is_safe_keyword_as_identifier()
2796                                        {
2797                                            self.skip();
2798                                        }
2799                                    }
2800                                }
2801                            } else if self.check(TokenType::Apply) {
2802                                // Parse APPLY transformer
2803                                self.skip();
2804                                let apply_expr = if self.match_token(TokenType::LParen) {
2805                                    let e = self.parse_expression()?;
2806                                    self.expect(TokenType::RParen)?;
2807                                    e
2808                                } else {
2809                                    self.parse_expression()?
2810                                };
2811                                result = Expression::Apply(Box::new(crate::expressions::Apply {
2812                                    this: Box::new(result),
2813                                    expression: Box::new(apply_expr),
2814                                }));
2815                            } else {
2816                                break;
2817                            }
2818                        }
2819                        result
2820                    } else {
2821                        expr
2822                    }
2823                } else {
2824                    expr
2825                };
2826
2827                // Capture comments between expression and potential AS
2828                let pre_alias_comments = self.previous_trailing_comments().to_vec();
2829
2830                // DuckDB prefix alias syntax: identifier: expression (e.g., "foo: 1" means "1 AS foo")
2831                // Check if the expression is a simple identifier followed by a colon
2832                let expr = if self.check(TokenType::Colon) && !self.check_next(TokenType::Colon) {
2833                    // Extract the alias name from the identifier expression
2834                    let alias_ident = match &expr {
2835                        Expression::Identifier(id) => Some(id.clone()),
2836                        Expression::Column(col) if col.table.is_none() => Some(col.name.clone()),
2837                        _ => None,
2838                    };
2839                    if let Some(alias) = alias_ident {
2840                        // Consume the colon
2841                        self.skip();
2842                        let colon_comments = self.previous_trailing_comments().to_vec();
2843                        // Parse the actual value expression
2844                        let value = self.parse_expression()?;
2845                        let value_trailing = self.previous_trailing_comments().to_vec();
2846                        // For colon-alias (foo: expr), comments between alias and colon should
2847                        // become trailing comments (placed after the alias in output).
2848                        // Comments after the value expression are also trailing.
2849                        let mut all_trailing = pre_alias_comments.clone();
2850                        all_trailing.extend(colon_comments);
2851                        all_trailing.extend(value_trailing);
2852                        Expression::Alias(Box::new(Alias {
2853                            this: value,
2854                            alias,
2855                            column_aliases: Vec::new(),
2856                            pre_alias_comments: Vec::new(),
2857                            trailing_comments: all_trailing,
2858                            inferred_type: None,
2859                        }))
2860                    } else {
2861                        // Not a simple identifier, fall through to normal alias handling
2862                        // (this handles cases where the expression is complex before the colon)
2863                        expr
2864                    }
2865                } else if self.match_token(TokenType::As) {
2866                    // Capture comments from AS token (e.g., AS /* foo */ (a, b, c))
2867                    // These go into trailing_comments (after the alias), not pre_alias_comments
2868                    let as_comments = self.previous_trailing_comments().to_vec();
2869                    // Check for column aliases: AS (col1, col2) - used by POSEXPLODE etc.
2870                    if self.match_token(TokenType::LParen) {
2871                        let mut column_aliases = Vec::new();
2872                        loop {
2873                            if let Some(col_expr) = self.parse_id_var()? {
2874                                if let Expression::Identifier(id) = col_expr {
2875                                    column_aliases.push(id);
2876                                }
2877                            } else {
2878                                break;
2879                            }
2880                            if !self.match_token(TokenType::Comma) {
2881                                break;
2882                            }
2883                        }
2884                        self.match_token(TokenType::RParen);
2885                        let mut trailing_comments = as_comments;
2886                        trailing_comments.extend_from_slice(self.previous_trailing_comments());
2887                        Expression::Alias(Box::new(Alias {
2888                            this: expr,
2889                            alias: Identifier::new(String::new()),
2890                            column_aliases,
2891                            pre_alias_comments,
2892                            trailing_comments,
2893                            inferred_type: None,
2894                        }))
2895                    } else {
2896                        // Allow keywords as aliases (e.g., SELECT 1 AS filter)
2897                        // Use _with_quoted to preserve quoted alias
2898                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
2899                        let mut trailing_comments = self.previous_trailing_comments().to_vec();
2900                        // If parse_comparison stored pending leading comments (no comparison
2901                        // followed), use those. Otherwise use the leading_comments we captured
2902                        // before parse_expression(). Both come from the same token, so we
2903                        // only add one set to avoid duplication.
2904                        if !self.pending_leading_comments.is_empty() {
2905                            trailing_comments.extend(self.pending_leading_comments.drain(..));
2906                        } else {
2907                            trailing_comments.extend(leading_comments.iter().cloned());
2908                        }
2909                        Expression::Alias(Box::new(Alias {
2910                            this: expr,
2911                            alias,
2912                            column_aliases: Vec::new(),
2913                            pre_alias_comments,
2914                            trailing_comments,
2915                            inferred_type: None,
2916                        }))
2917                    }
2918                } else if ((self.check(TokenType::Var) && !self.check_keyword()) || self.check(TokenType::QuotedIdentifier) || self.can_be_alias_keyword() || self.is_command_keyword_as_alias() || self.check(TokenType::Overlaps)
2919                    // ClickHouse: APPLY without ( is an implicit alias (e.g., SELECT col apply)
2920                    || (self.check(TokenType::Apply) && !self.check_next(TokenType::LParen)
2921                        && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))))
2922                    && !self.check_text_seq(&["BULK", "COLLECT", "INTO"])
2923                    // ClickHouse clauses must not be consumed as implicit aliases.
2924                    && !(matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
2925                        && (self.check(TokenType::Format) || self.check(TokenType::Settings)))
2926                    // LIMIT/OFFSET/FETCH are clause starters in most dialects and must not
2927                    // be consumed as implicit aliases in SELECT lists.
2928                    && !(
2929                        self.check(TokenType::Fetch)
2930                        || ((self.check(TokenType::Limit) || self.check(TokenType::Offset))
2931                            && !matches!(
2932                                self.config.dialect,
2933                                Some(crate::dialects::DialectType::Spark)
2934                                    | Some(crate::dialects::DialectType::Hive)
2935                            ))
2936                    )
2937                    // GROUP BY / ORDER BY are clause boundaries, not aliases.
2938                    && !self.check_text_seq(&["GROUP", "BY"])
2939                    && !self.check_text_seq(&["ORDER", "BY"])
2940                    // WINDOW is a clause boundary (named window definitions), not an alias.
2941                    && !self.check(TokenType::Window)
2942                    // ClickHouse: PARALLEL WITH is a statement separator, not an alias.
2943                    && !(self.check_identifier("PARALLEL") && self.check_next(TokenType::With)
2944                        && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
2945                {
2946                    // Implicit alias (without AS) - allow Var tokens, QuotedIdentifiers, command keywords (like GET, PUT, etc.), and OVERLAPS
2947                    // But NOT when it's the Oracle BULK COLLECT INTO sequence
2948                    let alias_token = self.advance();
2949                    let alias_text = alias_token.text.clone();
2950                    let is_quoted = alias_token.token_type == TokenType::QuotedIdentifier;
2951                    let trailing_comments = self.previous_trailing_comments().to_vec();
2952                    Expression::Alias(Box::new(Alias {
2953                        this: expr,
2954                        alias: Identifier {
2955                            name: alias_text,
2956                            quoted: is_quoted,
2957                            trailing_comments: Vec::new(),
2958                            span: None,
2959                        },
2960                        column_aliases: Vec::new(),
2961                        pre_alias_comments,
2962                        trailing_comments,
2963                        inferred_type: None,
2964                    }))
2965                } else if !pre_alias_comments.is_empty() {
2966                    // Only wrap in Annotated if the expression doesn't already handle trailing comments.
2967                    // BinaryOp, Column, Cast, Function, etc. have their own trailing_comments field that the generator uses.
2968                    let already_has_trailing = matches!(
2969                        &expr,
2970                        Expression::Add(_)
2971                            | Expression::Sub(_)
2972                            | Expression::Mul(_)
2973                            | Expression::Div(_)
2974                            | Expression::Mod(_)
2975                            | Expression::Concat(_)
2976                            | Expression::BitwiseAnd(_)
2977                            | Expression::BitwiseOr(_)
2978                            | Expression::BitwiseXor(_)
2979                            | Expression::Column(_)
2980                            | Expression::Paren(_)
2981                            | Expression::Annotated(_)
2982                            | Expression::Cast(_)
2983                            | Expression::Function(_)
2984                            | Expression::Subquery(_)
2985                    );
2986                    if already_has_trailing {
2987                        expr
2988                    } else {
2989                        // Wrap in Annotated to preserve trailing comments
2990                        Expression::Annotated(Box::new(Annotated {
2991                            this: expr,
2992                            trailing_comments: pre_alias_comments,
2993                        }))
2994                    }
2995                } else if !leading_comments.is_empty() {
2996                    // Wrap in Annotated to preserve leading comments as trailing comments
2997                    Expression::Annotated(Box::new(Annotated {
2998                        this: expr,
2999                        trailing_comments: leading_comments,
3000                    }))
3001                } else {
3002                    expr
3003                };
3004
3005                expressions.push(expr);
3006            }
3007
3008            if !self.match_token(TokenType::Comma) {
3009                break;
3010            }
3011
3012            // Handle trailing comma (ClickHouse supports trailing commas in SELECT)
3013            // ClickHouse: `from` after comma is a column name if followed by an operator
3014            // (e.g., `from + from` or `from in [0]`), comma, or line-end
3015            let from_is_column = matches!(
3016                self.config.dialect,
3017                Some(crate::dialects::DialectType::ClickHouse)
3018            ) && self.check(TokenType::From)
3019                && {
3020                    let next_tt = self
3021                        .peek_nth(1)
3022                        .map(|t| t.token_type)
3023                        .unwrap_or(TokenType::Semicolon);
3024                    matches!(
3025                        next_tt,
3026                        TokenType::Plus
3027                            | TokenType::Dash
3028                            | TokenType::Star
3029                            | TokenType::Slash
3030                            | TokenType::Percent
3031                            | TokenType::Eq
3032                            | TokenType::Neq
3033                            | TokenType::Lt
3034                            | TokenType::Gt
3035                            | TokenType::Lte
3036                            | TokenType::Gte
3037                            | TokenType::And
3038                            | TokenType::Or
3039                            | TokenType::Comma
3040                            | TokenType::Dot
3041                            | TokenType::In
3042                            | TokenType::Is
3043                            | TokenType::Not
3044                            | TokenType::Like
3045                            | TokenType::Between
3046                            | TokenType::Semicolon
3047                            | TokenType::RParen
3048                            | TokenType::As
3049                            | TokenType::DPipe
3050                            | TokenType::Amp
3051                            | TokenType::Pipe
3052                            | TokenType::LBracket
3053                    )
3054                };
3055            if (self.config.allow_trailing_commas
3056                || matches!(
3057                    self.config.dialect,
3058                    Some(crate::dialects::DialectType::ClickHouse)
3059                ))
3060                && (!from_is_column && self.check_from_keyword()
3061                    || self.check(TokenType::Where)
3062                    || self.check(TokenType::GroupBy)
3063                    || self.check(TokenType::Having)
3064                    || self.check(TokenType::Order)
3065                    || self.check(TokenType::Limit)
3066                    || self.check(TokenType::Union)
3067                    || self.check(TokenType::Intersect)
3068                    || (self.check(TokenType::Except) && !self.check_next(TokenType::LParen) && !self.check_next(TokenType::Comma))
3069                    || self.check(TokenType::Semicolon)
3070                    || self.check(TokenType::RParen)
3071                    // SETTINGS/FORMAT only as boundaries when NOT followed by ( or [ (function/column ref)
3072                    || (self.check(TokenType::Settings) && !self.check_next(TokenType::LParen) && !self.check_next(TokenType::LBracket))
3073                    || (self.check(TokenType::Format) && !self.check_next(TokenType::LParen))
3074                    || self.is_at_end())
3075            {
3076                break;
3077            }
3078        }
3079
3080        Ok(expressions)
3081    }
3082
3083    /// Parse DuckDB FROM-first query syntax
3084    /// FROM tbl = SELECT * FROM tbl
3085    /// FROM tbl SELECT col1, col2 = SELECT col1, col2 FROM tbl
3086    fn parse_from_first_query(&mut self) -> Result<Expression> {
3087        self.expect(TokenType::From)?;
3088
3089        // Parse the FROM clause (table references)
3090        let from = self.parse_from()?;
3091
3092        // Check if there's an explicit SELECT clause after FROM
3093        let expressions = if self.check(TokenType::Select) {
3094            self.skip(); // consume SELECT
3095            self.parse_select_expressions()?
3096        } else {
3097            // No explicit SELECT means SELECT *
3098            vec![Expression::Star(crate::expressions::Star {
3099                table: None,
3100                except: None,
3101                replace: None,
3102                rename: None,
3103                trailing_comments: Vec::new(),
3104                span: None,
3105            })]
3106        };
3107
3108        // Parse PREWHERE clause (ClickHouse specific)
3109        let prewhere = if self.match_token(TokenType::Prewhere) {
3110            Some(self.parse_expression()?)
3111        } else {
3112            None
3113        };
3114
3115        // Parse WHERE clause
3116        let where_clause = if self.match_token(TokenType::Where) {
3117            Some(Where {
3118                this: self.parse_expression()?,
3119            })
3120        } else {
3121            None
3122        };
3123
3124        // Parse GROUP BY
3125        let group_by = if self.match_token(TokenType::Group) {
3126            self.expect(TokenType::By)?;
3127            let mut groups = Vec::new();
3128            loop {
3129                groups.push(self.parse_expression()?);
3130                if !self.match_token(TokenType::Comma) {
3131                    break;
3132                }
3133            }
3134            Some(GroupBy {
3135                expressions: groups,
3136                all: None,
3137                totals: false,
3138                comments: Vec::new(),
3139            })
3140        } else {
3141            None
3142        };
3143
3144        // Parse HAVING
3145        let having = if self.match_token(TokenType::Having) {
3146            Some(Having {
3147                this: self.parse_expression()?,
3148                comments: Vec::new(),
3149            })
3150        } else {
3151            None
3152        };
3153
3154        // Parse ORDER BY
3155        let order_by = if self.match_token(TokenType::Order) {
3156            self.expect(TokenType::By)?;
3157            Some(self.parse_order_by()?)
3158        } else {
3159            None
3160        };
3161
3162        // Parse LIMIT
3163        let limit = if self.match_token(TokenType::Limit) {
3164            let first_expr = self.parse_expression()?;
3165            Some(Limit {
3166                this: first_expr,
3167                percent: false,
3168                comments: Vec::new(),
3169            })
3170        } else {
3171            None
3172        };
3173
3174        // Parse OFFSET
3175        let offset = if self.match_token(TokenType::Offset) {
3176            let expr = self.parse_expression()?;
3177            let rows = if self.match_token(TokenType::Row) || self.match_token(TokenType::Rows) {
3178                Some(true)
3179            } else {
3180                None
3181            };
3182            Some(Offset { this: expr, rows })
3183        } else {
3184            None
3185        };
3186
3187        // Build SELECT expression
3188        let select = Select {
3189            expressions,
3190            from: Some(from),
3191            joins: Vec::new(),
3192            lateral_views: Vec::new(),
3193            prewhere,
3194            where_clause,
3195            group_by,
3196            having,
3197            qualify: None,
3198            order_by,
3199            distribute_by: None,
3200            cluster_by: None,
3201            sort_by: None,
3202            limit,
3203            offset,
3204            limit_by: None,
3205            fetch: None,
3206            distinct: false,
3207            distinct_on: None,
3208            top: None,
3209            with: None,
3210            sample: None,
3211            settings: None,
3212            format: None,
3213            windows: None,
3214            hint: None,
3215            connect: None,
3216            into: None,
3217            locks: Vec::new(),
3218            for_xml: Vec::new(),
3219            for_json: Vec::new(),
3220            leading_comments: Vec::new(),
3221            post_select_comments: Vec::new(),
3222            kind: None,
3223            operation_modifiers: Vec::new(),
3224            qualify_after_window: false,
3225            option: None,
3226            exclude: None,
3227        };
3228
3229        // Check for set operations (UNION, INTERSECT, EXCEPT)
3230        let result = Expression::Select(Box::new(select));
3231        self.parse_set_operation(result)
3232    }
3233
3234    /// Parse FROM clause
3235    fn parse_from(&mut self) -> Result<From> {
3236        let mut expressions = Vec::new();
3237
3238        loop {
3239            // Capture leading comments before each table expression
3240            // (e.g., FROM \n/* comment */\n table_name)
3241            let pre_table_comments = if !self.is_at_end() {
3242                self.tokens[self.current].comments.clone()
3243            } else {
3244                Vec::new()
3245            };
3246            // Clear them from the token to avoid double output
3247            if !pre_table_comments.is_empty() && !self.is_at_end() {
3248                self.tokens[self.current].comments.clear();
3249            }
3250
3251            let mut table = self.parse_table_expression()?;
3252
3253            // Attach captured comments as trailing on the outermost expression
3254            if !pre_table_comments.is_empty() {
3255                match &mut table {
3256                    Expression::Pivot(p) => {
3257                        // For PIVOT, find the inner table and add to its leading_comments
3258                        // The generator will output these after the PIVOT clause
3259                        if let Expression::Table(ref mut t) = p.this {
3260                            t.leading_comments = pre_table_comments;
3261                        }
3262                    }
3263                    Expression::Table(ref mut t) => {
3264                        t.trailing_comments.extend(pre_table_comments);
3265                    }
3266                    _ => {}
3267                }
3268            }
3269            expressions.push(table);
3270
3271            if !self.match_token(TokenType::Comma) {
3272                break;
3273            }
3274
3275            // Handle trailing comma in FROM clause (Snowflake allows this)
3276            // If next token is a clause boundary keyword or end of input, break
3277            // Note: For Redshift, UNPIVOT after comma is a table expression (SUPER object traversal),
3278            // so we don't treat it as a boundary in that case
3279            let is_redshift = matches!(
3280                self.config.dialect,
3281                Some(crate::dialects::DialectType::Redshift)
3282            );
3283            let is_unpivot_boundary = !is_redshift && self.check(TokenType::Unpivot);
3284            if self.is_at_end()
3285                || is_unpivot_boundary
3286                || matches!(
3287                    self.peek().token_type,
3288                    TokenType::Where
3289                        | TokenType::GroupBy
3290                        | TokenType::Having
3291                        | TokenType::Order
3292                        | TokenType::Limit
3293                        | TokenType::Offset
3294                        | TokenType::Union
3295                        | TokenType::Intersect
3296                        | TokenType::Except
3297                        | TokenType::Semicolon
3298                        | TokenType::RParen
3299                        | TokenType::Window
3300                        | TokenType::Qualify
3301                        | TokenType::Distribute
3302                        | TokenType::Cluster
3303                        | TokenType::Pivot
3304                )
3305            {
3306                break;
3307            }
3308        }
3309
3310        Ok(From { expressions })
3311    }
3312
3313    /// Parse a table expression (table name, subquery, etc.)
3314    fn parse_table_expression(&mut self) -> Result<Expression> {
3315        // Handle PostgreSQL ONLY modifier: FROM ONLY t1
3316        // ONLY prevents scanning child tables in inheritance hierarchy
3317        let has_only = self.match_token(TokenType::Only);
3318
3319        // Handle PostgreSQL ROWS FROM syntax:
3320        // ROWS FROM (func1(args) AS alias1(col1 type1), func2(args) AS alias2(col2 type2)) [WITH ORDINALITY] [AS alias(cols)]
3321        if self.match_text_seq(&["ROWS", "FROM"]) {
3322            return self.parse_rows_from();
3323        }
3324
3325        // Redshift UNPIVOT in FROM clause for SUPER object traversal:
3326        // UNPIVOT expr [AS val_alias AT attr_alias]
3327        // Examples:
3328        //   UNPIVOT c.c_orders[0]
3329        //   UNPIVOT c.c_orders AS val AT attr
3330        if self.match_token(TokenType::Unpivot) {
3331            return self.parse_redshift_unpivot_table();
3332        }
3333
3334        let mut expr = if self.check(TokenType::Values) && self.check_next(TokenType::LParen) {
3335            // VALUES as table expression: FROM (VALUES ...)
3336            // In ClickHouse, bare `values` without ( is a table name
3337            self.parse_values()?
3338        } else if self.check(TokenType::Values)
3339            && matches!(
3340                self.config.dialect,
3341                Some(crate::dialects::DialectType::ClickHouse)
3342            )
3343        {
3344            // ClickHouse: `values` as a table name (not followed by LParen)
3345            let token = self.advance();
3346            let ident = Identifier::new(token.text);
3347            let trailing_comments = self.previous_trailing_comments().to_vec();
3348            Expression::boxed_table(TableRef {
3349                name: ident,
3350                schema: None,
3351                catalog: None,
3352                alias: None,
3353                alias_explicit_as: false,
3354                column_aliases: Vec::new(),
3355                leading_comments: Vec::new(),
3356                trailing_comments,
3357                when: None,
3358                only: false,
3359                final_: false,
3360                table_sample: None,
3361                hints: Vec::new(),
3362                system_time: None,
3363                partitions: Vec::new(),
3364                identifier_func: None,
3365                changes: None,
3366                version: None,
3367                span: None,
3368            })
3369        } else if self.check(TokenType::DAt) {
3370            // Snowflake stage reference: @stage_name or @"stage_name" or @namespace.stage/path
3371            self.parse_stage_reference()?
3372        } else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
3373            // Snowflake stage reference tokenized as Var: @mystage/path
3374            // When @ is followed by alphanumeric, tokenizer creates a Var token instead of DAt
3375            self.parse_stage_reference_from_var()?
3376        } else if self.check(TokenType::String) && self.peek().text.starts_with('@') {
3377            // Snowflake stage reference in string: '@mystage' or '@external/location'
3378            self.parse_stage_reference_from_string()?
3379        } else if self.match_token(TokenType::Lateral) {
3380            if self.check(TokenType::LParen) {
3381                // LATERAL (SELECT ...) or LATERAL (table_expression) or LATERAL (FROM ...) for DuckDB
3382                self.expect(TokenType::LParen)?;
3383                if self.check(TokenType::Select)
3384                    || self.check(TokenType::With)
3385                    || self.check(TokenType::From)
3386                {
3387                    let query = self.parse_statement()?;
3388                    self.expect(TokenType::RParen)?;
3389                    Expression::Subquery(Box::new(Subquery {
3390                        this: query,
3391                        alias: None,
3392                        column_aliases: Vec::new(),
3393                        order_by: None,
3394                        limit: None,
3395                        offset: None,
3396                        lateral: true,
3397                        modifiers_inside: false,
3398                        trailing_comments: Vec::new(),
3399                        distribute_by: None,
3400                        sort_by: None,
3401                        cluster_by: None,
3402                        inferred_type: None,
3403                    }))
3404                } else {
3405                    // LATERAL (table_function()) - parenthesized non-subquery
3406                    let table_expr = self.parse_table_expression()?;
3407                    self.expect(TokenType::RParen)?;
3408                    Expression::Subquery(Box::new(Subquery {
3409                        this: table_expr,
3410                        alias: None,
3411                        column_aliases: Vec::new(),
3412                        order_by: None,
3413                        limit: None,
3414                        offset: None,
3415                        lateral: true,
3416                        modifiers_inside: false,
3417                        trailing_comments: Vec::new(),
3418                        distribute_by: None,
3419                        sort_by: None,
3420                        cluster_by: None,
3421                        inferred_type: None,
3422                    }))
3423                }
3424            } else {
3425                // LATERAL function_name(args) [WITH ORDINALITY] [AS alias(columns)]
3426                // Parse function name
3427                let first_ident = self.expect_identifier_or_keyword_with_quoted()?;
3428                let first_name = first_ident.name.clone();
3429
3430                // Parse function arguments
3431                self.expect(TokenType::LParen)?;
3432                let args = if self.check(TokenType::RParen) {
3433                    Vec::new()
3434                } else {
3435                    self.parse_function_arguments()?
3436                };
3437                self.expect(TokenType::RParen)?;
3438
3439                // Handle UNNEST specially to create UnnestFunc expression
3440                let mut func_expr = if first_name.eq_ignore_ascii_case("UNNEST") {
3441                    let mut args_iter = args.into_iter();
3442                    let this = args_iter
3443                        .next()
3444                        .ok_or_else(|| self.parse_error("Expected expression in UNNEST"))?;
3445                    let expressions: Vec<Expression> = args_iter.collect();
3446                    Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
3447                        this,
3448                        expressions,
3449                        with_ordinality: false,
3450                        alias: None,
3451                        offset_alias: None,
3452                    }))
3453                } else {
3454                    Expression::Function(Box::new(Function {
3455                        name: first_name,
3456                        args,
3457                        distinct: false,
3458                        trailing_comments: Vec::new(),
3459                        use_bracket_syntax: false,
3460                        no_parens: false,
3461                        quoted: false,
3462                        span: None,
3463                        inferred_type: None,
3464                    }))
3465                };
3466
3467                // Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
3468                let mut with_offset_alias: Option<crate::expressions::Identifier> = None;
3469                let ordinality = if self.match_token(TokenType::With) {
3470                    if self.match_token(TokenType::Ordinality) {
3471                        Some(Box::new(Expression::Boolean(BooleanLiteral {
3472                            value: true,
3473                        })))
3474                    } else if self.check(TokenType::Offset) || self.check_identifier("OFFSET") {
3475                        // BigQuery: WITH OFFSET [AS alias]
3476                        self.skip(); // consume OFFSET
3477                                     // Check for optional offset alias: WITH OFFSET AS y or WITH OFFSET y
3478                        if matches!(
3479                            self.config.dialect,
3480                            Some(crate::dialects::DialectType::BigQuery)
3481                        ) {
3482                            let has_as = self.match_token(TokenType::As);
3483                            if has_as
3484                                || self.check(TokenType::Identifier)
3485                                || self.check(TokenType::Var)
3486                            {
3487                                let alias_name = self.advance().text;
3488                                with_offset_alias = Some(crate::expressions::Identifier {
3489                                    name: alias_name,
3490                                    quoted: false,
3491                                    trailing_comments: Vec::new(),
3492                                    span: None,
3493                                });
3494                            }
3495                        }
3496                        Some(Box::new(Expression::Boolean(BooleanLiteral {
3497                            value: true,
3498                        })))
3499                    } else {
3500                        // Not ORDINALITY or OFFSET, put back WITH
3501                        self.current -= 1;
3502                        None
3503                    }
3504                } else {
3505                    None
3506                };
3507
3508                // Update the inner UnnestFunc with WITH ORDINALITY/OFFSET info
3509                if ordinality.is_some() {
3510                    if let Expression::Unnest(ref mut u) = func_expr {
3511                        u.with_ordinality = true;
3512                        u.offset_alias = with_offset_alias;
3513                    }
3514                }
3515
3516                // Parse optional alias: AS alias or just alias
3517                let alias_ident = if self.match_token(TokenType::As) {
3518                    Some(self.expect_identifier_or_keyword_with_quoted()?)
3519                } else if !self.is_at_end()
3520                    && !self.check(TokenType::Comma)
3521                    && !self.check(TokenType::RParen)
3522                    && !self.check(TokenType::On)
3523                    && !self.check(TokenType::Cross)
3524                    && !self.check(TokenType::Inner)
3525                    && !self.check(TokenType::Left)
3526                    && !self.check(TokenType::Right)
3527                    && !self.check(TokenType::Full)
3528                    && !self.check(TokenType::Join)
3529                    && !self.check(TokenType::Where)
3530                    && !self.check(TokenType::Order)
3531                    && !self.check(TokenType::Limit)
3532                    && !self.check(TokenType::Semicolon)
3533                    && (self.check(TokenType::Identifier) || self.check(TokenType::Var))
3534                {
3535                    Some(self.expect_identifier_or_keyword_with_quoted()?)
3536                } else {
3537                    None
3538                };
3539                let alias_quoted = alias_ident.as_ref().map_or(false, |id| id.quoted);
3540                let alias = alias_ident.map(|id| id.name);
3541
3542                // Parse column aliases: (col1, col2, ...)
3543                let column_aliases = if alias.is_some() && self.match_token(TokenType::LParen) {
3544                    let mut cols = Vec::new();
3545                    loop {
3546                        cols.push(self.expect_identifier_or_keyword()?);
3547                        if !self.match_token(TokenType::Comma) {
3548                            break;
3549                        }
3550                    }
3551                    self.expect(TokenType::RParen)?;
3552                    cols
3553                } else {
3554                    Vec::new()
3555                };
3556
3557                Expression::Lateral(Box::new(Lateral {
3558                    this: Box::new(func_expr),
3559                    view: None,
3560                    outer: None,
3561                    alias,
3562                    alias_quoted,
3563                    cross_apply: None,
3564                    ordinality,
3565                    column_aliases,
3566                }))
3567            }
3568        } else if self.match_token(TokenType::LParen) {
3569            // Subquery or parenthesized set operation or (VALUES ...)
3570            if self.check(TokenType::Values) {
3571                // (VALUES (...), (...)) AS t(c1, c2) or (VALUES (0) foo(bar))
3572                let mut values = self.parse_values()?;
3573                self.expect(TokenType::RParen)?;
3574                // Extract alias from Values if present and move to Subquery
3575                let (alias, column_aliases) = if let Expression::Values(ref mut v) = values {
3576                    (v.alias.take(), std::mem::take(&mut v.column_aliases))
3577                } else {
3578                    (None, Vec::new())
3579                };
3580                Expression::Subquery(Box::new(Subquery {
3581                    this: values,
3582                    alias,
3583                    column_aliases,
3584                    order_by: None,
3585                    limit: None,
3586                    offset: None,
3587                    distribute_by: None,
3588                    sort_by: None,
3589                    cluster_by: None,
3590                    lateral: false,
3591                    modifiers_inside: false,
3592                    trailing_comments: self.previous_trailing_comments().to_vec(),
3593                    inferred_type: None,
3594                }))
3595            } else if self.check(TokenType::Select)
3596                || self.check(TokenType::With)
3597                || self.check(TokenType::Pivot)
3598                || self.check(TokenType::Unpivot)
3599                || self.check(TokenType::From)
3600                || self.check(TokenType::Merge)
3601                || self.check(TokenType::Describe)
3602                || (self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EXPLAIN"))
3603                || (self.check(TokenType::Var)
3604                    && self.peek().text.eq_ignore_ascii_case("SUMMARIZE"))
3605            {
3606                let query = self.parse_statement()?;
3607                self.expect(TokenType::RParen)?;
3608                let trailing = self.previous_trailing_comments().to_vec();
3609                // Check for set operations after parenthesized query
3610                // If there's a set operation, wrap query in Subquery first to preserve parens
3611                // e.g., (SELECT 1) UNION (SELECT 2) - the left operand needs Subquery wrapper
3612                let result = if self.check(TokenType::Union)
3613                    || self.check(TokenType::Intersect)
3614                    || self.check(TokenType::Except)
3615                {
3616                    let left = Expression::Subquery(Box::new(Subquery {
3617                        this: query,
3618                        alias: None,
3619                        column_aliases: Vec::new(),
3620                        order_by: None,
3621                        limit: None,
3622                        offset: None,
3623                        lateral: false,
3624                        modifiers_inside: false,
3625                        trailing_comments: Vec::new(),
3626                        distribute_by: None,
3627                        sort_by: None,
3628                        cluster_by: None,
3629                        inferred_type: None,
3630                    }));
3631                    self.parse_set_operation(left)?
3632                } else {
3633                    query
3634                };
3635                Expression::Subquery(Box::new(Subquery {
3636                    this: result,
3637                    alias: None,
3638                    column_aliases: Vec::new(),
3639                    order_by: None,
3640                    limit: None,
3641                    offset: None,
3642                    distribute_by: None,
3643                    sort_by: None,
3644                    cluster_by: None,
3645                    lateral: false,
3646                    modifiers_inside: false,
3647                    trailing_comments: trailing,
3648                    inferred_type: None,
3649                }))
3650            } else if self.check(TokenType::LParen) {
3651                // Nested parens like ((SELECT ...)) or ((x))
3652                // Also handles ((SELECT 1) UNION (SELECT 2)) - set operations inside parens
3653                let inner = self.parse_table_expression()?;
3654
3655                // Handle alias on subquery before set operation: ((SELECT 1) AS a UNION ALL (SELECT 2) AS b)
3656                let inner = if self.match_token(TokenType::As) {
3657                    let alias = self.expect_identifier()?;
3658                    if let Expression::Subquery(mut subq) = inner {
3659                        subq.alias = Some(Identifier::new(alias));
3660                        Expression::Subquery(subq)
3661                    } else {
3662                        Expression::Alias(Box::new(Alias::new(inner, Identifier::new(alias))))
3663                    }
3664                } else if self.is_identifier_token()
3665                    && !self.check(TokenType::Union)
3666                    && !self.check(TokenType::Intersect)
3667                    && !self.check(TokenType::Except)
3668                    && !self.check(TokenType::Cross)
3669                    && !self.check(TokenType::Inner)
3670                    && !self.check(TokenType::Left)
3671                    && !self.check(TokenType::Right)
3672                    && !self.check(TokenType::Full)
3673                    && !self.check(TokenType::Join)
3674                    && !self.check(TokenType::Order)
3675                    && !self.check(TokenType::Limit)
3676                    && !self.check(TokenType::Offset)
3677                    && !self.check(TokenType::Xor)
3678                {
3679                    // Implicit alias (no AS keyword)
3680                    let alias = self.expect_identifier()?;
3681                    if let Expression::Subquery(mut subq) = inner {
3682                        subq.alias = Some(Identifier::new(alias));
3683                        Expression::Subquery(subq)
3684                    } else {
3685                        Expression::Alias(Box::new(Alias::new(inner, Identifier::new(alias))))
3686                    }
3687                } else {
3688                    inner
3689                };
3690
3691                // ClickHouse: ((SELECT 1) AS x, (SELECT 2) AS y) — tuple of aliased subqueries
3692                if matches!(
3693                    self.config.dialect,
3694                    Some(crate::dialects::DialectType::ClickHouse)
3695                ) && self.check(TokenType::Comma)
3696                {
3697                    let mut exprs = vec![inner];
3698                    while self.match_token(TokenType::Comma) {
3699                        if self.check(TokenType::RParen) {
3700                            break;
3701                        }
3702                        let e = self.parse_expression()?;
3703                        exprs.push(e);
3704                    }
3705                    self.expect(TokenType::RParen)?;
3706                    return Ok(Expression::Tuple(Box::new(Tuple { expressions: exprs })));
3707                }
3708
3709                // Check for set operations after the first table expression
3710                let had_set_operation = self.check(TokenType::Union)
3711                    || self.check(TokenType::Intersect)
3712                    || self.check(TokenType::Except);
3713                let result = if had_set_operation {
3714                    // This is a set operation like ((SELECT 1) UNION (SELECT 2))
3715                    // Wrap inner in a subquery-like expression and parse set operation
3716                    let set_result = self.parse_set_operation(inner)?;
3717                    set_result
3718                } else if self.check(TokenType::Cross)
3719                    || self.check(TokenType::Inner)
3720                    || self.check(TokenType::Left)
3721                    || self.check(TokenType::Right)
3722                    || self.check(TokenType::Full)
3723                    || self.check(TokenType::Join)
3724                {
3725                    // This is a join: ((SELECT 1) CROSS JOIN (SELECT 2))
3726                    let joins = self.parse_joins()?;
3727                    let lateral_views = self.parse_lateral_views()?;
3728                    Expression::JoinedTable(Box::new(JoinedTable {
3729                        left: inner,
3730                        joins,
3731                        lateral_views,
3732                        alias: None,
3733                    }))
3734                } else {
3735                    inner
3736                };
3737
3738                // Handle ORDER BY, LIMIT, OFFSET after set operations inside parens
3739                let result = if self.check(TokenType::Order) {
3740                    // Wrap in a subquery with order/limit
3741                    self.expect(TokenType::Order)?;
3742                    self.expect(TokenType::By)?;
3743                    let order_by = self.parse_order_by()?;
3744                    let limit = if self.match_token(TokenType::Limit) {
3745                        Some(Limit {
3746                            this: self.parse_expression()?,
3747                            percent: false,
3748                            comments: Vec::new(),
3749                        })
3750                    } else {
3751                        None
3752                    };
3753                    let offset = if self.match_token(TokenType::Offset) {
3754                        Some(Offset {
3755                            this: self.parse_expression()?,
3756                            rows: None,
3757                        })
3758                    } else {
3759                        None
3760                    };
3761                    Expression::Subquery(Box::new(Subquery {
3762                        this: result,
3763                        alias: None,
3764                        column_aliases: Vec::new(),
3765                        order_by: Some(order_by),
3766                        limit,
3767                        offset,
3768                        distribute_by: None,
3769                        sort_by: None,
3770                        cluster_by: None,
3771                        lateral: false,
3772                        modifiers_inside: true, // ORDER BY was inside the parens
3773                        trailing_comments: Vec::new(),
3774                        inferred_type: None,
3775                    }))
3776                } else if self.check(TokenType::Limit) || self.check(TokenType::Offset) {
3777                    // LIMIT/OFFSET without ORDER BY
3778                    let limit = if self.match_token(TokenType::Limit) {
3779                        Some(Limit {
3780                            this: self.parse_expression()?,
3781                            percent: false,
3782                            comments: Vec::new(),
3783                        })
3784                    } else {
3785                        None
3786                    };
3787                    let offset = if self.match_token(TokenType::Offset) {
3788                        Some(Offset {
3789                            this: self.parse_expression()?,
3790                            rows: None,
3791                        })
3792                    } else {
3793                        None
3794                    };
3795                    Expression::Subquery(Box::new(Subquery {
3796                        this: result,
3797                        alias: None,
3798                        column_aliases: Vec::new(),
3799                        order_by: None,
3800                        limit,
3801                        offset,
3802                        distribute_by: None,
3803                        sort_by: None,
3804                        cluster_by: None,
3805                        lateral: false,
3806                        modifiers_inside: true, // LIMIT/OFFSET was inside the parens
3807                        trailing_comments: Vec::new(),
3808                        inferred_type: None,
3809                    }))
3810                } else {
3811                    result
3812                };
3813
3814                self.expect(TokenType::RParen)?;
3815                // Wrap result in Paren to preserve the outer parentheses when needed
3816                // Cases:
3817                // - ((SELECT 1)) -> Paren(Subquery(Select)) - inner was subquery of SELECT, wrap in Paren
3818                // - ((SELECT 1) UNION (SELECT 2)) -> Subquery(Union) - recursive call handled set op, don't add Paren
3819                // - ((SELECT 1) AS a UNION ALL ...) -> Union - we handled set op, need to add Paren
3820                // - (((SELECT 1) UNION SELECT 2) ORDER BY x) -> Subquery with modifiers_inside=true
3821                let had_modifiers = matches!(&result, Expression::Subquery(s) if s.order_by.is_some() || s.limit.is_some() || s.offset.is_some());
3822                let result_is_subquery_of_set_op = matches!(&result, Expression::Subquery(s) if matches!(&s.this, Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)));
3823                if had_modifiers || result_is_subquery_of_set_op {
3824                    // Subquery with modifiers or Subquery(Union) - already has proper structure
3825                    result
3826                } else {
3827                    // All other cases need Paren wrapper to preserve outer parentheses
3828                    Expression::Paren(Box::new(Paren {
3829                        this: result,
3830                        trailing_comments: Vec::new(),
3831                    }))
3832                }
3833            } else if self.is_identifier_token()
3834                || self.is_safe_keyword_as_identifier()
3835                || self.can_be_alias_keyword()
3836            {
3837                // Parenthesized join expression: (tbl1 CROSS JOIN tbl2) or just (x)
3838                // Also allow safe keywords and alias keywords (all, left, etc.) as table names
3839                let (left, joins) = self.parse_table_expression_with_joins()?;
3840                // Parse LATERAL VIEW after joins: (x CROSS JOIN foo LATERAL VIEW EXPLODE(y))
3841                let lateral_views = self.parse_lateral_views()?;
3842                self.expect(TokenType::RParen)?;
3843                if joins.is_empty() && lateral_views.is_empty() {
3844                    // Just a parenthesized table expression, wrap in Paren to preserve parens
3845                    Expression::Paren(Box::new(Paren {
3846                        this: left,
3847                        trailing_comments: Vec::new(),
3848                    }))
3849                } else {
3850                    // Create a JoinedTable
3851                    Expression::JoinedTable(Box::new(JoinedTable {
3852                        left,
3853                        joins,
3854                        lateral_views,
3855                        alias: None, // Alias is parsed separately after this
3856                    }))
3857                }
3858            } else {
3859                let query = self.parse_statement()?;
3860                self.expect(TokenType::RParen)?;
3861                Expression::Subquery(Box::new(Subquery {
3862                    this: query,
3863                    alias: None,
3864                    column_aliases: Vec::new(),
3865                    order_by: None,
3866                    limit: None,
3867                    offset: None,
3868                    distribute_by: None,
3869                    sort_by: None,
3870                    cluster_by: None,
3871                    lateral: false,
3872                    modifiers_inside: false,
3873                    trailing_comments: self.previous_trailing_comments().to_vec(),
3874                    inferred_type: None,
3875                }))
3876            }
3877        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() || self.can_be_alias_keyword()
3878            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::BigQuery)) && self.check(TokenType::Number))
3879            || self.is_mysql_numeric_identifier()
3880            // PIVOT/UNPIVOT can be table names when not followed by (
3881            || (self.check(TokenType::Pivot) && !self.check_next(TokenType::LParen))
3882            || (self.check(TokenType::Unpivot) && !self.check_next(TokenType::LParen))
3883            // ClickHouse: braced query parameters as table names {db:Identifier}.table
3884            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) && self.check(TokenType::LBrace))
3885            // ClickHouse: allow union/except/intersect as table names when not followed by ALL/DISTINCT/SELECT/(
3886            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
3887                && (self.check(TokenType::Union) || self.check(TokenType::Except) || self.check(TokenType::Intersect))
3888                && !self.check_next(TokenType::All) && !self.check_next(TokenType::Distinct)
3889                && !self.check_next(TokenType::Select) && !self.check_next(TokenType::LParen))
3890        {
3891            // Table name - could be simple, qualified, or table function
3892            // Also allow safe keywords (like 'table', 'view', 'case', 'all', etc.) as table names
3893            // BigQuery: also allows numeric table parts and hyphenated identifiers
3894            // MySQL: allows numeric-starting identifiers (e.g., 00f, 1d)
3895
3896            // DuckDB prefix alias syntax: alias: table (e.g., "foo: bar" means "bar AS foo")
3897            // Check if next token is COLON (but not :: which is DCOLON for casts)
3898            if matches!(
3899                self.config.dialect,
3900                Some(crate::dialects::DialectType::DuckDB)
3901            ) && self.check_next(TokenType::Colon)
3902                && !(self.current + 2 < self.tokens.len()
3903                    && self.tokens[self.current + 2].token_type == TokenType::Colon)
3904            {
3905                // Parse the alias identifier
3906                let alias_ident = self.parse_bigquery_table_part()?;
3907                let pre_alias_comments = self.previous_trailing_comments().to_vec();
3908                // Consume the colon
3909                self.expect(TokenType::Colon)?;
3910                let colon_comments = self.previous_trailing_comments().to_vec();
3911                // Parse the actual table expression recursively
3912                let mut table_expr = self.parse_table_expression()?;
3913                // Merge comments
3914                let mut all_comments = pre_alias_comments;
3915                all_comments.extend(colon_comments);
3916                // Apply the alias to the table expression
3917                match &mut table_expr {
3918                    Expression::Table(ref mut t) => {
3919                        t.alias = Some(alias_ident);
3920                        t.alias_explicit_as = true; // Output AS keyword (required by expected format)
3921                                                    // Store prefix alias comments - they should come BEFORE the table's trailing comments
3922                                                    // For "foo /* bla */: bar /* baz */", output is "bar AS foo /* bla */ /* baz */"
3923                                                    // So alias comments (/* bla */) come first, then table comments (/* baz */)
3924                        if !all_comments.is_empty() {
3925                            let existing_comments = std::mem::take(&mut t.trailing_comments);
3926                            t.trailing_comments = all_comments;
3927                            t.trailing_comments.extend(existing_comments);
3928                        }
3929                    }
3930                    Expression::Subquery(ref mut s) => {
3931                        s.alias = Some(alias_ident);
3932                    }
3933                    Expression::Function(ref mut _f) => {
3934                        // Wrap function in alias
3935                        return Ok(Expression::Alias(Box::new(Alias {
3936                            this: table_expr,
3937                            alias: alias_ident,
3938                            column_aliases: Vec::new(),
3939                            pre_alias_comments: all_comments,
3940                            trailing_comments: Vec::new(),
3941                            inferred_type: None,
3942                        })));
3943                    }
3944                    _ => {
3945                        // For other expressions, wrap in Alias
3946                        return Ok(Expression::Alias(Box::new(Alias {
3947                            this: table_expr,
3948                            alias: alias_ident,
3949                            column_aliases: Vec::new(),
3950                            pre_alias_comments: all_comments,
3951                            trailing_comments: Vec::new(),
3952                            inferred_type: None,
3953                        })));
3954                    }
3955                }
3956                return Ok(table_expr);
3957            }
3958
3959            let first_ident = self.parse_bigquery_table_part()?;
3960            let first_name = first_ident.name.clone();
3961
3962            // Check for qualified name (schema.table) or table function
3963            if self.match_token(TokenType::Dot) {
3964                // Handle TSQL a..b syntax (database..table with empty schema)
3965                if self.check(TokenType::Dot) {
3966                    // Two consecutive dots: a..b means catalog..table (empty schema)
3967                    self.skip(); // consume second dot
3968                    let table_ident = self.parse_bigquery_table_part()?;
3969                    let trailing_comments = self.previous_trailing_comments().to_vec();
3970                    return Ok(Expression::boxed_table(TableRef {
3971                        catalog: Some(first_ident),
3972                        schema: Some(Identifier::new("")), // Empty schema represents ..
3973                        name: table_ident,
3974                        alias: None,
3975                        alias_explicit_as: false,
3976                        column_aliases: Vec::new(),
3977                        leading_comments: Vec::new(),
3978                        trailing_comments,
3979                        when: None,
3980                        only: false,
3981                        final_: false,
3982                        table_sample: None,
3983                        hints: Vec::new(),
3984                        system_time: None,
3985                        partitions: Vec::new(),
3986                        identifier_func: None,
3987                        changes: None,
3988                        version: None,
3989                        span: None,
3990                    }));
3991                }
3992
3993                // BigQuery: handle x.* wildcard table reference (e.g., SELECT * FROM x.*)
3994                // After the first dot, if we see a Star token, it's a wildcard table name
3995                if matches!(
3996                    self.config.dialect,
3997                    Some(crate::dialects::DialectType::BigQuery)
3998                ) && self.check(TokenType::Star)
3999                {
4000                    self.skip(); // consume *
4001                    let trailing_comments = self.previous_trailing_comments().to_vec();
4002                    return Ok(Expression::boxed_table(TableRef {
4003                        catalog: None,
4004                        schema: Some(first_ident),
4005                        name: Identifier::new("*"),
4006                        alias: None,
4007                        alias_explicit_as: false,
4008                        column_aliases: Vec::new(),
4009                        leading_comments: Vec::new(),
4010                        trailing_comments,
4011                        when: None,
4012                        only: false,
4013                        final_: false,
4014                        table_sample: None,
4015                        hints: Vec::new(),
4016                        system_time: None,
4017                        partitions: Vec::new(),
4018                        identifier_func: None,
4019                        changes: None,
4020                        version: None,
4021                        span: None,
4022                    }));
4023                }
4024
4025                // schema.table or schema.function()
4026                // Allow keywords as table/schema names (e.g., schema.table, catalog.view)
4027                let second_ident = self.parse_bigquery_table_part()?;
4028                let second_name = second_ident.name.clone();
4029
4030                if self.match_token(TokenType::Dot) {
4031                    // BigQuery: handle a.b.* wildcard table reference
4032                    if matches!(
4033                        self.config.dialect,
4034                        Some(crate::dialects::DialectType::BigQuery)
4035                    ) && self.check(TokenType::Star)
4036                    {
4037                        self.skip(); // consume *
4038                        let trailing_comments = self.previous_trailing_comments().to_vec();
4039                        return Ok(Expression::boxed_table(TableRef {
4040                            catalog: Some(first_ident),
4041                            schema: Some(second_ident),
4042                            name: Identifier::new("*"),
4043                            alias: None,
4044                            alias_explicit_as: false,
4045                            column_aliases: Vec::new(),
4046                            leading_comments: Vec::new(),
4047                            trailing_comments,
4048                            when: None,
4049                            only: false,
4050                            final_: false,
4051                            table_sample: None,
4052                            hints: Vec::new(),
4053                            system_time: None,
4054                            partitions: Vec::new(),
4055                            identifier_func: None,
4056                            changes: None,
4057                            version: None,
4058                            span: None,
4059                        }));
4060                    }
4061                    // catalog.schema.table or catalog.schema.function()
4062                    let third_ident = self.parse_bigquery_table_part()?;
4063                    let third_name = third_ident.name.clone();
4064
4065                    // Check for 4-part name (e.g., project.dataset.INFORMATION_SCHEMA.TABLES)
4066                    if self.match_token(TokenType::Dot) {
4067                        let fourth_ident = self.parse_bigquery_table_part()?;
4068                        // BigQuery wildcard table suffix: a.b.c.d* matches all tables starting with d
4069                        let mut table_name = fourth_ident;
4070                        if matches!(
4071                            self.config.dialect,
4072                            Some(crate::dialects::DialectType::BigQuery)
4073                        ) && self.check(TokenType::Star)
4074                            && self.is_connected()
4075                        {
4076                            self.skip(); // consume *
4077                            table_name.name.push('*');
4078                        }
4079                        let trailing_comments = self.previous_trailing_comments().to_vec();
4080                        // For 4-part names, combine first two parts as catalog, third as schema
4081                        Expression::boxed_table(TableRef {
4082                            catalog: Some(Identifier::new(format!(
4083                                "{}.{}",
4084                                first_name, second_name
4085                            ))),
4086                            schema: Some(third_ident),
4087                            name: table_name,
4088                            alias: None,
4089                            alias_explicit_as: false,
4090                            column_aliases: Vec::new(),
4091                            leading_comments: Vec::new(),
4092                            trailing_comments,
4093                            when: None,
4094                            only: false,
4095                            final_: false,
4096                            table_sample: None,
4097                            hints: Vec::new(),
4098                            system_time: None,
4099                            partitions: Vec::new(),
4100                            identifier_func: None,
4101                            changes: None,
4102                            version: None,
4103                            span: None,
4104                        })
4105                    } else if self.match_token(TokenType::LParen) {
4106                        // catalog.schema.function() - table-valued function
4107                        let args = if self.check(TokenType::RParen) {
4108                            Vec::new()
4109                        } else {
4110                            self.parse_function_arguments()?
4111                        };
4112                        self.expect(TokenType::RParen)?;
4113                        let trailing_comments = self.previous_trailing_comments().to_vec();
4114                        Expression::Function(Box::new(Function {
4115                            name: format!("{}.{}.{}", first_name, second_name, third_name),
4116                            args,
4117                            distinct: false,
4118                            trailing_comments,
4119                            use_bracket_syntax: false,
4120                            no_parens: false,
4121                            quoted: false,
4122                            span: None,
4123                            inferred_type: None,
4124                        }))
4125                    } else {
4126                        // catalog.schema.table
4127                        // BigQuery wildcard table suffix: x.y.z* matches all tables starting with z
4128                        let mut table_name = third_ident;
4129                        if matches!(
4130                            self.config.dialect,
4131                            Some(crate::dialects::DialectType::BigQuery)
4132                        ) && self.check(TokenType::Star)
4133                            && self.is_connected()
4134                        {
4135                            self.skip(); // consume *
4136                            table_name.name.push('*');
4137                        }
4138                        let trailing_comments = self.previous_trailing_comments().to_vec();
4139                        Expression::boxed_table(TableRef {
4140                            catalog: Some(first_ident),
4141                            schema: Some(second_ident),
4142                            name: table_name,
4143                            alias: None,
4144                            alias_explicit_as: false,
4145                            column_aliases: Vec::new(),
4146                            leading_comments: Vec::new(),
4147                            trailing_comments,
4148                            when: None,
4149                            only: false,
4150                            final_: false,
4151                            table_sample: None,
4152                            hints: Vec::new(),
4153                            system_time: None,
4154                            partitions: Vec::new(),
4155                            identifier_func: None,
4156                            changes: None,
4157                            version: None,
4158                            span: None,
4159                        })
4160                    }
4161                } else if self.match_token(TokenType::LParen) {
4162                    // schema.function() - table-valued function
4163                    let args = if self.check(TokenType::RParen) {
4164                        Vec::new()
4165                    } else {
4166                        self.parse_function_arguments()?
4167                    };
4168                    self.expect(TokenType::RParen)?;
4169                    let trailing_comments = self.previous_trailing_comments().to_vec();
4170                    Expression::Function(Box::new(Function {
4171                        name: format!("{}.{}", first_name, second_name),
4172                        args,
4173                        distinct: false,
4174                        trailing_comments,
4175                        use_bracket_syntax: false,
4176                        no_parens: false,
4177                        quoted: false,
4178                        span: None,
4179                        inferred_type: None,
4180                    }))
4181                } else {
4182                    // schema.table
4183                    // BigQuery wildcard table suffix: x.y* matches all tables starting with y
4184                    let mut table_name = second_ident;
4185                    if matches!(
4186                        self.config.dialect,
4187                        Some(crate::dialects::DialectType::BigQuery)
4188                    ) && self.check(TokenType::Star)
4189                        && self.is_connected()
4190                    {
4191                        self.skip(); // consume *
4192                        table_name.name.push('*');
4193                    }
4194                    let trailing_comments = self.previous_trailing_comments().to_vec();
4195                    Expression::boxed_table(TableRef {
4196                        catalog: None,
4197                        schema: Some(first_ident),
4198                        name: table_name,
4199                        alias: None,
4200                        alias_explicit_as: false,
4201                        column_aliases: Vec::new(),
4202                        leading_comments: Vec::new(),
4203                        trailing_comments,
4204                        when: None,
4205                        only: false,
4206                        final_: false,
4207                        table_sample: None,
4208                        hints: Vec::new(),
4209                        system_time: None,
4210                        partitions: Vec::new(),
4211                        identifier_func: None,
4212                        changes: None,
4213                        version: None,
4214                        span: None,
4215                    })
4216                }
4217            } else if self.match_token(TokenType::LParen) {
4218                // Handle JSON_TABLE specially - it has COLUMNS clause syntax
4219                if first_name.eq_ignore_ascii_case("JSON_TABLE") {
4220                    // Parse the JSON expression (use parse_bitwise to avoid consuming FORMAT)
4221                    let this = self
4222                        .parse_bitwise()?
4223                        .unwrap_or(Expression::Null(crate::expressions::Null));
4224
4225                    // Check for FORMAT JSON after the expression
4226                    let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
4227                        Expression::JSONFormat(Box::new(crate::expressions::JSONFormat {
4228                            this: Some(Box::new(this)),
4229                            options: Vec::new(),
4230                            is_json: None,
4231                            to_json: None,
4232                        }))
4233                    } else {
4234                        this
4235                    };
4236
4237                    // Parse path (after comma)
4238                    let path = if self.match_token(TokenType::Comma) {
4239                        if let Some(s) = self.parse_string()? {
4240                            Some(Box::new(s))
4241                        } else {
4242                            None
4243                        }
4244                    } else {
4245                        None
4246                    };
4247
4248                    // Oracle uses "ERROR ON ERROR" (value then behavior) instead of "ON ERROR ERROR"
4249                    // Parse error handling: ERROR ON ERROR or NULL ON ERROR
4250                    let error_handling = if self.match_identifier("ERROR")
4251                        && self.match_text_seq(&["ON", "ERROR"])
4252                    {
4253                        Some(Box::new(Expression::Var(Box::new(Var {
4254                            this: "ERROR ON ERROR".to_string(),
4255                        }))))
4256                    } else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
4257                        Some(Box::new(Expression::Var(Box::new(Var {
4258                            this: "NULL ON ERROR".to_string(),
4259                        }))))
4260                    } else {
4261                        None
4262                    };
4263
4264                    // Parse empty handling: ERROR ON EMPTY or NULL ON EMPTY
4265                    let empty_handling = if self.match_identifier("ERROR")
4266                        && self.match_text_seq(&["ON", "EMPTY"])
4267                    {
4268                        Some(Box::new(Expression::Var(Box::new(Var {
4269                            this: "ERROR ON EMPTY".to_string(),
4270                        }))))
4271                    } else if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
4272                        Some(Box::new(Expression::Var(Box::new(Var {
4273                            this: "NULL ON EMPTY".to_string(),
4274                        }))))
4275                    } else {
4276                        None
4277                    };
4278
4279                    // Parse COLUMNS clause
4280                    let schema = self.parse_json_table_columns()?;
4281
4282                    self.expect(TokenType::RParen)?;
4283
4284                    Expression::JSONTable(Box::new(JSONTable {
4285                        this: Box::new(this_with_format),
4286                        schema: schema.map(Box::new),
4287                        path,
4288                        error_handling,
4289                        empty_handling,
4290                    }))
4291                } else if first_name.eq_ignore_ascii_case("XMLTABLE") {
4292                    // Handle XMLTABLE specially - it has COLUMNS clause syntax
4293                    // XMLTABLE([XMLNAMESPACES(...),] '/xpath' PASSING xml_doc COLUMNS ...)
4294                    if let Some(xml_table) = self.parse_xml_table()? {
4295                        self.expect(TokenType::RParen)?;
4296                        xml_table
4297                    } else {
4298                        return Err(self.parse_error("Failed to parse XMLTABLE"));
4299                    }
4300                } else if first_name.eq_ignore_ascii_case("OPENJSON") {
4301                    // Handle OPENJSON specially - it has WITH clause for column definitions
4302                    // OPENJSON(json[, path]) [WITH (col1 type1 'path' [AS JSON], ...)]
4303                    if let Some(openjson_expr) = self.parse_open_json()? {
4304                        openjson_expr
4305                    } else {
4306                        return Err(self.parse_error("Failed to parse OPENJSON"));
4307                    }
4308                } else if first_name.eq_ignore_ascii_case("SEMANTIC_VIEW") {
4309                    // Handle SEMANTIC_VIEW specially - it has METRICS/DIMENSIONS/FACTS/WHERE syntax
4310                    // SEMANTIC_VIEW(table METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE expr)
4311                    let semantic_view = self.parse_semantic_view()?;
4312                    self.expect(TokenType::RParen)?;
4313                    semantic_view
4314                } else if (first_name.eq_ignore_ascii_case("view")
4315                    || first_name.eq_ignore_ascii_case("merge"))
4316                    && (self.check(TokenType::Select) || self.check(TokenType::With))
4317                {
4318                    // ClickHouse: view(SELECT ...) and merge(SELECT ...) table functions
4319                    // contain a subquery as the argument
4320                    let query = self.parse_statement()?;
4321                    self.expect(TokenType::RParen)?;
4322                    let trailing_comments = self.previous_trailing_comments().to_vec();
4323                    Expression::Function(Box::new(Function {
4324                        name: first_name.to_string(),
4325                        args: vec![query],
4326                        distinct: false,
4327                        trailing_comments,
4328                        use_bracket_syntax: false,
4329                        no_parens: false,
4330                        quoted: false,
4331                        span: None,
4332                        inferred_type: None,
4333                    }))
4334                } else {
4335                    // Simple table function like UNNEST(), GAP_FILL(), etc.
4336                    let args = if self.check(TokenType::RParen) {
4337                        Vec::new()
4338                    } else {
4339                        self.parse_function_arguments()?
4340                    };
4341                    self.expect(TokenType::RParen)?;
4342                    let trailing_comments = self.previous_trailing_comments().to_vec();
4343
4344                    // Handle UNNEST specially to create UnnestFunc expression
4345                    if first_name.eq_ignore_ascii_case("UNNEST") {
4346                        // Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
4347                        // Both are semantically the same - provide an ordinal/offset column
4348                        let with_ordinality = self
4349                            .match_keywords(&[TokenType::With, TokenType::Ordinality])
4350                            || self.match_text_seq(&["WITH", "OFFSET"]);
4351                        // If WITH OFFSET matched, check for optional offset alias: WITH OFFSET AS y or WITH OFFSET y
4352                        let offset_alias = if with_ordinality
4353                            && matches!(
4354                                self.config.dialect,
4355                                Some(crate::dialects::DialectType::BigQuery)
4356                            ) {
4357                            let has_as = self.match_token(TokenType::As);
4358                            if has_as
4359                                || (self.check(TokenType::Identifier) || self.check(TokenType::Var))
4360                            {
4361                                let alias_name = self.advance().text;
4362                                Some(crate::expressions::Identifier {
4363                                    name: alias_name,
4364                                    quoted: false,
4365                                    trailing_comments: Vec::new(),
4366                                    span: None,
4367                                })
4368                            } else {
4369                                None
4370                            }
4371                        } else {
4372                            None
4373                        };
4374                        let mut args_iter = args.into_iter();
4375                        let this = args_iter
4376                            .next()
4377                            .ok_or_else(|| self.parse_error("Expected expression in UNNEST"))?;
4378                        let expressions: Vec<Expression> = args_iter.collect();
4379                        Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4380                            this,
4381                            expressions,
4382                            with_ordinality,
4383                            alias: None,
4384                            offset_alias,
4385                        }))
4386                    } else {
4387                        // Check for WITH ORDINALITY after any table-valued function
4388                        let with_ordinality =
4389                            self.match_keywords(&[TokenType::With, TokenType::Ordinality]);
4390                        let func_name = if with_ordinality {
4391                            format!("{} WITH ORDINALITY", first_name)
4392                        } else {
4393                            first_name.clone()
4394                        };
4395                        let func = Function {
4396                            name: func_name,
4397                            args,
4398                            distinct: false,
4399                            trailing_comments,
4400                            use_bracket_syntax: false,
4401                            no_parens: false,
4402                            quoted: false,
4403                            span: None,
4404                            inferred_type: None,
4405                        };
4406                        let func_expr = Expression::Function(Box::new(func));
4407
4408                        // TSQL: OPENDATASOURCE(...).Catalog.schema.table
4409                        // After a table-valued function, dot-chained access produces
4410                        // a TableRef whose identifier_func holds the function call.
4411                        if self.check(TokenType::Dot) {
4412                            self.skip(); // consume first dot
4413                            let part1 = self.parse_bigquery_table_part()?;
4414                            if self.match_token(TokenType::Dot) {
4415                                let part2 = self.parse_bigquery_table_part()?;
4416                                if self.match_token(TokenType::Dot) {
4417                                    // func().a.b.c  → catalog=a, schema=b, name=c
4418                                    let part3 = self.parse_bigquery_table_part()?;
4419                                    let tc = self.previous_trailing_comments().to_vec();
4420                                    Expression::boxed_table(TableRef {
4421                                        catalog: Some(part1),
4422                                        schema: Some(part2),
4423                                        name: part3,
4424                                        alias: None,
4425                                        alias_explicit_as: false,
4426                                        column_aliases: Vec::new(),
4427                                        leading_comments: Vec::new(),
4428                                        trailing_comments: tc,
4429                                        when: None,
4430                                        only: false,
4431                                        final_: false,
4432                                        table_sample: None,
4433                                        hints: Vec::new(),
4434                                        system_time: None,
4435                                        partitions: Vec::new(),
4436                                        identifier_func: Some(Box::new(func_expr)),
4437                                        changes: None,
4438                                        version: None,
4439                                        span: None,
4440                                    })
4441                                } else {
4442                                    // func().a.b  → schema=a, name=b
4443                                    let tc = self.previous_trailing_comments().to_vec();
4444                                    Expression::boxed_table(TableRef {
4445                                        catalog: None,
4446                                        schema: Some(part1),
4447                                        name: part2,
4448                                        alias: None,
4449                                        alias_explicit_as: false,
4450                                        column_aliases: Vec::new(),
4451                                        leading_comments: Vec::new(),
4452                                        trailing_comments: tc,
4453                                        when: None,
4454                                        only: false,
4455                                        final_: false,
4456                                        table_sample: None,
4457                                        hints: Vec::new(),
4458                                        system_time: None,
4459                                        partitions: Vec::new(),
4460                                        identifier_func: Some(Box::new(func_expr)),
4461                                        changes: None,
4462                                        version: None,
4463                                        span: None,
4464                                    })
4465                                }
4466                            } else {
4467                                // func().a  → name=a
4468                                let tc = self.previous_trailing_comments().to_vec();
4469                                Expression::boxed_table(TableRef {
4470                                    catalog: None,
4471                                    schema: None,
4472                                    name: part1,
4473                                    alias: None,
4474                                    alias_explicit_as: false,
4475                                    column_aliases: Vec::new(),
4476                                    leading_comments: Vec::new(),
4477                                    trailing_comments: tc,
4478                                    when: None,
4479                                    only: false,
4480                                    final_: false,
4481                                    table_sample: None,
4482                                    hints: Vec::new(),
4483                                    system_time: None,
4484                                    partitions: Vec::new(),
4485                                    identifier_func: Some(Box::new(func_expr)),
4486                                    changes: None,
4487                                    version: None,
4488                                    span: None,
4489                                })
4490                            }
4491                        } else {
4492                            func_expr
4493                        }
4494                    }
4495                }
4496            } else {
4497                // Simple table name
4498                // BigQuery wildcard table suffix: x* matches all tables starting with x
4499                let mut table_name = first_ident;
4500                if matches!(
4501                    self.config.dialect,
4502                    Some(crate::dialects::DialectType::BigQuery)
4503                ) && self.check(TokenType::Star)
4504                    && self.is_connected()
4505                {
4506                    self.skip(); // consume *
4507                    table_name.name.push('*');
4508                }
4509                let trailing_comments = self.previous_trailing_comments().to_vec();
4510                Expression::boxed_table(TableRef {
4511                    catalog: None,
4512                    schema: None,
4513                    name: table_name,
4514                    alias: None,
4515                    alias_explicit_as: false,
4516                    column_aliases: Vec::new(),
4517                    leading_comments: Vec::new(),
4518                    trailing_comments,
4519                    when: None,
4520                    only: false,
4521                    final_: false,
4522                    table_sample: None,
4523                    hints: Vec::new(),
4524                    system_time: None,
4525                    partitions: Vec::new(),
4526                    identifier_func: None,
4527                    changes: None,
4528                    version: None,
4529                    span: None,
4530                })
4531            }
4532        } else if self.check(TokenType::LBrace) {
4533            // ClickHouse query parameter: {name: Type}
4534            if let Some(param) = self.parse_clickhouse_braced_parameter()? {
4535                param
4536            } else {
4537                // Spark/Databricks widget template variable: {name}
4538                self.skip(); // consume {
4539                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
4540                    let name_token = self.advance();
4541                    self.expect(TokenType::RBrace)?;
4542                    Expression::Parameter(Box::new(Parameter {
4543                        name: Some(name_token.text.clone()),
4544                        index: None,
4545                        style: ParameterStyle::Brace,
4546                        quoted: false,
4547                        string_quoted: false,
4548                        expression: None,
4549                    }))
4550                } else {
4551                    return Err(self.parse_error("Expected identifier after {"));
4552                }
4553            }
4554        } else if self.check(TokenType::Dollar) && self.check_next(TokenType::LBrace) {
4555            // Template variable as table reference: ${variable_name} or ${kind:name}
4556            // This is used in Databricks/Hive for parameterized queries
4557            self.skip(); // consume $
4558            self.skip(); // consume {
4559            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
4560                let name_token = self.advance();
4561                // Check for ${kind:name} syntax (e.g., ${hiveconf:some_var})
4562                let expression = if self.match_token(TokenType::Colon) {
4563                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
4564                        let expr_token = self.advance();
4565                        Some(expr_token.text.clone())
4566                    } else {
4567                        return Err(self.parse_error("Expected identifier after : in ${...}"));
4568                    }
4569                } else {
4570                    None
4571                };
4572                self.expect(TokenType::RBrace)?;
4573                Expression::Parameter(Box::new(Parameter {
4574                    name: Some(name_token.text.clone()),
4575                    index: None,
4576                    style: ParameterStyle::DollarBrace,
4577                    quoted: false,
4578                    string_quoted: false,
4579                    expression,
4580                }))
4581            } else {
4582                return Err(self.parse_error("Expected identifier after ${"));
4583            }
4584        } else if self.check(TokenType::String) {
4585            // DuckDB allows string literals as table names: SELECT * FROM 'x.y'
4586            // Convert to a quoted identifier
4587            let string_token = self.advance();
4588            let table_name = Identifier {
4589                name: string_token.text.clone(),
4590                quoted: true,
4591                trailing_comments: Vec::new(),
4592                span: None,
4593            };
4594            let trailing_comments = self.previous_trailing_comments().to_vec();
4595            Expression::boxed_table(TableRef {
4596                catalog: None,
4597                schema: None,
4598                name: table_name,
4599                alias: None,
4600                alias_explicit_as: false,
4601                column_aliases: Vec::new(),
4602                leading_comments: Vec::new(),
4603                trailing_comments,
4604                when: None,
4605                only: false,
4606                final_: false,
4607                table_sample: None,
4608                hints: Vec::new(),
4609                system_time: None,
4610                partitions: Vec::new(),
4611                identifier_func: None,
4612                changes: None,
4613                version: None,
4614                span: None,
4615            })
4616        } else {
4617            return Err(self.parse_error(format!(
4618                "Expected table name or subquery, got {:?}",
4619                self.peek().token_type
4620            )));
4621        };
4622
4623        // Postgres supports a wildcard (table) suffix operator, which is a no-op in this context.
4624        // e.g., FROM t1* means "include inherited tables". Matches Python sqlglot behavior.
4625        self.match_token(TokenType::Star);
4626
4627        // Check for Snowflake CHANGES clause: CHANGES (INFORMATION => ...) AT|BEFORE (...) END (...)
4628        // Must be checked before time travel since CHANGES includes its own AT/BEFORE clauses
4629        if self.check_keyword_text("CHANGES") {
4630            if let Some(changes_expr) = self.parse_changes()? {
4631                if let Expression::Table(ref mut table) = expr {
4632                    if let Expression::Changes(changes_box) = changes_expr {
4633                        table.changes = Some(changes_box);
4634                    }
4635                }
4636            }
4637        }
4638
4639        // Check for Snowflake time travel: BEFORE (STATEMENT => ...) or AT (TIMESTAMP => ...)
4640        if self.check(TokenType::Before) || self.check_keyword_text("AT") {
4641            if let Some(historical_expr) = self.parse_historical_data()? {
4642                // Attach historical data to the table expression
4643                if let Expression::Table(ref mut table) = expr {
4644                    if let Expression::HistoricalData(hd) = historical_expr {
4645                        table.when = Some(hd);
4646                    }
4647                }
4648            }
4649        }
4650
4651        // Check for TSQL FOR SYSTEM_TIME temporal clause (not BigQuery - handled post-alias)
4652        // Syntax: FOR SYSTEM_TIME AS OF expr
4653        //         FOR SYSTEM_TIME FROM expr TO expr
4654        //         FOR SYSTEM_TIME BETWEEN expr AND expr
4655        //         FOR SYSTEM_TIME CONTAINED IN (expr, expr)
4656        //         FOR SYSTEM_TIME ALL
4657        if !matches!(
4658            self.config.dialect,
4659            Some(crate::dialects::DialectType::BigQuery)
4660        ) && self.check(TokenType::For)
4661            && self.current + 1 < self.tokens.len()
4662            && self.tokens[self.current + 1]
4663                .text
4664                .eq_ignore_ascii_case("SYSTEM_TIME")
4665        {
4666            self.skip(); // consume FOR
4667            self.skip(); // consume SYSTEM_TIME
4668            let system_time_str = if self.match_token(TokenType::As) {
4669                // AS OF expr
4670                if self.check_keyword_text("OF") {
4671                    self.skip(); // consume OF
4672                    let start = self.current;
4673                    // Collect expression tokens until we hit a clause boundary
4674                    while !self.is_at_end()
4675                        && !self.check(TokenType::Semicolon)
4676                        && !self.check(TokenType::Where)
4677                        && !self.check(TokenType::Join)
4678                        && !self.check(TokenType::Left)
4679                        && !self.check(TokenType::Right)
4680                        && !self.check(TokenType::Inner)
4681                        && !self.check(TokenType::Outer)
4682                        && !self.check(TokenType::Full)
4683                        && !self.check(TokenType::Cross)
4684                        && !self.check(TokenType::Order)
4685                        && !self.check(TokenType::Group)
4686                        && !self.check(TokenType::Having)
4687                        && !self.check(TokenType::Limit)
4688                        && !self.check(TokenType::Union)
4689                        && !self.check(TokenType::Except)
4690                        && !self.check(TokenType::Intersect)
4691                        && !self.check(TokenType::As)
4692                        && !self.check(TokenType::Comma)
4693                        && !self.check(TokenType::RParen)
4694                        && !self.check(TokenType::With)
4695                        && !self.check(TokenType::Pivot)
4696                        && !self.check(TokenType::Unpivot)
4697                    {
4698                        self.skip();
4699                    }
4700                    let expr_text = self.tokens_to_sql_uppercased(start, self.current);
4701                    format!("FOR SYSTEM_TIME AS OF {}", expr_text)
4702                } else {
4703                    "FOR SYSTEM_TIME AS".to_string()
4704                }
4705            } else if self.match_token(TokenType::Between) {
4706                // BETWEEN expr AND expr
4707                let start = self.current;
4708                while !self.is_at_end() && !self.check(TokenType::And) {
4709                    self.skip();
4710                }
4711                let expr1_text = self.tokens_to_sql_uppercased(start, self.current);
4712                self.skip(); // consume AND
4713                let start2 = self.current;
4714                while !self.is_at_end()
4715                    && !self.check(TokenType::Semicolon)
4716                    && !self.check(TokenType::Where)
4717                    && !self.check(TokenType::Join)
4718                    && !self.check(TokenType::Left)
4719                    && !self.check(TokenType::Right)
4720                    && !self.check(TokenType::Inner)
4721                    && !self.check(TokenType::Outer)
4722                    && !self.check(TokenType::Full)
4723                    && !self.check(TokenType::Cross)
4724                    && !self.check(TokenType::Order)
4725                    && !self.check(TokenType::Group)
4726                    && !self.check(TokenType::Having)
4727                    && !self.check(TokenType::Limit)
4728                    && !self.check(TokenType::Union)
4729                    && !self.check(TokenType::Except)
4730                    && !self.check(TokenType::Intersect)
4731                    && !self.check(TokenType::As)
4732                    && !self.check(TokenType::Comma)
4733                    && !self.check(TokenType::RParen)
4734                    && !self.check(TokenType::With)
4735                    && !self.check(TokenType::Pivot)
4736                    && !self.check(TokenType::Unpivot)
4737                {
4738                    self.skip();
4739                }
4740                let expr2_text = self.tokens_to_sql_uppercased(start2, self.current);
4741                format!("FOR SYSTEM_TIME BETWEEN {} AND {}", expr1_text, expr2_text)
4742            } else if self.match_token(TokenType::From) {
4743                // FROM expr TO expr
4744                let start = self.current;
4745                while !self.is_at_end() && !self.check(TokenType::To) {
4746                    self.skip();
4747                }
4748                let expr1_text = self.tokens_to_sql_uppercased(start, self.current);
4749                self.skip(); // consume TO
4750                let start2 = self.current;
4751                while !self.is_at_end()
4752                    && !self.check(TokenType::Semicolon)
4753                    && !self.check(TokenType::Where)
4754                    && !self.check(TokenType::As)
4755                    && !self.check(TokenType::Comma)
4756                    && !self.check(TokenType::RParen)
4757                {
4758                    self.skip();
4759                }
4760                let expr2_text = self.tokens_to_sql_uppercased(start2, self.current);
4761                format!("FOR SYSTEM_TIME FROM {} TO {}", expr1_text, expr2_text)
4762            } else if self.check_identifier("CONTAINED") {
4763                self.skip(); // consume CONTAINED
4764                self.expect(TokenType::In)?;
4765                self.expect(TokenType::LParen)?;
4766                let start = self.current;
4767                let mut depth = 1;
4768                while !self.is_at_end() && depth > 0 {
4769                    if self.check(TokenType::LParen) {
4770                        depth += 1;
4771                    }
4772                    if self.check(TokenType::RParen) {
4773                        depth -= 1;
4774                        if depth == 0 {
4775                            break;
4776                        }
4777                    }
4778                    self.skip();
4779                }
4780                let inner_text = self.tokens_to_sql_uppercased(start, self.current);
4781                self.expect(TokenType::RParen)?;
4782                format!("FOR SYSTEM_TIME CONTAINED IN ({})", inner_text)
4783            } else if self.match_token(TokenType::All) {
4784                "FOR SYSTEM_TIME ALL".to_string()
4785            } else {
4786                "FOR SYSTEM_TIME".to_string()
4787            };
4788            if let Expression::Table(ref mut table) = expr {
4789                table.system_time = Some(system_time_str);
4790            }
4791        }
4792
4793        // Check for Presto/Trino time travel: FOR VERSION AS OF / FOR TIMESTAMP AS OF
4794        // Syntax: FOR VERSION AS OF <snapshot_id>
4795        //         FOR TIMESTAMP AS OF <timestamp_expr>
4796        if self.check(TokenType::For) && self.current + 1 < self.tokens.len() {
4797            let next_text = &self.tokens[self.current + 1].text;
4798            if next_text.eq_ignore_ascii_case("VERSION")
4799                || next_text.eq_ignore_ascii_case("TIMESTAMP")
4800            {
4801                self.skip(); // consume FOR
4802                let version_kind = self.advance().text.to_ascii_uppercase(); // consume VERSION or TIMESTAMP
4803
4804                // Expect AS OF
4805                if self.match_token(TokenType::As) && self.check_keyword_text("OF") {
4806                    self.skip(); // consume OF
4807
4808                    // Parse the expression value
4809                    if let Some(value_expr) = self.parse_bitwise()? {
4810                        let version = crate::expressions::Version {
4811                            this: Box::new(Expression::Identifier(Identifier::new(&version_kind))),
4812                            kind: "AS OF".to_string(),
4813                            expression: Some(Box::new(value_expr)),
4814                        };
4815                        if let Expression::Table(ref mut table) = expr {
4816                            table.version = Some(Box::new(version));
4817                        }
4818                    }
4819                }
4820            }
4821        }
4822
4823        // Check for Hive-style time travel: TIMESTAMP AS OF / VERSION AS OF (without FOR)
4824        // Syntax: TIMESTAMP AS OF <timestamp_expr>
4825        //         VERSION AS OF <snapshot_id>
4826        if self.current < self.tokens.len() {
4827            let current_text = &self.tokens[self.current].text;
4828            if (current_text.eq_ignore_ascii_case("TIMESTAMP")
4829                || current_text.eq_ignore_ascii_case("VERSION"))
4830                && self.current + 2 < self.tokens.len()
4831                && self.tokens[self.current + 1].token_type == TokenType::As
4832                && self.tokens[self.current + 2]
4833                    .text
4834                    .eq_ignore_ascii_case("OF")
4835            {
4836                let version_kind = self.advance().text.to_ascii_uppercase(); // consume TIMESTAMP or VERSION
4837                self.skip(); // consume AS
4838                self.skip(); // consume OF
4839
4840                // Parse the expression value
4841                if let Some(value_expr) = self.parse_bitwise()? {
4842                    let version = crate::expressions::Version {
4843                        this: Box::new(Expression::Identifier(Identifier::new(&version_kind))),
4844                        kind: "AS OF".to_string(),
4845                        expression: Some(Box::new(value_expr)),
4846                    };
4847                    if let Expression::Table(ref mut table) = expr {
4848                        table.version = Some(Box::new(version));
4849                    }
4850                }
4851            }
4852        }
4853
4854        // Check for MySQL PARTITION(p0, p1, ...) clause
4855        // Only supported by MySQL-compatible dialects (not generic dialect)
4856        let supports_partition_selection = matches!(
4857            self.config.dialect,
4858            Some(crate::dialects::DialectType::MySQL)
4859                | Some(crate::dialects::DialectType::SingleStore)
4860                | Some(crate::dialects::DialectType::Doris)
4861                | Some(crate::dialects::DialectType::StarRocks)
4862        );
4863        if supports_partition_selection && self.match_token(TokenType::Partition) {
4864            if self.match_token(TokenType::LParen) {
4865                let mut partitions = Vec::new();
4866                loop {
4867                    let partition_name = self.expect_identifier_or_keyword_with_quoted()?;
4868                    partitions.push(partition_name);
4869                    if !self.match_token(TokenType::Comma) {
4870                        break;
4871                    }
4872                }
4873                self.expect(TokenType::RParen)?;
4874                if let Expression::Table(ref mut table) = expr {
4875                    table.partitions = partitions;
4876                }
4877            }
4878        }
4879
4880        // Check for table-level TABLESAMPLE/SAMPLE: tbl TABLESAMPLE METHOD(size) or tbl SAMPLE ROW(0)
4881        // Snowflake supports both TABLESAMPLE and SAMPLE
4882        if self.check(TokenType::TableSample) || self.check(TokenType::Sample) {
4883            if let Some(sample) = self.parse_table_level_sample()? {
4884                if let Expression::Table(ref mut table) = expr {
4885                    table.table_sample = Some(Box::new(sample));
4886                } else {
4887                    // For non-Table expressions (subqueries, functions, etc.),
4888                    // wrap in TableSample expression node
4889                    expr = Expression::TableSample(Box::new(crate::expressions::TableSample {
4890                        this: Some(Box::new(expr)),
4891                        sample: Some(Box::new(sample)),
4892                        expressions: Vec::new(),
4893                        method: None,
4894                        bucket_numerator: None,
4895                        bucket_denominator: None,
4896                        bucket_field: None,
4897                        percent: None,
4898                        rows: None,
4899                        size: None,
4900                        seed: None,
4901                    }));
4902                }
4903            }
4904        }
4905
4906        // Check for TSQL table hints: WITH (TABLOCK, INDEX(myindex), ...)
4907        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
4908            if let Expression::Table(ref mut table) = expr {
4909                if let Some(hint_expr) = self.parse_table_hints()? {
4910                    // parse_table_hints returns a Tuple wrapping individual hint expressions.
4911                    // Extract the inner hints so we store them directly.
4912                    match hint_expr {
4913                        Expression::Tuple(tuple) => {
4914                            table.hints = tuple.expressions;
4915                        }
4916                        other => {
4917                            table.hints = vec![other];
4918                        }
4919                    }
4920                }
4921            }
4922        }
4923
4924        // Check for MySQL index hints: USE INDEX, IGNORE INDEX, FORCE INDEX
4925        if self.check_keyword_text("USE")
4926            || self.check(TokenType::Ignore)
4927            || self.check_keyword_text("FORCE")
4928        {
4929            // Peek ahead to see if next token after USE/IGNORE/FORCE is INDEX or KEY
4930            let next_idx = self.current + 1;
4931            let is_index_hint = next_idx < self.tokens.len() && {
4932                let next_text = &self.tokens[next_idx].text;
4933                next_text.eq_ignore_ascii_case("INDEX") || next_text.eq_ignore_ascii_case("KEY")
4934            };
4935            if is_index_hint {
4936                if let Expression::Table(ref mut table) = expr {
4937                    if let Some(hint_expr) = self.parse_table_hints()? {
4938                        match hint_expr {
4939                            Expression::Tuple(tuple) => {
4940                                table.hints = tuple.expressions;
4941                            }
4942                            other => {
4943                                table.hints = vec![other];
4944                            }
4945                        }
4946                    }
4947                }
4948            }
4949        }
4950
4951        // Check for SQLite INDEXED BY or NOT INDEXED table hints
4952        if self.check_identifier("INDEXED") {
4953            self.skip(); // consume INDEXED
4954            self.expect(TokenType::By)?;
4955            // Parse index name (can be qualified: schema.index)
4956            let first_part = self.expect_identifier_or_keyword()?;
4957            let index_name = if self.match_token(TokenType::Dot) {
4958                let second_part = self.expect_identifier_or_keyword()?;
4959                format!("{}.{}", first_part, second_part)
4960            } else {
4961                first_part
4962            };
4963            if let Expression::Table(ref mut table) = expr {
4964                table.hints.push(Expression::Identifier(Identifier {
4965                    name: format!("INDEXED BY {}", index_name),
4966                    quoted: false,
4967                    trailing_comments: Vec::new(),
4968                    span: None,
4969                }));
4970            }
4971        } else if self.check(TokenType::Not) && self.check_next_identifier("INDEXED") {
4972            self.skip(); // consume NOT
4973            self.skip(); // consume INDEXED
4974            if let Expression::Table(ref mut table) = expr {
4975                table.hints.push(Expression::Identifier(Identifier {
4976                    name: "NOT INDEXED".to_string(),
4977                    quoted: false,
4978                    trailing_comments: Vec::new(),
4979                    span: None,
4980                }));
4981            }
4982        }
4983
4984        // Check for PIVOT (can be followed by UNPIVOT)
4985        // Only treat as PIVOT clause when followed by ( — otherwise it's a table alias
4986        if self.check(TokenType::Pivot) && self.check_next(TokenType::LParen) {
4987            self.skip(); // consume PIVOT
4988            expr = self.parse_pivot(expr)?;
4989        }
4990        // Check for UNPIVOT (can follow PIVOT or be standalone)
4991        // Only treat as UNPIVOT clause when followed by (, INCLUDE, or EXCLUDE — otherwise it's a table alias
4992        if self.check(TokenType::Unpivot) && self.is_unpivot_clause_start() {
4993            self.skip(); // consume UNPIVOT
4994            expr = self.parse_unpivot(expr)?;
4995        }
4996        // Check for MATCH_RECOGNIZE
4997        else if self.check(TokenType::MatchRecognize)
4998            && !matches!(&expr, Expression::Pivot(_) | Expression::Unpivot(_))
4999        {
5000            self.skip();
5001            expr = self.parse_match_recognize(Some(expr))?;
5002        }
5003
5004        // Check for alias
5005        if self.match_token(TokenType::As) {
5006            // Handle AS (col1, col2) without alias name - used by POSEXPLODE etc.
5007            if self.check(TokenType::LParen) {
5008                self.skip(); // consume LParen
5009                let mut column_aliases = Vec::new();
5010                loop {
5011                    if self.check(TokenType::RParen) {
5012                        break;
5013                    }
5014                    column_aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
5015                    if !self.match_token(TokenType::Comma) {
5016                        break;
5017                    }
5018                }
5019                self.expect(TokenType::RParen)?;
5020                expr = Expression::Alias(Box::new(Alias {
5021                    this: expr,
5022                    alias: Identifier::new(String::new()),
5023                    column_aliases,
5024                    pre_alias_comments: Vec::new(),
5025                    trailing_comments: Vec::new(),
5026                    inferred_type: None,
5027                }));
5028            } else {
5029                let alias_ident_parsed = self.expect_identifier_or_alias_keyword_with_quoted()?;
5030                let alias = alias_ident_parsed.name;
5031                let alias_is_quoted = alias_ident_parsed.quoted;
5032                let make_alias_ident = |name: String| -> Identifier {
5033                    if alias_is_quoted {
5034                        Identifier::quoted(name)
5035                    } else {
5036                        Identifier::new(name)
5037                    }
5038                };
5039                // Check for column aliases: AS t(c1, c2) or AS t(c1 type1, c2 type2) for table functions
5040                if self.match_token(TokenType::LParen) {
5041                    // Check if this is typed column definitions (for table functions like JSON_TO_RECORDSET)
5042                    // by looking ahead: if we see identifier followed by another identifier/type (not comma/rparen),
5043                    // it's typed columns
5044                    let has_typed_columns = self.check_typed_column_list();
5045
5046                    if has_typed_columns {
5047                        // Parse typed column definitions like: (col1 type1, col2 type2)
5048                        let mut typed_cols = Vec::new();
5049                        loop {
5050                            if self.check(TokenType::RParen) {
5051                                break;
5052                            }
5053                            // Parse column name (can be quoted)
5054                            let col_name = self.expect_identifier_or_keyword_with_quoted()?;
5055                            // Parse column type
5056                            let col_type = self.parse_data_type()?;
5057                            // Create ColumnDef expression, preserving the quoted status
5058                            let mut col_def = ColumnDef::new(col_name.name.clone(), col_type);
5059                            col_def.name = col_name;
5060                            typed_cols.push(Expression::ColumnDef(Box::new(col_def)));
5061
5062                            if !self.match_token(TokenType::Comma) {
5063                                break;
5064                            }
5065                        }
5066                        self.expect(TokenType::RParen)?;
5067
5068                        // Create TableAlias with typed columns
5069                        let table_alias = Expression::TableAlias(Box::new(TableAlias {
5070                            this: Some(Box::new(Expression::Identifier(make_alias_ident(alias)))),
5071                            columns: typed_cols,
5072                        }));
5073
5074                        // Wrap function with TableAlias using Tuple pattern (like ROWS FROM)
5075                        expr = Expression::Tuple(Box::new(Tuple {
5076                            expressions: vec![expr, table_alias],
5077                        }));
5078                    } else {
5079                        // Parse simple column aliases: (c1, c2, ...)
5080                        // Use expect_identifier_or_keyword to allow keywords like KEY, INDEX, VALUE as column aliases
5081                        let mut aliases = Vec::new();
5082                        loop {
5083                            if self.check(TokenType::RParen) {
5084                                break;
5085                            }
5086                            aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
5087                            if !self.match_token(TokenType::Comma) {
5088                                break;
5089                            }
5090                        }
5091                        self.expect(TokenType::RParen)?;
5092
5093                        expr = match expr {
5094                            Expression::Table(mut t) => {
5095                                t.alias = Some(make_alias_ident(alias));
5096                                t.alias_explicit_as = true;
5097                                t.column_aliases = aliases;
5098                                Expression::Table(t)
5099                            }
5100                            Expression::Subquery(mut s) => {
5101                                s.alias = Some(make_alias_ident(alias));
5102                                s.column_aliases = aliases;
5103                                Expression::Subquery(s)
5104                            }
5105                            Expression::Pivot(mut p) => {
5106                                p.alias = Some(make_alias_ident(alias));
5107                                Expression::Pivot(p)
5108                            }
5109                            Expression::Unpivot(mut u) => {
5110                                u.alias = Some(make_alias_ident(alias));
5111                                Expression::Unpivot(u)
5112                            }
5113                            Expression::MatchRecognize(mut mr) => {
5114                                mr.alias = Some(make_alias_ident(alias));
5115                                mr.alias_explicit_as = true;
5116                                Expression::MatchRecognize(mr)
5117                            }
5118                            Expression::JoinedTable(mut jt) => {
5119                                jt.alias = Some(make_alias_ident(alias));
5120                                Expression::JoinedTable(jt)
5121                            }
5122                            _ => Expression::Alias(Box::new(Alias {
5123                                this: expr,
5124                                alias: make_alias_ident(alias),
5125                                column_aliases: aliases,
5126                                pre_alias_comments: Vec::new(),
5127                                trailing_comments: Vec::new(),
5128                                inferred_type: None,
5129                            })),
5130                        };
5131                    }
5132                } else {
5133                    // No column aliases, just simple alias
5134                    let default_column_aliases = if matches!(
5135                        self.config.dialect,
5136                        Some(crate::dialects::DialectType::ClickHouse)
5137                    ) && matches!(&expr, Expression::Function(func) if func.name.eq_ignore_ascii_case("generate_series"))
5138                    {
5139                        vec![Identifier::new("generate_series")]
5140                    } else {
5141                        Vec::new()
5142                    };
5143                    expr = match expr {
5144                        Expression::Table(mut t) => {
5145                            t.alias = Some(make_alias_ident(alias));
5146                            t.alias_explicit_as = true;
5147                            t.column_aliases = Vec::new();
5148                            Expression::Table(t)
5149                        }
5150                        Expression::Subquery(mut s) => {
5151                            s.alias = Some(make_alias_ident(alias));
5152                            s.column_aliases = Vec::new();
5153                            Expression::Subquery(s)
5154                        }
5155                        Expression::Pivot(mut p) => {
5156                            p.alias = Some(make_alias_ident(alias));
5157                            Expression::Pivot(p)
5158                        }
5159                        Expression::Unpivot(mut u) => {
5160                            u.alias = Some(make_alias_ident(alias));
5161                            Expression::Unpivot(u)
5162                        }
5163                        Expression::MatchRecognize(mut mr) => {
5164                            mr.alias = Some(make_alias_ident(alias));
5165                            mr.alias_explicit_as = true;
5166                            Expression::MatchRecognize(mr)
5167                        }
5168                        Expression::JoinedTable(mut jt) => {
5169                            jt.alias = Some(make_alias_ident(alias));
5170                            Expression::JoinedTable(jt)
5171                        }
5172                        _ => Expression::Alias(Box::new(Alias {
5173                            this: expr,
5174                            alias: make_alias_ident(alias),
5175                            column_aliases: default_column_aliases,
5176                            pre_alias_comments: Vec::new(),
5177                            trailing_comments: Vec::new(),
5178                            inferred_type: None,
5179                        })),
5180                    };
5181                }
5182            } // close the else for AS (col1, col2) handling
5183        } else if (self.check(TokenType::QuotedIdentifier)
5184            || (self.check(TokenType::Var) && !self.check_keyword() && !self.check_identifier("MATCH_CONDITION")
5185                && !(self.check_identifier("ARRAY") && self.check_next(TokenType::Join)
5186                     && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
5187                // TSQL: OPTION(LABEL = 'foo') is a query hint, not an alias
5188                && !(self.check_identifier("OPTION") && self.check_next(TokenType::LParen))
5189                // MySQL: LOCK IN SHARE MODE is a locking clause, not an alias
5190                && !(self.check_identifier("LOCK") && self.check_next(TokenType::In))
5191                // ClickHouse: PARALLEL WITH is a statement separator, not a table alias
5192                && !(self.check_identifier("PARALLEL") && self.check_next(TokenType::With)
5193                     && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
5194                // DuckDB: POSITIONAL JOIN is a join method, not a table alias
5195                && !(self.check_identifier("POSITIONAL") && self.check_next(TokenType::Join))))
5196            || self.is_command_keyword_as_alias()
5197            // ClickHouse: allow FIRST/LAST as implicit table aliases
5198            // (they're keywords used in NULLS FIRST/LAST but also valid as identifiers)
5199            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
5200                && (self.check(TokenType::First) || self.check(TokenType::Last)))
5201            // PIVOT/UNPIVOT can be table aliases when not followed by clause-starting tokens
5202            || (self.check(TokenType::Pivot) && !self.check_next(TokenType::LParen))
5203            || (self.check(TokenType::Unpivot) && !self.is_unpivot_clause_start())
5204            // PARTITION can be a table alias when the dialect doesn't support partition selection
5205            || (self.check(TokenType::Partition) && !matches!(
5206                self.config.dialect,
5207                Some(crate::dialects::DialectType::MySQL)
5208                | Some(crate::dialects::DialectType::SingleStore)
5209                | Some(crate::dialects::DialectType::Doris)
5210                | Some(crate::dialects::DialectType::StarRocks)
5211            ))
5212            || (self.check(TokenType::Window) && {
5213                // WINDOW can be a table alias if NOT followed by an identifier (window definition)
5214                let next_pos = self.current + 1;
5215                next_pos >= self.tokens.len()
5216                    || (self.tokens[next_pos].token_type != TokenType::Var
5217                        && self.tokens[next_pos].token_type != TokenType::Identifier)
5218            })
5219        {
5220            // Implicit alias (but not MATCH_CONDITION which is a join condition keyword)
5221            // Also allow command keywords (GET, PUT, etc.) and WINDOW (when not a clause) as implicit table aliases
5222            let is_keyword_alias = self.peek().token_type.is_keyword();
5223            let is_quoted_alias = self.peek().token_type == TokenType::QuotedIdentifier;
5224            let alias = self.advance().text.clone();
5225            // Check for column aliases: t(c1, c2)
5226            // Use expect_identifier_or_keyword to allow keywords like KEY, INDEX, VALUE as column aliases
5227            let mut column_aliases = if self.match_token(TokenType::LParen) {
5228                let mut aliases = Vec::new();
5229                loop {
5230                    aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
5231                    if !self.match_token(TokenType::Comma) {
5232                        break;
5233                    }
5234                }
5235                self.expect(TokenType::RParen)?;
5236                aliases
5237            } else {
5238                Vec::new()
5239            };
5240            if column_aliases.is_empty()
5241                && matches!(
5242                    self.config.dialect,
5243                    Some(crate::dialects::DialectType::ClickHouse)
5244                )
5245                && matches!(&expr, Expression::Function(func) if func.name.eq_ignore_ascii_case("generate_series"))
5246            {
5247                column_aliases = vec![Identifier::new("generate_series")];
5248            }
5249            let make_alias_ident = |name: String| -> Identifier {
5250                if is_quoted_alias {
5251                    Identifier::quoted(name)
5252                } else {
5253                    Identifier::new(name)
5254                }
5255            };
5256            expr = match expr {
5257                Expression::Table(mut t) => {
5258                    t.alias = Some(make_alias_ident(alias));
5259                    t.alias_explicit_as = is_keyword_alias;
5260                    t.column_aliases = column_aliases;
5261                    Expression::Table(t)
5262                }
5263                Expression::Subquery(mut s) => {
5264                    s.alias = Some(make_alias_ident(alias));
5265                    s.column_aliases = column_aliases;
5266                    Expression::Subquery(s)
5267                }
5268                Expression::Pivot(mut p) => {
5269                    p.alias = Some(make_alias_ident(alias));
5270                    Expression::Pivot(p)
5271                }
5272                Expression::Unpivot(mut u) => {
5273                    u.alias = Some(make_alias_ident(alias));
5274                    Expression::Unpivot(u)
5275                }
5276                Expression::MatchRecognize(mut mr) => {
5277                    mr.alias = Some(make_alias_ident(alias));
5278                    Expression::MatchRecognize(mr)
5279                }
5280                Expression::JoinedTable(mut jt) => {
5281                    jt.alias = Some(make_alias_ident(alias));
5282                    Expression::JoinedTable(jt)
5283                }
5284                _ => Expression::Alias(Box::new(Alias {
5285                    this: expr,
5286                    alias: make_alias_ident(alias),
5287                    column_aliases,
5288                    pre_alias_comments: Vec::new(),
5289                    trailing_comments: Vec::new(),
5290                    inferred_type: None,
5291                })),
5292            };
5293        }
5294
5295        // ClickHouse: subquery column alias list without alias name: FROM (...) (c0, c1)
5296        if matches!(
5297            self.config.dialect,
5298            Some(crate::dialects::DialectType::ClickHouse)
5299        ) && self.check(TokenType::LParen)
5300            && matches!(&expr, Expression::Subquery(s) if s.alias.is_none())
5301        {
5302            // Lookahead: check if this is (identifier, identifier, ...) — column alias list
5303            let mut look = self.current + 1;
5304            let mut is_col_list = true;
5305            let mut col_count = 0;
5306            loop {
5307                if look >= self.tokens.len() {
5308                    is_col_list = false;
5309                    break;
5310                }
5311                let tt = self.tokens[look].token_type;
5312                if tt == TokenType::Identifier
5313                    || tt == TokenType::Var
5314                    || tt == TokenType::QuotedIdentifier
5315                    || tt.is_keyword()
5316                {
5317                    col_count += 1;
5318                    look += 1;
5319                } else {
5320                    is_col_list = false;
5321                    break;
5322                }
5323                if look >= self.tokens.len() {
5324                    is_col_list = false;
5325                    break;
5326                }
5327                if self.tokens[look].token_type == TokenType::Comma {
5328                    look += 1;
5329                } else if self.tokens[look].token_type == TokenType::RParen {
5330                    break;
5331                } else {
5332                    is_col_list = false;
5333                    break;
5334                }
5335            }
5336            if is_col_list && col_count >= 1 {
5337                self.skip(); // consume LParen
5338                let mut aliases = Vec::new();
5339                loop {
5340                    aliases.push(Identifier::new(self.advance().text.clone()));
5341                    if !self.match_token(TokenType::Comma) {
5342                        break;
5343                    }
5344                }
5345                self.expect(TokenType::RParen)?;
5346                if let Expression::Subquery(ref mut s) = expr {
5347                    s.column_aliases = aliases;
5348                }
5349            }
5350        }
5351
5352        // ClickHouse FINAL modifier: table [AS alias] FINAL
5353        if matches!(
5354            self.config.dialect,
5355            Some(crate::dialects::DialectType::ClickHouse)
5356        ) && self.match_token(TokenType::Final)
5357        {
5358            if let Expression::Table(ref mut table) = expr {
5359                table.final_ = true;
5360            }
5361        }
5362
5363        // Check for SQLite INDEXED BY after alias: t AS t INDEXED BY idx
5364        if self.check_identifier("INDEXED") {
5365            self.skip(); // consume INDEXED
5366            self.expect(TokenType::By)?;
5367            let first_part = self.expect_identifier_or_keyword()?;
5368            let index_name = if self.match_token(TokenType::Dot) {
5369                let second_part = self.expect_identifier_or_keyword()?;
5370                format!("{}.{}", first_part, second_part)
5371            } else {
5372                first_part
5373            };
5374            if let Expression::Table(ref mut table) = expr {
5375                table.hints.push(Expression::Identifier(Identifier {
5376                    name: format!("INDEXED BY {}", index_name),
5377                    quoted: false,
5378                    trailing_comments: Vec::new(),
5379                    span: None,
5380                }));
5381            }
5382        }
5383
5384        // Check for TSQL table hints after alias: t o WITH (NOLOCK), t AS a WITH (TABLOCK)
5385        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
5386            if let Expression::Table(ref mut table) = expr {
5387                if let Some(hint_expr) = self.parse_table_hints()? {
5388                    match hint_expr {
5389                        Expression::Tuple(tuple) => {
5390                            table.hints = tuple.expressions;
5391                        }
5392                        other => {
5393                            table.hints = vec![other];
5394                        }
5395                    }
5396                }
5397            }
5398        }
5399
5400        // Check for MySQL index hints after alias: t e USE INDEX (idx), t AS a IGNORE INDEX (idx)
5401        if self.check_keyword_text("USE")
5402            || self.check(TokenType::Ignore)
5403            || self.check_keyword_text("FORCE")
5404        {
5405            let next_idx = self.current + 1;
5406            let is_index_hint = next_idx < self.tokens.len() && {
5407                let next_text = &self.tokens[next_idx].text;
5408                next_text.eq_ignore_ascii_case("INDEX") || next_text.eq_ignore_ascii_case("KEY")
5409            };
5410            if is_index_hint {
5411                if let Expression::Table(ref mut table) = expr {
5412                    if let Some(hint_expr) = self.parse_table_hints()? {
5413                        match hint_expr {
5414                            Expression::Tuple(tuple) => {
5415                                table.hints = tuple.expressions;
5416                            }
5417                            other => {
5418                                table.hints = vec![other];
5419                            }
5420                        }
5421                    }
5422                }
5423            }
5424        }
5425
5426        // Check for PIVOT/UNPIVOT after alias (some dialects allow this order)
5427        // Only treat as PIVOT/UNPIVOT clause when followed by ( — otherwise it's a table alias
5428        if self.check(TokenType::Pivot) && self.check_next(TokenType::LParen) {
5429            self.skip(); // consume PIVOT
5430            expr = self.parse_pivot(expr)?;
5431        } else if self.check(TokenType::Unpivot) && self.is_unpivot_clause_start() {
5432            self.skip(); // consume UNPIVOT
5433            expr = self.parse_unpivot(expr)?;
5434        }
5435        // Handle PIVOT/UNPIVOT alias: PIVOT(...) AS pvt
5436        if matches!(&expr, Expression::Pivot(_) | Expression::Unpivot(_)) {
5437            if self.match_token(TokenType::As) {
5438                let alias = self.expect_identifier_or_alias_keyword_with_quoted()?;
5439                match &mut expr {
5440                    Expression::Pivot(p) => p.alias = Some(alias),
5441                    Expression::Unpivot(u) => u.alias = Some(alias),
5442                    _ => {}
5443                }
5444            } else if !self.check_keyword()
5445                && (self.check(TokenType::Var) || self.check(TokenType::QuotedIdentifier))
5446            {
5447                let tok = self.advance();
5448                let alias = if tok.token_type == TokenType::QuotedIdentifier {
5449                    Identifier::quoted(tok.text.clone())
5450                } else {
5451                    Identifier::new(tok.text.clone())
5452                };
5453                match &mut expr {
5454                    Expression::Pivot(p) => p.alias = Some(alias),
5455                    Expression::Unpivot(u) => u.alias = Some(alias),
5456                    _ => {}
5457                }
5458            }
5459        }
5460
5461        // Check for Redshift AT index clause for array unnesting
5462        // Syntax: table_alias.array_column AS element_alias AT index_alias
5463        // e.g., c.c_orders AS orders AT index
5464        // https://docs.aws.amazon.com/redshift/latest/dg/query-super.html
5465        if self.match_identifier("AT") {
5466            let index_alias = self.expect_identifier_or_keyword()?;
5467            // Convert the table expression to a column for AtIndex
5468            let column_expr = match expr {
5469                Expression::Table(t) => {
5470                    // Convert Table to Column reference
5471                    // For c.c_orders, table=c, name=c_orders -> column name should be c.c_orders
5472                    let mut parts = Vec::new();
5473                    if let Some(cat) = t.catalog {
5474                        parts.push(cat.name);
5475                    }
5476                    if let Some(schema) = t.schema {
5477                        parts.push(schema.name);
5478                    }
5479                    parts.push(t.name.name);
5480                    let col_name = parts.join(".");
5481                    let alias_expr = if let Some(alias) = t.alias {
5482                        Expression::Alias(Box::new(Alias {
5483                            this: Expression::boxed_column(Column {
5484                                name: Identifier::new(&col_name),
5485                                table: None,
5486                                join_mark: false,
5487                                trailing_comments: Vec::new(),
5488                                span: None,
5489                                inferred_type: None,
5490                            }),
5491                            alias,
5492                            column_aliases: t.column_aliases,
5493                            pre_alias_comments: Vec::new(),
5494                            trailing_comments: t.trailing_comments,
5495                            inferred_type: None,
5496                        }))
5497                    } else {
5498                        Expression::boxed_column(Column {
5499                            name: Identifier::new(&col_name),
5500                            table: None,
5501                            join_mark: false,
5502                            trailing_comments: t.trailing_comments,
5503                            span: None,
5504                            inferred_type: None,
5505                        })
5506                    };
5507                    alias_expr
5508                }
5509                other => other, // Keep as is for non-table expressions
5510            };
5511            expr = Expression::AtIndex(Box::new(AtIndex {
5512                this: Box::new(column_expr),
5513                expression: Box::new(Expression::Identifier(Identifier::new(index_alias))),
5514            }));
5515        }
5516
5517        // Check for TABLESAMPLE/SAMPLE after alias (Snowflake ALIAS_POST_TABLESAMPLE)
5518        // e.g., table2 AS t2 TABLESAMPLE BERNOULLI (50), table2 AS t2 SAMPLE ROW (0)
5519        if self.check(TokenType::TableSample) || self.check(TokenType::Sample) {
5520            if let Some(sample) = self.parse_table_level_sample()? {
5521                // Capture trailing comments after the SAMPLE clause (e.g., -- 25% of rows in table1)
5522                let post_sample_comments = self.previous_trailing_comments().to_vec();
5523                if let Expression::Table(ref mut table) = expr {
5524                    table.table_sample = Some(Box::new(sample));
5525                    if !post_sample_comments.is_empty() {
5526                        table.trailing_comments.extend(post_sample_comments);
5527                    }
5528                } else {
5529                    // For non-Table expressions, wrap in TableSample expression node
5530                    expr = Expression::TableSample(Box::new(crate::expressions::TableSample {
5531                        this: Some(Box::new(expr)),
5532                        sample: Some(Box::new(sample)),
5533                        expressions: Vec::new(),
5534                        method: None,
5535                        bucket_numerator: None,
5536                        bucket_denominator: None,
5537                        bucket_field: None,
5538                        percent: None,
5539                        rows: None,
5540                        size: None,
5541                        seed: None,
5542                    }));
5543                }
5544            }
5545        }
5546
5547        // Apply PostgreSQL ONLY modifier if present
5548        if has_only {
5549            if let Expression::Table(ref mut table) = expr {
5550                table.only = true;
5551            }
5552        }
5553
5554        // BigQuery: FOR SYSTEM_TIME AS OF after alias
5555        // e.g., FROM foo AS t0 FOR SYSTEM_TIME AS OF '2026-01-01'
5556        if self.check(TokenType::For)
5557            && self.current + 1 < self.tokens.len()
5558            && self.tokens[self.current + 1]
5559                .text
5560                .eq_ignore_ascii_case("SYSTEM_TIME")
5561        {
5562            self.skip(); // consume FOR
5563            self.skip(); // consume SYSTEM_TIME
5564            if self.match_token(TokenType::As) && self.check_keyword_text("OF") {
5565                self.skip(); // consume OF
5566                let start = self.current;
5567                // Collect expression tokens until clause boundary
5568                while !self.is_at_end()
5569                    && !self.check(TokenType::Semicolon)
5570                    && !self.check(TokenType::Where)
5571                    && !self.check(TokenType::Join)
5572                    && !self.check(TokenType::Left)
5573                    && !self.check(TokenType::Right)
5574                    && !self.check(TokenType::Inner)
5575                    && !self.check(TokenType::Outer)
5576                    && !self.check(TokenType::Full)
5577                    && !self.check(TokenType::Cross)
5578                    && !self.check(TokenType::Order)
5579                    && !self.check(TokenType::Group)
5580                    && !self.check(TokenType::Having)
5581                    && !self.check(TokenType::Limit)
5582                    && !self.check(TokenType::Union)
5583                    && !self.check(TokenType::Except)
5584                    && !self.check(TokenType::Intersect)
5585                    && !self.check(TokenType::Comma)
5586                    && !self.check(TokenType::RParen)
5587                {
5588                    self.skip();
5589                }
5590                let expr_text = self.tokens_to_sql(start, self.current);
5591                let system_time_str = format!("FOR SYSTEM_TIME AS OF {}", expr_text);
5592                if let Expression::Table(ref mut table) = expr {
5593                    table.system_time = Some(system_time_str);
5594                }
5595            }
5596        }
5597
5598        // BigQuery INFORMATION_SCHEMA handling
5599        // When INFORMATION_SCHEMA is part of a table reference, merge it with the table name
5600        // into a single quoted identifier and auto-add an alias if not present
5601        if matches!(
5602            self.config.dialect,
5603            Some(crate::dialects::DialectType::BigQuery)
5604        ) {
5605            if let Expression::Table(ref mut table) = expr {
5606                // Case 1: Single quoted identifier containing INFORMATION_SCHEMA (e.g., `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`)
5607                // Add an alias that is the same as the table name (only if no alias)
5608                if table.schema.is_none() && table.catalog.is_none() && table.alias.is_none() {
5609                    let name_upper = table.name.name.to_ascii_uppercase();
5610                    if name_upper.contains("INFORMATION_SCHEMA.") {
5611                        // Set alias to be the full quoted table name
5612                        table.alias = Some(table.name.clone());
5613                        table.alias_explicit_as = true;
5614                    }
5615                }
5616                // Case 2: Multi-part name where schema part is INFORMATION_SCHEMA
5617                // e.g., region_or_dataset.INFORMATION_SCHEMA.TABLES -> region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES
5618                // e.g., proj.region_or_dataset.INFORMATION_SCHEMA.TABLES -> proj.region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES
5619                // This applies even if an alias is already set (we still need to merge the parts)
5620                else if let Some(ref schema) = table.schema {
5621                    if schema.name.eq_ignore_ascii_case("INFORMATION_SCHEMA") {
5622                        // Merge schema (INFORMATION_SCHEMA) with table name into a single quoted identifier
5623                        let merged_name = format!("{}.{}", schema.name, table.name.name);
5624                        let original_table_name = table.name.name.clone();
5625
5626                        // Set alias to original table name (TABLES, VIEWS, etc.) only if no alias exists
5627                        if table.alias.is_none() {
5628                            table.alias = Some(Identifier::new(original_table_name));
5629                            table.alias_explicit_as = true;
5630                        }
5631
5632                        // Create new quoted identifier
5633                        table.name = Identifier {
5634                            name: merged_name,
5635                            quoted: true,
5636                            trailing_comments: Vec::new(),
5637                            span: None,
5638                        };
5639
5640                        // Shift: schema becomes catalog, catalog becomes None or stays
5641                        table.schema = table.catalog.take();
5642                        // catalog is now None
5643                    }
5644                }
5645            }
5646        }
5647
5648        Ok(expr)
5649    }
5650
5651    /// Parse standard PIVOT clause (in FROM clause)
5652    /// PIVOT(agg_func [AS alias], ... FOR column IN (value [AS alias], ...) [GROUP BY ...])
5653    fn parse_pivot(&mut self, source: Expression) -> Result<Expression> {
5654        self.expect(TokenType::LParen)?;
5655
5656        // Parse aggregation functions (comma-separated, may have aliases)
5657        // Stop when we see FOR keyword
5658        // Use parse_primary() to handle keyword function names like FIRST, LAST
5659        let mut expressions = Vec::new();
5660        loop {
5661            if self.check(TokenType::For) || self.check(TokenType::RParen) {
5662                break;
5663            }
5664            // Parse the aggregation expression using parse_primary (handles keyword functions)
5665            let func = self.parse_primary()?;
5666            // Check for alias (AS alias or just identifier after function)
5667            let expr = if self.match_token(TokenType::As) {
5668                // AS alias
5669                let alias_name = self.expect_identifier_or_keyword()?;
5670                Expression::Alias(Box::new(Alias::new(func, Identifier::new(alias_name))))
5671            } else if !self.check(TokenType::Comma)
5672                && !self.check(TokenType::For)
5673                && !self.check(TokenType::RParen)
5674            {
5675                // Implicit alias (no AS keyword): SUM(b) d
5676                if let Some(id) = self.parse_id_var()? {
5677                    let alias_name = match &id {
5678                        Expression::Identifier(ident) => ident.name.clone(),
5679                        Expression::Column(col) => col.name.name.clone(),
5680                        _ => String::new(),
5681                    };
5682                    if !alias_name.is_empty() {
5683                        Expression::Alias(Box::new(Alias::new(func, Identifier::new(alias_name))))
5684                    } else {
5685                        func
5686                    }
5687                } else {
5688                    func
5689                }
5690            } else {
5691                func
5692            };
5693            expressions.push(expr);
5694            if !self.match_token(TokenType::Comma) {
5695                break;
5696            }
5697            // After consuming comma, if next is FOR, break (comma before FOR is optional/dropped)
5698            if self.check(TokenType::For) {
5699                break;
5700            }
5701        }
5702
5703        // FOR column IN (values)
5704        self.expect(TokenType::For)?;
5705
5706        let mut fields = Vec::new();
5707        loop {
5708            let field = self.parse_standard_pivot_in()?;
5709            fields.push(field);
5710
5711            // Check for additional FOR clauses (rare but possible)
5712            if !self.match_token(TokenType::For) {
5713                break;
5714            }
5715        }
5716
5717        // Handle Snowflake's DEFAULT ON NULL (default_value) clause
5718        let default_on_null = if self.match_text_seq(&["DEFAULT", "ON", "NULL"]) {
5719            if self.match_token(TokenType::LParen) {
5720                let val = self.parse_expression()?;
5721                self.expect(TokenType::RParen)?;
5722                Some(Box::new(val))
5723            } else {
5724                None
5725            }
5726        } else {
5727            None
5728        };
5729
5730        // Parse optional GROUP BY inside PIVOT parens
5731        let group = self.parse_group()?;
5732
5733        self.expect(TokenType::RParen)?;
5734
5735        Ok(Expression::Pivot(Box::new(Pivot {
5736            this: source,
5737            expressions,
5738            fields,
5739            using: Vec::new(),
5740            group: group.map(Box::new),
5741            unpivot: false,
5742            into: None,
5743            alias: None,
5744            include_nulls: None,
5745            default_on_null,
5746            with: None,
5747        })))
5748    }
5749
5750    /// Parse FOR column IN (...) part of standard PIVOT
5751    fn parse_standard_pivot_in(&mut self) -> Result<Expression> {
5752        // Parse the column being pivoted
5753        let column = self.parse_primary()?;
5754
5755        // IN keyword
5756        self.expect(TokenType::In)?;
5757
5758        // IN values - can be parenthesized or bare identifier
5759        if self.match_token(TokenType::LParen) {
5760            // Check for ANY keyword
5761            let in_exprs = if self.match_text_seq(&["ANY"]) {
5762                let order = self.parse_order()?;
5763                vec![Expression::PivotAny(Box::new(PivotAny {
5764                    this: order.map(Box::new),
5765                }))]
5766            } else {
5767                // Parse comma-separated values with optional aliases
5768                let mut vals = Vec::new();
5769                loop {
5770                    if self.check(TokenType::RParen) {
5771                        break;
5772                    }
5773                    if let Some(val) = self.parse_select_or_expression()? {
5774                        // Check for alias - alias can be an identifier or an expression
5775                        // (e.g., 'PREFIX ' || CHR(38) || ' SUFFIX' in Oracle)
5776                        let val = if self.match_token(TokenType::As) {
5777                            // Parse the alias as an expression (not just an identifier)
5778                            // This allows for string concatenation aliases
5779                            let alias_expr = self.parse_bitwise()?.ok_or_else(|| {
5780                                self.parse_error(
5781                                    "Expected expression after AS in PIVOT/UNPIVOT IN clause",
5782                                )
5783                            })?;
5784                            Expression::PivotAlias(Box::new(PivotAlias {
5785                                this: val,
5786                                alias: alias_expr,
5787                            }))
5788                        } else {
5789                            val
5790                        };
5791                        vals.push(val);
5792                    }
5793                    if !self.match_token(TokenType::Comma) {
5794                        break;
5795                    }
5796                }
5797                vals
5798            };
5799            self.expect(TokenType::RParen)?;
5800            Ok(Expression::In(Box::new(In {
5801                this: column,
5802                expressions: in_exprs,
5803                query: None,
5804                not: false,
5805                global: false,
5806                unnest: None,
5807                is_field: false,
5808            })))
5809        } else {
5810            // Bare identifier: FOR foo IN y_enum (no parentheses)
5811            // Store in query field to distinguish from parenthesized IN
5812            let field_id = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
5813            Ok(Expression::In(Box::new(In {
5814                this: column,
5815                expressions: Vec::new(),
5816                query: Some(field_id),
5817                not: false,
5818                global: false,
5819                unnest: None,
5820                is_field: true,
5821            })))
5822        }
5823    }
5824
5825    /// Parse UNPIVOT clause
5826    /// UNPIVOT (value_column FOR name_column IN (col1, col2, ...))
5827    /// UNPIVOT ((col1, col2) FOR name_column IN (col1, col2, ...))
5828    /// UNPIVOT INCLUDE NULLS (value_column FOR name_column IN (...))
5829    /// UNPIVOT EXCLUDE NULLS (value_column FOR name_column IN (...))
5830    fn parse_unpivot(&mut self, source: Expression) -> Result<Expression> {
5831        // Check for optional INCLUDE NULLS or EXCLUDE NULLS
5832        let include_nulls = if self.match_text_seq(&["INCLUDE", "NULLS"]) {
5833            Some(true)
5834        } else if self.match_text_seq(&["EXCLUDE", "NULLS"]) {
5835            Some(false)
5836        } else {
5837            None
5838        };
5839
5840        self.expect(TokenType::LParen)?;
5841
5842        // Value column(s) - can be identifier or (col1, col2, ...)
5843        // Allow keywords as identifiers (e.g., "values" is a common column name in UNPIVOT)
5844        let (value_column, value_column_parenthesized, extra_value_columns) =
5845            if self.match_token(TokenType::LParen) {
5846                // Parenthesized value column(s)
5847                let col = self.expect_identifier_or_keyword()?;
5848                let mut extra_cols = Vec::new();
5849                while self.match_token(TokenType::Comma) {
5850                    extra_cols.push(Identifier::new(self.expect_identifier_or_keyword()?));
5851                }
5852                self.expect(TokenType::RParen)?;
5853                (Identifier::new(col), true, extra_cols)
5854            } else {
5855                (
5856                    Identifier::new(self.expect_identifier_or_keyword()?),
5857                    false,
5858                    Vec::new(),
5859                )
5860            };
5861
5862        // FOR name_column
5863        self.expect(TokenType::For)?;
5864        let name_column = Identifier::new(self.expect_identifier_or_keyword()?);
5865
5866        // IN (columns with optional aliases)
5867        // Format: col1 [AS alias1], col2 [AS alias2], ...
5868        // Or tuple format: (col1, col2) [AS alias1], (col3, col4) [AS alias2], ...
5869        // Aliases can be expressions like 'PREFIX ' || CHR(38) || ' SUFFIX'
5870        self.expect(TokenType::In)?;
5871        self.expect(TokenType::LParen)?;
5872        let columns = {
5873            let mut cols = Vec::new();
5874            loop {
5875                if self.check(TokenType::RParen) {
5876                    break;
5877                }
5878                // Check if this is a tuple of columns: (col1, col2)
5879                let col_expr = if self.check(TokenType::LParen) {
5880                    // Could be a tuple of columns for multi-value unpivot
5881                    let saved = self.current;
5882                    self.skip(); // consume (
5883                                 // Try parsing as identifier list (tuple of columns)
5884                    let mut tuple_cols = Vec::new();
5885                    let first = self.expect_identifier_or_keyword();
5886                    if let Ok(first_id) = first {
5887                        tuple_cols.push(Expression::column(first_id));
5888                        while self.match_token(TokenType::Comma) {
5889                            if let Ok(id) = self.expect_identifier_or_keyword() {
5890                                tuple_cols.push(Expression::column(id));
5891                            } else {
5892                                break;
5893                            }
5894                        }
5895                        if self.match_token(TokenType::RParen) && tuple_cols.len() > 1 {
5896                            // Successful tuple parse
5897                            Some(Expression::Tuple(Box::new(Tuple {
5898                                expressions: tuple_cols,
5899                            })))
5900                        } else {
5901                            // Not a tuple, backtrack
5902                            self.current = saved;
5903                            self.parse_select_or_expression()?
5904                        }
5905                    } else {
5906                        // Not an identifier, backtrack
5907                        self.current = saved;
5908                        self.parse_select_or_expression()?
5909                    }
5910                } else {
5911                    self.parse_select_or_expression()?
5912                };
5913
5914                if let Some(col) = col_expr {
5915                    // Check for alias
5916                    let col = if self.match_token(TokenType::As) {
5917                        // Parse the alias as an expression (allows string concatenation)
5918                        let alias_expr = self.parse_bitwise()?.ok_or_else(|| {
5919                            self.parse_error("Expected expression after AS in UNPIVOT IN clause")
5920                        })?;
5921                        Expression::PivotAlias(Box::new(PivotAlias {
5922                            this: col,
5923                            alias: alias_expr,
5924                        }))
5925                    } else {
5926                        col
5927                    };
5928                    cols.push(col);
5929                }
5930                if !self.match_token(TokenType::Comma) {
5931                    break;
5932                }
5933            }
5934            cols
5935        };
5936        self.expect(TokenType::RParen)?;
5937
5938        self.expect(TokenType::RParen)?;
5939
5940        Ok(Expression::Unpivot(Box::new(Unpivot {
5941            this: source,
5942            value_column,
5943            name_column,
5944            columns,
5945            alias: None,
5946            value_column_parenthesized,
5947            include_nulls,
5948            extra_value_columns,
5949        })))
5950    }
5951
5952    /// Parse Redshift UNPIVOT in FROM clause for SUPER object traversal
5953    /// Syntax: UNPIVOT expr [AS val_alias AT attr_alias]
5954    /// Examples:
5955    ///   FROM t, UNPIVOT t.arr[0]
5956    ///   FROM t, UNPIVOT t.arr AS val AT attr
5957    fn parse_redshift_unpivot_table(&mut self) -> Result<Expression> {
5958        // Parse the expression (column reference with possible array subscript)
5959        // We need to parse a primary expression that can include:
5960        // - Simple column: c.c_orders
5961        // - Array subscript: c.c_orders[0]
5962        // - Multiple subscripts: c.c_orders[0].items[1]
5963        // Using parse_primary which handles column refs with subscripts
5964        let this = self.parse_primary()?;
5965
5966        // Check for optional AS val_alias AT attr_alias
5967        let alias = if self.match_token(TokenType::As) {
5968            let val_alias = self.expect_identifier_or_keyword()?;
5969            // Check for AT attr_alias
5970            if self.match_text_seq(&["AT"]) {
5971                let attr_alias = self.expect_identifier_or_keyword()?;
5972                // Create alias expression that captures both aliases
5973                // We'll use the val_alias as the main alias and store attr_alias in a way
5974                // the generator can reconstruct "AS val AT attr"
5975                Some(Identifier::new(format!("{} AT {}", val_alias, attr_alias)))
5976            } else {
5977                Some(Identifier::new(val_alias))
5978            }
5979        } else {
5980            None
5981        };
5982
5983        // Return a Pivot expression with unpivot=true
5984        // Use the simplified form pattern where:
5985        // - this: the expression being unpivoted
5986        // - expressions: empty (no ON expressions)
5987        // - unpivot: true
5988        // - alias: captured above
5989        Ok(Expression::Pivot(Box::new(Pivot {
5990            this,
5991            expressions: Vec::new(),
5992            fields: Vec::new(),
5993            using: Vec::new(),
5994            group: None,
5995            unpivot: true,
5996            into: None,
5997            alias,
5998            include_nulls: None,
5999            default_on_null: None,
6000            with: None,
6001        })))
6002    }
6003
6004    /// BigQuery: Parse a table part that may contain hyphens (e.g., project-id)
6005    /// Also handles numeric table parts (e.g., foo.bar.25 -> foo.bar.`25`)
6006    /// Returns the identifier, possibly with merged hyphenated parts and quoted flag set.
6007    fn parse_bigquery_table_part(&mut self) -> Result<Identifier> {
6008        use crate::dialects::DialectType;
6009
6010        // Try to parse a number for BigQuery numeric table parts (e.g., foo.bar.25)
6011        if matches!(self.config.dialect, Some(DialectType::BigQuery))
6012            && self.check(TokenType::Number)
6013        {
6014            let num_token = self.advance().clone();
6015            let mut name = num_token.text.clone();
6016
6017            // Check if followed by more connected tokens (e.g., 25x, 25_, 25ab)
6018            // Numbers followed immediately by identifiers without whitespace are merged
6019            while !self.is_at_end() && self.is_connected() {
6020                let tok = self.advance().clone();
6021                name.push_str(&tok.text);
6022            }
6023
6024            return Ok(Identifier {
6025                name,
6026                quoted: true,
6027                trailing_comments: Vec::new(),
6028                span: None,
6029            });
6030        }
6031
6032        // MySQL numeric-starting identifiers (e.g., 00f, 1d)
6033        if matches!(self.config.dialect, Some(DialectType::MySQL)) && self.check(TokenType::Number)
6034        {
6035            let num_token = self.advance().clone();
6036            let mut name = num_token.text.clone();
6037
6038            // Merge with connected identifier/var tokens only (not punctuation)
6039            while !self.is_at_end()
6040                && self.is_connected()
6041                && (self.check(TokenType::Var) || self.check(TokenType::Identifier))
6042            {
6043                let tok = self.advance().clone();
6044                name.push_str(&tok.text);
6045            }
6046
6047            return Ok(Identifier {
6048                name,
6049                quoted: true,
6050                trailing_comments: Vec::new(),
6051                span: None,
6052            });
6053        }
6054
6055        let mut ident = self.expect_identifier_or_keyword_with_quoted()?;
6056
6057        // BigQuery: merge hyphenated parts (e.g., pro-ject_id -> `pro-ject_id`)
6058        if matches!(self.config.dialect, Some(DialectType::BigQuery)) && !ident.quoted {
6059            // Check if next token is a dash and it looks connected (no space)
6060            if self.check(TokenType::Dash) && self.is_connected_dash() {
6061                let mut name = ident.name.clone();
6062
6063                while self.check(TokenType::Dash) && self.is_connected_dash() {
6064                    self.skip(); // consume dash
6065                    name.push('-');
6066                    // Consume the next part
6067                    let part = self.advance().clone();
6068                    name.push_str(&part.text);
6069                    // Continue consuming connected tokens (for things like a-b-c)
6070                    while !self.is_at_end()
6071                        && self.is_connected()
6072                        && !self.check(TokenType::Dot)
6073                        && !self.check(TokenType::Dash)
6074                        && !self.check(TokenType::LParen)
6075                        && !self.check(TokenType::RParen)
6076                    {
6077                        let tok = self.advance().clone();
6078                        name.push_str(&tok.text);
6079                    }
6080                }
6081
6082                ident = Identifier {
6083                    name,
6084                    quoted: false,
6085                    trailing_comments: Vec::new(),
6086                    span: None,
6087                };
6088            }
6089        }
6090
6091        Ok(ident)
6092    }
6093
6094    /// Check if the current dash token is "connected" to the next token
6095    /// (i.e., the dash and next token are part of a hyphenated identifier)
6096    fn is_connected_dash(&self) -> bool {
6097        if !self.check(TokenType::Dash) {
6098            return false;
6099        }
6100        if self.current + 1 >= self.tokens.len() {
6101            return false;
6102        }
6103        let dash_token = &self.tokens[self.current];
6104        let next_token = &self.tokens[self.current + 1];
6105
6106        // The next token after dash must be an identifier, number, or keyword
6107        // and it must be adjacent (no whitespace between dash and next token)
6108        let next_is_valid = matches!(
6109            next_token.token_type,
6110            TokenType::Identifier
6111                | TokenType::Var
6112                | TokenType::Number
6113                | TokenType::All
6114                | TokenType::Select
6115                | TokenType::From
6116                | TokenType::Where
6117        ) || next_token.token_type.is_keyword();
6118
6119        // Check adjacency: dash ends at dash.end, next starts at next.start
6120        let adjacent = dash_token.span.end + 1 == next_token.span.start
6121            || dash_token.span.end == next_token.span.start;
6122
6123        next_is_valid && adjacent
6124    }
6125
6126    /// Check if the current token is "connected" to the previous token (no whitespace)
6127    fn is_connected(&self) -> bool {
6128        if self.current == 0 || self.current >= self.tokens.len() {
6129            return false;
6130        }
6131        let prev_token = &self.tokens[self.current - 1];
6132        let curr_token = &self.tokens[self.current];
6133        // Tokens are connected if they are immediately adjacent (no characters between them)
6134        // span.end is exclusive, so if prev.end == curr.start, they are adjacent
6135        prev_token.span.end == curr_token.span.start
6136    }
6137
6138    /// Parse a table reference (schema.table format)
6139    fn parse_table_ref(&mut self) -> Result<TableRef> {
6140        // Capture leading comments on the first token (e.g., FROM \n/* comment */\n db.schema.tbl)
6141        let table_ref_leading_comments = self.current_leading_comments().to_vec();
6142        let mut result = self.parse_table_ref_inner()?;
6143        if !table_ref_leading_comments.is_empty() && result.leading_comments.is_empty() {
6144            result.leading_comments = table_ref_leading_comments;
6145        }
6146        Ok(result)
6147    }
6148
6149    fn parse_table_ref_inner(&mut self) -> Result<TableRef> {
6150        // Check for Snowflake IDENTIFIER() function: IDENTIFIER('string') or IDENTIFIER($var)
6151        if self.check_identifier("IDENTIFIER") && self.check_next(TokenType::LParen) {
6152            self.skip(); // consume IDENTIFIER
6153            self.skip(); // consume (
6154                         // Parse the argument: either a string literal, a variable ($foo), or identifier
6155            let arg = if self.check(TokenType::String) {
6156                let s = self.advance().text.clone();
6157                Expression::Literal(Box::new(Literal::String(s)))
6158            } else if self.check(TokenType::Parameter) {
6159                // ?-style parameter
6160                let var = self.advance().text.clone();
6161                Expression::Var(Box::new(crate::expressions::Var { this: var }))
6162            } else if self.check(TokenType::Dollar) {
6163                // $foo style variable - Dollar followed by identifier
6164                self.skip(); // consume $
6165                let var_name = self.expect_identifier()?;
6166                Expression::Var(Box::new(crate::expressions::Var {
6167                    this: format!("${}", var_name),
6168                }))
6169            } else {
6170                // Could be an identifier too
6171                let ident = self.expect_identifier()?;
6172                Expression::Identifier(Identifier::new(ident))
6173            };
6174            self.expect(TokenType::RParen)?;
6175            let trailing_comments = self.previous_trailing_comments().to_vec();
6176            // Create a Function expression to represent IDENTIFIER(arg)
6177            let identifier_func = Expression::Function(Box::new(crate::expressions::Function {
6178                name: "IDENTIFIER".to_string(),
6179                args: vec![arg],
6180                distinct: false,
6181                trailing_comments: Vec::new(),
6182                use_bracket_syntax: false,
6183                no_parens: false,
6184                quoted: false,
6185                span: None,
6186                inferred_type: None,
6187            }));
6188            return Ok(TableRef {
6189                catalog: None,
6190                schema: None,
6191                name: Identifier::empty(),
6192                alias: None,
6193                alias_explicit_as: false,
6194                column_aliases: Vec::new(),
6195                leading_comments: Vec::new(),
6196                trailing_comments,
6197                when: None,
6198                only: false,
6199                final_: false,
6200                table_sample: None,
6201                hints: Vec::new(),
6202                system_time: None,
6203                partitions: Vec::new(),
6204                identifier_func: Some(Box::new(identifier_func)),
6205                changes: None,
6206                version: None,
6207                span: None,
6208            });
6209        }
6210
6211        let first = self.parse_bigquery_table_part()?;
6212
6213        // Check for schema.table format
6214        if self.match_token(TokenType::Dot) {
6215            // Handle TSQL a..b syntax (database..table with empty schema)
6216            if self.check(TokenType::Dot) {
6217                // Two consecutive dots: a..b means catalog..table (empty schema)
6218                self.skip(); // consume second dot
6219                let table = self.parse_bigquery_table_part()?;
6220                let trailing_comments = self.previous_trailing_comments().to_vec();
6221                Ok(TableRef {
6222                    catalog: Some(first),
6223                    schema: Some(Identifier::new("")), // Empty schema represents ..
6224                    name: table,
6225                    alias: None,
6226                    alias_explicit_as: false,
6227                    column_aliases: Vec::new(),
6228                    leading_comments: Vec::new(),
6229                    trailing_comments,
6230                    when: None,
6231                    only: false,
6232                    final_: false,
6233                    table_sample: None,
6234                    hints: Vec::new(),
6235                    system_time: None,
6236                    partitions: Vec::new(),
6237                    identifier_func: None,
6238                    changes: None,
6239                    version: None,
6240                    span: None,
6241                })
6242            } else {
6243                // BigQuery: handle x.* wildcard table reference (e.g., SELECT * FROM x.*)
6244                // After the first dot, if we see a Star token, it's a wildcard table name
6245                if matches!(
6246                    self.config.dialect,
6247                    Some(crate::dialects::DialectType::BigQuery)
6248                ) && self.check(TokenType::Star)
6249                {
6250                    self.skip(); // consume *
6251                    let trailing_comments = self.previous_trailing_comments().to_vec();
6252                    return Ok(TableRef {
6253                        catalog: None,
6254                        schema: Some(first),
6255                        name: Identifier::new("*"),
6256                        alias: None,
6257                        alias_explicit_as: false,
6258                        column_aliases: Vec::new(),
6259                        leading_comments: Vec::new(),
6260                        trailing_comments,
6261                        when: None,
6262                        only: false,
6263                        final_: false,
6264                        table_sample: None,
6265                        hints: Vec::new(),
6266                        system_time: None,
6267                        partitions: Vec::new(),
6268                        identifier_func: None,
6269                        changes: None,
6270                        version: None,
6271                        span: None,
6272                    });
6273                }
6274                let table = self.parse_bigquery_table_part()?;
6275                // Check for catalog.schema.table format
6276                if self.match_token(TokenType::Dot) {
6277                    // BigQuery: handle a.b.* wildcard table reference
6278                    if matches!(
6279                        self.config.dialect,
6280                        Some(crate::dialects::DialectType::BigQuery)
6281                    ) && self.check(TokenType::Star)
6282                    {
6283                        self.skip(); // consume *
6284                        let trailing_comments = self.previous_trailing_comments().to_vec();
6285                        return Ok(TableRef {
6286                            catalog: Some(first),
6287                            schema: Some(table),
6288                            name: Identifier::new("*"),
6289                            alias: None,
6290                            alias_explicit_as: false,
6291                            column_aliases: Vec::new(),
6292                            leading_comments: Vec::new(),
6293                            trailing_comments,
6294                            when: None,
6295                            only: false,
6296                            final_: false,
6297                            table_sample: None,
6298                            hints: Vec::new(),
6299                            system_time: None,
6300                            partitions: Vec::new(),
6301                            identifier_func: None,
6302                            changes: None,
6303                            version: None,
6304                            span: None,
6305                        });
6306                    }
6307                    let actual_table = self.parse_bigquery_table_part()?;
6308                    let trailing_comments = self.previous_trailing_comments().to_vec();
6309                    Ok(TableRef {
6310                        catalog: Some(first),
6311                        schema: Some(table),
6312                        name: actual_table,
6313                        alias: None,
6314                        alias_explicit_as: false,
6315                        column_aliases: Vec::new(),
6316                        leading_comments: Vec::new(),
6317                        trailing_comments,
6318                        when: None,
6319                        only: false,
6320                        final_: false,
6321                        table_sample: None,
6322                        hints: Vec::new(),
6323                        system_time: None,
6324                        partitions: Vec::new(),
6325                        identifier_func: None,
6326                        changes: None,
6327                        version: None,
6328                        span: None,
6329                    })
6330                } else {
6331                    let trailing_comments = self.previous_trailing_comments().to_vec();
6332                    Ok(TableRef {
6333                        catalog: None,
6334                        schema: Some(first),
6335                        name: table,
6336                        alias: None,
6337                        alias_explicit_as: false,
6338                        column_aliases: Vec::new(),
6339                        leading_comments: Vec::new(),
6340                        trailing_comments,
6341                        when: None,
6342                        only: false,
6343                        final_: false,
6344                        table_sample: None,
6345                        hints: Vec::new(),
6346                        system_time: None,
6347                        partitions: Vec::new(),
6348                        identifier_func: None,
6349                        changes: None,
6350                        version: None,
6351                        span: None,
6352                    })
6353                }
6354            }
6355        } else {
6356            let trailing_comments = self.previous_trailing_comments().to_vec();
6357            Ok(TableRef {
6358                catalog: None,
6359                schema: None,
6360                name: first,
6361                alias: None,
6362                alias_explicit_as: false,
6363                column_aliases: Vec::new(),
6364                leading_comments: Vec::new(),
6365                trailing_comments,
6366                when: None,
6367                only: false,
6368                final_: false,
6369                table_sample: None,
6370                hints: Vec::new(),
6371                system_time: None,
6372                partitions: Vec::new(),
6373                identifier_func: None,
6374                changes: None,
6375                version: None,
6376                span: None,
6377            })
6378        }
6379    }
6380
6381    /// Parse a datetime field for EXTRACT function (YEAR, MONTH, DAY, etc.)
6382    fn parse_datetime_field(&mut self) -> Result<DateTimeField> {
6383        let token = self.advance();
6384        let original_name = token.text.clone();
6385        let name = original_name.to_ascii_uppercase();
6386        match name.as_str() {
6387            "YEAR" => Ok(DateTimeField::Year),
6388            "MONTH" => Ok(DateTimeField::Month),
6389            "DAY" => Ok(DateTimeField::Day),
6390            "HOUR" => Ok(DateTimeField::Hour),
6391            "MINUTE" => Ok(DateTimeField::Minute),
6392            "SECOND" => Ok(DateTimeField::Second),
6393            "MILLISECOND" => Ok(DateTimeField::Millisecond),
6394            "MICROSECOND" => Ok(DateTimeField::Microsecond),
6395            "DOW" | "DAYOFWEEK" => Ok(DateTimeField::DayOfWeek),
6396            "DOY" | "DAYOFYEAR" => Ok(DateTimeField::DayOfYear),
6397            "WEEK" => {
6398                // Check for modifier like WEEK(monday)
6399                if self.match_token(TokenType::LParen) {
6400                    let modifier = self.expect_identifier_or_keyword()?;
6401                    self.expect(TokenType::RParen)?;
6402                    Ok(DateTimeField::WeekWithModifier(modifier))
6403                } else {
6404                    Ok(DateTimeField::Week)
6405                }
6406            }
6407            "QUARTER" => Ok(DateTimeField::Quarter),
6408            "EPOCH" => Ok(DateTimeField::Epoch),
6409            "TIMEZONE" => Ok(DateTimeField::Timezone),
6410            "TIMEZONE_HOUR" => Ok(DateTimeField::TimezoneHour),
6411            "TIMEZONE_MINUTE" => Ok(DateTimeField::TimezoneMinute),
6412            "DATE" => Ok(DateTimeField::Date),
6413            "TIME" => Ok(DateTimeField::Time),
6414            // Allow arbitrary field names for dialect-specific functionality
6415            _ => Ok(DateTimeField::Custom(original_name)),
6416        }
6417    }
6418
6419    /// Parse a table expression followed by any joins
6420    /// Used for parenthesized join expressions like (tbl1 CROSS JOIN tbl2)
6421    fn parse_table_expression_with_joins(&mut self) -> Result<(Expression, Vec<Join>)> {
6422        // First parse the left table expression
6423        let left = self.parse_table_expression()?;
6424
6425        // Then parse any joins
6426        let joins = self.parse_joins()?;
6427
6428        Ok((left, joins))
6429    }
6430
6431    /// Parse JOIN clauses
6432    ///
6433    /// Supports right-associative chained JOINs where ON/USING clauses are assigned right-to-left:
6434    /// - `a JOIN b JOIN c ON cond1 ON cond2` means `a JOIN (b JOIN c ON cond1) ON cond2`
6435    /// - The rightmost ON applies to the rightmost unconditioned JOIN
6436    fn parse_joins(&mut self) -> Result<Vec<Join>> {
6437        let mut joins = Vec::with_capacity(2);
6438        let mut nesting_group: usize = 0;
6439
6440        // Loop: Phase 1 (parse JOINs) + Phase 2 (assign deferred conditions)
6441        // After phase 2, if there are more JOIN keywords, continue with another round
6442        loop {
6443            let joins_before = joins.len();
6444
6445            // Phase 1: Parse all JOINs with optional inline ON/USING conditions
6446            loop {
6447                let pos_before_join_kind = self.current;
6448                let join_kind_result = self.try_parse_join_kind();
6449                let (kind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint) =
6450                    match join_kind_result {
6451                        Some(r) => r,
6452                        None => break,
6453                    };
6454                // Collect comments from all tokens consumed by try_parse_join_kind:
6455                // - Leading comments on the first token (comments on a separate line before the join)
6456                // - Trailing comments between join keywords (e.g., INNER /* comment */ JOIN)
6457                let mut join_comments = Vec::new();
6458                // Capture leading comments from the first token of the join kind
6459                if pos_before_join_kind < self.tokens.len() {
6460                    join_comments
6461                        .extend(self.tokens[pos_before_join_kind].comments.iter().cloned());
6462                }
6463                for i in pos_before_join_kind..self.current {
6464                    if i < self.tokens.len() {
6465                        join_comments.extend(self.tokens[i].trailing_comments.iter().cloned());
6466                    }
6467                }
6468                // Snowflake: DIRECTED keyword before JOIN (e.g., CROSS DIRECTED JOIN)
6469                let directed = if needs_join_keyword && self.check_identifier("DIRECTED") {
6470                    self.skip();
6471                    true
6472                } else {
6473                    false
6474                };
6475                if needs_join_keyword {
6476                    self.expect(TokenType::Join)?;
6477                }
6478
6479                // ClickHouse: ARRAY JOIN uses expressions, not table references
6480                let table = if matches!(kind, JoinKind::Array | JoinKind::LeftArray) {
6481                    let mut items = Vec::new();
6482                    // Handle ARRAY JOIN with no arguments (intentional error test)
6483                    if !self.is_at_end()
6484                        && !self.check(TokenType::Semicolon)
6485                        && !self.check(TokenType::RParen)
6486                    {
6487                        loop {
6488                            let expr = self.parse_expression()?;
6489                            let item = if self.match_token(TokenType::As) {
6490                                let alias_name = self.expect_identifier_or_safe_keyword()?;
6491                                Expression::Alias(Box::new(Alias {
6492                                    this: expr,
6493                                    alias: Identifier::new(alias_name),
6494                                    column_aliases: Vec::new(),
6495                                    pre_alias_comments: Vec::new(),
6496                                    trailing_comments: Vec::new(),
6497                                    inferred_type: None,
6498                                }))
6499                            } else {
6500                                expr
6501                            };
6502                            items.push(item);
6503                            if !self.match_token(TokenType::Comma) {
6504                                break;
6505                            }
6506                        }
6507                    } // end if !is_at_end check
6508                    if items.len() == 1 {
6509                        items.pop().unwrap()
6510                    } else if items.is_empty() {
6511                        Expression::Null(Null)
6512                    } else {
6513                        Expression::Tuple(Box::new(Tuple { expressions: items }))
6514                    }
6515                } else {
6516                    self.parse_table_expression()?
6517                };
6518
6519                // Snowflake ASOF JOIN: OFFSET/LIMIT before MATCH_CONDITION are table aliases
6520                let table = if matches!(
6521                    kind,
6522                    JoinKind::AsOf | JoinKind::AsOfLeft | JoinKind::AsOfRight
6523                ) && (self.check(TokenType::Offset) || self.check(TokenType::Limit))
6524                    && self
6525                        .peek_nth(1)
6526                        .map(|t| t.text.eq_ignore_ascii_case("MATCH_CONDITION"))
6527                        == Some(true)
6528                {
6529                    let alias_name = self.advance().text.clone();
6530                    Expression::Alias(Box::new(Alias {
6531                        this: table,
6532                        alias: Identifier::new(alias_name),
6533                        column_aliases: Vec::new(),
6534                        pre_alias_comments: Vec::new(),
6535                        trailing_comments: Vec::new(),
6536                        inferred_type: None,
6537                    }))
6538                } else {
6539                    table
6540                };
6541
6542                // Try to parse inline MATCH_CONDITION/ON/USING (only if not followed by another JOIN)
6543                // We need to peek ahead to see if there's another JOIN keyword coming
6544                let has_match_condition = self.check_identifier("MATCH_CONDITION");
6545                let has_inline_condition = self.check(TokenType::On)
6546                    || self.check(TokenType::Using)
6547                    || has_match_condition;
6548                let next_is_join = self.check_join_keyword();
6549
6550                // Parse MATCH_CONDITION first (Snowflake ASOF JOIN can have MATCH_CONDITION before ON)
6551                let match_condition = if has_match_condition && !next_is_join {
6552                    if self.match_identifier("MATCH_CONDITION") {
6553                        self.expect(TokenType::LParen)?;
6554                        let condition = self.parse_expression()?;
6555                        self.expect(TokenType::RParen)?;
6556                        Some(condition)
6557                    } else {
6558                        None
6559                    }
6560                } else {
6561                    None
6562                };
6563
6564                let (on, using) = if (has_inline_condition || match_condition.is_some())
6565                    && !self.check_join_keyword()
6566                {
6567                    // Parse inline condition only if there's no more JOINs following
6568                    if self.match_token(TokenType::On) {
6569                        (Some(self.parse_expression()?), Vec::new())
6570                    } else if self.match_token(TokenType::Using) {
6571                        // ClickHouse allows USING without parentheses
6572                        let has_parens = self.match_token(TokenType::LParen);
6573                        // Use parse_using_column_list to handle qualified names like t1.col
6574                        let cols = self.parse_using_column_list()?;
6575                        if has_parens {
6576                            self.expect(TokenType::RParen)?;
6577                        }
6578                        (None, cols)
6579                    } else {
6580                        (None, Vec::new())
6581                    }
6582                } else {
6583                    (None, Vec::new())
6584                };
6585
6586                joins.push(Join {
6587                    this: table,
6588                    on,
6589                    using,
6590                    kind,
6591                    use_inner_keyword,
6592                    use_outer_keyword,
6593                    deferred_condition: false,
6594                    join_hint,
6595                    match_condition,
6596                    pivots: Vec::new(),
6597                    comments: join_comments,
6598                    nesting_group,
6599                    directed,
6600                });
6601            }
6602
6603            // Phase 2: Assign deferred ON/USING conditions to unconditioned joins (right-to-left)
6604            // Only consider joins from the current batch (joins_before..)
6605            let unconditioned: Vec<usize> = joins[joins_before..]
6606                .iter()
6607                .enumerate()
6608                .filter(|(_, j)| j.on.is_none() && j.using.is_empty())
6609                .map(|(i, _)| joins_before + i)
6610                .collect();
6611
6612            let mut idx = unconditioned.len();
6613            while idx > 0 {
6614                if self.match_token(TokenType::On) {
6615                    idx -= 1;
6616                    let join_idx = unconditioned[idx];
6617                    joins[join_idx].on = Some(self.parse_expression()?);
6618                    joins[join_idx].deferred_condition = true;
6619                } else if self.match_token(TokenType::Using) {
6620                    idx -= 1;
6621                    let join_idx = unconditioned[idx];
6622                    let has_parens = self.match_token(TokenType::LParen);
6623                    // Handle empty USING ()
6624                    let cols = if has_parens && self.check(TokenType::RParen) {
6625                        Vec::new()
6626                    } else {
6627                        // Use parse_using_column_list to handle qualified names like t1.col
6628                        self.parse_using_column_list()?
6629                    };
6630                    joins[join_idx].using = cols;
6631                    if has_parens {
6632                        self.expect(TokenType::RParen)?;
6633                    }
6634                    joins[join_idx].deferred_condition = true;
6635                } else {
6636                    break;
6637                }
6638            }
6639
6640            // If no new joins were parsed in this round, we're done
6641            if joins.len() == joins_before {
6642                break;
6643            }
6644
6645            // If there are more JOIN keywords after deferred conditions, continue with another round
6646            if !self.check_join_keyword() {
6647                break;
6648            }
6649            nesting_group += 1;
6650        }
6651
6652        Ok(joins)
6653    }
6654
6655    /// Check if the current token starts a JOIN clause
6656    fn check_join_keyword(&self) -> bool {
6657        self.check(TokenType::Join) ||
6658        self.check(TokenType::Inner) ||
6659        self.check(TokenType::Left) ||
6660        self.check(TokenType::Right) ||
6661        self.check(TokenType::Full) ||
6662        self.check(TokenType::Cross) ||
6663        self.check(TokenType::Natural) ||
6664        self.check(TokenType::Outer) ||
6665        // ClickHouse: ARRAY JOIN, GLOBAL JOIN, ALL JOIN, ANY JOIN, PASTE JOIN
6666        (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) &&
6667            (self.check_identifier("ARRAY") || self.check_identifier("GLOBAL") || self.check(TokenType::All) || self.check(TokenType::Any) || self.check_identifier("PASTE")))
6668    }
6669
6670    /// Try to parse a JOIN kind
6671    /// Returns (JoinKind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint)
6672    fn try_parse_join_kind(&mut self) -> Option<(JoinKind, bool, bool, bool, Option<String>)> {
6673        if matches!(
6674            self.config.dialect,
6675            Some(crate::dialects::DialectType::ClickHouse)
6676        ) {
6677            let start = self.current;
6678            let mut global = false;
6679            let mut strictness: Option<String> = None;
6680            let mut kind: Option<JoinKind> = None;
6681            let mut use_outer = false;
6682            let mut use_inner = false;
6683
6684            if self.match_identifier("GLOBAL") {
6685                global = true;
6686            }
6687
6688            loop {
6689                if strictness.is_none() && self.match_token(TokenType::All) {
6690                    strictness = Some("ALL".to_string());
6691                    continue;
6692                }
6693                if strictness.is_none() && self.match_token(TokenType::Any) {
6694                    strictness = Some("ANY".to_string());
6695                    continue;
6696                }
6697                if strictness.is_none() && self.match_token(TokenType::AsOf) {
6698                    strictness = Some("ASOF".to_string());
6699                    continue;
6700                }
6701                if strictness.is_none() && self.match_token(TokenType::Semi) {
6702                    strictness = Some("SEMI".to_string());
6703                    continue;
6704                }
6705                if strictness.is_none() && self.match_token(TokenType::Anti) {
6706                    strictness = Some("ANTI".to_string());
6707                    continue;
6708                }
6709                if kind.is_none() && self.match_token(TokenType::Left) {
6710                    use_outer = self.match_token(TokenType::Outer);
6711                    use_inner = self.match_token(TokenType::Inner);
6712                    kind = Some(JoinKind::Left);
6713                    continue;
6714                }
6715                if kind.is_none() && self.match_token(TokenType::Right) {
6716                    use_outer = self.match_token(TokenType::Outer);
6717                    use_inner = self.match_token(TokenType::Inner);
6718                    kind = Some(JoinKind::Right);
6719                    continue;
6720                }
6721                if kind.is_none() && self.match_token(TokenType::Full) {
6722                    use_outer = self.match_token(TokenType::Outer);
6723                    kind = Some(JoinKind::Full);
6724                    continue;
6725                }
6726                if kind.is_none() && self.match_token(TokenType::Inner) {
6727                    use_inner = true;
6728                    kind = Some(JoinKind::Inner);
6729                    continue;
6730                }
6731                break;
6732            }
6733
6734            // ClickHouse: ARRAY JOIN or LEFT ARRAY JOIN
6735            if self.check_identifier("ARRAY") && self.check_next(TokenType::Join) {
6736                let array_kind = if matches!(kind, Some(JoinKind::Left)) {
6737                    JoinKind::LeftArray
6738                } else {
6739                    JoinKind::Array
6740                };
6741                self.skip(); // consume ARRAY
6742                             // JOIN will be consumed by caller
6743                return Some((array_kind, true, false, false, None));
6744            }
6745
6746            // ClickHouse: PASTE JOIN (positional join, no ON/USING)
6747            if self.check_identifier("PASTE") && self.check_next(TokenType::Join) {
6748                self.skip(); // consume PASTE
6749                             // JOIN will be consumed by caller
6750                return Some((JoinKind::Paste, true, false, false, None));
6751            }
6752
6753            if global || strictness.is_some() || kind.is_some() {
6754                if self.check(TokenType::Join) {
6755                    let join_kind = kind.unwrap_or(JoinKind::Inner);
6756                    let mut hints = Vec::new();
6757                    if global {
6758                        hints.push("GLOBAL".to_string());
6759                    }
6760                    if let Some(strict) = strictness {
6761                        hints.push(strict);
6762                    }
6763                    let join_hint = if hints.is_empty() {
6764                        None
6765                    } else {
6766                        Some(hints.join(" "))
6767                    };
6768                    return Some((join_kind, true, use_inner, use_outer, join_hint));
6769                } else {
6770                    self.current = start;
6771                }
6772            }
6773        }
6774
6775        // Check for ASOF first (DuckDB/Snowflake) - can be followed by LEFT/RIGHT/etc.
6776        if self.match_token(TokenType::AsOf) {
6777            // ASOF can be followed by LEFT, RIGHT, INNER, or standalone
6778            if self.match_token(TokenType::Left) {
6779                let use_outer = self.match_token(TokenType::Outer);
6780                Some((JoinKind::AsOfLeft, true, false, use_outer, None))
6781            } else if self.match_token(TokenType::Right) {
6782                let use_outer = self.match_token(TokenType::Outer);
6783                Some((JoinKind::AsOfRight, true, false, use_outer, None))
6784            } else if self.match_token(TokenType::Inner) {
6785                Some((JoinKind::AsOf, true, true, false, None))
6786            } else {
6787                // Standalone ASOF JOIN
6788                Some((JoinKind::AsOf, true, false, false, None))
6789            }
6790        } else if self.check(TokenType::Inner) {
6791            // Check if INNER is followed by a set operation (BigQuery INNER UNION/INTERSECT/EXCEPT)
6792            // In that case, don't treat it as a JOIN keyword
6793            let saved = self.current;
6794            self.skip(); // consume INNER
6795            if self.check(TokenType::Union)
6796                || self.check(TokenType::Intersect)
6797                || self.check(TokenType::Except)
6798            {
6799                self.current = saved; // backtrack
6800                return None;
6801            }
6802            // Check for TSQL join hints: INNER LOOP JOIN, INNER HASH JOIN, INNER MERGE JOIN
6803            let join_hint = self.parse_tsql_join_hint();
6804            Some((JoinKind::Inner, true, true, false, join_hint)) // INNER keyword was explicit
6805        } else if self.check(TokenType::Left) {
6806            // Check if LEFT is followed by a set operation (BigQuery LEFT UNION/INTERSECT/EXCEPT)
6807            let saved = self.current;
6808            self.skip(); // consume LEFT
6809                         // LEFT can be followed by OUTER/INNER then set op, or directly by set op
6810            let at_set_op = self.check(TokenType::Union)
6811                || self.check(TokenType::Intersect)
6812                || self.check(TokenType::Except);
6813            let at_inner_set_op = self.check(TokenType::Inner) && {
6814                let saved2 = self.current;
6815                self.skip();
6816                let is_setop = self.check(TokenType::Union)
6817                    || self.check(TokenType::Intersect)
6818                    || self.check(TokenType::Except);
6819                self.current = saved2;
6820                is_setop
6821            };
6822            if at_set_op || at_inner_set_op {
6823                self.current = saved; // backtrack
6824                return None;
6825            }
6826            // Continue with normal LEFT JOIN parsing
6827            self.current = saved;
6828            self.match_token(TokenType::Left); // re-consume LEFT
6829            let use_outer = self.match_token(TokenType::Outer);
6830            let use_inner = self.match_token(TokenType::Inner);
6831            let join_hint = self.parse_tsql_join_hint();
6832            // Check for SEMI, ANTI, or LATERAL
6833            if self.match_token(TokenType::Semi) {
6834                Some((JoinKind::LeftSemi, true, use_inner, use_outer, join_hint))
6835            } else if self.match_token(TokenType::Anti) {
6836                Some((JoinKind::LeftAnti, true, use_inner, use_outer, join_hint))
6837            } else if self.match_token(TokenType::Lateral) {
6838                Some((JoinKind::LeftLateral, true, use_inner, use_outer, join_hint))
6839            } else {
6840                Some((JoinKind::Left, true, use_inner, use_outer, join_hint))
6841            }
6842        } else if self.check(TokenType::Right) {
6843            // Check if RIGHT is followed by a set operation (BigQuery RIGHT UNION/INTERSECT/EXCEPT)
6844            let saved = self.current;
6845            self.skip(); // consume RIGHT
6846            let at_set_op = self.check(TokenType::Union)
6847                || self.check(TokenType::Intersect)
6848                || self.check(TokenType::Except);
6849            let at_inner_set_op = self.check(TokenType::Inner) && {
6850                let saved2 = self.current;
6851                self.skip();
6852                let is_setop = self.check(TokenType::Union)
6853                    || self.check(TokenType::Intersect)
6854                    || self.check(TokenType::Except);
6855                self.current = saved2;
6856                is_setop
6857            };
6858            if at_set_op || at_inner_set_op {
6859                self.current = saved; // backtrack
6860                return None;
6861            }
6862            // Continue with normal RIGHT JOIN parsing
6863            self.current = saved;
6864            self.match_token(TokenType::Right); // re-consume RIGHT
6865            let use_outer = self.match_token(TokenType::Outer);
6866            let use_inner = self.match_token(TokenType::Inner);
6867            let join_hint = self.parse_tsql_join_hint();
6868            // Check for SEMI or ANTI
6869            if self.match_token(TokenType::Semi) {
6870                Some((JoinKind::RightSemi, true, use_inner, use_outer, join_hint))
6871            } else if self.match_token(TokenType::Anti) {
6872                Some((JoinKind::RightAnti, true, use_inner, use_outer, join_hint))
6873            } else {
6874                Some((JoinKind::Right, true, use_inner, use_outer, join_hint))
6875            }
6876        } else if self.check(TokenType::Full) {
6877            // Check if FULL is followed by a set operation (BigQuery FULL UNION/INTERSECT/EXCEPT)
6878            let saved = self.current;
6879            self.skip(); // consume FULL
6880            let at_set_op = self.check(TokenType::Union)
6881                || self.check(TokenType::Intersect)
6882                || self.check(TokenType::Except);
6883            let at_inner_set_op = self.check(TokenType::Inner) && {
6884                let saved2 = self.current;
6885                self.skip();
6886                let is_setop = self.check(TokenType::Union)
6887                    || self.check(TokenType::Intersect)
6888                    || self.check(TokenType::Except);
6889                self.current = saved2;
6890                is_setop
6891            };
6892            if at_set_op || at_inner_set_op {
6893                self.current = saved; // backtrack
6894                return None;
6895            }
6896            // Continue with normal FULL JOIN parsing
6897            self.current = saved;
6898            self.match_token(TokenType::Full); // re-consume FULL
6899            let use_outer = self.match_token(TokenType::Outer);
6900            let join_hint = self.parse_tsql_join_hint();
6901            Some((JoinKind::Full, true, false, use_outer, join_hint))
6902        } else if self.match_token(TokenType::Cross) {
6903            // CROSS JOIN or CROSS APPLY
6904            if self.match_token(TokenType::Apply) {
6905                Some((JoinKind::CrossApply, false, false, false, None))
6906            } else {
6907                Some((JoinKind::Cross, true, false, false, None))
6908            }
6909        } else if self.match_token(TokenType::Natural) {
6910            // NATURAL can be followed by LEFT, RIGHT, INNER, FULL, or just JOIN
6911            if self.match_token(TokenType::Left) {
6912                let use_outer = self.match_token(TokenType::Outer);
6913                Some((JoinKind::NaturalLeft, true, false, use_outer, None))
6914            } else if self.match_token(TokenType::Right) {
6915                let use_outer = self.match_token(TokenType::Outer);
6916                Some((JoinKind::NaturalRight, true, false, use_outer, None))
6917            } else if self.match_token(TokenType::Full) {
6918                let use_outer = self.match_token(TokenType::Outer);
6919                Some((JoinKind::NaturalFull, true, false, use_outer, None))
6920            } else if self.match_token(TokenType::Inner) {
6921                Some((JoinKind::Natural, true, true, false, None))
6922            } else {
6923                Some((JoinKind::Natural, true, false, false, None))
6924            }
6925        } else if self.match_token(TokenType::Outer) {
6926            // OUTER APPLY or standalone OUTER JOIN
6927            if self.match_token(TokenType::Apply) {
6928                Some((JoinKind::OuterApply, false, false, true, None))
6929            } else {
6930                // Standalone OUTER JOIN (without LEFT/RIGHT/FULL)
6931                Some((JoinKind::Outer, true, false, true, None))
6932            }
6933        } else if self.check(TokenType::Lateral) {
6934            // Check if this is LATERAL VIEW (Hive/Spark syntax) vs LATERAL JOIN
6935            if self.current + 1 < self.tokens.len()
6936                && self.tokens[self.current + 1].token_type == TokenType::View
6937            {
6938                // LATERAL VIEW is not a JOIN type, return None
6939                None
6940            } else {
6941                self.skip(); // Consume LATERAL
6942                Some((JoinKind::Lateral, true, false, false, None))
6943            }
6944        } else if self.match_token(TokenType::Semi) {
6945            Some((JoinKind::Semi, true, false, false, None))
6946        } else if self.match_token(TokenType::Anti) {
6947            Some((JoinKind::Anti, true, false, false, None))
6948        } else if self.check_identifier("POSITIONAL") && self.check_next(TokenType::Join) {
6949            // DuckDB POSITIONAL JOIN
6950            self.skip(); // consume POSITIONAL
6951            Some((JoinKind::Positional, true, false, false, None))
6952        } else if self.match_token(TokenType::StraightJoin) {
6953            // STRAIGHT_JOIN in MySQL - doesn't need JOIN keyword after it
6954            Some((JoinKind::Straight, false, false, false, None))
6955        } else if self.check(TokenType::Join) {
6956            Some((JoinKind::Inner, true, false, false, None)) // Default JOIN is INNER (without explicit INNER keyword)
6957        } else if self.match_token(TokenType::Comma) {
6958            // Comma-separated tables: FROM a, b (old-style ANSI join syntax)
6959            Some((JoinKind::Implicit, false, false, false, None)) // No JOIN keyword needed
6960        } else {
6961            None
6962        }
6963    }
6964
6965    /// Parse TSQL join hints: LOOP, HASH, MERGE, REMOTE
6966    fn parse_tsql_join_hint(&mut self) -> Option<String> {
6967        if self.check_identifier("LOOP") {
6968            self.skip();
6969            Some("LOOP".to_string())
6970        } else if self.check_identifier("HASH") {
6971            self.skip();
6972            Some("HASH".to_string())
6973        } else if self.check_identifier("REMOTE") {
6974            self.skip();
6975            Some("REMOTE".to_string())
6976        } else if self.check(TokenType::Merge) && {
6977            // Be careful: MERGE is also a keyword for MERGE statement
6978            // Only treat as hint if followed by JOIN
6979            let next_pos = self.current + 1;
6980            next_pos < self.tokens.len() && self.tokens[next_pos].token_type == TokenType::Join
6981        } {
6982            self.skip();
6983            Some("MERGE".to_string())
6984        } else {
6985            None
6986        }
6987    }
6988
6989    /// Parse GROUP BY clause
6990    fn parse_group_by(&mut self) -> Result<GroupBy> {
6991        // Check for optional ALL/DISTINCT modifier
6992        // Some(true) = ALL, Some(false) = DISTINCT, None = no modifier
6993        let all = if self.match_token(TokenType::All) {
6994            Some(true)
6995        } else if self.match_token(TokenType::Distinct) {
6996            Some(false)
6997        } else {
6998            None
6999        };
7000
7001        let mut expressions = Vec::new();
7002
7003        // GROUP BY ALL / GROUP BY DISTINCT without following CUBE/ROLLUP/expressions
7004        // should return early (e.g., Snowflake's "GROUP BY ALL" without column list).
7005        // But in Presto/Trino, ALL/DISTINCT can be followed by CUBE/ROLLUP expressions.
7006        if all.is_some() && self.is_at_query_modifier_or_end() {
7007            return Ok(GroupBy {
7008                expressions,
7009                all,
7010                totals: false,
7011                comments: Vec::new(),
7012            });
7013        }
7014
7015        // GROUP BY ALL WITH ROLLUP/CUBE/TOTALS — skip expression parsing, go straight to modifiers
7016        if all.is_some()
7017            && self.check(TokenType::With)
7018            && (self.check_next(TokenType::Cube)
7019                || self.check_next(TokenType::Rollup)
7020                || self.check_next_identifier("TOTALS"))
7021        {
7022            let mut totals = false;
7023            // Process WITH ROLLUP/CUBE
7024            if self.check_next(TokenType::Cube) || self.check_next(TokenType::Rollup) {
7025                self.skip(); // consume WITH
7026                if self.match_token(TokenType::Cube) {
7027                    expressions.push(Expression::Cube(Box::new(Cube {
7028                        expressions: Vec::new(),
7029                    })));
7030                } else if self.match_token(TokenType::Rollup) {
7031                    expressions.push(Expression::Rollup(Box::new(Rollup {
7032                        expressions: Vec::new(),
7033                    })));
7034                }
7035            }
7036            // Check for WITH TOTALS (possibly chained after ROLLUP/CUBE)
7037            if self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
7038                self.skip(); // WITH
7039                self.skip(); // TOTALS
7040                totals = true;
7041            }
7042            return Ok(GroupBy {
7043                expressions,
7044                all,
7045                totals,
7046                comments: Vec::new(),
7047            });
7048        }
7049
7050        loop {
7051            // Check for GROUPING SETS, CUBE, ROLLUP
7052            let expr = if self.check_identifier("GROUPING")
7053                && self
7054                    .peek_nth(1)
7055                    .map_or(false, |t| t.text.eq_ignore_ascii_case("SETS"))
7056                && {
7057                    self.skip();
7058                    self.skip();
7059                    true
7060                } {
7061                // GROUPING SETS (...)
7062                self.expect(TokenType::LParen)?;
7063                let args = self.parse_grouping_sets_args()?;
7064                self.expect(TokenType::RParen)?;
7065                Expression::Function(Box::new(Function {
7066                    name: "GROUPING SETS".to_string(),
7067                    args,
7068                    distinct: false,
7069                    trailing_comments: Vec::new(),
7070                    use_bracket_syntax: false,
7071                    no_parens: false,
7072                    quoted: false,
7073                    span: None,
7074                    inferred_type: None,
7075                }))
7076            } else if self.match_token(TokenType::Cube) {
7077                // CUBE (...)
7078                self.expect(TokenType::LParen)?;
7079                let args = self.parse_expression_list()?;
7080                self.expect(TokenType::RParen)?;
7081                Expression::Function(Box::new(Function {
7082                    name: "CUBE".to_string(),
7083                    args,
7084                    distinct: false,
7085                    trailing_comments: Vec::new(),
7086                    use_bracket_syntax: false,
7087                    no_parens: false,
7088                    quoted: false,
7089                    span: None,
7090                    inferred_type: None,
7091                }))
7092            } else if self.match_token(TokenType::Rollup) {
7093                // ROLLUP (...)
7094                self.expect(TokenType::LParen)?;
7095                let args = self.parse_expression_list()?;
7096                self.expect(TokenType::RParen)?;
7097                Expression::Function(Box::new(Function {
7098                    name: "ROLLUP".to_string(),
7099                    args,
7100                    distinct: false,
7101                    trailing_comments: Vec::new(),
7102                    use_bracket_syntax: false,
7103                    no_parens: false,
7104                    quoted: false,
7105                    span: None,
7106                    inferred_type: None,
7107                }))
7108            } else {
7109                self.parse_expression()?
7110            };
7111
7112            // ClickHouse: GROUP BY expr AS alias
7113            let expr = if matches!(
7114                self.config.dialect,
7115                Some(crate::dialects::DialectType::ClickHouse)
7116            ) && self.check(TokenType::As)
7117                && !self.check_next(TokenType::LParen)
7118            {
7119                self.skip(); // consume AS
7120                let alias = self.expect_identifier_or_keyword_with_quoted()?;
7121                Expression::Alias(Box::new(Alias::new(expr, alias)))
7122            } else {
7123                expr
7124            };
7125
7126            expressions.push(expr);
7127
7128            if !self.match_token(TokenType::Comma) {
7129                // Allow adjacent CUBE/ROLLUP/GROUPING SETS without comma separator
7130                // e.g., GROUP BY CUBE(a) ROLLUP(b), GROUPING SETS((c, d))
7131                if self.check(TokenType::Cube)
7132                    || self.check(TokenType::Rollup)
7133                    || (self.check_identifier("GROUPING")
7134                        && self
7135                            .peek_nth(1)
7136                            .map_or(false, |t| t.text.eq_ignore_ascii_case("SETS")))
7137                {
7138                    continue;
7139                }
7140                break;
7141            }
7142        }
7143
7144        // Check for trailing WITH CUBE or WITH ROLLUP (Hive/MySQL syntax)
7145        // This is different from CUBE(...) or ROLLUP(...) which are parsed inline above
7146        // Use lookahead to avoid consuming WITH if it's not followed by CUBE or ROLLUP
7147        // (e.g., Redshift's WITH NO SCHEMA BINDING should not be consumed here)
7148        if self.check(TokenType::With)
7149            && (self.check_next(TokenType::Cube) || self.check_next(TokenType::Rollup))
7150        {
7151            self.skip(); // consume WITH
7152            if self.match_token(TokenType::Cube) {
7153                // WITH CUBE - add Cube with empty expressions
7154                expressions.push(Expression::Cube(Box::new(Cube {
7155                    expressions: Vec::new(),
7156                })));
7157            } else if self.match_token(TokenType::Rollup) {
7158                // WITH ROLLUP - add Rollup with empty expressions
7159                expressions.push(Expression::Rollup(Box::new(Rollup {
7160                    expressions: Vec::new(),
7161                })));
7162            }
7163        }
7164
7165        // ClickHouse: WITH TOTALS
7166        let totals = if self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
7167            self.skip(); // consume WITH
7168            self.skip(); // consume TOTALS
7169            true
7170        } else {
7171            false
7172        };
7173
7174        Ok(GroupBy {
7175            expressions,
7176            all,
7177            totals,
7178            comments: Vec::new(),
7179        })
7180    }
7181
7182    /// Parse GROUPING SETS arguments which can include tuples like (x, y), nested GROUPING SETS, CUBE, ROLLUP
7183    fn parse_grouping_sets_args(&mut self) -> Result<Vec<Expression>> {
7184        let mut args = Vec::new();
7185
7186        loop {
7187            // Check for nested GROUPING SETS, CUBE, ROLLUP
7188            let expr = if self.check_identifier("GROUPING")
7189                && self
7190                    .peek_nth(1)
7191                    .map_or(false, |t| t.text.eq_ignore_ascii_case("SETS"))
7192                && {
7193                    self.skip();
7194                    self.skip();
7195                    true
7196                } {
7197                // Nested GROUPING SETS (...)
7198                self.expect(TokenType::LParen)?;
7199                let inner_args = self.parse_grouping_sets_args()?;
7200                self.expect(TokenType::RParen)?;
7201                Expression::Function(Box::new(Function {
7202                    name: "GROUPING SETS".to_string(),
7203                    args: inner_args,
7204                    distinct: false,
7205                    trailing_comments: Vec::new(),
7206                    use_bracket_syntax: false,
7207                    no_parens: false,
7208                    quoted: false,
7209                    span: None,
7210                    inferred_type: None,
7211                }))
7212            } else if self.match_token(TokenType::Cube) {
7213                // CUBE (...)
7214                self.expect(TokenType::LParen)?;
7215                let inner_args = self.parse_expression_list()?;
7216                self.expect(TokenType::RParen)?;
7217                Expression::Function(Box::new(Function {
7218                    name: "CUBE".to_string(),
7219                    args: inner_args,
7220                    distinct: false,
7221                    trailing_comments: Vec::new(),
7222                    use_bracket_syntax: false,
7223                    no_parens: false,
7224                    quoted: false,
7225                    span: None,
7226                    inferred_type: None,
7227                }))
7228            } else if self.match_token(TokenType::Rollup) {
7229                // ROLLUP (...)
7230                self.expect(TokenType::LParen)?;
7231                let inner_args = self.parse_expression_list()?;
7232                self.expect(TokenType::RParen)?;
7233                Expression::Function(Box::new(Function {
7234                    name: "ROLLUP".to_string(),
7235                    args: inner_args,
7236                    distinct: false,
7237                    trailing_comments: Vec::new(),
7238                    use_bracket_syntax: false,
7239                    no_parens: false,
7240                    quoted: false,
7241                    span: None,
7242                    inferred_type: None,
7243                }))
7244            } else if self.check(TokenType::LParen) {
7245                // This could be a tuple like (x, y) or empty ()
7246                self.skip(); // consume (
7247                if self.check(TokenType::RParen) {
7248                    // Empty tuple ()
7249                    self.skip();
7250                    Expression::Tuple(Box::new(Tuple {
7251                        expressions: Vec::new(),
7252                    }))
7253                } else {
7254                    let inner = self.parse_expression_list()?;
7255                    self.expect(TokenType::RParen)?;
7256                    Expression::Tuple(Box::new(Tuple { expressions: inner }))
7257                }
7258            } else {
7259                self.parse_expression()?
7260            };
7261
7262            args.push(expr);
7263
7264            if !self.match_token(TokenType::Comma) {
7265                break;
7266            }
7267        }
7268
7269        Ok(args)
7270    }
7271
7272    /// Parse ORDER BY clause
7273    fn parse_order_by(&mut self) -> Result<OrderBy> {
7274        self.parse_order_by_with_siblings(false)
7275    }
7276
7277    /// Parse ORDER BY clause with optional siblings flag (Oracle ORDER SIBLINGS BY)
7278    fn parse_order_by_with_siblings(&mut self, siblings: bool) -> Result<OrderBy> {
7279        let mut expressions = Vec::new();
7280
7281        loop {
7282            let expr = self.parse_expression()?;
7283
7284            // ClickHouse: ORDER BY expr AS alias — allow AS alias before DESC/ASC
7285            // But NOT AS SELECT/WITH which would be CREATE TABLE ... AS SELECT
7286            let expr = if matches!(
7287                self.config.dialect,
7288                Some(crate::dialects::DialectType::ClickHouse)
7289            ) && self.check(TokenType::As)
7290                && !self.check_next(TokenType::LParen)
7291                && !self.check_next(TokenType::Select)
7292                && !self.check_next(TokenType::With)
7293            {
7294                self.skip(); // consume AS
7295                let alias = self.expect_identifier_or_keyword_with_quoted()?;
7296                Expression::Alias(Box::new(Alias::new(expr, alias)))
7297            } else {
7298                expr
7299            };
7300
7301            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7302                (true, false)
7303            } else if self.match_token(TokenType::Asc) {
7304                (false, true)
7305            } else {
7306                (false, false)
7307            };
7308
7309            let nulls_first = if self.match_token(TokenType::Nulls) {
7310                if self.match_token(TokenType::First) {
7311                    Some(true)
7312                } else if self.match_token(TokenType::Last) {
7313                    Some(false)
7314                } else {
7315                    return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
7316                }
7317            } else {
7318                None
7319            };
7320
7321            // Parse optional WITH FILL clause (ClickHouse)
7322            let with_fill = if self.match_text_seq(&["WITH", "FILL"]) {
7323                let from_ = if self.match_token(TokenType::From) {
7324                    Some(Box::new(self.parse_or()?))
7325                } else {
7326                    None
7327                };
7328                let to = if self.match_text_seq(&["TO"]) {
7329                    Some(Box::new(self.parse_or()?))
7330                } else {
7331                    None
7332                };
7333                let step = if self.match_text_seq(&["STEP"]) {
7334                    Some(Box::new(self.parse_or()?))
7335                } else {
7336                    None
7337                };
7338                // ClickHouse: STALENESS [INTERVAL] expr
7339                let staleness = if self.match_text_seq(&["STALENESS"]) {
7340                    Some(Box::new(self.parse_or()?))
7341                } else {
7342                    None
7343                };
7344                let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
7345                    if self.match_token(TokenType::LParen) {
7346                        // Parse INTERPOLATE items: identifier [AS expression], ...
7347                        let mut items = Vec::new();
7348                        loop {
7349                            if self.check(TokenType::RParen) {
7350                                break;
7351                            }
7352                            let quoted = self.check(TokenType::QuotedIdentifier);
7353                            let name_text = self.expect_identifier_or_safe_keyword()?;
7354                            let name_id = Identifier {
7355                                name: name_text,
7356                                quoted,
7357                                trailing_comments: Vec::new(),
7358                                span: None,
7359                            };
7360                            let item = if self.match_token(TokenType::As) {
7361                                let expr = self.parse_expression()?;
7362                                // Store as Alias: this=expression, alias=name
7363                                Expression::Alias(Box::new(Alias {
7364                                    this: expr,
7365                                    alias: name_id,
7366                                    column_aliases: Vec::new(),
7367                                    pre_alias_comments: Vec::new(),
7368                                    trailing_comments: Vec::new(),
7369                                    inferred_type: None,
7370                                }))
7371                            } else {
7372                                Expression::Identifier(name_id)
7373                            };
7374                            items.push(item);
7375                            if !self.match_token(TokenType::Comma) {
7376                                break;
7377                            }
7378                        }
7379                        self.expect(TokenType::RParen)?;
7380                        if items.len() == 1 {
7381                            Some(Box::new(items.into_iter().next().unwrap()))
7382                        } else {
7383                            Some(Box::new(Expression::Tuple(Box::new(
7384                                crate::expressions::Tuple { expressions: items },
7385                            ))))
7386                        }
7387                    } else {
7388                        None
7389                    }
7390                } else {
7391                    None
7392                };
7393                Some(Box::new(WithFill {
7394                    from_,
7395                    to,
7396                    step,
7397                    staleness,
7398                    interpolate,
7399                }))
7400            } else {
7401                None
7402            };
7403
7404            expressions.push(Ordered {
7405                this: expr,
7406                desc,
7407                nulls_first,
7408                explicit_asc,
7409                with_fill,
7410            });
7411
7412            if !self.match_token(TokenType::Comma) {
7413                break;
7414            }
7415
7416            // Handle trailing comma: if at end of input or semicolon, break
7417            if self.is_at_end() || self.check(TokenType::Semicolon) {
7418                break;
7419            }
7420        }
7421
7422        Ok(OrderBy {
7423            expressions,
7424            siblings,
7425            comments: Vec::new(),
7426        })
7427    }
7428
7429    /// Parse query modifiers (ORDER BY, LIMIT, OFFSET, DISTRIBUTE BY, SORT BY, CLUSTER BY) for parenthesized queries
7430    /// e.g., (SELECT 1) ORDER BY x LIMIT 1 OFFSET 1
7431    /// e.g., (SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x
7432    fn parse_query_modifiers(&mut self, inner: Expression) -> Result<Expression> {
7433        // Parse DISTRIBUTE BY (Hive/Spark)
7434        let distribute_by = if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
7435            let exprs = self.parse_expression_list()?;
7436            Some(DistributeBy { expressions: exprs })
7437        } else {
7438            None
7439        };
7440
7441        // Parse SORT BY (Hive/Spark) or CLUSTER BY (Hive/Spark)
7442        let (sort_by, cluster_by) = if self.match_keywords(&[TokenType::Sort, TokenType::By]) {
7443            // SORT BY
7444            let mut orders = Vec::new();
7445            loop {
7446                if let Some(ordered) = self.parse_ordered_item()? {
7447                    orders.push(ordered);
7448                } else {
7449                    break;
7450                }
7451                if !self.match_token(TokenType::Comma) {
7452                    break;
7453                }
7454            }
7455            (
7456                Some(SortBy {
7457                    expressions: orders,
7458                }),
7459                None,
7460            )
7461        } else if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
7462            // CLUSTER BY
7463            let mut orders = Vec::new();
7464            loop {
7465                if let Some(ordered) = self.parse_ordered_item()? {
7466                    orders.push(ordered);
7467                } else {
7468                    break;
7469                }
7470                if !self.match_token(TokenType::Comma) {
7471                    break;
7472                }
7473            }
7474            (
7475                None,
7476                Some(ClusterBy {
7477                    expressions: orders,
7478                }),
7479            )
7480        } else {
7481            (None, None)
7482        };
7483
7484        // Parse ORDER BY
7485        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
7486            Some(self.parse_order_by()?)
7487        } else {
7488            None
7489        };
7490
7491        // Parse LIMIT
7492        let limit = if self.match_token(TokenType::Limit) {
7493            Some(Limit {
7494                this: self.parse_expression()?,
7495                percent: false,
7496                comments: Vec::new(),
7497            })
7498        } else {
7499            None
7500        };
7501
7502        // Parse OFFSET
7503        let offset = if self.match_token(TokenType::Offset) {
7504            Some(Offset {
7505                this: self.parse_expression()?,
7506                rows: None,
7507            })
7508        } else {
7509            None
7510        };
7511
7512        // If we have any modifiers, wrap in a Subquery with the modifiers
7513        if order_by.is_some()
7514            || limit.is_some()
7515            || offset.is_some()
7516            || distribute_by.is_some()
7517            || sort_by.is_some()
7518            || cluster_by.is_some()
7519        {
7520            // If inner is already a Subquery, add modifiers to it instead of double-wrapping
7521            if let Expression::Subquery(mut subq) = inner {
7522                subq.order_by = order_by;
7523                subq.limit = limit;
7524                subq.offset = offset;
7525                subq.distribute_by = distribute_by;
7526                subq.sort_by = sort_by;
7527                subq.cluster_by = cluster_by;
7528                Ok(Expression::Subquery(subq))
7529            } else if let Expression::Paren(paren) = inner {
7530                // If inner is a Paren containing a Subquery or other query, unwrap it
7531                // and add modifiers to a new Subquery wrapping the Paren
7532                // This handles cases like ((SELECT 1)) LIMIT 1
7533                Ok(Expression::Subquery(Box::new(Subquery {
7534                    this: Expression::Paren(paren),
7535                    alias: None,
7536                    column_aliases: Vec::new(),
7537                    order_by,
7538                    limit,
7539                    offset,
7540                    distribute_by,
7541                    sort_by,
7542                    cluster_by,
7543                    lateral: false,
7544                    modifiers_inside: false,
7545                    trailing_comments: Vec::new(),
7546                    inferred_type: None,
7547                })))
7548            } else {
7549                Ok(Expression::Subquery(Box::new(Subquery {
7550                    this: inner,
7551                    alias: None,
7552                    column_aliases: Vec::new(),
7553                    order_by,
7554                    limit,
7555                    offset,
7556                    distribute_by,
7557                    sort_by,
7558                    cluster_by,
7559                    lateral: false,
7560                    modifiers_inside: false,
7561                    trailing_comments: Vec::new(),
7562                    inferred_type: None,
7563                })))
7564            }
7565        } else {
7566            // No modifiers - return inner as-is (don't double-wrap if already a Subquery)
7567            Ok(inner)
7568        }
7569    }
7570
7571    /// Parse ORDER BY expressions for use inside aggregate functions
7572    /// Returns Vec<Ordered> instead of OrderBy struct
7573    fn parse_order_by_list(&mut self) -> Result<Vec<Ordered>> {
7574        let mut expressions = Vec::new();
7575
7576        loop {
7577            let expr = self.parse_expression()?;
7578
7579            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7580                (true, false)
7581            } else if self.match_token(TokenType::Asc) {
7582                (false, true)
7583            } else {
7584                (false, false)
7585            };
7586
7587            let nulls_first = if self.match_token(TokenType::Nulls) {
7588                if self.match_token(TokenType::First) {
7589                    Some(true)
7590                } else if self.match_token(TokenType::Last) {
7591                    Some(false)
7592                } else {
7593                    return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
7594                }
7595            } else {
7596                None
7597            };
7598
7599            expressions.push(Ordered {
7600                this: expr,
7601                desc,
7602                nulls_first,
7603                explicit_asc,
7604                with_fill: None,
7605            });
7606
7607            if !self.match_token(TokenType::Comma) {
7608                break;
7609            }
7610        }
7611
7612        Ok(expressions)
7613    }
7614
7615    /// Parse DISTRIBUTE BY clause (Hive/Spark)
7616    fn parse_distribute_by(&mut self) -> Result<DistributeBy> {
7617        let mut expressions = Vec::new();
7618
7619        loop {
7620            expressions.push(self.parse_expression()?);
7621            if !self.match_token(TokenType::Comma) {
7622                break;
7623            }
7624        }
7625
7626        Ok(DistributeBy { expressions })
7627    }
7628
7629    /// Parse CLUSTER BY clause (Hive/Spark)
7630    fn parse_cluster_by(&mut self) -> Result<ClusterBy> {
7631        let mut expressions = Vec::new();
7632
7633        loop {
7634            let expr = self.parse_expression()?;
7635
7636            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7637                (true, false)
7638            } else if self.match_token(TokenType::Asc) {
7639                (false, true)
7640            } else {
7641                (false, false)
7642            };
7643
7644            expressions.push(Ordered {
7645                this: expr,
7646                desc,
7647                nulls_first: None,
7648                explicit_asc,
7649                with_fill: None,
7650            });
7651
7652            if !self.match_token(TokenType::Comma) {
7653                break;
7654            }
7655        }
7656
7657        Ok(ClusterBy { expressions })
7658    }
7659
7660    /// Parse SORT BY clause (Hive/Spark)
7661    fn parse_sort_by(&mut self) -> Result<SortBy> {
7662        let mut expressions = Vec::new();
7663
7664        loop {
7665            let expr = self.parse_expression()?;
7666
7667            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7668                (true, false)
7669            } else if self.match_token(TokenType::Asc) {
7670                (false, true)
7671            } else {
7672                (false, false)
7673            };
7674
7675            let nulls_first = if self.match_token(TokenType::Nulls) {
7676                if self.match_token(TokenType::First) {
7677                    Some(true)
7678                } else if self.match_token(TokenType::Last) {
7679                    Some(false)
7680                } else {
7681                    return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
7682                }
7683            } else {
7684                None
7685            };
7686
7687            expressions.push(Ordered {
7688                this: expr,
7689                desc,
7690                nulls_first,
7691                explicit_asc,
7692                with_fill: None,
7693            });
7694
7695            if !self.match_token(TokenType::Comma) {
7696                break;
7697            }
7698        }
7699
7700        Ok(SortBy { expressions })
7701    }
7702
7703    /// Parse FOR UPDATE/SHARE locking clauses or FOR XML/JSON (T-SQL)
7704    /// Syntax: FOR UPDATE|SHARE|NO KEY UPDATE|KEY SHARE [OF tables] [NOWAIT|WAIT n|SKIP LOCKED]
7705    /// Also handles: LOCK IN SHARE MODE (MySQL)
7706    /// Also handles: FOR XML PATH|RAW|AUTO|EXPLICIT [, options...] (T-SQL)
7707    /// Also handles: FOR JSON PATH|AUTO [, ROOT('name')] [, INCLUDE_NULL_VALUES] [, WITHOUT_ARRAY_WRAPPER] (T-SQL)
7708    fn parse_locks_and_for_xml(&mut self) -> Result<(Vec<Lock>, Vec<Expression>, Vec<Expression>)> {
7709        let mut locks = Vec::new();
7710        let mut for_xml = Vec::new();
7711        let mut for_json = Vec::new();
7712
7713        loop {
7714            let (update, key) = if self.match_keywords(&[TokenType::For, TokenType::Update]) {
7715                // FOR UPDATE
7716                (
7717                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7718                        value: true,
7719                    }))),
7720                    None,
7721                )
7722            } else if self.check(TokenType::For) && self.check_next_identifier("XML") {
7723                // FOR XML (T-SQL) - parse XML options
7724                self.skip(); // consume FOR
7725                self.skip(); // consume XML
7726                for_xml = self.parse_for_xml_options()?;
7727                break; // FOR XML is always the last clause
7728            } else if self.check(TokenType::For) && self.check_next_identifier("JSON") {
7729                // FOR JSON (T-SQL) - parse JSON options
7730                self.skip(); // consume FOR
7731                self.skip(); // consume JSON
7732                for_json = self.parse_for_json_options()?;
7733                break; // FOR JSON is always the last clause
7734            } else if self.check(TokenType::For) && self.check_next_identifier("SHARE") {
7735                // FOR SHARE
7736                self.skip(); // consume FOR
7737                self.skip(); // consume SHARE
7738                (None, None)
7739            } else if self.check_identifier("LOCK") && self.check_next(TokenType::In) {
7740                // LOCK IN SHARE MODE (MySQL) -> converted to FOR SHARE
7741                self.skip(); // consume LOCK
7742                self.skip(); // consume IN
7743                if self.match_identifier("SHARE") {
7744                    let _ = self.match_identifier("MODE");
7745                }
7746                (None, None)
7747            } else if self.check(TokenType::For) && self.check_next(TokenType::Key) {
7748                // FOR KEY SHARE (PostgreSQL)
7749                self.skip(); // consume FOR
7750                self.skip(); // consume KEY
7751                if !self.match_identifier("SHARE") {
7752                    break; // Not a valid lock clause
7753                }
7754                (
7755                    None,
7756                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7757                        value: true,
7758                    }))),
7759                )
7760            } else if self.check(TokenType::For) && self.check_next(TokenType::No) {
7761                // FOR NO KEY UPDATE (PostgreSQL)
7762                self.skip(); // consume FOR
7763                self.skip(); // consume NO
7764                if !self.match_identifier("KEY") || !self.match_token(TokenType::Update) {
7765                    break; // Not a valid lock clause
7766                }
7767                (
7768                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7769                        value: true,
7770                    }))),
7771                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7772                        value: true,
7773                    }))),
7774                )
7775            } else {
7776                // No more lock clauses
7777                break;
7778            };
7779
7780            // Parse optional OF clause: OF table1, table2
7781            let expressions = if self.match_token(TokenType::Of) {
7782                let mut tables = Vec::new();
7783                loop {
7784                    // Parse table reference (can be schema.table or just table)
7785                    let table = self.parse_table_ref()?;
7786                    tables.push(Expression::Table(Box::new(table)));
7787                    if !self.match_token(TokenType::Comma) {
7788                        break;
7789                    }
7790                }
7791                tables
7792            } else {
7793                Vec::new()
7794            };
7795
7796            // Parse wait option: NOWAIT, WAIT n, or SKIP LOCKED
7797            // Following Python sqlglot convention:
7798            // - NOWAIT -> Boolean(true)
7799            // - SKIP LOCKED -> Boolean(false)
7800            // - WAIT n -> Literal (the number)
7801            let wait = if self.match_identifier("NOWAIT") {
7802                // NOWAIT -> represented as Boolean(true)
7803                Some(Box::new(Expression::Boolean(BooleanLiteral {
7804                    value: true,
7805                })))
7806            } else if self.match_identifier("WAIT") {
7807                // WAIT n -> wait = expression (the number/literal)
7808                Some(Box::new(self.parse_primary()?))
7809            } else if self.match_identifier("SKIP") && self.match_identifier("LOCKED") {
7810                // SKIP LOCKED -> represented as Boolean(false)
7811                Some(Box::new(Expression::Boolean(BooleanLiteral {
7812                    value: false,
7813                })))
7814            } else {
7815                None
7816            };
7817
7818            locks.push(Lock {
7819                update,
7820                expressions,
7821                wait,
7822                key,
7823            });
7824        }
7825
7826        Ok((locks, for_xml, for_json))
7827    }
7828
7829    /// Parse FOR XML options (T-SQL)
7830    /// Syntax: FOR XML PATH|RAW|AUTO|EXPLICIT [('element')] [, BINARY BASE64] [, ELEMENTS [XSINIL|ABSENT]] [, TYPE] [, ROOT('name')]
7831    fn parse_for_xml_options(&mut self) -> Result<Vec<Expression>> {
7832        let mut options = Vec::new();
7833
7834        loop {
7835            // Parse XML option: could be a known option (PATH, RAW, AUTO, EXPLICIT, BINARY, ELEMENTS, TYPE, ROOT)
7836            // or an XMLKeyValueOption like PATH('element')
7837            if let Some(opt) = self.parse_for_xml_single_option()? {
7838                options.push(opt);
7839            } else {
7840                break;
7841            }
7842
7843            // Check for comma to continue parsing more options
7844            if !self.match_token(TokenType::Comma) {
7845                break;
7846            }
7847        }
7848
7849        Ok(options)
7850    }
7851
7852    /// Parse a single FOR XML option
7853    fn parse_for_xml_single_option(&mut self) -> Result<Option<Expression>> {
7854        // Known XML modes: PATH, RAW, AUTO, EXPLICIT
7855        // Known options: BINARY BASE64, ELEMENTS [XSINIL|ABSENT], TYPE, ROOT('name')
7856
7857        // Try to match known patterns
7858        if self.match_identifier("PATH") {
7859            let expression = if self.match_token(TokenType::LParen) {
7860                let expr = self.parse_string()?;
7861                self.expect(TokenType::RParen)?;
7862                expr
7863            } else {
7864                None
7865            };
7866            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7867                this: Box::new(Expression::Var(Box::new(Var {
7868                    this: "PATH".to_string(),
7869                }))),
7870                expression: expression.map(|e| Box::new(e)),
7871            }))));
7872        }
7873
7874        if self.match_identifier("RAW") {
7875            let expression = if self.match_token(TokenType::LParen) {
7876                let expr = self.parse_string()?;
7877                self.expect(TokenType::RParen)?;
7878                expr
7879            } else {
7880                None
7881            };
7882            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7883                this: Box::new(Expression::Var(Box::new(Var {
7884                    this: "RAW".to_string(),
7885                }))),
7886                expression: expression.map(|e| Box::new(e)),
7887            }))));
7888        }
7889
7890        if self.match_identifier("AUTO") {
7891            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7892                this: Box::new(Expression::Var(Box::new(Var {
7893                    this: "AUTO".to_string(),
7894                }))),
7895                expression: None,
7896            }))));
7897        }
7898
7899        if self.match_identifier("EXPLICIT") {
7900            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7901                this: Box::new(Expression::Var(Box::new(Var {
7902                    this: "EXPLICIT".to_string(),
7903                }))),
7904                expression: None,
7905            }))));
7906        }
7907
7908        if self.match_identifier("TYPE") || self.match_token(TokenType::Type) {
7909            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7910                this: Box::new(Expression::Var(Box::new(Var {
7911                    this: "TYPE".to_string(),
7912                }))),
7913                expression: None,
7914            }))));
7915        }
7916
7917        if self.match_identifier("BINARY") {
7918            // BINARY BASE64
7919            if self.match_identifier("BASE64") {
7920                return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7921                    this: Box::new(Expression::Var(Box::new(Var {
7922                        this: "BINARY BASE64".to_string(),
7923                    }))),
7924                    expression: None,
7925                }))));
7926            } else {
7927                return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7928                    this: Box::new(Expression::Var(Box::new(Var {
7929                        this: "BINARY".to_string(),
7930                    }))),
7931                    expression: None,
7932                }))));
7933            }
7934        }
7935
7936        if self.match_identifier("ELEMENTS") {
7937            // ELEMENTS [XSINIL|ABSENT]
7938            let suboption = if self.match_identifier("XSINIL") {
7939                Some("XSINIL".to_string())
7940            } else if self.match_identifier("ABSENT") {
7941                Some("ABSENT".to_string())
7942            } else {
7943                None
7944            };
7945            let option_name = match &suboption {
7946                Some(sub) => format!("ELEMENTS {}", sub),
7947                None => "ELEMENTS".to_string(),
7948            };
7949            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7950                this: Box::new(Expression::Var(Box::new(Var { this: option_name }))),
7951                expression: None,
7952            }))));
7953        }
7954
7955        if self.match_identifier("ROOT") {
7956            let expression = if self.match_token(TokenType::LParen) {
7957                let expr = self.parse_string()?;
7958                self.expect(TokenType::RParen)?;
7959                expr
7960            } else {
7961                None
7962            };
7963            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7964                this: Box::new(Expression::Var(Box::new(Var {
7965                    this: "ROOT".to_string(),
7966                }))),
7967                expression: expression.map(|e| Box::new(e)),
7968            }))));
7969        }
7970
7971        // No more options recognized
7972        Ok(None)
7973    }
7974
7975    /// Parse FOR JSON options (T-SQL)
7976    /// Syntax: FOR JSON PATH|AUTO [, ROOT('name')] [, INCLUDE_NULL_VALUES] [, WITHOUT_ARRAY_WRAPPER]
7977    fn parse_for_json_options(&mut self) -> Result<Vec<Expression>> {
7978        let mut options = Vec::new();
7979
7980        loop {
7981            if let Some(opt) = self.parse_for_json_single_option()? {
7982                options.push(opt);
7983            } else {
7984                break;
7985            }
7986            if !self.match_token(TokenType::Comma) {
7987                break;
7988            }
7989        }
7990
7991        Ok(options)
7992    }
7993
7994    /// Parse a single FOR JSON option
7995    fn parse_for_json_single_option(&mut self) -> Result<Option<Expression>> {
7996        if self.match_identifier("PATH") {
7997            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7998                this: Box::new(Expression::Var(Box::new(Var {
7999                    this: "PATH".to_string(),
8000                }))),
8001                expression: None,
8002            }))));
8003        }
8004
8005        if self.match_identifier("AUTO") {
8006            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
8007                this: Box::new(Expression::Var(Box::new(Var {
8008                    this: "AUTO".to_string(),
8009                }))),
8010                expression: None,
8011            }))));
8012        }
8013
8014        if self.match_identifier("ROOT") {
8015            let expression = if self.match_token(TokenType::LParen) {
8016                let expr = self.parse_string()?;
8017                self.expect(TokenType::RParen)?;
8018                expr
8019            } else {
8020                None
8021            };
8022            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
8023                this: Box::new(Expression::Var(Box::new(Var {
8024                    this: "ROOT".to_string(),
8025                }))),
8026                expression: expression.map(|e| Box::new(e)),
8027            }))));
8028        }
8029
8030        if self.match_identifier("INCLUDE_NULL_VALUES") {
8031            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
8032                this: Box::new(Expression::Var(Box::new(Var {
8033                    this: "INCLUDE_NULL_VALUES".to_string(),
8034                }))),
8035                expression: None,
8036            }))));
8037        }
8038
8039        if self.match_identifier("WITHOUT_ARRAY_WRAPPER") {
8040            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
8041                this: Box::new(Expression::Var(Box::new(Var {
8042                    this: "WITHOUT_ARRAY_WRAPPER".to_string(),
8043                }))),
8044                expression: None,
8045            }))));
8046        }
8047
8048        Ok(None)
8049    }
8050
8051    /// Parse CONNECT BY clause (Oracle hierarchical queries)
8052    /// Syntax: [START WITH condition] CONNECT BY [NOCYCLE] condition [START WITH condition]
8053    /// START WITH can appear before or after CONNECT BY
8054    fn parse_connect(&mut self) -> Result<Option<Connect>> {
8055        // Check for START WITH first (can appear before CONNECT BY)
8056        let start_before = if self.match_keywords(&[TokenType::Start, TokenType::With]) {
8057            Some(self.parse_expression()?)
8058        } else {
8059            None
8060        };
8061
8062        // Check for CONNECT BY
8063        if !self.match_keywords(&[TokenType::Connect, TokenType::By]) {
8064            if start_before.is_some() {
8065                return Err(self.parse_error("START WITH without CONNECT BY"));
8066            }
8067            return Ok(None);
8068        }
8069
8070        // Check for NOCYCLE
8071        let nocycle = self.match_token(TokenType::NoCycle);
8072
8073        // Parse the CONNECT BY condition with PRIOR support
8074        let connect = self.parse_connect_expression()?;
8075
8076        // START WITH can also appear after CONNECT BY
8077        let start = if start_before.is_some() {
8078            start_before
8079        } else if self.match_keywords(&[TokenType::Start, TokenType::With]) {
8080            Some(self.parse_expression()?)
8081        } else {
8082            None
8083        };
8084
8085        Ok(Some(Connect {
8086            start,
8087            connect,
8088            nocycle,
8089        }))
8090    }
8091
8092    /// Parse expression in CONNECT BY context, treating PRIOR as prefix operator
8093    fn parse_connect_expression(&mut self) -> Result<Expression> {
8094        self.parse_connect_or()
8095    }
8096
8097    /// Parse OR expression in CONNECT BY context
8098    fn parse_connect_or(&mut self) -> Result<Expression> {
8099        let mut left = self.parse_connect_and()?;
8100
8101        while self.match_token(TokenType::Or) {
8102            let right = self.parse_connect_and()?;
8103            left = Expression::Or(Box::new(BinaryOp::new(left, right)));
8104        }
8105
8106        Ok(Self::maybe_rebalance_boolean_chain(left, false))
8107    }
8108
8109    /// Parse AND expression in CONNECT BY context
8110    fn parse_connect_and(&mut self) -> Result<Expression> {
8111        let mut left = self.parse_connect_comparison()?;
8112
8113        while self.match_token(TokenType::And) {
8114            let right = self.parse_connect_comparison()?;
8115            left = Expression::And(Box::new(BinaryOp::new(left, right)));
8116        }
8117
8118        Ok(Self::maybe_rebalance_boolean_chain(left, true))
8119    }
8120
8121    /// Parse comparison in CONNECT BY context
8122    fn parse_connect_comparison(&mut self) -> Result<Expression> {
8123        let left = self.parse_connect_primary()?;
8124
8125        if self.match_token(TokenType::Eq) {
8126            let right = self.parse_connect_primary()?;
8127            return Ok(Expression::Eq(Box::new(BinaryOp::new(left, right))));
8128        }
8129        if self.match_token(TokenType::Neq) {
8130            let right = self.parse_connect_primary()?;
8131            return Ok(Expression::Neq(Box::new(BinaryOp::new(left, right))));
8132        }
8133        if self.match_token(TokenType::Lt) {
8134            let right = self.parse_connect_primary()?;
8135            return Ok(Expression::Lt(Box::new(BinaryOp::new(left, right))));
8136        }
8137        if self.match_token(TokenType::Lte) {
8138            let right = self.parse_connect_primary()?;
8139            return Ok(Expression::Lte(Box::new(BinaryOp::new(left, right))));
8140        }
8141        if self.match_token(TokenType::Gt) {
8142            let right = self.parse_connect_primary()?;
8143            return Ok(Expression::Gt(Box::new(BinaryOp::new(left, right))));
8144        }
8145        if self.match_token(TokenType::Gte) {
8146            let right = self.parse_connect_primary()?;
8147            return Ok(Expression::Gte(Box::new(BinaryOp::new(left, right))));
8148        }
8149
8150        Ok(left)
8151    }
8152
8153    /// Parse primary in CONNECT BY context with PRIOR support
8154    fn parse_connect_primary(&mut self) -> Result<Expression> {
8155        // Handle PRIOR as prefix operator
8156        if self.match_token(TokenType::Prior) {
8157            let expr = self.parse_primary()?;
8158            return Ok(Expression::Prior(Box::new(Prior { this: expr })));
8159        }
8160
8161        if let Some(connect_by_root) = self.try_parse_connect_by_root_expression()? {
8162            return Ok(connect_by_root);
8163        }
8164
8165        self.parse_primary()
8166    }
8167
8168    /// Parse Oracle CONNECT_BY_ROOT in either supported form:
8169    /// CONNECT_BY_ROOT col
8170    /// CONNECT_BY_ROOT(col)
8171    fn try_parse_connect_by_root_expression(&mut self) -> Result<Option<Expression>> {
8172        if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("CONNECT_BY_ROOT"))
8173        {
8174            return Ok(None);
8175        }
8176
8177        self.skip();
8178
8179        let this = if self.match_token(TokenType::LParen) {
8180            let expr = self.parse_expression()?;
8181            self.expect(TokenType::RParen)?;
8182            expr
8183        } else {
8184            self.parse_column()?.ok_or_else(|| {
8185                self.parse_error("Expected expression or column after CONNECT_BY_ROOT")
8186            })?
8187        };
8188
8189        Ok(Some(Expression::ConnectByRoot(Box::new(ConnectByRoot {
8190            this,
8191        }))))
8192    }
8193
8194    /// Parse MATCH_RECOGNIZE clause (Oracle/Snowflake/Presto/Trino pattern matching)
8195    /// MATCH_RECOGNIZE ( [PARTITION BY ...] [ORDER BY ...] [MEASURES ...] [rows] [after] PATTERN (...) DEFINE ... )
8196    fn parse_match_recognize(&mut self, source: Option<Expression>) -> Result<Expression> {
8197        self.expect(TokenType::LParen)?;
8198
8199        // PARTITION BY (optional)
8200        let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
8201            Some(self.parse_expression_list()?)
8202        } else {
8203            None
8204        };
8205
8206        // ORDER BY (optional)
8207        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
8208            Some(self.parse_order_by()?.expressions)
8209        } else {
8210            None
8211        };
8212
8213        // MEASURES (optional)
8214        let measures = if self.match_token(TokenType::Measures) {
8215            Some(self.parse_match_recognize_measures()?)
8216        } else {
8217            None
8218        };
8219
8220        // Row semantics: ONE ROW PER MATCH / ALL ROWS PER MATCH
8221        let rows = self.parse_match_recognize_rows()?;
8222
8223        // AFTER MATCH SKIP
8224        let after = self.parse_match_recognize_after()?;
8225
8226        // PATTERN
8227        let pattern = if self.match_token(TokenType::Pattern) {
8228            Some(self.parse_match_recognize_pattern()?)
8229        } else {
8230            None
8231        };
8232
8233        // DEFINE
8234        let define = if self.match_token(TokenType::Define) {
8235            Some(self.parse_match_recognize_define()?)
8236        } else {
8237            None
8238        };
8239
8240        self.expect(TokenType::RParen)?;
8241
8242        // Alias is handled by the caller
8243
8244        Ok(Expression::MatchRecognize(Box::new(MatchRecognize {
8245            this: source.map(Box::new),
8246            partition_by,
8247            order_by,
8248            measures,
8249            rows,
8250            after,
8251            pattern,
8252            define,
8253            alias: None,
8254            alias_explicit_as: false,
8255        })))
8256    }
8257
8258    /// Parse MEASURES clause in MATCH_RECOGNIZE
8259    fn parse_match_recognize_measures(&mut self) -> Result<Vec<MatchRecognizeMeasure>> {
8260        let mut measures = Vec::new();
8261
8262        loop {
8263            // Check for RUNNING or FINAL
8264            let window_frame = if self.match_token(TokenType::Running) {
8265                Some(MatchRecognizeSemantics::Running)
8266            } else if self.match_token(TokenType::Final) {
8267                Some(MatchRecognizeSemantics::Final)
8268            } else {
8269                None
8270            };
8271
8272            let mut expr = self.parse_expression()?;
8273
8274            // Handle AS alias for measures
8275            if self.match_token(TokenType::As) {
8276                let alias = Identifier::new(self.expect_identifier()?);
8277                expr = Expression::Alias(Box::new(Alias::new(expr, alias)));
8278            }
8279
8280            measures.push(MatchRecognizeMeasure {
8281                this: expr,
8282                window_frame,
8283            });
8284
8285            if !self.match_token(TokenType::Comma) {
8286                break;
8287            }
8288        }
8289
8290        Ok(measures)
8291    }
8292
8293    /// Parse row semantics in MATCH_RECOGNIZE
8294    fn parse_match_recognize_rows(&mut self) -> Result<Option<MatchRecognizeRows>> {
8295        // ONE ROW PER MATCH
8296        if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ONE") {
8297            self.skip(); // consume ONE
8298            if !self.match_token(TokenType::Row) {
8299                return Err(self.parse_error("Expected ROW after ONE"));
8300            }
8301            if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("PER")) {
8302                return Err(self.parse_error("Expected PER after ONE ROW"));
8303            }
8304            self.skip(); // consume PER
8305            if !self.match_token(TokenType::Match) {
8306                return Err(self.parse_error("Expected MATCH after ONE ROW PER"));
8307            }
8308            return Ok(Some(MatchRecognizeRows::OneRowPerMatch));
8309        }
8310
8311        // ALL ROWS PER MATCH [variants]
8312        if self.match_token(TokenType::All) {
8313            if !self.match_token(TokenType::Rows) {
8314                return Err(self.parse_error("Expected ROWS after ALL"));
8315            }
8316            if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("PER")) {
8317                return Err(self.parse_error("Expected PER after ALL ROWS"));
8318            }
8319            self.skip(); // consume PER
8320            if !self.match_token(TokenType::Match) {
8321                return Err(self.parse_error("Expected MATCH after ALL ROWS PER"));
8322            }
8323
8324            // Check for optional modifiers
8325            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SHOW") {
8326                self.skip();
8327                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EMPTY") {
8328                    self.skip();
8329                    if self.check(TokenType::Var)
8330                        && self.peek().text.eq_ignore_ascii_case("MATCHES")
8331                    {
8332                        self.skip();
8333                        return Ok(Some(MatchRecognizeRows::AllRowsPerMatchShowEmptyMatches));
8334                    }
8335                }
8336                return Err(self.parse_error("Expected EMPTY MATCHES after SHOW"));
8337            }
8338
8339            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OMIT") {
8340                self.skip();
8341                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EMPTY") {
8342                    self.skip();
8343                    if self.check(TokenType::Var)
8344                        && self.peek().text.eq_ignore_ascii_case("MATCHES")
8345                    {
8346                        self.skip();
8347                        return Ok(Some(MatchRecognizeRows::AllRowsPerMatchOmitEmptyMatches));
8348                    }
8349                }
8350                return Err(self.parse_error("Expected EMPTY MATCHES after OMIT"));
8351            }
8352
8353            if self.match_token(TokenType::With) {
8354                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("UNMATCHED")
8355                {
8356                    self.skip();
8357                    if self.match_token(TokenType::Rows) {
8358                        return Ok(Some(MatchRecognizeRows::AllRowsPerMatchWithUnmatchedRows));
8359                    }
8360                }
8361                return Err(self.parse_error("Expected UNMATCHED ROWS after WITH"));
8362            }
8363
8364            return Ok(Some(MatchRecognizeRows::AllRowsPerMatch));
8365        }
8366
8367        Ok(None)
8368    }
8369
8370    /// Parse AFTER MATCH SKIP clause in MATCH_RECOGNIZE
8371    fn parse_match_recognize_after(&mut self) -> Result<Option<MatchRecognizeAfter>> {
8372        if !self.match_token(TokenType::After) {
8373            return Ok(None);
8374        }
8375
8376        if !self.match_token(TokenType::Match) {
8377            return Err(self.parse_error("Expected MATCH after AFTER"));
8378        }
8379
8380        // Check for SKIP (it might be an identifier)
8381        if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SKIP")) {
8382            return Err(self.parse_error("Expected SKIP after AFTER MATCH"));
8383        }
8384        self.skip(); // consume SKIP
8385
8386        // PAST LAST ROW
8387        if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("PAST") {
8388            self.skip();
8389            if self.match_token(TokenType::Last) {
8390                if self.match_token(TokenType::Row) {
8391                    return Ok(Some(MatchRecognizeAfter::PastLastRow));
8392                }
8393            }
8394            return Err(self.parse_error("Expected LAST ROW after PAST"));
8395        }
8396
8397        // TO NEXT ROW / TO FIRST x / TO LAST x
8398        if self.match_token(TokenType::To) {
8399            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("NEXT") {
8400                self.skip();
8401                if self.match_token(TokenType::Row) {
8402                    return Ok(Some(MatchRecognizeAfter::ToNextRow));
8403                }
8404                return Err(self.parse_error("Expected ROW after NEXT"));
8405            }
8406
8407            if self.match_token(TokenType::First) {
8408                let name = self.expect_identifier()?;
8409                return Ok(Some(MatchRecognizeAfter::ToFirst(Identifier::new(name))));
8410            }
8411
8412            if self.match_token(TokenType::Last) {
8413                let name = self.expect_identifier()?;
8414                return Ok(Some(MatchRecognizeAfter::ToLast(Identifier::new(name))));
8415            }
8416
8417            return Err(self.parse_error("Expected NEXT ROW, FIRST x, or LAST x after TO"));
8418        }
8419
8420        Err(self.parse_error("Expected PAST LAST ROW or TO ... after AFTER MATCH SKIP"))
8421    }
8422
8423    /// Parse PATTERN clause in MATCH_RECOGNIZE using bracket counting
8424    fn parse_match_recognize_pattern(&mut self) -> Result<String> {
8425        self.expect(TokenType::LParen)?;
8426
8427        let mut depth = 1;
8428        let mut pattern = String::new();
8429
8430        while depth > 0 && !self.is_at_end() {
8431            let token = self.advance();
8432            match token.token_type {
8433                TokenType::LParen => {
8434                    depth += 1;
8435                    pattern.push('(');
8436                }
8437                TokenType::RParen => {
8438                    depth -= 1;
8439                    if depth > 0 {
8440                        pattern.push(')');
8441                    }
8442                }
8443                _ => {
8444                    // Pattern quantifiers (+, *, ?, {n,m}) should not have a space before them
8445                    let is_quantifier = matches!(token.text.as_str(), "+" | "*" | "?")
8446                        || token.text.starts_with('{');
8447
8448                    if !pattern.is_empty()
8449                        && !pattern.ends_with('(')
8450                        && !pattern.ends_with(' ')
8451                        && !is_quantifier
8452                    {
8453                        pattern.push(' ');
8454                    }
8455                    pattern.push_str(&token.text);
8456                }
8457            }
8458        }
8459
8460        if depth > 0 {
8461            return Err(self.parse_error("Unclosed parenthesis in PATTERN clause"));
8462        }
8463
8464        Ok(pattern.trim().to_string())
8465    }
8466
8467    /// Parse DEFINE clause in MATCH_RECOGNIZE
8468    fn parse_match_recognize_define(&mut self) -> Result<Vec<(Identifier, Expression)>> {
8469        let mut definitions = Vec::new();
8470
8471        loop {
8472            let name = Identifier::new(self.expect_identifier()?);
8473            self.expect(TokenType::As)?;
8474            let expr = self.parse_expression()?;
8475
8476            definitions.push((name, expr));
8477
8478            if !self.match_token(TokenType::Comma) {
8479                break;
8480            }
8481        }
8482
8483        Ok(definitions)
8484    }
8485
8486    /// Parse LATERAL VIEW clauses (Hive/Spark)
8487    /// Syntax: LATERAL VIEW [OUTER] generator_function(args) table_alias AS col1 [, col2, ...]
8488    fn parse_lateral_views(&mut self) -> Result<Vec<LateralView>> {
8489        let mut views = Vec::new();
8490
8491        while self.match_keywords(&[TokenType::Lateral, TokenType::View]) {
8492            // Check for OUTER keyword
8493            let outer = self.match_token(TokenType::Outer);
8494
8495            // Parse the generator function (EXPLODE, POSEXPLODE, INLINE, etc.)
8496            // This is a function call expression
8497            let this = self.parse_primary()?;
8498
8499            // Parse table alias (comes before AS)
8500            let table_alias = if self.check(TokenType::Var) && !self.check_keyword() {
8501                Some(Identifier::new(self.expect_identifier()?))
8502            } else {
8503                None
8504            };
8505
8506            // Parse column aliases after AS keyword
8507            // Supports both: AS a, b and AS (a, b)
8508            let column_aliases = if self.match_token(TokenType::As) {
8509                let mut aliases = Vec::new();
8510                // Check for parenthesized alias list: AS ("a", "b")
8511                if self.match_token(TokenType::LParen) {
8512                    loop {
8513                        aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
8514                        if !self.match_token(TokenType::Comma) {
8515                            break;
8516                        }
8517                    }
8518                    self.expect(TokenType::RParen)?;
8519                } else {
8520                    // Non-parenthesized aliases: AS a, b, c
8521                    // Use expect_identifier_or_keyword because aliases like "key", "value", "pos" may be keywords
8522                    loop {
8523                        aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
8524                        if !self.match_token(TokenType::Comma) {
8525                            break;
8526                        }
8527                        // Check if next token is still an identifier or keyword (column alias)
8528                        // vs starting a new LATERAL VIEW or other clause
8529                        if !self.is_identifier_or_keyword_token() {
8530                            break;
8531                        }
8532                        // Check for keywords that would end the column list
8533                        if self.peek().token_type == TokenType::Lateral
8534                            || self.peek().token_type == TokenType::Where
8535                            || self.peek().token_type == TokenType::Group
8536                            || self.peek().token_type == TokenType::Having
8537                            || self.peek().token_type == TokenType::Order
8538                            || self.peek().token_type == TokenType::Limit
8539                        {
8540                            break;
8541                        }
8542                    }
8543                }
8544                aliases
8545            } else {
8546                Vec::new()
8547            };
8548
8549            views.push(LateralView {
8550                this,
8551                table_alias,
8552                column_aliases,
8553                outer,
8554            });
8555        }
8556
8557        Ok(views)
8558    }
8559
8560    /// Parse named windows (WINDOW w AS (...), ...)
8561    fn parse_named_windows(&mut self) -> Result<Vec<NamedWindow>> {
8562        let mut windows = Vec::new();
8563
8564        loop {
8565            let name = self.expect_identifier()?;
8566            self.expect(TokenType::As)?;
8567            self.expect(TokenType::LParen)?;
8568
8569            // Parse optional base window name reference (e.g., w1 AS (w0 ORDER BY ...))
8570            let window_name = if (self.check(TokenType::Identifier)
8571                || self.check(TokenType::Var)
8572                || self.check(TokenType::QuotedIdentifier))
8573                && !self.check(TokenType::Partition)
8574                && !self.check(TokenType::Order)
8575                && self.peek_nth(1).map_or(true, |t| {
8576                    matches!(
8577                        t.token_type,
8578                        TokenType::Partition
8579                            | TokenType::Order
8580                            | TokenType::Rows
8581                            | TokenType::Range
8582                            | TokenType::Groups
8583                            | TokenType::RParen
8584                            | TokenType::Comma
8585                    )
8586                }) {
8587                Some(self.expect_identifier()?)
8588            } else {
8589                None
8590            };
8591
8592            // Parse window specification
8593            let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
8594                Some(self.parse_expression_list()?)
8595            } else {
8596                None
8597            };
8598
8599            let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
8600                Some(self.parse_order_by()?)
8601            } else {
8602                None
8603            };
8604
8605            let frame = self.parse_window_frame()?;
8606
8607            self.expect(TokenType::RParen)?;
8608
8609            windows.push(NamedWindow {
8610                name: Identifier::new(name),
8611                spec: Over {
8612                    window_name: window_name.map(|n| Identifier::new(n)),
8613                    partition_by: partition_by.unwrap_or_default(),
8614                    order_by: order_by.map(|o| o.expressions).unwrap_or_default(),
8615                    frame,
8616                    alias: None,
8617                },
8618            });
8619
8620            if !self.match_token(TokenType::Comma) {
8621                break;
8622            }
8623        }
8624
8625        Ok(windows)
8626    }
8627
8628    /// Parse query hint /*+ ... */
8629    fn parse_hint(&mut self) -> Result<Hint> {
8630        let token = self.advance();
8631        let hint_text = token.text.clone();
8632
8633        // For now, parse as raw hint text
8634        // More sophisticated parsing can be added later
8635        let expressions = if hint_text.is_empty() {
8636            Vec::new()
8637        } else {
8638            vec![HintExpression::Raw(hint_text)]
8639        };
8640
8641        Ok(Hint { expressions })
8642    }
8643
8644    /// Parse SAMPLE / TABLESAMPLE / USING SAMPLE clause
8645    fn parse_sample_clause(&mut self) -> Result<Option<Sample>> {
8646        // Check for USING SAMPLE (DuckDB), SAMPLE, or TABLESAMPLE
8647        let is_using_sample = if self.check(TokenType::Using)
8648            && self.current + 1 < self.tokens.len()
8649            && self.tokens[self.current + 1].token_type == TokenType::Sample
8650        {
8651            self.skip(); // consume USING
8652            self.skip(); // consume SAMPLE
8653            true
8654        } else {
8655            false
8656        };
8657
8658        let use_sample_keyword = if is_using_sample {
8659            // USING SAMPLE acts like SAMPLE
8660            true
8661        } else if self.match_token(TokenType::Sample) {
8662            true
8663        } else if self.match_token(TokenType::TableSample) {
8664            false
8665        } else {
8666            return Ok(None);
8667        };
8668
8669        // Parse sampling method if specified (BERNOULLI, SYSTEM, BLOCK, ROW, RESERVOIR)
8670        let (method, method_before_size, explicit_method) =
8671            if self.match_token(TokenType::Bernoulli) {
8672                (SampleMethod::Bernoulli, true, true)
8673            } else if self.match_token(TokenType::System) {
8674                (SampleMethod::System, true, true)
8675            } else if self.match_token(TokenType::Block) {
8676                (SampleMethod::Block, true, true)
8677            } else if self.match_token(TokenType::Row) {
8678                (SampleMethod::Row, true, true)
8679            } else if self.check_identifier("RESERVOIR") {
8680                self.skip();
8681                (SampleMethod::Reservoir, true, true)
8682            } else {
8683                // Default to BERNOULLI for both SAMPLE and TABLESAMPLE
8684                // This matches Python SQLGlot's normalization behavior
8685                (SampleMethod::Bernoulli, false, false)
8686            };
8687
8688        // Parse size (can be in parentheses)
8689        let has_paren = self.match_token(TokenType::LParen);
8690
8691        // Check for BUCKET syntax: TABLESAMPLE (BUCKET 1 OUT OF 5 ON x)
8692        if self.match_identifier("BUCKET") {
8693            let bucket_numerator = self.parse_primary()?;
8694            self.match_identifier("OUT");
8695            self.match_token(TokenType::Of); // OF is a keyword token
8696            let bucket_denominator = self.parse_primary()?;
8697            let bucket_field = if self.match_token(TokenType::On) {
8698                Some(Box::new(self.parse_primary()?))
8699            } else {
8700                None
8701            };
8702            if has_paren {
8703                self.expect(TokenType::RParen)?;
8704            }
8705            return Ok(Some(Sample {
8706                method: SampleMethod::Bucket,
8707                size: bucket_numerator.clone(),
8708                seed: None,
8709                offset: None,
8710                unit_after_size: false,
8711                use_sample_keyword,
8712                explicit_method: true,     // BUCKET is always explicit
8713                method_before_size: false, // BUCKET appears inside parens
8714                use_seed_keyword: false,
8715                bucket_numerator: Some(Box::new(bucket_numerator)),
8716                bucket_denominator: Some(Box::new(bucket_denominator)),
8717                bucket_field,
8718                is_using_sample,
8719                is_percent: false,
8720                suppress_method_output: false,
8721            }));
8722        }
8723
8724        // Use parse_unary to avoid consuming PERCENT as modulo operator
8725        let size = self.parse_unary()?;
8726
8727        // Check for PERCENT/ROWS suffix after size (if not already part of the number)
8728        // Both "%" and "PERCENT" tokens map to TokenType::Percent - accept both as PERCENT modifier
8729        let (method, unit_after_size, is_percent) = if self.check(TokenType::Percent) {
8730            self.skip(); // consume PERCENT or %
8731                         // If method was already explicitly specified (e.g., SYSTEM), keep it
8732                         // PERCENT here is just the unit, not the sampling method
8733            if method_before_size {
8734                (method, true, true)
8735            } else {
8736                (SampleMethod::Percent, true, true)
8737            }
8738        } else if self.match_token(TokenType::Rows) {
8739            // If method was already explicitly specified, keep it
8740            if method_before_size {
8741                (method, true, false)
8742            } else {
8743                (SampleMethod::Row, true, false)
8744            }
8745        } else {
8746            // No explicit unit after size - preserve the original method
8747            (method, false, false)
8748        };
8749
8750        if has_paren {
8751            self.expect(TokenType::RParen)?;
8752        }
8753
8754        // DuckDB USING SAMPLE: method and optional seed can come in parens after size
8755        // e.g., "10 PERCENT (bernoulli)" or "10% (system, 377)"
8756        // DuckDB USING SAMPLE: method and optional seed can come in parens after size
8757        // e.g., "10 PERCENT (bernoulli)" or "10% (system, 377)"
8758        let (method, seed, use_seed_keyword, explicit_method) =
8759            if is_using_sample && self.check(TokenType::LParen) {
8760                self.skip(); // consume LParen
8761                             // Parse method name as identifier or keyword token
8762                             // BERNOULLI, SYSTEM, RESERVOIR can be tokenized as keywords, not identifiers
8763                let method_from_parens =
8764                    if self.check_identifier("BERNOULLI") || self.check(TokenType::Bernoulli) {
8765                        self.skip();
8766                        Some(SampleMethod::Bernoulli)
8767                    } else if self.check_identifier("SYSTEM") || self.check(TokenType::System) {
8768                        self.skip();
8769                        Some(SampleMethod::System)
8770                    } else if self.check_identifier("RESERVOIR") {
8771                        self.skip();
8772                        Some(SampleMethod::Reservoir)
8773                    } else {
8774                        None
8775                    };
8776                // Optional seed after comma
8777                let seed = if self.match_token(TokenType::Comma) {
8778                    Some(self.parse_expression()?)
8779                } else {
8780                    None
8781                };
8782                self.expect(TokenType::RParen)?;
8783                let final_method = method_from_parens.unwrap_or(method);
8784                (final_method, seed, false, true)
8785            } else {
8786                // Parse optional SEED / REPEATABLE
8787                let (seed, use_seed_keyword) = if self.match_token(TokenType::Seed) {
8788                    self.expect(TokenType::LParen)?;
8789                    let seed_value = self.parse_expression()?;
8790                    self.expect(TokenType::RParen)?;
8791                    (Some(seed_value), true)
8792                } else if self.match_token(TokenType::Repeatable) {
8793                    self.expect(TokenType::LParen)?;
8794                    let seed_value = self.parse_expression()?;
8795                    self.expect(TokenType::RParen)?;
8796                    (Some(seed_value), false)
8797                } else {
8798                    (None, false)
8799                };
8800                let explicit_method = explicit_method || unit_after_size;
8801                (method, seed, use_seed_keyword, explicit_method)
8802            };
8803
8804        // For DuckDB USING SAMPLE: apply default methods
8805        // - bare number -> RESERVOIR, ROWS
8806        // - percent -> SYSTEM, PERCENT
8807        let (method, unit_after_size) = if is_using_sample && !explicit_method {
8808            // No explicit method - apply defaults
8809            (SampleMethod::Reservoir, false) // default: RESERVOIR with ROWS
8810        } else if is_using_sample && unit_after_size && !method_before_size {
8811            // Unit was specified after size (e.g., "10 PERCENT") but no method before
8812            // Check if method was set in post-parens
8813            if matches!(method, SampleMethod::Percent) {
8814                // "10%" or "10 PERCENT" without method -> SYSTEM
8815                (SampleMethod::System, true)
8816            } else if matches!(method, SampleMethod::Row) {
8817                // "50 ROWS" without method -> RESERVOIR
8818                (SampleMethod::Reservoir, true)
8819            } else {
8820                (method, unit_after_size)
8821            }
8822        } else {
8823            (method, unit_after_size)
8824        };
8825
8826        // method_before_size: true for USING SAMPLE - we normalize to method-before-size format
8827        // e.g., "10 PERCENT (bernoulli)" becomes "BERNOULLI (10 PERCENT)"
8828        Ok(Some(Sample {
8829            method,
8830            size,
8831            seed,
8832            offset: None,
8833            unit_after_size,
8834            use_sample_keyword,
8835            explicit_method: true,    // For USING SAMPLE, always explicit
8836            method_before_size: true, // Normalize to method-before-size format
8837            use_seed_keyword,
8838            bucket_numerator: None,
8839            bucket_denominator: None,
8840            bucket_field: None,
8841            is_using_sample,
8842            is_percent,
8843            suppress_method_output: false,
8844        }))
8845    }
8846
8847    /// Parse table-level TABLESAMPLE/SAMPLE: TABLESAMPLE/SAMPLE METHOD(size [PERCENT|ROWS])
8848    /// e.g., TABLESAMPLE RESERVOIR(20%), SAMPLE BERNOULLI(10 PERCENT), SAMPLE ROW(0)
8849    fn parse_table_level_sample(&mut self) -> Result<Option<Sample>> {
8850        // Accept both TABLESAMPLE and SAMPLE (Snowflake supports both)
8851        let use_sample_keyword = if self.match_token(TokenType::Sample) {
8852            true
8853        } else if self.match_token(TokenType::TableSample) {
8854            false
8855        } else {
8856            return Ok(None);
8857        };
8858        // Track which keyword was used for identity output
8859        let _ = use_sample_keyword; // Used below for is_using_sample field
8860
8861        // Teradata: SAMPLE 5 or SAMPLE 0.33, .25, .1 (no parentheses)
8862        if matches!(
8863            self.config.dialect,
8864            Some(crate::dialects::DialectType::Teradata)
8865        ) && use_sample_keyword
8866            && !self.check(TokenType::LParen)
8867        {
8868            let mut expressions = vec![self.parse_unary()?];
8869            while self.match_token(TokenType::Comma) {
8870                expressions.push(self.parse_unary()?);
8871            }
8872            let size = if expressions.len() == 1 {
8873                expressions.into_iter().next().unwrap()
8874            } else {
8875                Expression::Tuple(Box::new(Tuple { expressions }))
8876            };
8877            return Ok(Some(Sample {
8878                method: SampleMethod::Percent,
8879                size,
8880                seed: None,
8881                offset: None,
8882                unit_after_size: false,
8883                use_sample_keyword,
8884                explicit_method: false,
8885                method_before_size: false,
8886                use_seed_keyword: false,
8887                bucket_numerator: None,
8888                bucket_denominator: None,
8889                bucket_field: None,
8890                is_using_sample: false,
8891                is_percent: false,
8892                suppress_method_output: false,
8893            }));
8894        }
8895
8896        // ClickHouse: SAMPLE 0.1 [OFFSET 0.2] (no parentheses)
8897        if matches!(
8898            self.config.dialect,
8899            Some(crate::dialects::DialectType::ClickHouse)
8900        ) && use_sample_keyword
8901            && !self.check(TokenType::LParen)
8902        {
8903            let size = self.parse_expression()?;
8904            let offset = if self.match_token(TokenType::Offset) {
8905                Some(self.parse_expression()?)
8906            } else {
8907                None
8908            };
8909            return Ok(Some(Sample {
8910                method: SampleMethod::Bernoulli,
8911                size,
8912                seed: None,
8913                offset,
8914                unit_after_size: false,
8915                use_sample_keyword,
8916                explicit_method: false,
8917                method_before_size: false,
8918                use_seed_keyword: false,
8919                bucket_numerator: None,
8920                bucket_denominator: None,
8921                bucket_field: None,
8922                is_using_sample: false,
8923                is_percent: false,
8924                suppress_method_output: false,
8925            }));
8926        }
8927
8928        // Parse method name (optional for table-level TABLESAMPLE)
8929        let (method, explicit_method, method_before_size) = if self.check_identifier("RESERVOIR") {
8930            self.skip();
8931            (SampleMethod::Reservoir, true, true)
8932        } else if self.match_token(TokenType::Bernoulli) {
8933            (SampleMethod::Bernoulli, true, true)
8934        } else if self.match_token(TokenType::System) {
8935            (SampleMethod::System, true, true)
8936        } else if self.match_token(TokenType::Block) {
8937            (SampleMethod::Block, true, true)
8938        } else if self.match_token(TokenType::Row) {
8939            (SampleMethod::Row, true, true)
8940        } else {
8941            // No explicit method - default to Bernoulli internally but track as not explicit
8942            (SampleMethod::Bernoulli, false, false)
8943        };
8944
8945        // Parse (size [PERCENT|ROWS])
8946        self.expect(TokenType::LParen)?;
8947
8948        // Check for BUCKET syntax: TABLESAMPLE (BUCKET 1 OUT OF 5 [ON col])
8949        if self.match_identifier("BUCKET") {
8950            let bucket_numerator = self.parse_primary()?;
8951            self.match_identifier("OUT");
8952            self.match_token(TokenType::Of);
8953            let bucket_denominator = self.parse_primary()?;
8954            let bucket_field = if self.match_token(TokenType::On) {
8955                Some(Box::new(self.parse_primary()?))
8956            } else {
8957                None
8958            };
8959            self.expect(TokenType::RParen)?;
8960            return Ok(Some(Sample {
8961                method: SampleMethod::Bucket,
8962                size: bucket_numerator.clone(),
8963                seed: None,
8964                offset: None,
8965                unit_after_size: false,
8966                use_sample_keyword,
8967                explicit_method: true,
8968                method_before_size: false,
8969                use_seed_keyword: false,
8970                bucket_numerator: Some(Box::new(bucket_numerator)),
8971                bucket_denominator: Some(Box::new(bucket_denominator)),
8972                bucket_field,
8973                is_using_sample: false,
8974                is_percent: false,
8975                suppress_method_output: false,
8976            }));
8977        }
8978
8979        let size = self.parse_unary()?;
8980
8981        // Check for PERCENT/ROWS suffix or % symbol
8982        let (method, unit_after_size, is_percent) =
8983            if self.check(TokenType::Percent) && self.peek().text.eq_ignore_ascii_case("PERCENT") {
8984                self.skip();
8985                // If no explicit method, use Percent to represent "PERCENT" unit
8986                if explicit_method {
8987                    (method, true, true)
8988                } else {
8989                    (SampleMethod::Percent, true, true)
8990                }
8991            } else if self.match_token(TokenType::Rows) {
8992                // If no explicit method, use Row to represent "ROWS" unit
8993                if explicit_method {
8994                    (method, true, false)
8995                } else {
8996                    (SampleMethod::Row, true, false)
8997                }
8998            } else if self.check(TokenType::Percent) && self.peek().text == "%" {
8999                // 20% -> consume the %, treat as PERCENT unit
9000                self.skip();
9001                if explicit_method {
9002                    (method, true, true)
9003                } else {
9004                    (SampleMethod::Percent, true, true)
9005                }
9006            } else {
9007                (method, false, false)
9008            };
9009
9010        self.expect(TokenType::RParen)?;
9011
9012        // Optional SEED/REPEATABLE
9013        let (seed, use_seed_keyword) = if self.match_token(TokenType::Seed) {
9014            self.expect(TokenType::LParen)?;
9015            let seed_value = self.parse_expression()?;
9016            self.expect(TokenType::RParen)?;
9017            (Some(seed_value), true)
9018        } else if self.match_token(TokenType::Repeatable) {
9019            self.expect(TokenType::LParen)?;
9020            let seed_value = self.parse_expression()?;
9021            self.expect(TokenType::RParen)?;
9022            (Some(seed_value), false)
9023        } else {
9024            (None, false)
9025        };
9026
9027        Ok(Some(Sample {
9028            method,
9029            size,
9030            seed,
9031            offset: None,
9032            unit_after_size,
9033            use_sample_keyword,
9034            explicit_method,
9035            method_before_size,
9036            use_seed_keyword,
9037            bucket_numerator: None,
9038            bucket_denominator: None,
9039            bucket_field: None,
9040            is_using_sample: false, // table-level uses TABLESAMPLE/SAMPLE keyword, not USING SAMPLE
9041            is_percent,
9042            suppress_method_output: false,
9043        }))
9044    }
9045
9046    /// Parse set operations (UNION, INTERSECT, EXCEPT)
9047    fn parse_set_operation(&mut self, left: Expression) -> Result<Expression> {
9048        let mut result = left;
9049        let mut found_set_op = false;
9050
9051        loop {
9052            // Check for BigQuery set operation modifiers BEFORE the set operation keyword
9053            // Pattern: SELECT ... [INNER|LEFT|RIGHT|FULL] UNION/INTERSECT/EXCEPT ...
9054            let (side, kind) = self.parse_set_operation_side_kind();
9055
9056            // Capture leading comments from the set operation keyword token (e.g., /*x*/ before UNION).
9057            // These comments appeared on a new line between the left SELECT and the set operation keyword.
9058            let set_op_leading_comments = if self.check(TokenType::Union)
9059                || self.check(TokenType::Intersect)
9060                || self.check(TokenType::Except)
9061            {
9062                self.current_leading_comments().to_vec()
9063            } else {
9064                Vec::new()
9065            };
9066
9067            // Wrap left expression with comments if needed
9068            let left = if !set_op_leading_comments.is_empty() {
9069                Expression::Annotated(Box::new(Annotated {
9070                    this: result,
9071                    trailing_comments: set_op_leading_comments,
9072                }))
9073            } else {
9074                result
9075            };
9076
9077            if self.match_token(TokenType::Union) {
9078                let all = self.match_token(TokenType::All);
9079                let distinct = if !all {
9080                    self.match_token(TokenType::Distinct)
9081                } else {
9082                    false
9083                };
9084
9085                let (by_name, strict, corresponding, on_columns) =
9086                    self.parse_set_operation_corresponding()?;
9087
9088                let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
9089                    Some("INNER".to_string())
9090                } else {
9091                    kind
9092                };
9093
9094                let right = self.parse_select_or_paren_select()?;
9095                result = Expression::Union(Box::new(Union {
9096                    left,
9097                    right,
9098                    all,
9099                    distinct,
9100                    with: None,
9101                    order_by: None,
9102                    limit: None,
9103                    offset: None,
9104                    distribute_by: None,
9105                    sort_by: None,
9106                    cluster_by: None,
9107                    by_name,
9108                    side,
9109                    kind,
9110                    corresponding,
9111                    strict,
9112                    on_columns,
9113                }));
9114                found_set_op = true;
9115            } else if self.match_token(TokenType::Intersect) {
9116                let all = self.match_token(TokenType::All);
9117                let distinct = if !all {
9118                    self.match_token(TokenType::Distinct)
9119                } else {
9120                    false
9121                };
9122
9123                let (by_name, strict, corresponding, on_columns) =
9124                    self.parse_set_operation_corresponding()?;
9125
9126                let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
9127                    Some("INNER".to_string())
9128                } else {
9129                    kind
9130                };
9131
9132                let right = self.parse_select_or_paren_select()?;
9133                result = Expression::Intersect(Box::new(Intersect {
9134                    left,
9135                    right,
9136                    all,
9137                    distinct,
9138                    with: None,
9139                    order_by: None,
9140                    limit: None,
9141                    offset: None,
9142                    distribute_by: None,
9143                    sort_by: None,
9144                    cluster_by: None,
9145                    by_name,
9146                    side,
9147                    kind,
9148                    corresponding,
9149                    strict,
9150                    on_columns,
9151                }));
9152                found_set_op = true;
9153            } else if self.match_token(TokenType::Except) {
9154                let all = self.match_token(TokenType::All);
9155                let distinct = if !all {
9156                    self.match_token(TokenType::Distinct)
9157                } else {
9158                    false
9159                };
9160
9161                let (by_name, strict, corresponding, on_columns) =
9162                    self.parse_set_operation_corresponding()?;
9163
9164                let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
9165                    Some("INNER".to_string())
9166                } else {
9167                    kind
9168                };
9169
9170                let right = self.parse_select_or_paren_select()?;
9171                result = Expression::Except(Box::new(Except {
9172                    left,
9173                    right,
9174                    all,
9175                    distinct,
9176                    with: None,
9177                    order_by: None,
9178                    limit: None,
9179                    offset: None,
9180                    distribute_by: None,
9181                    sort_by: None,
9182                    cluster_by: None,
9183                    by_name,
9184                    side,
9185                    kind,
9186                    corresponding,
9187                    strict,
9188                    on_columns,
9189                }));
9190                found_set_op = true;
9191            } else if side.is_some() || kind.is_some() {
9192                return Err(self.parse_error(
9193                    "Expected UNION, INTERSECT, or EXCEPT after set operation modifier",
9194                ));
9195            } else {
9196                result = left;
9197                break;
9198            }
9199        }
9200
9201        // Parse ORDER BY, LIMIT, OFFSET for the outermost set operation
9202        if found_set_op {
9203            self.parse_set_operation_modifiers(&mut result)?;
9204        }
9205        Ok(result)
9206    }
9207
9208    /// Parse BigQuery set operation side (LEFT, RIGHT, FULL) and kind (INNER)
9209    /// These modifiers appear BEFORE the UNION/INTERSECT/EXCEPT keyword
9210    fn parse_set_operation_side_kind(&mut self) -> (Option<String>, Option<String>) {
9211        let mut side = None;
9212        let mut kind = None;
9213
9214        // Check for side: LEFT, RIGHT, FULL (reusing join side tokens)
9215        if self.check(TokenType::Left)
9216            || self.check(TokenType::Right)
9217            || self.check(TokenType::Full)
9218        {
9219            // Only consume if followed by UNION/INTERSECT/EXCEPT (or INNER which would be followed by them)
9220            let saved = self.current;
9221            let side_token = self.advance();
9222            let side_text = side_token.text.to_ascii_uppercase();
9223
9224            // Check if followed by set operation or INNER
9225            if self.check(TokenType::Union)
9226                || self.check(TokenType::Intersect)
9227                || self.check(TokenType::Except)
9228                || self.check(TokenType::Inner)
9229            {
9230                side = Some(side_text);
9231            } else {
9232                // Not a set operation modifier, backtrack
9233                self.current = saved;
9234                return (None, None);
9235            }
9236        }
9237
9238        // Check for kind: INNER
9239        if self.check(TokenType::Inner) {
9240            let saved = self.current;
9241            self.skip(); // consume INNER
9242
9243            // Check if followed by set operation
9244            if self.check(TokenType::Union)
9245                || self.check(TokenType::Intersect)
9246                || self.check(TokenType::Except)
9247            {
9248                kind = Some("INNER".to_string());
9249            } else {
9250                // Not a set operation modifier, backtrack
9251                self.current = saved;
9252                if side.is_some() {
9253                    // We already consumed a side token, need to backtrack that too
9254                    self.current = saved - 1;
9255                }
9256                return (None, None);
9257            }
9258        }
9259
9260        (side, kind)
9261    }
9262
9263    /// Parse CORRESPONDING/STRICT CORRESPONDING/BY NAME modifiers after ALL/DISTINCT
9264    /// Returns (by_name, strict, corresponding, on_columns)
9265    fn parse_set_operation_corresponding(&mut self) -> Result<(bool, bool, bool, Vec<Expression>)> {
9266        let mut by_name = false;
9267        let mut strict = false;
9268        let mut corresponding = false;
9269        let mut on_columns = Vec::new();
9270
9271        // Check for BY NAME (DuckDB style)
9272        if self.match_token(TokenType::By) && self.match_identifier("NAME") {
9273            by_name = true;
9274        }
9275        // Check for STRICT CORRESPONDING (BigQuery style)
9276        else if self.match_identifier("STRICT") {
9277            if self.match_identifier("CORRESPONDING") {
9278                strict = true;
9279                corresponding = true;
9280            } else {
9281                // STRICT without CORRESPONDING - backtrack
9282                self.current -= 1;
9283            }
9284        }
9285        // Check for CORRESPONDING (BigQuery style)
9286        else if self.match_identifier("CORRESPONDING") {
9287            corresponding = true;
9288        }
9289
9290        // If CORRESPONDING is set, check for BY (columns)
9291        if corresponding && self.match_token(TokenType::By) {
9292            self.expect(TokenType::LParen)?;
9293            on_columns = self
9294                .parse_identifier_list()?
9295                .into_iter()
9296                .map(|id| {
9297                    Expression::boxed_column(Column {
9298                        name: id,
9299                        table: None,
9300                        join_mark: false,
9301                        trailing_comments: Vec::new(),
9302                        span: None,
9303                        inferred_type: None,
9304                    })
9305                })
9306                .collect();
9307            self.expect(TokenType::RParen)?;
9308        }
9309
9310        Ok((by_name, strict, corresponding, on_columns))
9311    }
9312
9313    /// Parse ORDER BY, LIMIT, OFFSET modifiers for set operations
9314    fn parse_set_operation_modifiers(&mut self, expr: &mut Expression) -> Result<()> {
9315        // Parse ORDER BY
9316        let order_by = if self.match_token(TokenType::Order) {
9317            self.expect(TokenType::By)?;
9318            Some(self.parse_order_by()?)
9319        } else {
9320            None
9321        };
9322
9323        // Parse LIMIT
9324        let limit = if self.match_token(TokenType::Limit) {
9325            Some(Box::new(self.parse_expression()?))
9326        } else {
9327            None
9328        };
9329
9330        // Parse OFFSET
9331        let offset = if self.match_token(TokenType::Offset) {
9332            Some(Box::new(self.parse_expression()?))
9333        } else {
9334            None
9335        };
9336
9337        // Apply modifiers to the outermost set operation
9338        match expr {
9339            Expression::Union(ref mut union) => {
9340                if order_by.is_some() {
9341                    union.order_by = order_by;
9342                }
9343                if limit.is_some() {
9344                    union.limit = limit;
9345                }
9346                if offset.is_some() {
9347                    union.offset = offset;
9348                }
9349            }
9350            Expression::Intersect(ref mut intersect) => {
9351                if order_by.is_some() {
9352                    intersect.order_by = order_by;
9353                }
9354                if limit.is_some() {
9355                    intersect.limit = limit;
9356                }
9357                if offset.is_some() {
9358                    intersect.offset = offset;
9359                }
9360            }
9361            Expression::Except(ref mut except) => {
9362                if order_by.is_some() {
9363                    except.order_by = order_by;
9364                }
9365                if limit.is_some() {
9366                    except.limit = limit;
9367                }
9368                if offset.is_some() {
9369                    except.offset = offset;
9370                }
9371            }
9372            _ => {}
9373        }
9374        Ok(())
9375    }
9376
9377    /// Parse either a SELECT statement or a parenthesized SELECT/set operation
9378    fn parse_select_or_paren_select(&mut self) -> Result<Expression> {
9379        if self.match_token(TokenType::LParen) {
9380            // Could be (SELECT ...) or ((SELECT ...) UNION ...) or (FROM ...) for DuckDB
9381            if self.check(TokenType::Select)
9382                || self.check(TokenType::With)
9383                || self.check(TokenType::From)
9384            {
9385                let query = self.parse_statement()?;
9386                self.expect(TokenType::RParen)?;
9387                // Handle optional alias after subquery: (SELECT 1) AS a
9388                let alias = if self.match_token(TokenType::As) {
9389                    Some(Identifier::new(self.expect_identifier()?))
9390                } else {
9391                    None
9392                };
9393                // Wrap in Subquery to preserve parentheses
9394                Ok(Expression::Subquery(Box::new(Subquery {
9395                    this: query,
9396                    alias,
9397                    column_aliases: Vec::new(),
9398                    order_by: None,
9399                    limit: None,
9400                    offset: None,
9401                    lateral: false,
9402                    modifiers_inside: false,
9403                    trailing_comments: Vec::new(),
9404                    distribute_by: None,
9405                    sort_by: None,
9406                    cluster_by: None,
9407                    inferred_type: None,
9408                })))
9409            } else if self.check(TokenType::LParen) {
9410                // Nested parentheses like ((SELECT ...))
9411                let inner = self.parse_select_or_paren_select()?;
9412                // Check for set operations inside the parens
9413                let result = self.parse_set_operation(inner)?;
9414                self.expect(TokenType::RParen)?;
9415                // Handle optional alias after subquery
9416                let alias = if self.match_token(TokenType::As) {
9417                    Some(Identifier::new(self.expect_identifier()?))
9418                } else {
9419                    None
9420                };
9421                // Wrap in Subquery to preserve parentheses
9422                Ok(Expression::Subquery(Box::new(Subquery {
9423                    this: result,
9424                    alias,
9425                    column_aliases: Vec::new(),
9426                    order_by: None,
9427                    limit: None,
9428                    offset: None,
9429                    lateral: false,
9430                    modifiers_inside: false,
9431                    trailing_comments: Vec::new(),
9432                    distribute_by: None,
9433                    sort_by: None,
9434                    cluster_by: None,
9435                    inferred_type: None,
9436                })))
9437            } else {
9438                Err(self.parse_error("Expected SELECT or ( after ("))
9439            }
9440        } else if self.check(TokenType::From) {
9441            // DuckDB FROM-first syntax without parentheses: ... UNION FROM t
9442            self.parse_from_first_query()
9443        } else if self.check(TokenType::With) {
9444            // WITH CTE as right-hand side of UNION/INTERSECT/EXCEPT
9445            self.parse_statement()
9446        } else {
9447            // Use parse_select_body (not parse_select) to avoid mutual recursion:
9448            // parse_select calls parse_set_operation, which calls back here.
9449            // The caller (parse_set_operation's loop) handles set-op chaining.
9450            self.parse_select_body()
9451        }
9452    }
9453
9454    /// Parse INSERT statement
9455    fn parse_insert(&mut self) -> Result<Expression> {
9456        let insert_token = self.expect(TokenType::Insert)?;
9457        let leading_comments = insert_token.comments;
9458
9459        // Parse query hint /*+ ... */ if present (Oracle: INSERT /*+ APPEND */ INTO ...)
9460        let hint = if self.check(TokenType::Hint) {
9461            Some(self.parse_hint()?)
9462        } else {
9463            None
9464        };
9465
9466        // Handle SQLite conflict action: INSERT OR ABORT|FAIL|IGNORE|REPLACE|ROLLBACK INTO
9467        let conflict_action = if self.match_token(TokenType::Or) {
9468            if self.match_identifier("ABORT") {
9469                Some("ABORT".to_string())
9470            } else if self.match_identifier("FAIL") {
9471                Some("FAIL".to_string())
9472            } else if self.match_token(TokenType::Ignore) {
9473                Some("IGNORE".to_string())
9474            } else if self.match_token(TokenType::Replace) {
9475                Some("REPLACE".to_string())
9476            } else if self.match_token(TokenType::Rollback) {
9477                Some("ROLLBACK".to_string())
9478            } else {
9479                return Err(self.parse_error(
9480                    "Expected ABORT, FAIL, IGNORE, REPLACE, or ROLLBACK after INSERT OR",
9481                ));
9482            }
9483        } else {
9484            None
9485        };
9486
9487        // Handle INSERT IGNORE (MySQL)
9488        let ignore = conflict_action.is_none() && self.match_token(TokenType::Ignore);
9489
9490        // Handle OVERWRITE for Hive/Spark: INSERT OVERWRITE TABLE ...
9491        let overwrite = self.match_token(TokenType::Overwrite);
9492
9493        // Handle Oracle multi-table INSERT: INSERT ALL/FIRST ...
9494        // Must check before OVERWRITE handling since these are mutually exclusive
9495        if !overwrite && (self.match_token(TokenType::All) || self.match_token(TokenType::First)) {
9496            if let Some(multi_insert) = self.parse_multitable_inserts(leading_comments.clone())? {
9497                return Ok(multi_insert);
9498            }
9499        }
9500
9501        // Handle INTO or TABLE (OVERWRITE requires TABLE, INTO is standard)
9502        // Also handle INSERT OVERWRITE [LOCAL] DIRECTORY 'path'
9503        let local_directory = overwrite && self.match_token(TokenType::Local);
9504        let is_directory = (overwrite || local_directory) && self.match_identifier("DIRECTORY");
9505
9506        if is_directory {
9507            // INSERT OVERWRITE [LOCAL] DIRECTORY 'path' [ROW FORMAT ...] SELECT ...
9508            let path = self.expect_string()?;
9509            // Parse optional ROW FORMAT clause
9510            let row_format = if self.match_keywords(&[TokenType::Row, TokenType::Format]) {
9511                // ROW FORMAT DELIMITED ...
9512                let delimited = self.match_identifier("DELIMITED");
9513                let mut fields_terminated_by = None;
9514                let mut collection_items_terminated_by = None;
9515                let mut map_keys_terminated_by = None;
9516                let mut lines_terminated_by = None;
9517                let mut null_defined_as = None;
9518
9519                // Parse the various TERMINATED BY clauses
9520                loop {
9521                    if self.match_identifier("FIELDS") || self.match_identifier("FIELD") {
9522                        self.match_identifier("TERMINATED");
9523                        self.match_token(TokenType::By);
9524                        fields_terminated_by = Some(self.expect_string()?);
9525                    } else if self.match_identifier("COLLECTION") {
9526                        self.match_identifier("ITEMS");
9527                        self.match_identifier("TERMINATED");
9528                        self.match_token(TokenType::By);
9529                        collection_items_terminated_by = Some(self.expect_string()?);
9530                    } else if self.match_identifier("MAP") {
9531                        self.match_identifier("KEYS");
9532                        self.match_identifier("TERMINATED");
9533                        self.match_token(TokenType::By);
9534                        map_keys_terminated_by = Some(self.expect_string()?);
9535                    } else if self.match_identifier("LINES") {
9536                        self.match_identifier("TERMINATED");
9537                        self.match_token(TokenType::By);
9538                        lines_terminated_by = Some(self.expect_string()?);
9539                    } else if self.match_token(TokenType::Null) {
9540                        self.match_identifier("DEFINED");
9541                        self.match_token(TokenType::As);
9542                        null_defined_as = Some(self.expect_string()?);
9543                    } else {
9544                        break;
9545                    }
9546                }
9547
9548                Some(RowFormat {
9549                    delimited,
9550                    fields_terminated_by,
9551                    collection_items_terminated_by,
9552                    map_keys_terminated_by,
9553                    lines_terminated_by,
9554                    null_defined_as,
9555                })
9556            } else {
9557                None
9558            };
9559
9560            // Parse optional STORED AS clause
9561            let stored_as = if self.match_identifier("STORED") {
9562                self.expect(TokenType::As)?;
9563                Some(self.expect_identifier()?)
9564            } else {
9565                None
9566            };
9567
9568            // Parse the SELECT query
9569            let query = self.parse_statement()?;
9570
9571            return Ok(Expression::Insert(Box::new(Insert {
9572                table: TableRef::new(""),
9573                columns: Vec::new(),
9574                values: Vec::new(),
9575                query: Some(query),
9576                overwrite,
9577                partition: Vec::new(),
9578                directory: Some(DirectoryInsert {
9579                    local: local_directory,
9580                    path,
9581                    row_format,
9582                    stored_as,
9583                }),
9584                returning: Vec::new(),
9585                output: None,
9586                on_conflict: None,
9587                leading_comments,
9588                if_exists: false,
9589                with: None,
9590                ignore,
9591                source_alias: None,
9592                alias: None,
9593                alias_explicit_as: false,
9594                default_values: false,
9595                by_name: false,
9596                conflict_action: conflict_action.clone(),
9597                is_replace: false,
9598                replace_where: None,
9599                source: None,
9600                hint: hint.clone(),
9601                function_target: None,
9602                partition_by: None,
9603                settings: Vec::new(),
9604            })));
9605        }
9606
9607        if overwrite {
9608            // OVERWRITE can be followed by INTO (Snowflake) or TABLE (Hive/Spark)
9609            self.match_token(TokenType::Into);
9610            self.match_token(TokenType::Table);
9611        } else {
9612            self.expect(TokenType::Into)?;
9613            // Optional TABLE keyword after INTO
9614            self.match_token(TokenType::Table);
9615        }
9616
9617        // ClickHouse: INSERT INTO [TABLE] FUNCTION func_name(args...)
9618        let mut function_target: Option<Box<Expression>> = None;
9619        if self.match_token(TokenType::Function) {
9620            // Parse function call: func_name(args...)
9621            let func_name = self.expect_identifier_or_keyword()?;
9622            self.expect(TokenType::LParen)?;
9623            let args = if self.check(TokenType::RParen) {
9624                Vec::new()
9625            } else {
9626                self.parse_expression_list()?
9627            };
9628            self.expect(TokenType::RParen)?;
9629            function_target = Some(Box::new(Expression::Function(Box::new(Function {
9630                name: func_name,
9631                args,
9632                distinct: false,
9633                trailing_comments: Vec::new(),
9634                use_bracket_syntax: false,
9635                no_parens: false,
9636                quoted: false,
9637                span: None,
9638                inferred_type: None,
9639            }))));
9640        }
9641
9642        let table_name = if function_target.is_some() {
9643            // For FUNCTION targets, use empty table name
9644            Identifier::new(String::new())
9645        } else {
9646            // Allow keywords (like TABLE) as table names in INSERT statements
9647            self.expect_identifier_or_keyword_with_quoted()?
9648        };
9649        // Handle qualified table names like a.b
9650        let table = if self.match_token(TokenType::Dot) {
9651            let schema = table_name;
9652            let name = self.expect_identifier_or_keyword_with_quoted()?;
9653            let trailing_comments = self.previous_trailing_comments().to_vec();
9654            TableRef {
9655                name,
9656                schema: Some(schema),
9657                catalog: None,
9658                alias: None,
9659                alias_explicit_as: false,
9660                column_aliases: Vec::new(),
9661                leading_comments: Vec::new(),
9662                trailing_comments,
9663                when: None,
9664                only: false,
9665                final_: false,
9666                table_sample: None,
9667                hints: Vec::new(),
9668                system_time: None,
9669                partitions: Vec::new(),
9670                identifier_func: None,
9671                changes: None,
9672                version: None,
9673                span: None,
9674            }
9675        } else {
9676            let trailing_comments = self.previous_trailing_comments().to_vec();
9677            TableRef {
9678                name: table_name,
9679                schema: None,
9680                catalog: None,
9681                alias: None,
9682                alias_explicit_as: false,
9683                column_aliases: Vec::new(),
9684                leading_comments: Vec::new(),
9685                when: None,
9686                only: false,
9687                final_: false,
9688                table_sample: None,
9689                hints: Vec::new(),
9690                system_time: None,
9691                trailing_comments,
9692                partitions: Vec::new(),
9693                identifier_func: None,
9694                changes: None,
9695                version: None,
9696                span: None,
9697            }
9698        };
9699
9700        // Optional alias (PostgreSQL: INSERT INTO table AS t(...), Oracle: INSERT INTO table t ...)
9701        let (alias, alias_explicit_as) = if self.match_token(TokenType::As) {
9702            (Some(Identifier::new(self.expect_identifier()?)), true)
9703        } else if self.is_identifier_token()
9704            && !self.check(TokenType::Values)
9705            && !self.check(TokenType::Select)
9706            && !self.check(TokenType::Default)
9707            && !self.check(TokenType::By)
9708            && !self.check(TokenType::Partition)
9709            && !self.check(TokenType::Output)
9710            && !self.check(TokenType::If)
9711            && !self.check(TokenType::Replace)
9712            && !self.check(TokenType::Table)
9713            && !self.check(TokenType::LParen)
9714        {
9715            // Implicit alias without AS (e.g., INSERT INTO dest d VALUES ...)
9716            (Some(Identifier::new(self.expect_identifier()?)), false)
9717        } else {
9718            (None, false)
9719        };
9720
9721        // Optional IF EXISTS (Hive)
9722        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
9723
9724        // Optional REPLACE WHERE clause (Databricks): INSERT INTO a REPLACE WHERE cond VALUES ...
9725        let replace_where =
9726            if self.match_token(TokenType::Replace) && self.match_token(TokenType::Where) {
9727                Some(Box::new(self.parse_or()?))
9728            } else {
9729                None
9730            };
9731
9732        // Optional PARTITION clause
9733        // ClickHouse: PARTITION BY expr (no parens)
9734        // Hive/Spark: PARTITION (col1 = val1, col2)
9735        let mut partition_by_expr: Option<Box<Expression>> = None;
9736        let partition = if self.check(TokenType::Partition) && self.check_next(TokenType::By) {
9737            // ClickHouse PARTITION BY expr
9738            self.skip(); // consume PARTITION
9739            self.skip(); // consume BY
9740            partition_by_expr = Some(Box::new(self.parse_expression()?));
9741            Vec::new()
9742        } else if self.match_token(TokenType::Partition) {
9743            self.expect(TokenType::LParen)?;
9744            let mut parts = Vec::new();
9745            loop {
9746                let col = Identifier::new(self.expect_identifier()?);
9747                let value = if self.match_token(TokenType::Eq) {
9748                    Some(self.parse_expression()?)
9749                } else {
9750                    None
9751                };
9752                parts.push((col, value));
9753                if !self.match_token(TokenType::Comma) {
9754                    break;
9755                }
9756            }
9757            self.expect(TokenType::RParen)?;
9758            parts
9759        } else {
9760            Vec::new()
9761        };
9762
9763        // ClickHouse: SETTINGS key = val, ...
9764        let insert_settings = if self.match_token(TokenType::Settings) {
9765            let mut settings = Vec::new();
9766            loop {
9767                settings.push(self.parse_expression()?);
9768                if !self.match_token(TokenType::Comma) {
9769                    break;
9770                }
9771            }
9772            settings
9773        } else {
9774            Vec::new()
9775        };
9776
9777        // Optional column list OR parenthesized subquery
9778        // We need to check if ( is followed by SELECT/WITH (subquery) or identifiers (column list)
9779        let columns = if self.check(TokenType::LParen) {
9780            // Look ahead to see if this is a subquery or column list
9781            if self
9782                .peek_nth(1)
9783                .map(|t| t.token_type == TokenType::Select || t.token_type == TokenType::With)
9784                .unwrap_or(false)
9785            {
9786                // This is a parenthesized subquery, not a column list
9787                Vec::new()
9788            } else if matches!(
9789                self.config.dialect,
9790                Some(crate::dialects::DialectType::ClickHouse)
9791            ) && {
9792                // ClickHouse: INSERT INTO t (*), t(* EXCEPT ...), t(table.* EXCEPT ...), t(COLUMNS('pattern') EXCEPT ...)
9793                let peek1 = self.peek_nth(1).map(|t| t.token_type);
9794                peek1 == Some(TokenType::Star)
9795                    || (peek1 == Some(TokenType::Var)
9796                        && self.peek_nth(2).map(|t| t.token_type) == Some(TokenType::Dot)
9797                        && self.peek_nth(3).map(|t| t.token_type) == Some(TokenType::Star))
9798                    || (peek1 == Some(TokenType::Var)
9799                        && self
9800                            .peek_nth(1)
9801                            .map(|t| t.text.eq_ignore_ascii_case("COLUMNS"))
9802                            .unwrap_or(false))
9803            } {
9804                // Consume balanced parens and skip entire column specification
9805                self.skip(); // consume (
9806                let mut depth = 1i32;
9807                while !self.is_at_end() && depth > 0 {
9808                    if self.check(TokenType::LParen) {
9809                        depth += 1;
9810                    }
9811                    if self.check(TokenType::RParen) {
9812                        depth -= 1;
9813                        if depth == 0 {
9814                            break;
9815                        }
9816                    }
9817                    self.skip();
9818                }
9819                self.expect(TokenType::RParen)?;
9820                Vec::new() // Treat as "all columns"
9821            } else {
9822                self.skip(); // consume (
9823                let cols = self.parse_identifier_list()?;
9824                self.expect(TokenType::RParen)?;
9825                cols
9826            }
9827        } else {
9828            Vec::new()
9829        };
9830
9831        // Parse OUTPUT clause (TSQL)
9832        let output = if self.match_token(TokenType::Output) {
9833            Some(self.parse_output_clause()?)
9834        } else {
9835            None
9836        };
9837
9838        // Check for BY NAME (DuckDB): INSERT INTO x BY NAME SELECT ...
9839        let by_name = self.match_token(TokenType::By) && self.match_identifier("NAME");
9840
9841        // Check for DEFAULT VALUES (PostgreSQL)
9842        let default_values =
9843            self.match_token(TokenType::Default) && self.match_token(TokenType::Values);
9844
9845        // VALUES or SELECT or TABLE source (Hive/Spark) or DEFAULT VALUES (already consumed above)
9846        let (values, query) = if default_values {
9847            // DEFAULT VALUES: no values or query
9848            (Vec::new(), None)
9849        } else if matches!(
9850            self.config.dialect,
9851            Some(crate::dialects::DialectType::ClickHouse)
9852        ) && self.check(TokenType::Format)
9853            && self.peek_nth(1).is_some_and(|t| {
9854                !t.text.eq_ignore_ascii_case("VALUES")
9855                    && (t.token_type == TokenType::Var || t.token_type == TokenType::Identifier)
9856            })
9857        {
9858            // ClickHouse: FORMAT <format_name> followed by raw data (CSV, JSON, TSV, etc.)
9859            // Skip everything to next semicolon or end — the data is not SQL
9860            self.skip(); // consume FORMAT
9861            let format_name = self.advance().text.clone(); // consume format name
9862                                                           // Consume all remaining tokens until semicolon (raw data)
9863            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
9864                self.skip();
9865            }
9866            // Store as empty values with the format name in the query as a command
9867            (
9868                Vec::new(),
9869                Some(Expression::Command(Box::new(crate::expressions::Command {
9870                    this: format!("FORMAT {}", format_name),
9871                }))),
9872            )
9873        } else if matches!(
9874            self.config.dialect,
9875            Some(crate::dialects::DialectType::ClickHouse)
9876        ) && self.match_text_seq(&["FORMAT", "VALUES"])
9877        {
9878            let mut all_values = Vec::new();
9879
9880            loop {
9881                self.expect(TokenType::LParen)?;
9882                let row = self.parse_expression_list()?;
9883                self.expect(TokenType::RParen)?;
9884                all_values.push(row);
9885
9886                if !self.match_token(TokenType::Comma) {
9887                    break;
9888                }
9889            }
9890
9891            (all_values, None)
9892        } else if self.match_token(TokenType::Values) {
9893            let mut all_values = Vec::new();
9894
9895            // ClickHouse: INSERT INTO t VALUES; — empty VALUES (clientError expected)
9896            if matches!(
9897                self.config.dialect,
9898                Some(crate::dialects::DialectType::ClickHouse)
9899            ) && (self.check(TokenType::Semicolon) || self.is_at_end())
9900            {
9901                // Return empty INSERT as Command to avoid needing all Insert fields
9902                return Ok(Expression::Command(Box::new(crate::expressions::Command {
9903                    this: "INSERT INTO VALUES".to_string(),
9904                })));
9905            }
9906
9907            // ClickHouse: allow bare VALUES without parens: VALUES 1, 2, 3
9908            if matches!(
9909                self.config.dialect,
9910                Some(crate::dialects::DialectType::ClickHouse)
9911            ) && !self.check(TokenType::LParen)
9912            {
9913                loop {
9914                    let val = self.parse_expression()?;
9915                    all_values.push(vec![val]);
9916                    if !self.match_token(TokenType::Comma) {
9917                        break;
9918                    }
9919                }
9920            } else {
9921                loop {
9922                    self.expect(TokenType::LParen)?;
9923                    // ClickHouse: allow empty VALUES () — empty tuple
9924                    let row = if self.check(TokenType::RParen) {
9925                        Vec::new()
9926                    } else {
9927                        self.parse_values_expression_list()?
9928                    };
9929                    self.expect(TokenType::RParen)?;
9930                    all_values.push(row);
9931
9932                    if !self.match_token(TokenType::Comma) {
9933                        // ClickHouse: allow tuples without commas: VALUES (1) (2) (3)
9934                        if matches!(
9935                            self.config.dialect,
9936                            Some(crate::dialects::DialectType::ClickHouse)
9937                        ) && self.check(TokenType::LParen)
9938                        {
9939                            continue;
9940                        }
9941                        break;
9942                    }
9943                    // ClickHouse: allow trailing comma after last tuple
9944                    if matches!(
9945                        self.config.dialect,
9946                        Some(crate::dialects::DialectType::ClickHouse)
9947                    ) && !self.check(TokenType::LParen)
9948                    {
9949                        break;
9950                    }
9951                }
9952            } // close else (parenthesized values)
9953
9954            (all_values, None)
9955        } else if self.check(TokenType::Table) {
9956            // Hive/Spark: INSERT OVERWRITE TABLE target TABLE source
9957            // The TABLE keyword here indicates source table, not a subquery
9958            (Vec::new(), None)
9959        } else {
9960            (Vec::new(), Some(self.parse_statement()?))
9961        };
9962
9963        // Parse source table (Hive/Spark): INSERT OVERWRITE TABLE target TABLE source
9964        let source = if self.match_token(TokenType::Table) {
9965            // Parse source table reference (similar to target table parsing)
9966            let source_name = self.expect_identifier_with_quoted()?;
9967            let source_table = if self.match_token(TokenType::Dot) {
9968                let schema = source_name;
9969                let name = self.expect_identifier_with_quoted()?;
9970                let trailing_comments = self.previous_trailing_comments().to_vec();
9971                TableRef {
9972                    name,
9973                    schema: Some(schema),
9974                    catalog: None,
9975                    alias: None,
9976                    alias_explicit_as: false,
9977                    column_aliases: Vec::new(),
9978                    leading_comments: Vec::new(),
9979                    trailing_comments,
9980                    when: None,
9981                    only: false,
9982                    final_: false,
9983                    table_sample: None,
9984                    hints: Vec::new(),
9985                    system_time: None,
9986                    partitions: Vec::new(),
9987                    identifier_func: None,
9988                    changes: None,
9989                    version: None,
9990                    span: None,
9991                }
9992            } else {
9993                let trailing_comments = self.previous_trailing_comments().to_vec();
9994                TableRef {
9995                    name: source_name,
9996                    schema: None,
9997                    catalog: None,
9998                    alias: None,
9999                    alias_explicit_as: false,
10000                    column_aliases: Vec::new(),
10001                    leading_comments: Vec::new(),
10002                    trailing_comments,
10003                    when: None,
10004                    only: false,
10005                    final_: false,
10006                    table_sample: None,
10007                    hints: Vec::new(),
10008                    system_time: None,
10009                    partitions: Vec::new(),
10010                    identifier_func: None,
10011                    changes: None,
10012                    version: None,
10013                    span: None,
10014                }
10015            };
10016            Some(Expression::Table(Box::new(source_table)))
10017        } else {
10018            None
10019        };
10020
10021        // Parse optional AS alias after VALUES (MySQL: INSERT ... VALUES (...) AS new_data)
10022        let source_alias = if self.match_token(TokenType::As) {
10023            Some(Identifier::new(self.expect_identifier()?))
10024        } else {
10025            None
10026        };
10027
10028        // Parse ON CONFLICT clause (PostgreSQL, SQLite) or ON DUPLICATE KEY UPDATE (MySQL)
10029        let on_conflict = if self.match_token(TokenType::On) {
10030            if self.match_identifier("CONFLICT") {
10031                Some(Box::new(self.parse_on_conflict()?))
10032            } else if self.match_identifier("DUPLICATE") {
10033                // MySQL: ON DUPLICATE KEY UPDATE
10034                self.expect(TokenType::Key)?;
10035                self.expect(TokenType::Update)?;
10036
10037                // Parse the UPDATE SET expressions
10038                let mut sets = Vec::new();
10039                loop {
10040                    // Parse column = expression
10041                    let col_name = self.expect_identifier_with_quoted()?;
10042                    // Handle qualified column: table.column
10043                    let column = if self.match_token(TokenType::Dot) {
10044                        let col = self.expect_identifier_with_quoted()?;
10045                        Expression::boxed_column(Column {
10046                            name: col,
10047                            table: Some(col_name),
10048                            join_mark: false,
10049                            trailing_comments: Vec::new(),
10050                            span: None,
10051                            inferred_type: None,
10052                        })
10053                    } else {
10054                        Expression::Identifier(col_name)
10055                    };
10056                    self.expect(TokenType::Eq)?;
10057                    let value = self.parse_expression()?;
10058                    sets.push(Expression::Eq(Box::new(BinaryOp {
10059                        left: column,
10060                        right: value,
10061                        left_comments: Vec::new(),
10062                        operator_comments: Vec::new(),
10063                        trailing_comments: Vec::new(),
10064                        inferred_type: None,
10065                    })));
10066                    if !self.match_token(TokenType::Comma) {
10067                        break;
10068                    }
10069                }
10070
10071                Some(Box::new(Expression::OnConflict(Box::new(OnConflict {
10072                    duplicate: Some(Box::new(Expression::Boolean(BooleanLiteral {
10073                        value: true,
10074                    }))),
10075                    expressions: sets,
10076                    action: None,
10077                    conflict_keys: None,
10078                    index_predicate: None,
10079                    constraint: None,
10080                    where_: None,
10081                }))))
10082            } else {
10083                // Unexpected token after ON
10084                return Err(self.parse_error("Expected CONFLICT or DUPLICATE after ON"));
10085            }
10086        } else {
10087            None
10088        };
10089
10090        // Parse RETURNING clause (PostgreSQL, SQLite)
10091        let returning = if self.match_token(TokenType::Returning) {
10092            self.parse_select_expressions()?
10093        } else {
10094            Vec::new()
10095        };
10096
10097        Ok(Expression::Insert(Box::new(Insert {
10098            table,
10099            columns,
10100            values,
10101            query,
10102            overwrite,
10103            partition,
10104            directory: None,
10105            returning,
10106            output,
10107            on_conflict,
10108            leading_comments,
10109            if_exists,
10110            with: None,
10111            ignore,
10112            source_alias,
10113            alias,
10114            alias_explicit_as,
10115            default_values,
10116            by_name,
10117            conflict_action,
10118            is_replace: false,
10119            replace_where,
10120            source: source.map(Box::new),
10121            hint,
10122            function_target,
10123            partition_by: partition_by_expr,
10124            settings: insert_settings,
10125        })))
10126    }
10127
10128    /// Parse ON CONFLICT clause for INSERT statements (PostgreSQL, SQLite)
10129    /// Syntax: ON CONFLICT [(conflict_target)] [WHERE predicate] DO NOTHING | DO UPDATE SET ...
10130    /// ON CONFLICT ON CONSTRAINT constraint_name DO ...
10131    fn parse_on_conflict(&mut self) -> Result<Expression> {
10132        // Check for ON CONSTRAINT variant
10133        let constraint =
10134            if self.match_token(TokenType::On) && self.match_token(TokenType::Constraint) {
10135                let name = self.expect_identifier()?;
10136                Some(Box::new(Expression::Identifier(Identifier::new(name))))
10137            } else {
10138                None
10139            };
10140
10141        // Parse optional conflict target (column list)
10142        let conflict_keys = if constraint.is_none() && self.match_token(TokenType::LParen) {
10143            let keys = self.parse_expression_list()?;
10144            self.expect(TokenType::RParen)?;
10145            Some(Box::new(Expression::Tuple(Box::new(Tuple {
10146                expressions: keys,
10147            }))))
10148        } else {
10149            None
10150        };
10151
10152        // Parse optional WHERE clause for conflict target
10153        let index_predicate = if self.match_token(TokenType::Where) {
10154            Some(Box::new(self.parse_expression()?))
10155        } else {
10156            None
10157        };
10158
10159        // Parse DO NOTHING or DO UPDATE
10160        if !self.match_identifier("DO") {
10161            return Err(self.parse_error("Expected DO after ON CONFLICT"));
10162        }
10163
10164        let action = if self.match_identifier("NOTHING") {
10165            // DO NOTHING
10166            Some(Box::new(Expression::Identifier(Identifier::new(
10167                "NOTHING".to_string(),
10168            ))))
10169        } else if self.match_token(TokenType::Update) {
10170            // DO UPDATE SET ...
10171            self.expect(TokenType::Set)?;
10172            let mut sets = Vec::new();
10173            loop {
10174                // Parse column = expression
10175                let col_name = self.expect_identifier_with_quoted()?;
10176                // Handle qualified column: table.column
10177                let column = if self.match_token(TokenType::Dot) {
10178                    let col = self.expect_identifier_with_quoted()?;
10179                    Expression::boxed_column(Column {
10180                        name: col,
10181                        table: Some(col_name),
10182                        join_mark: false,
10183                        trailing_comments: Vec::new(),
10184                        span: None,
10185                        inferred_type: None,
10186                    })
10187                } else {
10188                    Expression::Identifier(col_name)
10189                };
10190                self.expect(TokenType::Eq)?;
10191                let value = self.parse_expression()?;
10192                sets.push(Expression::Eq(Box::new(BinaryOp {
10193                    left: column,
10194                    right: value,
10195                    left_comments: Vec::new(),
10196                    operator_comments: Vec::new(),
10197                    trailing_comments: Vec::new(),
10198                    inferred_type: None,
10199                })));
10200                if !self.match_token(TokenType::Comma) {
10201                    break;
10202                }
10203            }
10204            Some(Box::new(Expression::Tuple(Box::new(Tuple {
10205                expressions: sets,
10206            }))))
10207        } else {
10208            return Err(self.parse_error("Expected NOTHING or UPDATE after DO"));
10209        };
10210
10211        // Parse optional WHERE clause for the UPDATE action
10212        let where_ = if self.match_token(TokenType::Where) {
10213            Some(Box::new(self.parse_expression()?))
10214        } else {
10215            None
10216        };
10217
10218        Ok(Expression::OnConflict(Box::new(OnConflict {
10219            duplicate: None,
10220            expressions: Vec::new(),
10221            action,
10222            conflict_keys,
10223            index_predicate,
10224            constraint,
10225            where_,
10226        })))
10227    }
10228
10229    /// Parse MySQL REPLACE [INTO] statement or REPLACE() function call
10230    fn parse_replace(&mut self) -> Result<Expression> {
10231        // Check if this is REPLACE() function call (REPLACE followed by '(')
10232        // or MySQL REPLACE INTO statement
10233        let replace_token = self.expect(TokenType::Replace)?;
10234        let leading_comments = replace_token.comments;
10235
10236        if self.check(TokenType::LParen) {
10237            // This is a REPLACE() function call, parse as expression
10238            self.expect(TokenType::LParen)?;
10239            let args = self.parse_expression_list()?;
10240            self.expect(TokenType::RParen)?;
10241            return Ok(Expression::Function(Box::new(Function {
10242                name: "REPLACE".to_string(),
10243                args,
10244                distinct: false,
10245                trailing_comments: Vec::new(),
10246                use_bracket_syntax: false,
10247                no_parens: false,
10248                quoted: false,
10249                span: None,
10250                inferred_type: None,
10251            })));
10252        }
10253
10254        // Teradata: REPLACE VIEW -> CREATE OR REPLACE VIEW
10255        if matches!(
10256            self.config.dialect,
10257            Some(crate::dialects::DialectType::Teradata)
10258        ) && self.check(TokenType::View)
10259        {
10260            return self.parse_create_view(true, false, false, false, None, None, None, false);
10261        }
10262
10263        // ClickHouse: REPLACE TABLE -> treat like CREATE OR REPLACE TABLE
10264        // Also handle REPLACE TEMPORARY TABLE
10265        if matches!(
10266            self.config.dialect,
10267            Some(crate::dialects::DialectType::ClickHouse)
10268        ) && (self.check(TokenType::Table) || self.check(TokenType::Temporary))
10269        {
10270            let temporary = self.match_token(TokenType::Temporary);
10271            return self.parse_create_table(true, temporary, leading_comments.clone(), None);
10272        }
10273
10274        // ClickHouse: REPLACE DICTIONARY -> consume as Command
10275        if matches!(
10276            self.config.dialect,
10277            Some(crate::dialects::DialectType::ClickHouse)
10278        ) && (self.check(TokenType::Dictionary) || self.check_identifier("DICTIONARY"))
10279        {
10280            let mut parts = vec!["REPLACE".to_string()];
10281            let mut _paren_depth = 0i32;
10282            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
10283                let token = self.advance();
10284                if token.token_type == TokenType::LParen {
10285                    _paren_depth += 1;
10286                }
10287                if token.token_type == TokenType::RParen {
10288                    _paren_depth -= 1;
10289                }
10290                let text = if token.token_type == TokenType::String {
10291                    format!("'{}'", token.text)
10292                } else if token.token_type == TokenType::QuotedIdentifier {
10293                    format!("\"{}\"", token.text)
10294                } else {
10295                    token.text.clone()
10296                };
10297                parts.push(text);
10298            }
10299            return Ok(Expression::Command(Box::new(crate::expressions::Command {
10300                this: parts.join(" "),
10301            })));
10302        }
10303
10304        // Otherwise, this is MySQL/SQLite REPLACE INTO statement - parse similarly to INSERT
10305        self.match_token(TokenType::Into);
10306
10307        let table_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
10308        let table = if self.match_token(TokenType::Dot) {
10309            let second_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
10310            TableRef {
10311                name: second_name,
10312                schema: Some(table_name),
10313                catalog: None,
10314                alias: None,
10315                alias_explicit_as: false,
10316                column_aliases: Vec::new(),
10317                leading_comments: Vec::new(),
10318                trailing_comments: Vec::new(),
10319                when: None,
10320                only: false,
10321                final_: false,
10322                table_sample: None,
10323                hints: Vec::new(),
10324                system_time: None,
10325                partitions: Vec::new(),
10326                identifier_func: None,
10327                changes: None,
10328                version: None,
10329                span: None,
10330            }
10331        } else {
10332            TableRef::new(table_name.name)
10333        };
10334
10335        // Parse optional column list
10336        let columns = if self.match_token(TokenType::LParen) {
10337            let mut cols = Vec::new();
10338            loop {
10339                if self.check(TokenType::RParen) {
10340                    break;
10341                }
10342                let col = self.expect_identifier_with_quoted()?;
10343                cols.push(col);
10344                if !self.match_token(TokenType::Comma) {
10345                    break;
10346                }
10347            }
10348            self.expect(TokenType::RParen)?;
10349            cols
10350        } else {
10351            Vec::new()
10352        };
10353
10354        // Parse VALUES or SELECT query
10355        let mut values = Vec::new();
10356        let query = if self.match_token(TokenType::Values) {
10357            loop {
10358                self.expect(TokenType::LParen)?;
10359                let row = self.parse_expression_list()?;
10360                self.expect(TokenType::RParen)?;
10361                values.push(row);
10362                if !self.match_token(TokenType::Comma) {
10363                    break;
10364                }
10365            }
10366            None
10367        } else if !self.is_at_end() && !self.check(TokenType::Semicolon) {
10368            // SELECT or other statement as value source
10369            Some(self.parse_statement()?)
10370        } else {
10371            None
10372        };
10373
10374        Ok(Expression::Insert(Box::new(Insert {
10375            table,
10376            columns,
10377            values,
10378            query,
10379            overwrite: false,
10380            partition: Vec::new(),
10381            directory: None,
10382            returning: Vec::new(),
10383            output: None,
10384            on_conflict: None,
10385            leading_comments,
10386            if_exists: false,
10387            with: None,
10388            ignore: false,
10389            source_alias: None,
10390            alias: None,
10391            alias_explicit_as: false,
10392            default_values: false,
10393            by_name: false,
10394            conflict_action: None,
10395            is_replace: true,
10396            replace_where: None,
10397            source: None,
10398            hint: None,
10399            function_target: None,
10400            partition_by: None,
10401            settings: Vec::new(),
10402        })))
10403    }
10404
10405    /// Parse UPDATE statement
10406    fn parse_update(&mut self) -> Result<Expression> {
10407        let update_token = self.expect(TokenType::Update)?;
10408        let leading_comments = update_token.comments;
10409
10410        // TSQL: UPDATE STATISTICS table_name - parse as Command
10411        if self.check_identifier("STATISTICS") {
10412            let mut parts = vec!["UPDATE".to_string()];
10413            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
10414                parts.push(self.advance().text);
10415            }
10416            return Ok(Expression::Command(Box::new(Command {
10417                this: parts.join(" "),
10418            })));
10419        }
10420
10421        // PostgreSQL ONLY modifier: UPDATE ONLY t SET ...
10422        let has_only = self.match_token(TokenType::Only);
10423
10424        // Parse table name (can be qualified: db.table_name)
10425        let first_name = self.expect_identifier_with_quoted()?;
10426        let mut table = if self.match_token(TokenType::Dot) {
10427            let second_name = self.expect_identifier_with_quoted()?;
10428            // Check for three-part name (catalog.schema.table)
10429            if self.match_token(TokenType::Dot) {
10430                let table_name = self.expect_identifier_with_quoted()?;
10431                TableRef {
10432                    name: table_name,
10433                    schema: Some(second_name),
10434                    catalog: Some(first_name),
10435                    alias: None,
10436                    alias_explicit_as: false,
10437                    column_aliases: Vec::new(),
10438                    leading_comments: Vec::new(),
10439                    trailing_comments: Vec::new(),
10440                    when: None,
10441                    only: false,
10442                    final_: false,
10443                    table_sample: None,
10444                    hints: Vec::new(),
10445                    system_time: None,
10446                    partitions: Vec::new(),
10447                    identifier_func: None,
10448                    changes: None,
10449                    version: None,
10450                    span: None,
10451                }
10452            } else {
10453                TableRef {
10454                    name: second_name,
10455                    schema: Some(first_name),
10456                    catalog: None,
10457                    alias: None,
10458                    alias_explicit_as: false,
10459                    column_aliases: Vec::new(),
10460                    leading_comments: Vec::new(),
10461                    trailing_comments: Vec::new(),
10462                    when: None,
10463                    only: false,
10464                    final_: false,
10465                    table_sample: None,
10466                    hints: Vec::new(),
10467                    system_time: None,
10468                    partitions: Vec::new(),
10469                    identifier_func: None,
10470                    changes: None,
10471                    version: None,
10472                    span: None,
10473                }
10474            }
10475        } else {
10476            TableRef::from_identifier(first_name)
10477        };
10478        table.trailing_comments = self.previous_trailing_comments().to_vec();
10479        if has_only {
10480            table.only = true;
10481        }
10482
10483        // Optional alias (with or without AS)
10484        if self.match_token(TokenType::As) {
10485            table.alias = Some(self.expect_identifier_with_quoted()?);
10486            table.alias_explicit_as = true;
10487        } else if self.is_identifier_token() && !self.check(TokenType::Set) {
10488            // Implicit alias (table t SET ...)
10489            table.alias = Some(self.expect_identifier_with_quoted()?);
10490            table.alias_explicit_as = false;
10491        }
10492
10493        // Handle multi-table UPDATE syntax: UPDATE t1, t2, t3 LEFT JOIN t4 ON ... SET ...
10494        // Capture additional tables
10495        let mut extra_tables = Vec::new();
10496        while self.match_token(TokenType::Comma) {
10497            // Parse additional table name
10498            let first_name = self.expect_identifier_with_quoted()?;
10499            let mut extra_table = if self.match_token(TokenType::Dot) {
10500                let second_name = self.expect_identifier_with_quoted()?;
10501                if self.match_token(TokenType::Dot) {
10502                    let table_name = self.expect_identifier_with_quoted()?;
10503                    TableRef {
10504                        name: table_name,
10505                        schema: Some(second_name),
10506                        catalog: Some(first_name),
10507                        alias: None,
10508                        alias_explicit_as: false,
10509                        column_aliases: Vec::new(),
10510                        leading_comments: Vec::new(),
10511                        trailing_comments: Vec::new(),
10512                        when: None,
10513                        only: false,
10514                        final_: false,
10515                        table_sample: None,
10516                        hints: Vec::new(),
10517                        system_time: None,
10518                        partitions: Vec::new(),
10519                        identifier_func: None,
10520                        changes: None,
10521                        version: None,
10522                        span: None,
10523                    }
10524                } else {
10525                    TableRef {
10526                        name: second_name,
10527                        schema: Some(first_name),
10528                        catalog: None,
10529                        alias: None,
10530                        alias_explicit_as: false,
10531                        column_aliases: Vec::new(),
10532                        leading_comments: Vec::new(),
10533                        trailing_comments: Vec::new(),
10534                        when: None,
10535                        only: false,
10536                        final_: false,
10537                        table_sample: None,
10538                        hints: Vec::new(),
10539                        system_time: None,
10540                        partitions: Vec::new(),
10541                        identifier_func: None,
10542                        changes: None,
10543                        version: None,
10544                        span: None,
10545                    }
10546                }
10547            } else {
10548                TableRef::from_identifier(first_name)
10549            };
10550            // Optional alias
10551            if self.match_token(TokenType::As) {
10552                extra_table.alias = Some(self.expect_identifier_with_quoted()?);
10553                extra_table.alias_explicit_as = true;
10554            } else if self.is_identifier_token()
10555                && !self.check(TokenType::Set)
10556                && !self.check_keyword()
10557            {
10558                extra_table.alias = Some(self.expect_identifier_with_quoted()?);
10559                extra_table.alias_explicit_as = false;
10560            }
10561            extra_tables.push(extra_table);
10562        }
10563
10564        // Handle JOINs before SET
10565        let mut table_joins = Vec::new();
10566        while let Some((kind, _, use_inner_keyword, use_outer_keyword, _join_hint)) =
10567            self.try_parse_join_kind()
10568        {
10569            if self.check(TokenType::Join) {
10570                self.skip(); // consume JOIN
10571            }
10572            // Parse joined table (supports subqueries, LATERAL, functions, etc.)
10573            let join_expr = self.parse_table_expression()?;
10574            // ON clause
10575            let on_condition = if self.match_token(TokenType::On) {
10576                Some(self.parse_expression()?)
10577            } else {
10578                None
10579            };
10580            table_joins.push(Join {
10581                this: join_expr,
10582                on: on_condition,
10583                using: Vec::new(),
10584                kind,
10585                use_inner_keyword,
10586                use_outer_keyword,
10587                deferred_condition: false,
10588                join_hint: None,
10589                match_condition: None,
10590                pivots: Vec::new(),
10591                comments: Vec::new(),
10592                nesting_group: 0,
10593                directed: false,
10594            });
10595        }
10596
10597        // Snowflake syntax: UPDATE table FROM (source) SET ... WHERE ...
10598        // Check if FROM comes before SET
10599        let (from_before_set, early_from_clause, early_from_joins) =
10600            if self.match_token(TokenType::From) {
10601                let from_clause = self.parse_from()?;
10602                let from_joins = self.parse_joins()?;
10603                (true, Some(from_clause), from_joins)
10604            } else {
10605                (false, None, Vec::new())
10606            };
10607
10608        self.expect(TokenType::Set)?;
10609
10610        let mut set = Vec::new();
10611        loop {
10612            // Column can be qualified for multi-table UPDATE (e.g., a.id = 1)
10613            // Use safe keyword variant to allow keywords like 'exists' as column names (ClickHouse)
10614            let mut col_ident = self.expect_identifier_or_safe_keyword_with_quoted()?;
10615            while self.match_token(TokenType::Dot) {
10616                let part = self.expect_identifier_or_safe_keyword_with_quoted()?;
10617                // For qualified columns, preserve both parts
10618                col_ident = Identifier {
10619                    name: format!("{}.{}", col_ident.name, part.name),
10620                    quoted: col_ident.quoted || part.quoted,
10621                    trailing_comments: Vec::new(),
10622                    span: None,
10623                };
10624            }
10625            self.expect(TokenType::Eq)?;
10626            let value = self.parse_expression()?;
10627            set.push((col_ident, value));
10628
10629            if !self.match_token(TokenType::Comma) {
10630                break;
10631            }
10632        }
10633
10634        // Parse OUTPUT clause (TSQL)
10635        let output = if self.match_token(TokenType::Output) {
10636            Some(self.parse_output_clause()?)
10637        } else {
10638            None
10639        };
10640
10641        // Parse FROM clause (PostgreSQL, SQL Server, Snowflake) - only if not already parsed before SET
10642        let (from_clause, from_joins) = if from_before_set {
10643            (early_from_clause, early_from_joins)
10644        } else if self.match_token(TokenType::From) {
10645            let from_clause = Some(self.parse_from()?);
10646            let from_joins = self.parse_joins()?;
10647            (from_clause, from_joins)
10648        } else {
10649            (None, Vec::new())
10650        };
10651
10652        let where_clause = if self.match_token(TokenType::Where) {
10653            Some(Where {
10654                this: self.parse_expression()?,
10655            })
10656        } else {
10657            None
10658        };
10659
10660        // Parse RETURNING clause (PostgreSQL, SQLite)
10661        let returning = if self.match_token(TokenType::Returning) {
10662            self.parse_select_expressions()?
10663        } else {
10664            Vec::new()
10665        };
10666
10667        // Parse ORDER BY clause (MySQL)
10668        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
10669            Some(self.parse_order_by()?)
10670        } else {
10671            None
10672        };
10673
10674        // Parse LIMIT clause (MySQL)
10675        let limit = if self.match_token(TokenType::Limit) {
10676            Some(self.parse_expression()?)
10677        } else {
10678            None
10679        };
10680
10681        Ok(Expression::Update(Box::new(Update {
10682            table,
10683            extra_tables,
10684            table_joins,
10685            set,
10686            from_clause,
10687            from_joins,
10688            where_clause,
10689            returning,
10690            output,
10691            with: None,
10692            leading_comments,
10693            limit,
10694            order_by,
10695            from_before_set,
10696        })))
10697    }
10698
10699    /// Parse DELETE statement
10700    /// Handles:
10701    /// - Standard: DELETE FROM t WHERE ...
10702    /// - PostgreSQL USING: DELETE FROM t USING s WHERE ... RETURNING a
10703    /// - DuckDB USING: DELETE FROM t USING (VALUES ...) AS t1 WHERE ...
10704    /// - MySQL multi-table: DELETE t1 FROM t1 JOIN t2 ON ... WHERE ...
10705    /// - MySQL multi-table: DELETE t1, t2 FROM t1 JOIN t2 JOIN t3 WHERE ...
10706    /// - MySQL USING: DELETE FROM t1, t2 USING t1 JOIN t2 JOIN t3 WHERE ...
10707    /// - MySQL FORCE INDEX: DELETE FROM t FORCE INDEX (idx) WHERE ...
10708    fn parse_delete(&mut self) -> Result<Expression> {
10709        let delete_token = self.expect(TokenType::Delete)?;
10710        let leading_comments = delete_token.comments;
10711
10712        // Check if FROM is present. If not, this is MySQL multi-table: DELETE t1, t2 FROM ...
10713        // or TSQL: DELETE x OUTPUT x.a FROM z
10714        let mut tables = Vec::new();
10715        let mut early_output = None;
10716        let _has_from = if self.check(TokenType::From) {
10717            self.skip(); // consume FROM
10718            true
10719        } else {
10720            // MySQL multi-table: DELETE t1[, t2, ...] FROM ...
10721            // or TSQL: DELETE x OUTPUT x.a FROM z
10722            // or BigQuery/generic: DELETE table WHERE ... (no FROM required)
10723            // Parse target table list (supporting dotted names)
10724            loop {
10725                let tref = self.parse_table_ref()?;
10726                tables.push(tref);
10727                if !self.match_token(TokenType::Comma) {
10728                    break;
10729                }
10730            }
10731            // TSQL: OUTPUT clause can appear before FROM
10732            if self.match_token(TokenType::Output) {
10733                early_output = Some(self.parse_output_clause()?);
10734            }
10735            if self.check(TokenType::From) {
10736                self.skip(); // consume FROM
10737                true
10738            } else {
10739                // BigQuery-style: DELETE table WHERE ... (no FROM)
10740                false
10741            }
10742        };
10743
10744        // Now parse the main table after FROM (or use from no-FROM path)
10745        let has_only = self.match_token(TokenType::Only);
10746        let mut table = if _has_from {
10747            // Parse the main table(s) after FROM
10748            // Use parse_table_ref() to handle dotted names like db.table
10749            self.parse_table_ref()?
10750        } else {
10751            // BigQuery-style: table was already parsed into `tables`
10752            // Move it out to be the main table
10753            if !tables.is_empty() {
10754                tables.remove(0)
10755            } else {
10756                return Err(self.parse_error("Expected table name in DELETE statement"));
10757            }
10758        };
10759        if has_only {
10760            table.only = true;
10761        }
10762
10763        // ClickHouse: ON CLUSTER clause
10764        let on_cluster = self.parse_on_cluster_clause()?;
10765
10766        // Check for additional tables after the first: DELETE FROM t1, t2 USING ...
10767        let mut extra_from_tables = Vec::new();
10768        if _has_from
10769            && tables.is_empty()
10770            && self.check(TokenType::Comma)
10771            && !self.check(TokenType::Where)
10772        {
10773            // Could be multi-table: DELETE FROM t1, t2 USING ...
10774            // Check ahead if this is followed by USING or more tables
10775            while self.match_token(TokenType::Comma) {
10776                let extra_name = self.expect_identifier_with_quoted()?;
10777                let extra_ref = TableRef::from_identifier(extra_name);
10778                extra_from_tables.push(extra_ref);
10779            }
10780        }
10781
10782        // If we had DELETE FROM t1, t2 USING ..., the tables field stores t1, t2
10783        let mut tables_from_using = false;
10784        if !extra_from_tables.is_empty() {
10785            // The main table + extra tables form the multi-table target
10786            tables.push(table.clone());
10787            tables.append(&mut extra_from_tables);
10788            tables_from_using = true;
10789        }
10790
10791        // Check for FORCE INDEX hint (MySQL): DELETE FROM t FORCE INDEX (idx)
10792        let force_index = if self.match_text_seq(&["FORCE", "INDEX"]) {
10793            self.expect(TokenType::LParen)?;
10794            let idx_name = self.expect_identifier_with_quoted()?;
10795            self.expect(TokenType::RParen)?;
10796            Some(idx_name.name)
10797        } else {
10798            None
10799        };
10800
10801        // Check for optional alias (with or without AS)
10802        let (alias, alias_explicit_as) = if force_index.is_none() && self.match_token(TokenType::As)
10803        {
10804            (Some(self.expect_identifier_with_quoted()?), true)
10805        } else if force_index.is_none()
10806            && self.is_identifier_token()
10807            && !self.check(TokenType::Using)
10808            && !self.check(TokenType::Where)
10809            && !self.check(TokenType::Inner)
10810            && !self.check(TokenType::Left)
10811            && !self.check(TokenType::Right)
10812            && !self.check(TokenType::Cross)
10813            && !self.check(TokenType::Full)
10814            && !self.check(TokenType::Join)
10815            && !self.check_identifier("FORCE")
10816        {
10817            (Some(self.expect_identifier_with_quoted()?), false)
10818        } else {
10819            (None, false)
10820        };
10821
10822        // Parse JOINs for MySQL multi-table: DELETE t1 FROM t1 LEFT JOIN t2 ON ...
10823        let mut joins = self.parse_joins()?;
10824
10825        // Parse USING clause (PostgreSQL/DuckDB/MySQL)
10826        let mut using = Vec::new();
10827        if self.match_token(TokenType::Using) {
10828            loop {
10829                // Check for subquery: USING (SELECT ...) AS ... or (VALUES ...) AS ...
10830                if self.check(TokenType::LParen) {
10831                    // Check if next token after ( is VALUES
10832                    let is_values = self.current + 1 < self.tokens.len()
10833                        && self.tokens[self.current + 1].token_type == TokenType::Values;
10834                    let subquery = if is_values {
10835                        // Parse (VALUES ...) as parenthesized VALUES
10836                        self.skip(); // consume (
10837                        let values = self.parse_values()?;
10838                        self.expect(TokenType::RParen)?;
10839                        Expression::Paren(Box::new(Paren {
10840                            this: values,
10841                            trailing_comments: Vec::new(),
10842                        }))
10843                    } else {
10844                        // Parse as subquery (SELECT ...) or other expression
10845                        self.parse_primary()?
10846                    };
10847                    // Parse alias
10848                    let using_alias = if self.match_token(TokenType::As) {
10849                        let alias_name = self.expect_identifier_with_quoted()?;
10850                        // Check for column aliases: AS name(col1, col2)
10851                        let col_aliases = if self.match_token(TokenType::LParen) {
10852                            let aliases = self.parse_identifier_list()?;
10853                            self.expect(TokenType::RParen)?;
10854                            aliases
10855                        } else {
10856                            Vec::new()
10857                        };
10858                        Some((alias_name, col_aliases))
10859                    } else {
10860                        None
10861                    };
10862                    // Create a TableRef from the subquery with alias
10863                    let mut tref = TableRef::new("");
10864                    if let Some((alias_name, col_aliases)) = using_alias {
10865                        tref.alias = Some(alias_name);
10866                        tref.alias_explicit_as = true;
10867                        tref.column_aliases = col_aliases;
10868                    }
10869                    // Store the subquery in the table reference using hints (as a hack)
10870                    // Actually, we need a better approach - use the table ref hints to store the subquery
10871                    tref.hints = vec![subquery];
10872                    using.push(tref);
10873                } else {
10874                    let using_table = self.expect_identifier_with_quoted()?;
10875                    let mut using_ref = TableRef::from_identifier(using_table);
10876
10877                    // Check for JOINs: USING t1 INNER JOIN t2 INNER JOIN t3
10878                    if self.check_join_keyword() {
10879                        // Parse JOINs as part of USING
10880                        using.push(using_ref);
10881                        let mut using_joins = self.parse_joins()?;
10882                        joins.append(&mut using_joins);
10883                        break;
10884                    }
10885
10886                    // Optional alias for using table
10887                    if self.match_token(TokenType::As) {
10888                        using_ref.alias = Some(self.expect_identifier_with_quoted()?);
10889                        using_ref.alias_explicit_as = true;
10890                    } else if self.is_identifier_token()
10891                        && !self.check(TokenType::Comma)
10892                        && !self.check(TokenType::Where)
10893                    {
10894                        using_ref.alias = Some(self.expect_identifier_with_quoted()?);
10895                    }
10896                    using.push(using_ref);
10897                }
10898                if !self.match_token(TokenType::Comma) {
10899                    break;
10900                }
10901            }
10902        }
10903
10904        // ClickHouse: IN PARTITION 'partition_id' clause before WHERE
10905        if matches!(
10906            self.config.dialect,
10907            Some(crate::dialects::DialectType::ClickHouse)
10908        ) && self.check(TokenType::In)
10909            && self
10910                .peek_nth(1)
10911                .is_some_and(|t| t.text.eq_ignore_ascii_case("PARTITION"))
10912        {
10913            self.skip(); // consume IN
10914            self.skip(); // consume PARTITION
10915                         // Consume partition expression (string or identifier)
10916            let _partition = self.parse_primary()?;
10917        }
10918
10919        // Parse OUTPUT clause (TSQL) - may have been parsed early (before FROM)
10920        let output = if early_output.is_some() {
10921            early_output
10922        } else if self.match_token(TokenType::Output) {
10923            Some(self.parse_output_clause()?)
10924        } else {
10925            None
10926        };
10927
10928        let where_clause = if self.match_token(TokenType::Where) {
10929            Some(Where {
10930                this: self.parse_expression()?,
10931            })
10932        } else {
10933            None
10934        };
10935
10936        // Parse ORDER BY clause (MySQL)
10937        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
10938            Some(self.parse_order_by()?)
10939        } else {
10940            None
10941        };
10942
10943        // Parse LIMIT clause (MySQL)
10944        let limit = if self.match_token(TokenType::Limit) {
10945            Some(self.parse_expression()?)
10946        } else {
10947            None
10948        };
10949
10950        // Parse RETURNING clause (PostgreSQL)
10951        let returning = if self.match_token(TokenType::Returning) {
10952            self.parse_select_expressions()?
10953        } else {
10954            Vec::new()
10955        };
10956
10957        Ok(Expression::Delete(Box::new(Delete {
10958            table,
10959            on_cluster,
10960            alias,
10961            alias_explicit_as,
10962            using,
10963            where_clause,
10964            output,
10965            leading_comments,
10966            with: None,
10967            limit,
10968            order_by,
10969            returning,
10970            tables,
10971            tables_from_using,
10972            joins,
10973            force_index,
10974            no_from: !_has_from,
10975        })))
10976    }
10977
10978    // ==================== DDL Parsing ====================
10979
10980    /// Parse a CREATE statement
10981    fn parse_create(&mut self) -> Result<Expression> {
10982        let create_pos = self.current; // position of CREATE token
10983        let create_token = self.expect(TokenType::Create)?;
10984        let leading_comments = create_token.comments;
10985
10986        // Handle OR REPLACE / OR ALTER (TSQL)
10987        let or_replace = self.match_keywords(&[TokenType::Or, TokenType::Replace]);
10988        let or_alter = !or_replace && self.match_text_seq(&["OR", "ALTER"]);
10989
10990        // Handle TEMPORARY
10991        let temporary = self.match_token(TokenType::Temporary);
10992
10993        // Handle MATERIALIZED
10994        let materialized = self.match_token(TokenType::Materialized);
10995
10996        // Parse MySQL-specific CREATE VIEW options: ALGORITHM, DEFINER, SQL SECURITY
10997        // CREATE ALGORITHM=... DEFINER=... SQL SECURITY DEFINER VIEW ...
10998        let mut algorithm: Option<String> = None;
10999        let mut definer: Option<String> = None;
11000        let mut security: Option<FunctionSecurity> = None;
11001
11002        while self.match_identifier("ALGORITHM")
11003            || self.match_identifier("DEFINER")
11004            || self.match_identifier("SQL")
11005        {
11006            let option_name = self.previous().text.to_ascii_uppercase();
11007
11008            if option_name == "ALGORITHM" && self.match_token(TokenType::Eq) {
11009                // ALGORITHM=UNDEFINED|MERGE|TEMPTABLE
11010                let value = self.expect_identifier_or_keyword()?;
11011                algorithm = Some(value.to_ascii_uppercase());
11012            } else if option_name == "DEFINER" && self.match_token(TokenType::Eq) {
11013                // DEFINER=user@host (can include @ and %)
11014                let mut definer_value = String::new();
11015                while !self.is_at_end()
11016                    && !self.check(TokenType::View)
11017                    && !self.check_identifier("ALGORITHM")
11018                    && !self.check_identifier("DEFINER")
11019                    && !self.check_identifier("SQL")
11020                    && !self.check_identifier("SECURITY")
11021                {
11022                    definer_value.push_str(&self.advance().text);
11023                }
11024                definer = Some(definer_value);
11025            } else if option_name == "SQL" && self.match_identifier("SECURITY") {
11026                // SQL SECURITY DEFINER/INVOKER
11027                if self.match_identifier("DEFINER") {
11028                    security = Some(FunctionSecurity::Definer);
11029                } else if self.match_identifier("INVOKER") {
11030                    security = Some(FunctionSecurity::Invoker);
11031                }
11032            }
11033        }
11034
11035        // Handle SECURE modifier for VIEW (Snowflake)
11036        let secure = self.match_identifier("SECURE");
11037
11038        // Handle table modifiers: DYNAMIC, ICEBERG, EXTERNAL, HYBRID, TRANSIENT (Snowflake), UNLOGGED (PostgreSQL)
11039        let mut table_modifier: Option<String> = if self.check_identifier("DYNAMIC") {
11040            self.skip();
11041            Some("DYNAMIC".to_string())
11042        } else if self.check_identifier("ICEBERG") {
11043            self.skip();
11044            Some("ICEBERG".to_string())
11045        } else if self.check_identifier("EXTERNAL") {
11046            self.skip();
11047            Some("EXTERNAL".to_string())
11048        } else if self.check_identifier("HYBRID") {
11049            self.skip();
11050            Some("HYBRID".to_string())
11051        } else if self.check_identifier("TRANSIENT") {
11052            self.skip();
11053            Some("TRANSIENT".to_string())
11054        } else if self.check_identifier("UNLOGGED") {
11055            self.skip();
11056            Some("UNLOGGED".to_string())
11057        } else if self.check_identifier("DICTIONARY") {
11058            self.skip();
11059            Some("DICTIONARY".to_string())
11060        } else if self.check(TokenType::Dictionary) {
11061            self.skip();
11062            Some("DICTIONARY".to_string())
11063        } else {
11064            None
11065        };
11066
11067        // Teradata: SET/MULTISET/VOLATILE/GLOBAL TEMPORARY modifiers before TABLE
11068        if matches!(
11069            self.config.dialect,
11070            Some(crate::dialects::DialectType::Teradata)
11071        ) {
11072            let mut parts = Vec::new();
11073            loop {
11074                if self.match_token(TokenType::Set) {
11075                    parts.push(self.previous().text.to_ascii_uppercase());
11076                } else if self.match_identifier("MULTISET") {
11077                    parts.push(self.previous().text.to_ascii_uppercase());
11078                } else if self.match_identifier("VOLATILE") {
11079                    parts.push(self.previous().text.to_ascii_uppercase());
11080                } else if self.match_identifier("GLOBAL") {
11081                    parts.push(self.previous().text.to_ascii_uppercase());
11082                } else if self.match_token(TokenType::Temporary) {
11083                    parts.push(self.previous().text.to_ascii_uppercase());
11084                } else {
11085                    break;
11086                }
11087            }
11088            if !parts.is_empty() {
11089                table_modifier = Some(parts.join(" "));
11090            }
11091        }
11092
11093        if table_modifier.as_deref() == Some("DICTIONARY") {
11094            return self.parse_create_table(
11095                or_replace,
11096                temporary,
11097                leading_comments,
11098                table_modifier.as_deref(),
11099            );
11100        }
11101
11102        match self.peek().token_type {
11103            TokenType::Table => {
11104                // Check if this is CREATE TABLE FUNCTION (BigQuery)
11105                if self.current + 1 < self.tokens.len()
11106                    && self.tokens[self.current + 1].token_type == TokenType::Function
11107                {
11108                    self.skip(); // consume TABLE
11109                    return self.parse_create_function(or_replace, or_alter, temporary, true);
11110                }
11111                let modifier = if materialized {
11112                    Some("MATERIALIZED")
11113                } else {
11114                    table_modifier.as_deref()
11115                };
11116                self.parse_create_table(or_replace, temporary, leading_comments, modifier)
11117            }
11118            TokenType::Dictionary => {
11119                self.parse_create_table(or_replace, temporary, leading_comments, Some("DICTIONARY"))
11120            }
11121            TokenType::View => self.parse_create_view(
11122                or_replace,
11123                or_alter,
11124                materialized,
11125                temporary,
11126                algorithm,
11127                definer,
11128                security,
11129                secure,
11130            ),
11131            TokenType::Unique => {
11132                self.skip(); // consume UNIQUE
11133                             // Check for CLUSTERED/NONCLUSTERED after UNIQUE (TSQL)
11134                let clustered = if self.check_identifier("CLUSTERED") {
11135                    self.skip();
11136                    Some("CLUSTERED".to_string())
11137                } else if self.check_identifier("NONCLUSTERED") {
11138                    self.skip();
11139                    Some("NONCLUSTERED".to_string())
11140                } else {
11141                    None
11142                };
11143                // Check for COLUMNSTORE (TSQL: CREATE UNIQUE NONCLUSTERED COLUMNSTORE INDEX)
11144                if self.check_identifier("COLUMNSTORE") {
11145                    self.skip();
11146                    // Prepend COLUMNSTORE to clustered
11147                    let clustered = clustered
11148                        .map(|c| format!("{} COLUMNSTORE", c))
11149                        .or_else(|| Some("COLUMNSTORE".to_string()));
11150                    self.parse_create_index_with_clustered(true, clustered)
11151                } else {
11152                    self.parse_create_index_with_clustered(true, clustered)
11153                }
11154            }
11155            TokenType::Index => self.parse_create_index_with_clustered(false, None),
11156            TokenType::Schema => self.parse_create_schema(leading_comments),
11157            TokenType::Database => self.parse_create_database(),
11158            TokenType::Function => {
11159                self.parse_create_function(or_replace, or_alter, temporary, false)
11160            }
11161            TokenType::Procedure => self.parse_create_procedure(or_replace, or_alter),
11162            TokenType::Sequence => self.parse_create_sequence(temporary, or_replace),
11163            TokenType::Trigger => {
11164                self.parse_create_trigger(or_replace, or_alter, false, create_pos)
11165            }
11166            TokenType::Constraint => {
11167                self.skip(); // consume CONSTRAINT
11168                self.parse_create_trigger(or_replace, or_alter, true, create_pos)
11169            }
11170            TokenType::Type => self.parse_create_type(),
11171            TokenType::Domain => self.parse_create_domain(),
11172            _ => {
11173                // Handle TSQL CLUSTERED/NONCLUSTERED [COLUMNSTORE] INDEX
11174                if self.check_identifier("CLUSTERED") || self.check_identifier("NONCLUSTERED") {
11175                    let clustered_text = self.advance().text.to_ascii_uppercase();
11176                    // Check for COLUMNSTORE after CLUSTERED/NONCLUSTERED
11177                    let clustered = if self.check_identifier("COLUMNSTORE") {
11178                        self.skip();
11179                        Some(format!("{} COLUMNSTORE", clustered_text))
11180                    } else {
11181                        Some(clustered_text)
11182                    };
11183                    return self.parse_create_index_with_clustered(false, clustered);
11184                }
11185                // Handle TSQL COLUMNSTORE INDEX (without CLUSTERED/NONCLUSTERED prefix)
11186                if self.check_identifier("COLUMNSTORE") && {
11187                    let pos = self.current;
11188                    let result = pos + 1 < self.tokens.len()
11189                        && self.tokens[pos + 1].token_type == TokenType::Index;
11190                    result
11191                } {
11192                    self.skip(); // consume COLUMNSTORE
11193                                 // COLUMNSTORE without prefix implies NONCLUSTERED
11194                    return self.parse_create_index_with_clustered(
11195                        false,
11196                        Some("NONCLUSTERED COLUMNSTORE".to_string()),
11197                    );
11198                }
11199                // Handle identifiers that aren't keywords: TAG, STAGE, STREAM, etc.
11200                if self.check_identifier("TAG") {
11201                    return self.parse_create_tag(or_replace);
11202                }
11203                if self.check_identifier("STAGE") {
11204                    return self.parse_create_stage(or_replace, temporary);
11205                }
11206                if self.check_identifier("STREAM") {
11207                    return self.parse_create_stream(or_replace);
11208                }
11209                if self.check_identifier("TASK") {
11210                    return self.parse_create_task(or_replace);
11211                }
11212                if (self.check_identifier("FILE") || self.check(TokenType::File)) && {
11213                    let next = self.current + 1;
11214                    next < self.tokens.len()
11215                        && (self.tokens[next].text.eq_ignore_ascii_case("FORMAT"))
11216                } {
11217                    return self.parse_create_file_format(or_replace, temporary);
11218                }
11219                // TSQL: CREATE SYNONYM name FOR target
11220                if self.check_identifier("SYNONYM") {
11221                    self.skip(); // consume SYNONYM
11222                    let name = self.parse_table_ref()?;
11223                    self.expect(TokenType::For)?;
11224                    let target = self.parse_table_ref()?;
11225                    return Ok(Expression::CreateSynonym(Box::new(
11226                        crate::expressions::CreateSynonym { name, target },
11227                    )));
11228                }
11229                // Fall back to Raw for unrecognized CREATE targets
11230                // (e.g., CREATE WAREHOUSE, CREATE STREAMLIT, CREATE STORAGE INTEGRATION, etc.)
11231                {
11232                    let start = self.current;
11233                    while !self.is_at_end() && !self.check(TokenType::Semicolon) {
11234                        self.skip();
11235                    }
11236                    let sql = self.tokens_to_sql(start, self.current);
11237                    let mut prefix = String::from("CREATE");
11238                    if or_replace {
11239                        prefix.push_str(" OR REPLACE");
11240                    }
11241                    if temporary {
11242                        prefix.push_str(" TEMPORARY");
11243                    }
11244                    if materialized {
11245                        prefix.push_str(" MATERIALIZED");
11246                    }
11247                    prefix.push(' ');
11248                    prefix.push_str(&sql);
11249                    Ok(Expression::Raw(Raw { sql: prefix }))
11250                }
11251            }
11252        }
11253    }
11254
11255    /// Parse CREATE TABLE
11256    fn parse_create_table(
11257        &mut self,
11258        or_replace: bool,
11259        temporary: bool,
11260        leading_comments: Vec<String>,
11261        table_modifier: Option<&str>,
11262    ) -> Result<Expression> {
11263        if table_modifier == Some("DICTIONARY") {
11264            let _ = self.match_token(TokenType::Dictionary);
11265        } else {
11266            self.expect(TokenType::Table)?;
11267        }
11268
11269        // Handle IF NOT EXISTS
11270        let if_not_exists =
11271            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
11272
11273        let is_special_modifier = matches!(
11274            table_modifier,
11275            Some(
11276                "DYNAMIC"
11277                    | "ICEBERG"
11278                    | "EXTERNAL"
11279                    | "HYBRID"
11280                    | "UNLOGGED"
11281                    | "DICTIONARY"
11282                    | "MATERIALIZED"
11283            )
11284        ) || (table_modifier.is_some()
11285            && matches!(
11286                self.config.dialect,
11287                Some(crate::dialects::DialectType::Teradata)
11288            ));
11289        let is_clickhouse = matches!(
11290            self.config.dialect,
11291            Some(crate::dialects::DialectType::ClickHouse)
11292        );
11293
11294        // Parse table name
11295        let name = self.parse_table_ref()?;
11296
11297        // ClickHouse: UUID 'xxx' clause after table name
11298        let uuid = if matches!(
11299            self.config.dialect,
11300            Some(crate::dialects::DialectType::ClickHouse)
11301        ) && self.check_identifier("UUID")
11302        {
11303            self.skip(); // consume UUID
11304            let uuid_token = self.advance().clone();
11305            // Strip surrounding quotes from the UUID string
11306            let uuid_text = uuid_token.text.trim_matches('\'').to_string();
11307            Some(uuid_text)
11308        } else {
11309            None
11310        };
11311
11312        // ClickHouse: ON CLUSTER clause
11313        let on_cluster = self.parse_on_cluster_clause()?;
11314
11315        // Teradata: options after name before column list
11316        let teradata_post_name_options = if matches!(
11317            self.config.dialect,
11318            Some(crate::dialects::DialectType::Teradata)
11319        ) {
11320            self.parse_teradata_post_name_options()
11321        } else {
11322            Vec::new()
11323        };
11324
11325        // Handle PARTITION OF parent_table [(column_defs)] [FOR VALUES spec | DEFAULT] [PARTITION BY ...]
11326        if self.match_keywords(&[TokenType::Partition, TokenType::Of]) {
11327            return self.parse_create_table_partition_of(
11328                name,
11329                if_not_exists,
11330                temporary,
11331                or_replace,
11332                table_modifier,
11333                leading_comments,
11334            );
11335        }
11336
11337        // ClickHouse: EMPTY AS source_table — create empty table from source
11338        if matches!(
11339            self.config.dialect,
11340            Some(crate::dialects::DialectType::ClickHouse)
11341        ) && self.check_identifier("EMPTY")
11342        {
11343            if self.check_next(TokenType::As) {
11344                self.skip(); // consume EMPTY
11345                self.skip(); // consume AS
11346                             // Consume rest as Command
11347                let start = self.current;
11348                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
11349                    self.skip();
11350                }
11351                let rest_sql = self.tokens_to_sql(start, self.current);
11352                let mut prefix = String::from("CREATE TABLE");
11353                if if_not_exists {
11354                    prefix.push_str(" IF NOT EXISTS");
11355                }
11356                prefix.push(' ');
11357                prefix.push_str(&name.name.name);
11358                prefix.push_str(" EMPTY AS ");
11359                prefix.push_str(&rest_sql);
11360                return Ok(Expression::Raw(Raw { sql: prefix }));
11361            }
11362        }
11363
11364        // Handle [SHALLOW | DEEP] CLONE source_table [AT(...) | BEFORE(...)]
11365        // Databricks/Delta Lake uses SHALLOW CLONE / DEEP CLONE
11366        // Snowflake uses just CLONE (which is equivalent to DEEP CLONE)
11367        let shallow_clone = self.check_identifier("SHALLOW");
11368        let deep_clone = self.check_identifier("DEEP");
11369        if shallow_clone || deep_clone {
11370            self.skip(); // consume SHALLOW or DEEP
11371        }
11372        // Also handle COPY (BigQuery: CREATE TABLE ... COPY source_table)
11373        // But NOT "COPY GRANTS" which is a Snowflake property
11374        let is_copy = self.check(TokenType::Copy) && !self.check_next_identifier("GRANTS");
11375        if self.check_identifier("CLONE") || is_copy {
11376            self.skip(); // consume CLONE or COPY
11377                         // ClickHouse: CLONE AS source_table (AS is part of the syntax, not an alias)
11378            if matches!(
11379                self.config.dialect,
11380                Some(crate::dialects::DialectType::ClickHouse)
11381            ) {
11382                let _ = self.match_token(TokenType::As);
11383            }
11384            let source = self.parse_table_ref()?;
11385            // Parse optional AT or BEFORE time travel clause
11386            // Note: BEFORE is a keyword token, AT is an identifier
11387            let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
11388                let keyword = self.previous().text.to_ascii_uppercase();
11389                self.expect(TokenType::LParen)?;
11390                // Parse the content: OFFSET => value or TIMESTAMP => value
11391                let mut result = format!("{} (", keyword);
11392                let mut prev_token_type: Option<TokenType> = None;
11393                let mut paren_depth = 1;
11394                while !self.is_at_end() && paren_depth > 0 {
11395                    let token = self.advance();
11396                    if token.token_type == TokenType::LParen {
11397                        paren_depth += 1;
11398                    } else if token.token_type == TokenType::RParen {
11399                        paren_depth -= 1;
11400                        if paren_depth == 0 {
11401                            break;
11402                        }
11403                    }
11404                    let needs_space = !result.ends_with('(')
11405                        && prev_token_type != Some(TokenType::Arrow)
11406                        && prev_token_type != Some(TokenType::Dash)
11407                        && prev_token_type != Some(TokenType::LParen)
11408                        && prev_token_type != Some(TokenType::Comma) // comma already adds trailing space
11409                        && token.token_type != TokenType::LParen; // no space before (
11410                    if needs_space
11411                        && token.token_type != TokenType::RParen
11412                        && token.token_type != TokenType::Comma
11413                    {
11414                        result.push(' ');
11415                    }
11416                    // Properly quote string literals
11417                    if token.token_type == TokenType::String {
11418                        result.push('\'');
11419                        result.push_str(&token.text.replace('\'', "''"));
11420                        result.push('\'');
11421                    } else {
11422                        result.push_str(&token.text);
11423                    }
11424                    if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma
11425                    {
11426                        result.push(' ');
11427                    }
11428                    prev_token_type = Some(token.token_type);
11429                }
11430                result.push(')');
11431                Some(Expression::Raw(Raw { sql: result }))
11432            } else {
11433                None
11434            };
11435            // Return the CLONE table immediately
11436            return Ok(Expression::CreateTable(Box::new(CreateTable {
11437                name,
11438                on_cluster: on_cluster.clone(),
11439                columns: Vec::new(),
11440                constraints: Vec::new(),
11441                if_not_exists,
11442                temporary,
11443                or_replace,
11444                table_modifier: table_modifier.map(|s| s.to_string()),
11445                as_select: None,
11446                as_select_parenthesized: false,
11447                on_commit: None,
11448                clone_source: Some(source),
11449                clone_at_clause: at_clause,
11450                shallow_clone,
11451                is_copy,
11452                leading_comments,
11453                with_properties: Vec::new(),
11454                teradata_post_name_options: teradata_post_name_options.clone(),
11455                with_data: None,
11456                with_statistics: None,
11457                teradata_indexes: Vec::new(),
11458                with_cte: None,
11459                properties: Vec::new(),
11460                partition_of: None,
11461                post_table_properties: Vec::new(),
11462                mysql_table_options: Vec::new(),
11463                inherits: Vec::new(),
11464                on_property: None,
11465                copy_grants: false,
11466                using_template: None,
11467                rollup: None,
11468                uuid: uuid.clone(),
11469            })));
11470        }
11471
11472        // Handle WITH properties before columns/AS (e.g., CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1)
11473        let with_properties = if self.match_token(TokenType::With) {
11474            self.parse_with_properties()?
11475        } else {
11476            Vec::new()
11477        };
11478
11479        // Snowflake: COPY GRANTS clause (before column list or AS)
11480        let copy_grants = self.match_text_seq(&["COPY", "GRANTS"]);
11481
11482        // Snowflake: USING TEMPLATE (expr) - allows schema inference from a query
11483        let using_template = if self.match_text_seq(&["USING", "TEMPLATE"]) {
11484            Some(Box::new(self.parse_primary()?))
11485        } else {
11486            None
11487        };
11488
11489        // If we have USING TEMPLATE, return early since it replaces AS SELECT
11490        if using_template.is_some() {
11491            return Ok(Expression::CreateTable(Box::new(CreateTable {
11492                name,
11493                on_cluster: on_cluster.clone(),
11494                columns: Vec::new(),
11495                constraints: Vec::new(),
11496                if_not_exists,
11497                temporary,
11498                or_replace,
11499                table_modifier: table_modifier.map(|s| s.to_string()),
11500                as_select: None,
11501                as_select_parenthesized: false,
11502                on_commit: None,
11503                clone_source: None,
11504                clone_at_clause: None,
11505                shallow_clone: false,
11506                is_copy: false,
11507                leading_comments,
11508                with_properties,
11509                teradata_post_name_options: teradata_post_name_options.clone(),
11510                with_data: None,
11511                with_statistics: None,
11512                teradata_indexes: Vec::new(),
11513                with_cte: None,
11514                properties: Vec::new(),
11515                partition_of: None,
11516                post_table_properties: Vec::new(),
11517                mysql_table_options: Vec::new(),
11518                inherits: Vec::new(),
11519                on_property: None,
11520                copy_grants,
11521                using_template,
11522                rollup: None,
11523                uuid: uuid.clone(),
11524            })));
11525        }
11526
11527        // Redshift: Parse DISTKEY, SORTKEY, DISTSTYLE, BACKUP before AS SELECT (CTAS without columns)
11528        // This handles: CREATE TABLE t BACKUP YES|NO AS SELECT ...
11529        let mut redshift_ctas_properties: Vec<Expression> = Vec::new();
11530        loop {
11531            if self.match_identifier("DISTKEY") {
11532                // DISTKEY(column)
11533                if self.match_token(TokenType::LParen) {
11534                    let col = self.expect_identifier()?;
11535                    self.expect(TokenType::RParen)?;
11536                    redshift_ctas_properties.push(Expression::DistKeyProperty(Box::new(
11537                        DistKeyProperty {
11538                            this: Box::new(Expression::boxed_column(Column {
11539                                name: Identifier::new(col),
11540                                table: None,
11541                                join_mark: false,
11542                                trailing_comments: Vec::new(),
11543                                span: None,
11544                                inferred_type: None,
11545                            })),
11546                        },
11547                    )));
11548                }
11549            } else if self.check_identifier("COMPOUND") || self.check_identifier("INTERLEAVED") {
11550                // COMPOUND SORTKEY(col, ...) or INTERLEAVED SORTKEY(col, ...)
11551                let modifier = self.advance().text.to_ascii_uppercase();
11552                if self.match_identifier("SORTKEY") && self.match_token(TokenType::LParen) {
11553                    let mut cols = Vec::new();
11554                    loop {
11555                        let col = self.expect_identifier()?;
11556                        cols.push(Expression::boxed_column(Column {
11557                            name: Identifier::new(col),
11558                            table: None,
11559                            join_mark: false,
11560                            trailing_comments: Vec::new(),
11561                            span: None,
11562                            inferred_type: None,
11563                        }));
11564                        if !self.match_token(TokenType::Comma) {
11565                            break;
11566                        }
11567                    }
11568                    self.expect(TokenType::RParen)?;
11569                    let compound_value = if modifier == "COMPOUND" {
11570                        Some(Box::new(Expression::Boolean(BooleanLiteral {
11571                            value: true,
11572                        })))
11573                    } else {
11574                        None
11575                    };
11576                    redshift_ctas_properties.push(Expression::SortKeyProperty(Box::new(
11577                        SortKeyProperty {
11578                            this: Box::new(Expression::Tuple(Box::new(Tuple {
11579                                expressions: cols,
11580                            }))),
11581                            compound: compound_value,
11582                        },
11583                    )));
11584                }
11585            } else if self.match_identifier("SORTKEY") {
11586                // SORTKEY(column, ...)
11587                if self.match_token(TokenType::LParen) {
11588                    let mut cols = Vec::new();
11589                    loop {
11590                        let col = self.expect_identifier()?;
11591                        cols.push(Expression::boxed_column(Column {
11592                            name: Identifier::new(col),
11593                            table: None,
11594                            join_mark: false,
11595                            trailing_comments: Vec::new(),
11596                            span: None,
11597                            inferred_type: None,
11598                        }));
11599                        if !self.match_token(TokenType::Comma) {
11600                            break;
11601                        }
11602                    }
11603                    self.expect(TokenType::RParen)?;
11604                    redshift_ctas_properties.push(Expression::SortKeyProperty(Box::new(
11605                        SortKeyProperty {
11606                            this: Box::new(Expression::Tuple(Box::new(Tuple {
11607                                expressions: cols,
11608                            }))),
11609                            compound: None,
11610                        },
11611                    )));
11612                }
11613            } else if self.match_identifier("DISTSTYLE") {
11614                // DISTSTYLE ALL|EVEN|AUTO|KEY
11615                if self.match_texts(&["ALL", "EVEN", "AUTO", "KEY"]) {
11616                    let style = self.previous().text.to_ascii_uppercase();
11617                    redshift_ctas_properties.push(Expression::DistStyleProperty(Box::new(
11618                        DistStyleProperty {
11619                            this: Box::new(Expression::Var(Box::new(Var { this: style }))),
11620                        },
11621                    )));
11622                }
11623            } else if self.match_identifier("BACKUP") {
11624                // BACKUP YES|NO
11625                if self.match_texts(&["YES", "NO"]) {
11626                    let value = self.previous().text.to_ascii_uppercase();
11627                    redshift_ctas_properties.push(Expression::BackupProperty(Box::new(
11628                        BackupProperty {
11629                            this: Box::new(Expression::Var(Box::new(Var { this: value }))),
11630                        },
11631                    )));
11632                }
11633            } else {
11634                break;
11635            }
11636        }
11637
11638        // Check for AS SELECT (CTAS)
11639        if self.match_token(TokenType::As) {
11640            // ClickHouse: CREATE TABLE t AS other_table [ENGINE = ...] — copy structure from another table
11641            // Also: CREATE TABLE t AS func_name(args...) — table from function (e.g., remote, merge)
11642            // Detect when AS is followed by an identifier (not SELECT/WITH/LParen)
11643            if is_clickhouse
11644                && !self.check(TokenType::Select)
11645                && !self.check(TokenType::With)
11646                && !self.check(TokenType::LParen)
11647                && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
11648            {
11649                // Check if this is AS func_name(...) — table function
11650                let is_table_func = self.current + 1 < self.tokens.len()
11651                    && self.tokens[self.current + 1].token_type == TokenType::LParen;
11652                let source = if is_table_func {
11653                    // Parse as expression to consume function call with arguments
11654                    self.parse_primary()?;
11655                    let mut table_properties: Vec<Expression> = Vec::new();
11656                    self.parse_clickhouse_table_properties(&mut table_properties)?;
11657                    return Ok(Expression::CreateTable(Box::new(CreateTable {
11658                        name,
11659                        on_cluster: on_cluster.clone(),
11660                        columns: Vec::new(),
11661                        constraints: Vec::new(),
11662                        if_not_exists,
11663                        temporary,
11664                        or_replace,
11665                        table_modifier: table_modifier.map(|s| s.to_string()),
11666                        as_select: None,
11667                        as_select_parenthesized: false,
11668                        on_commit: None,
11669                        clone_source: None,
11670                        clone_at_clause: None,
11671                        shallow_clone: false,
11672                        is_copy: false,
11673                        leading_comments,
11674                        with_properties,
11675                        teradata_post_name_options: teradata_post_name_options.clone(),
11676                        with_data: None,
11677                        with_statistics: None,
11678                        teradata_indexes: Vec::new(),
11679                        with_cte: None,
11680                        properties: table_properties,
11681                        partition_of: None,
11682                        post_table_properties: redshift_ctas_properties,
11683                        mysql_table_options: Vec::new(),
11684                        inherits: Vec::new(),
11685                        on_property: None,
11686                        copy_grants,
11687                        using_template: None,
11688                        rollup: None,
11689                        uuid: uuid.clone(),
11690                    })));
11691                } else {
11692                    self.parse_table_ref()?
11693                };
11694                // Parse ClickHouse table properties after the source table
11695                let mut table_properties: Vec<Expression> = Vec::new();
11696                self.parse_clickhouse_table_properties(&mut table_properties)?;
11697                return Ok(Expression::CreateTable(Box::new(CreateTable {
11698                    name,
11699                    on_cluster: on_cluster.clone(),
11700                    columns: Vec::new(),
11701                    constraints: Vec::new(),
11702                    if_not_exists,
11703                    temporary,
11704                    or_replace,
11705                    table_modifier: table_modifier.map(|s| s.to_string()),
11706                    as_select: None,
11707                    as_select_parenthesized: false,
11708                    on_commit: None,
11709                    clone_source: Some(source),
11710                    clone_at_clause: None,
11711                    shallow_clone: false,
11712                    is_copy: false,
11713                    leading_comments,
11714                    with_properties,
11715                    teradata_post_name_options: teradata_post_name_options.clone(),
11716                    with_data: None,
11717                    with_statistics: None,
11718                    teradata_indexes: Vec::new(),
11719                    with_cte: None,
11720                    properties: table_properties,
11721                    partition_of: None,
11722                    post_table_properties: redshift_ctas_properties,
11723                    mysql_table_options: Vec::new(),
11724                    inherits: Vec::new(),
11725                    on_property: None,
11726                    copy_grants,
11727                    using_template: None,
11728                    rollup: None,
11729                    uuid: uuid.clone(),
11730                })));
11731            }
11732
11733            // The query can be:
11734            // - SELECT ... (simple case)
11735            // - (SELECT 1) UNION ALL (SELECT 2) (set operations)
11736            // - (WITH cte AS (SELECT 1) SELECT * FROM cte) (CTE in parens)
11737            let mut as_select_parenthesized = self.check(TokenType::LParen);
11738            let query = if as_select_parenthesized {
11739                // Parenthesized query - parse as expression which handles subqueries
11740                // Note: parse_primary will consume set operations like UNION internally
11741                let subquery = self.parse_primary()?;
11742                // If parse_primary returned a set operation, the outer parens weren't wrapping
11743                // the entire expression - they were part of the operands
11744                if matches!(
11745                    &subquery,
11746                    Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)
11747                ) {
11748                    as_select_parenthesized = false;
11749                    subquery
11750                } else {
11751                    // Just a parenthesized query without set ops
11752                    // Keep the Subquery wrapper if it has limit/offset/order_by
11753                    if let Expression::Subquery(ref sq) = subquery {
11754                        if sq.limit.is_some() || sq.offset.is_some() || sq.order_by.is_some() {
11755                            // Keep the Subquery to preserve the modifiers
11756                            subquery
11757                        } else {
11758                            // Extract the inner query
11759                            if let Expression::Subquery(sq) = subquery {
11760                                sq.this
11761                            } else {
11762                                subquery
11763                            }
11764                        }
11765                    } else if let Expression::Paren(p) = subquery {
11766                        p.this
11767                    } else {
11768                        subquery
11769                    }
11770                }
11771            } else if self.check(TokenType::With) {
11772                // Handle WITH ... SELECT ...
11773                self.parse_statement()?
11774            } else {
11775                self.parse_select()?
11776            };
11777
11778            // Parse any trailing Teradata options like "WITH DATA", "NO PRIMARY INDEX", etc.
11779            let (with_data, with_statistics, teradata_indexes) =
11780                self.parse_teradata_table_options();
11781            let on_commit = if matches!(
11782                self.config.dialect,
11783                Some(crate::dialects::DialectType::Teradata)
11784            ) && self.check(TokenType::On)
11785                && self.check_next(TokenType::Commit)
11786            {
11787                self.skip(); // ON
11788                self.skip(); // COMMIT
11789                if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
11790                    Some(OnCommit::PreserveRows)
11791                } else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
11792                    Some(OnCommit::DeleteRows)
11793                } else {
11794                    return Err(
11795                        self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
11796                    );
11797                }
11798            } else {
11799                None
11800            };
11801
11802            return Ok(Expression::CreateTable(Box::new(CreateTable {
11803                name,
11804                on_cluster: on_cluster.clone(),
11805                columns: Vec::new(),
11806                constraints: Vec::new(),
11807                if_not_exists,
11808                temporary,
11809                or_replace,
11810                table_modifier: table_modifier.map(|s| s.to_string()),
11811                as_select: Some(query),
11812                as_select_parenthesized,
11813                on_commit,
11814                clone_source: None,
11815                clone_at_clause: None,
11816                shallow_clone: false,
11817                is_copy: false,
11818                leading_comments,
11819                with_properties,
11820                teradata_post_name_options: teradata_post_name_options.clone(),
11821                with_data,
11822                with_statistics,
11823                teradata_indexes,
11824                with_cte: None,
11825                properties: Vec::new(),
11826                partition_of: None,
11827                post_table_properties: redshift_ctas_properties,
11828                mysql_table_options: Vec::new(),
11829                inherits: Vec::new(),
11830                on_property: None,
11831                copy_grants,
11832                using_template: None,
11833                rollup: None,
11834                uuid: uuid.clone(),
11835            })));
11836        }
11837
11838        // ClickHouse: allow table properties/AS SELECT without a column list
11839        if is_clickhouse && !self.check(TokenType::LParen) {
11840            let starts_props = self.check_identifier("ENGINE")
11841                || self.check(TokenType::Order)
11842                || self.check(TokenType::Sample)
11843                || self.check(TokenType::Settings)
11844                || self.check(TokenType::Comment)
11845                || self.check(TokenType::As);
11846
11847            if starts_props {
11848                let mut table_properties: Vec<Expression> = Vec::new();
11849                self.parse_clickhouse_table_properties(&mut table_properties)?;
11850
11851                let as_select = if self.match_token(TokenType::As) {
11852                    Some(self.parse_statement()?)
11853                } else {
11854                    None
11855                };
11856                let as_select_parenthesized = as_select.is_some();
11857
11858                if as_select.is_some() {
11859                    self.parse_clickhouse_table_properties(&mut table_properties)?;
11860                }
11861
11862                return Ok(Expression::CreateTable(Box::new(CreateTable {
11863                    name,
11864                    on_cluster: on_cluster.clone(),
11865                    columns: Vec::new(),
11866                    constraints: Vec::new(),
11867                    if_not_exists,
11868                    temporary,
11869                    or_replace,
11870                    table_modifier: table_modifier.map(|s| s.to_string()),
11871                    as_select,
11872                    as_select_parenthesized,
11873                    on_commit: None,
11874                    clone_source: None,
11875                    clone_at_clause: None,
11876                    shallow_clone: false,
11877                    is_copy: false,
11878                    leading_comments,
11879                    with_properties,
11880                    teradata_post_name_options: teradata_post_name_options.clone(),
11881                    with_data: None,
11882                    with_statistics: None,
11883                    teradata_indexes: Vec::new(),
11884                    with_cte: None,
11885                    properties: table_properties,
11886                    partition_of: None,
11887                    post_table_properties: Vec::new(),
11888                    mysql_table_options: Vec::new(),
11889                    inherits: Vec::new(),
11890                    on_property: None,
11891                    copy_grants,
11892                    using_template: None,
11893                    rollup: None,
11894                    uuid: uuid.clone(),
11895                })));
11896            }
11897        }
11898
11899        // For DYNAMIC/ICEBERG/EXTERNAL tables, columns might be optional (use AS SELECT or other syntax)
11900        // Check if we have a left paren for columns or if we're going straight to options
11901        if !self.check(TokenType::LParen) && is_special_modifier {
11902            // No columns - parse options and AS SELECT
11903            let mut extra_options = Vec::new();
11904            // Parse key=value options until AS or end
11905            // Note: WAREHOUSE is a keyword token type, so check for it explicitly
11906            while !self.is_at_end()
11907                && !self.check(TokenType::As)
11908                && !self.check(TokenType::Semicolon)
11909            {
11910                if self.is_identifier_token()
11911                    || self.is_safe_keyword_as_identifier()
11912                    || self.check(TokenType::Warehouse)
11913                {
11914                    let key = self.advance().text;
11915                    if self.match_token(TokenType::Eq) {
11916                        // Capture value
11917                        let value = if self.check(TokenType::String) {
11918                            let v = format!("'{}'", self.peek().text);
11919                            self.skip();
11920                            v
11921                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
11922                        {
11923                            self.advance().text
11924                        } else {
11925                            break;
11926                        };
11927                        extra_options.push((key, value));
11928                    } else {
11929                        // Just a keyword without value (like WAREHOUSE mywh)
11930                        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
11931                            let value = self.advance().text;
11932                            extra_options.push((key, value));
11933                        }
11934                    }
11935                } else {
11936                    break;
11937                }
11938            }
11939            // Check for AS SELECT
11940            let as_select = if self.match_token(TokenType::As) {
11941                Some(self.parse_statement()?)
11942            } else {
11943                None
11944            };
11945            return Ok(Expression::CreateTable(Box::new(CreateTable {
11946                name,
11947                on_cluster: on_cluster.clone(),
11948                columns: Vec::new(),
11949                constraints: Vec::new(),
11950                if_not_exists,
11951                temporary,
11952                or_replace,
11953                table_modifier: table_modifier.map(|s| s.to_string()),
11954                as_select,
11955                as_select_parenthesized: false,
11956                on_commit: None,
11957                clone_source: None,
11958                clone_at_clause: None,
11959                shallow_clone: false,
11960                is_copy: false,
11961                leading_comments,
11962                with_properties: extra_options,
11963                teradata_post_name_options: teradata_post_name_options.clone(),
11964                with_data: None,
11965                with_statistics: None,
11966                teradata_indexes: Vec::new(),
11967                with_cte: None,
11968                properties: Vec::new(),
11969                partition_of: None,
11970                post_table_properties: Vec::new(),
11971                mysql_table_options: Vec::new(),
11972                inherits: Vec::new(),
11973                on_property: None,
11974                copy_grants,
11975                using_template: None,
11976                rollup: None,
11977                uuid: uuid.clone(),
11978            })));
11979        }
11980
11981        // MySQL: CREATE TABLE A LIKE B (without parentheses)
11982        if self.check(TokenType::Like) {
11983            self.skip(); // consume LIKE
11984            let source_ref = self.parse_table_ref()?;
11985            return Ok(Expression::CreateTable(Box::new(CreateTable {
11986                name,
11987                on_cluster: on_cluster.clone(),
11988                columns: Vec::new(),
11989                constraints: vec![TableConstraint::Like {
11990                    source: source_ref,
11991                    options: Vec::new(),
11992                }],
11993                if_not_exists,
11994                temporary,
11995                or_replace,
11996                table_modifier: table_modifier.map(|s| s.to_string()),
11997                as_select: None,
11998                as_select_parenthesized: false,
11999                on_commit: None,
12000                clone_source: None,
12001                clone_at_clause: None,
12002                shallow_clone: false,
12003                is_copy: false,
12004                leading_comments,
12005                with_properties,
12006                teradata_post_name_options: teradata_post_name_options.clone(),
12007                with_data: None,
12008                with_statistics: None,
12009                teradata_indexes: Vec::new(),
12010                with_cte: None,
12011                properties: Vec::new(),
12012                partition_of: None,
12013                post_table_properties: Vec::new(),
12014                mysql_table_options: Vec::new(),
12015                inherits: Vec::new(),
12016                on_property: None,
12017                copy_grants,
12018                using_template: None,
12019                rollup: None,
12020                uuid: uuid.clone(),
12021            })));
12022        }
12023
12024        // Snowflake: CREATE TABLE a TAG (key='value', ...) without column definitions
12025        if self.match_keyword("TAG")
12026            || (self.match_token(TokenType::With) && self.match_keyword("TAG"))
12027        {
12028            let tags = self.parse_tags()?;
12029            return Ok(Expression::CreateTable(Box::new(CreateTable {
12030                name,
12031                on_cluster: on_cluster.clone(),
12032                columns: Vec::new(),
12033                constraints: vec![TableConstraint::Tags(tags)],
12034                if_not_exists,
12035                temporary,
12036                or_replace,
12037                table_modifier: table_modifier.map(|s| s.to_string()),
12038                as_select: None,
12039                as_select_parenthesized: false,
12040                on_commit: None,
12041                clone_source: None,
12042                clone_at_clause: None,
12043                shallow_clone: false,
12044                is_copy: false,
12045                leading_comments,
12046                with_properties,
12047                teradata_post_name_options: teradata_post_name_options.clone(),
12048                with_data: None,
12049                with_statistics: None,
12050                teradata_indexes: Vec::new(),
12051                with_cte: None,
12052                properties: Vec::new(),
12053                partition_of: None,
12054                post_table_properties: Vec::new(),
12055                mysql_table_options: Vec::new(),
12056                inherits: Vec::new(),
12057                on_property: None,
12058                copy_grants,
12059                using_template: None,
12060                rollup: None,
12061                uuid: uuid.clone(),
12062            })));
12063        }
12064
12065        // Hive/Spark/Databricks: CREATE TABLE t TBLPROPERTIES (...) without column definitions
12066        // Check for Hive-style table properties before expecting column definitions
12067        if self.check_identifier("TBLPROPERTIES")
12068            || self.check_identifier("LOCATION")
12069            || self.check_identifier("STORED")
12070            || self.check(TokenType::Row)
12071            || self.check(TokenType::Using)
12072            || self.check_identifier("CLUSTERED")
12073            || self.check_identifier("PARTITIONED")
12074            || self.check_identifier("COMMENT")
12075        {
12076            // Parse Hive table properties without column definitions
12077            let hive_properties = self.parse_hive_table_properties()?;
12078
12079            // Check for AS SELECT (CTAS) after properties
12080            let as_select = if self.match_token(TokenType::As) {
12081                Some(self.parse_statement()?)
12082            } else {
12083                None
12084            };
12085
12086            return Ok(Expression::CreateTable(Box::new(CreateTable {
12087                name,
12088                on_cluster: on_cluster.clone(),
12089                columns: Vec::new(),
12090                constraints: Vec::new(),
12091                if_not_exists,
12092                temporary,
12093                or_replace,
12094                table_modifier: table_modifier.map(|s| s.to_string()),
12095                as_select,
12096                as_select_parenthesized: false,
12097                on_commit: None,
12098                clone_source: None,
12099                clone_at_clause: None,
12100                shallow_clone: false,
12101                is_copy: false,
12102                leading_comments,
12103                with_properties,
12104                teradata_post_name_options: teradata_post_name_options.clone(),
12105                with_data: None,
12106                with_statistics: None,
12107                teradata_indexes: Vec::new(),
12108                with_cte: None,
12109                properties: hive_properties,
12110                partition_of: None,
12111                post_table_properties: Vec::new(),
12112                mysql_table_options: Vec::new(),
12113                inherits: Vec::new(),
12114                on_property: None,
12115                copy_grants,
12116                using_template: None,
12117                rollup: None,
12118                uuid: uuid.clone(),
12119            })));
12120        }
12121
12122        // Check if (SELECT ...) or (WITH ...) follows - this is CTAS without explicit AS keyword
12123        if self.check(TokenType::LParen) {
12124            let saved = self.current;
12125            self.skip(); // consume (
12126            let is_ctas = self.check(TokenType::Select) || self.check(TokenType::With);
12127            self.current = saved;
12128            if is_ctas {
12129                // Parse as subquery
12130                let subquery = self.parse_primary()?;
12131                let query = if let Expression::Subquery(sq) = subquery {
12132                    sq.this
12133                } else if let Expression::Paren(p) = subquery {
12134                    p.this
12135                } else {
12136                    subquery
12137                };
12138                return Ok(Expression::CreateTable(Box::new(CreateTable {
12139                    name,
12140                    on_cluster: on_cluster.clone(),
12141                    columns: Vec::new(),
12142                    constraints: Vec::new(),
12143                    if_not_exists,
12144                    temporary,
12145                    or_replace,
12146                    table_modifier: table_modifier.map(|s| s.to_string()),
12147                    as_select: Some(query),
12148                    as_select_parenthesized: true,
12149                    on_commit: None,
12150                    clone_source: None,
12151                    clone_at_clause: None,
12152                    shallow_clone: false,
12153                    is_copy: false,
12154                    leading_comments,
12155                    with_properties,
12156                    teradata_post_name_options: teradata_post_name_options.clone(),
12157                    with_data: None,
12158                    with_statistics: None,
12159                    teradata_indexes: Vec::new(),
12160                    with_cte: None,
12161                    properties: Vec::new(),
12162                    partition_of: None,
12163                    post_table_properties: Vec::new(),
12164                    mysql_table_options: Vec::new(),
12165                    inherits: Vec::new(),
12166                    on_property: None,
12167                    copy_grants,
12168                    using_template: None,
12169                    rollup: None,
12170                    uuid: uuid.clone(),
12171                })));
12172            }
12173        }
12174
12175        // BigQuery (and others): CREATE TABLE t PARTITION BY ... CLUSTER BY ... OPTIONS(...) AS (SELECT ...)
12176        // When there are no column definitions, skip straight to property/AS parsing
12177        let no_column_defs = !self.check(TokenType::LParen)
12178            && (self.check(TokenType::Partition)
12179                || self.check(TokenType::PartitionBy)
12180                || self.check(TokenType::Cluster)
12181                || self.check_identifier("OPTIONS")
12182                || self.check(TokenType::As));
12183
12184        // Parse column definitions
12185        if !no_column_defs {
12186            self.expect(TokenType::LParen)?;
12187        }
12188
12189        // For DYNAMIC TABLE, column list contains only names without types
12190        // e.g., CREATE DYNAMIC TABLE t (col1, col2, col3) TARGET_LAG=... AS SELECT ...
12191        let (columns, constraints) = if no_column_defs {
12192            (Vec::new(), Vec::new())
12193        } else if table_modifier == Some("DYNAMIC") {
12194            // Check if this looks like a simple column name list (just identifiers separated by commas)
12195            // by peeking ahead - if next token after identifier is comma or rparen, it's a name-only list
12196            let saved = self.current;
12197            let is_name_only_list =
12198                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
12199                    self.skip();
12200                    let result = self.check(TokenType::Comma) || self.check(TokenType::RParen);
12201                    self.current = saved;
12202                    result
12203                } else {
12204                    false
12205                };
12206
12207            if is_name_only_list {
12208                // Parse column names without types
12209                let mut cols = Vec::new();
12210                loop {
12211                    let name = self.expect_identifier_or_safe_keyword_with_quoted()?;
12212                    // Create a column def with an empty/placeholder type
12213                    let mut col_def = ColumnDef::new(
12214                        name.name.clone(),
12215                        DataType::Custom {
12216                            name: String::new(),
12217                        },
12218                    );
12219                    col_def.name = name;
12220                    cols.push(col_def);
12221                    if !self.match_token(TokenType::Comma) {
12222                        break;
12223                    }
12224                }
12225                (cols, Vec::new())
12226            } else {
12227                // Regular column definitions with types
12228                self.parse_column_definitions()?
12229            }
12230        } else {
12231            self.parse_column_definitions()?
12232        };
12233
12234        if !no_column_defs {
12235            self.expect(TokenType::RParen)?;
12236        }
12237
12238        // Parse COMMENT before WITH properties (Presto: CREATE TABLE x (...) COMMENT 'text' WITH (...))
12239        let pre_with_comment = if self.check(TokenType::Comment) {
12240            let saved = self.current;
12241            self.skip(); // consume COMMENT
12242            if self.check(TokenType::String) {
12243                let comment_text = self.advance().text.clone();
12244                Some(comment_text)
12245            } else {
12246                self.current = saved;
12247                None
12248            }
12249        } else {
12250            None
12251        };
12252
12253        // Handle WITH properties after columns (e.g., CREATE TABLE z (z INT) WITH (...))
12254        // But skip if this is WITH(SYSTEM_VERSIONING=...) which is handled by parse_post_table_properties
12255        let with_properties_after = if self.check(TokenType::With) {
12256            // Lookahead: check if this is WITH(SYSTEM_VERSIONING=...)
12257            let saved = self.current;
12258            self.skip(); // consume WITH
12259            let is_system_versioning = if self.check(TokenType::LParen) {
12260                let saved2 = self.current;
12261                self.skip(); // consume (
12262                let result = self.check_identifier("SYSTEM_VERSIONING");
12263                self.current = saved2; // retreat to before (
12264                result
12265            } else {
12266                false
12267            };
12268            if is_system_versioning {
12269                // Retreat back before WITH, let parse_post_table_properties handle it
12270                self.current = saved;
12271                Vec::new()
12272            } else {
12273                // Normal WITH properties parsing
12274                self.parse_with_properties()?
12275            }
12276        } else {
12277            Vec::new()
12278        };
12279
12280        // Combine properties from before and after columns
12281        let mut all_with_properties = with_properties;
12282        all_with_properties.extend(with_properties_after);
12283
12284        // For DYNAMIC/ICEBERG/EXTERNAL tables with columns, parse Snowflake-specific options
12285        // like TARGET_LAG, WAREHOUSE, CATALOG, EXTERNAL_VOLUME, LOCATION etc.
12286        if is_special_modifier {
12287            while !self.is_at_end()
12288                && !self.check(TokenType::As)
12289                && !self.check(TokenType::Semicolon)
12290            {
12291                // Check for known Snowflake table options (WAREHOUSE is a keyword, others are identifiers)
12292                // These are Snowflake-style options that use KEY=VALUE or KEY VALUE (without =)
12293                // Hive-style LOCATION/TBLPROPERTIES (without =) should NOT be matched here
12294                let is_snowflake_option = self.check(TokenType::Warehouse)
12295                    || self.check_identifier("TARGET_LAG")
12296                    || self.check_identifier("CATALOG")
12297                    || self.check_identifier("EXTERNAL_VOLUME")
12298                    || self.check_identifier("BASE_LOCATION")
12299                    || self.check_identifier("REFRESH_MODE")
12300                    || self.check_identifier("INITIALIZE")
12301                    || self.check_identifier("DATA_RETENTION_TIME_IN_DAYS")
12302                    || self.check_identifier("LOCATION")
12303                    || self.check_identifier("PARTITION")
12304                    || self.check_identifier("FILE_FORMAT")
12305                    || self.check_identifier("AUTO_REFRESH");
12306                if is_snowflake_option {
12307                    // Save position before consuming key - we might need to retreat for Hive-style syntax
12308                    let saved = self.current;
12309                    let key = self.advance().text;
12310                    if self.match_token(TokenType::Eq) {
12311                        // Capture value - could be string, identifier, stage path @..., keyword, or parenthesized options
12312                        let value = if self.check(TokenType::LParen) {
12313                            // Parenthesized option list like file_format = (type = parquet compression = gzip)
12314                            self.skip(); // consume (
12315                            let mut options = String::from("(");
12316                            let mut depth = 1;
12317                            while !self.is_at_end() && depth > 0 {
12318                                let tok = self.advance();
12319                                if tok.token_type == TokenType::LParen {
12320                                    depth += 1;
12321                                } else if tok.token_type == TokenType::RParen {
12322                                    depth -= 1;
12323                                }
12324                                // Add space before tokens that need it (not after open paren, not before close paren)
12325                                if !options.ends_with('(')
12326                                    && !options.ends_with(' ')
12327                                    && tok.token_type != TokenType::RParen
12328                                {
12329                                    options.push(' ');
12330                                }
12331                                options.push_str(&tok.text);
12332                            }
12333                            options
12334                        } else if self.check(TokenType::String) {
12335                            let v = format!("'{}'", self.peek().text);
12336                            self.skip();
12337                            v
12338                        } else if self.check(TokenType::DAt) {
12339                            // Stage path like @s1/logs/
12340                            self.skip(); // consume @
12341                            let mut path = String::from("@");
12342                            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
12343                                path.push_str(&self.advance().text);
12344                            }
12345                            // Parse path segments, but stop before Snowflake option keywords
12346                            while self.check(TokenType::Slash) {
12347                                // Peek ahead to see if next identifier is a Snowflake option keyword
12348                                if self.current + 1 < self.tokens.len() {
12349                                    let next = &self.tokens[self.current + 1];
12350                                    if next.text.eq_ignore_ascii_case("FILE_FORMAT")
12351                                        || next.text.eq_ignore_ascii_case("PARTITION_TYPE")
12352                                        || next.text.eq_ignore_ascii_case("AUTO_REFRESH")
12353                                        || next.text.eq_ignore_ascii_case("LOCATION")
12354                                        || next.text.eq_ignore_ascii_case("PARTITION")
12355                                        || next.text.eq_ignore_ascii_case("WAREHOUSE")
12356                                    {
12357                                        // Consume the trailing slash before the keyword
12358                                        self.skip();
12359                                        path.push('/');
12360                                        break;
12361                                    }
12362                                }
12363                                self.skip();
12364                                path.push('/');
12365                                if self.is_identifier_token()
12366                                    || self.is_safe_keyword_as_identifier()
12367                                {
12368                                    path.push_str(&self.advance().text);
12369                                }
12370                            }
12371                            path
12372                        } else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
12373                            // Stage path tokenized as Var (e.g., @s2/logs/)
12374                            // When @ is followed by alphanumeric, tokenizer creates a Var token
12375                            let mut path = self.advance().text;
12376                            // Parse path segments, but stop before Snowflake option keywords
12377                            while self.check(TokenType::Slash) {
12378                                // Peek ahead to see if next identifier is a Snowflake option keyword
12379                                if self.current + 1 < self.tokens.len() {
12380                                    let next = &self.tokens[self.current + 1];
12381                                    if next.text.eq_ignore_ascii_case("FILE_FORMAT")
12382                                        || next.text.eq_ignore_ascii_case("PARTITION_TYPE")
12383                                        || next.text.eq_ignore_ascii_case("AUTO_REFRESH")
12384                                        || next.text.eq_ignore_ascii_case("LOCATION")
12385                                        || next.text.eq_ignore_ascii_case("PARTITION")
12386                                        || next.text.eq_ignore_ascii_case("WAREHOUSE")
12387                                    {
12388                                        // Consume the trailing slash before the keyword
12389                                        self.skip();
12390                                        path.push('/');
12391                                        break;
12392                                    }
12393                                }
12394                                self.skip();
12395                                path.push('/');
12396                                if self.is_identifier_token()
12397                                    || self.is_safe_keyword_as_identifier()
12398                                {
12399                                    path.push_str(&self.advance().text);
12400                                }
12401                            }
12402                            path
12403                        } else if self.check(TokenType::Warehouse) {
12404                            self.advance().text
12405                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
12406                        {
12407                            self.advance().text
12408                        } else {
12409                            // No valid value after =, retreat and let Hive parsing try
12410                            self.current = saved;
12411                            break;
12412                        };
12413                        all_with_properties.push((key, value));
12414                    } else if self.is_identifier_token()
12415                        || self.is_safe_keyword_as_identifier()
12416                        || self.check(TokenType::Warehouse)
12417                    {
12418                        // WAREHOUSE mywh (without =)
12419                        let value = self.advance().text;
12420                        all_with_properties.push((key, value));
12421                    } else {
12422                        // Not a Snowflake-style option (e.g., Hive LOCATION 'path' without =)
12423                        // Retreat and let Hive parsing try
12424                        self.current = saved;
12425                        break;
12426                    }
12427                } else {
12428                    break;
12429                }
12430            }
12431        }
12432
12433        // Parse MySQL table options: ENGINE=val, AUTO_INCREMENT=val, DEFAULT CHARSET=val, etc.
12434        let mysql_table_options = if is_clickhouse {
12435            Vec::new()
12436        } else {
12437            self.parse_mysql_table_options()
12438        };
12439
12440        // Parse StarRocks ROLLUP property: ROLLUP (r1(col1, col2), r2(col1))
12441        let rollup = if self.match_token(TokenType::Rollup) {
12442            self.expect(TokenType::LParen)?;
12443            let mut indices = Vec::new();
12444            loop {
12445                let name = self.expect_identifier_or_keyword_with_quoted()?;
12446                let cols = if self.match_token(TokenType::LParen) {
12447                    let mut col_list = Vec::new();
12448                    loop {
12449                        col_list.push(self.expect_identifier_or_keyword_with_quoted()?);
12450                        if !self.match_token(TokenType::Comma) {
12451                            break;
12452                        }
12453                    }
12454                    self.expect(TokenType::RParen)?;
12455                    col_list
12456                } else {
12457                    Vec::new()
12458                };
12459                indices.push(crate::expressions::RollupIndex {
12460                    name,
12461                    expressions: cols,
12462                });
12463                if !self.match_token(TokenType::Comma) {
12464                    break;
12465                }
12466            }
12467            self.expect(TokenType::RParen)?;
12468            Some(crate::expressions::RollupProperty {
12469                expressions: indices,
12470            })
12471        } else {
12472            None
12473        };
12474
12475        // Parse Hive table properties: ROW FORMAT, STORED AS/BY, LOCATION, TBLPROPERTIES
12476        let hive_properties = self.parse_hive_table_properties()?;
12477        let is_teradata = matches!(
12478            self.config.dialect,
12479            Some(crate::dialects::DialectType::Teradata)
12480        );
12481
12482        // Handle ON COMMIT PRESERVE ROWS or ON COMMIT DELETE ROWS
12483        // Also handle TSQL ON filegroup or ON filegroup (partition_column)
12484        let (mut on_commit, on_property) = if is_teradata {
12485            (None, None)
12486        } else if self.match_token(TokenType::On) {
12487            if self.match_token(TokenType::Commit) {
12488                if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
12489                    (Some(OnCommit::PreserveRows), None)
12490                } else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
12491                    (Some(OnCommit::DeleteRows), None)
12492                } else {
12493                    return Err(
12494                        self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
12495                    );
12496                }
12497            } else {
12498                // TSQL: ON filegroup or ON filegroup (partition_column)
12499                // Parse filegroup name as schema which allows filegroup(column) syntax
12500                let filegroup = self.parse_schema_identifier()?;
12501                (
12502                    None,
12503                    Some(OnProperty {
12504                        this: Box::new(filegroup),
12505                    }),
12506                )
12507            }
12508        } else {
12509            (None, None)
12510        };
12511
12512        // Parse table properties like DEFAULT COLLATE (BigQuery)
12513        let mut table_properties = hive_properties;
12514
12515        // If COMMENT was found before WITH, add it to table_properties as SchemaCommentProperty
12516        if let Some(comment_text) = pre_with_comment {
12517            table_properties.push(Expression::SchemaCommentProperty(Box::new(
12518                SchemaCommentProperty {
12519                    this: Box::new(Expression::Literal(Box::new(Literal::String(comment_text)))),
12520                },
12521            )));
12522        }
12523
12524        if self.match_token(TokenType::Default) && self.match_token(TokenType::Collate) {
12525            let collation = self.parse_primary()?;
12526            table_properties.push(Expression::CollateProperty(Box::new(CollateProperty {
12527                this: Box::new(collation),
12528                default: Some(Box::new(Expression::Boolean(BooleanLiteral {
12529                    value: true,
12530                }))),
12531            })));
12532        }
12533
12534        // BigQuery: OPTIONS (key=value, ...) on table - comes after column definitions
12535        if matches!(
12536            self.config.dialect,
12537            Some(crate::dialects::DialectType::BigQuery)
12538        ) {
12539            if let Some(options_property) = self.parse_bigquery_options_property()? {
12540                table_properties.push(options_property);
12541            }
12542        } else if self.match_identifier("OPTIONS") {
12543            let options = self.parse_options_list()?;
12544            table_properties.push(Expression::Properties(Box::new(Properties {
12545                expressions: options,
12546            })));
12547        }
12548
12549        // Doris/StarRocks: PROPERTIES ('key'='value', ...) - comes after column definitions
12550        let is_doris_starrocks = matches!(
12551            self.config.dialect,
12552            Some(crate::dialects::DialectType::Doris)
12553                | Some(crate::dialects::DialectType::StarRocks)
12554        );
12555        if is_doris_starrocks && self.match_identifier("PROPERTIES") {
12556            // Use parse_options_list which handles 'key'='value' format
12557            let props = self.parse_options_list()?;
12558            if !props.is_empty() {
12559                table_properties.push(Expression::Properties(Box::new(Properties {
12560                    expressions: props,
12561                })));
12562            }
12563        }
12564
12565        // Redshift: Parse DISTKEY, SORTKEY, DISTSTYLE, BACKUP after column definitions
12566        // These can appear in any order and multiple times
12567        loop {
12568            if self.match_identifier("DISTKEY") {
12569                // DISTKEY(column)
12570                if let Some(distkey) = self.parse_distkey()? {
12571                    table_properties.push(distkey);
12572                }
12573            } else if self.match_text_seq(&["COMPOUND", "SORTKEY"]) {
12574                // COMPOUND SORTKEY(col1, col2, ...)
12575                if let Some(sortkey) = self.parse_sortkey()? {
12576                    // Set compound flag
12577                    if let Expression::SortKeyProperty(mut skp) = sortkey {
12578                        skp.compound = Some(Box::new(Expression::Boolean(BooleanLiteral {
12579                            value: true,
12580                        })));
12581                        table_properties.push(Expression::SortKeyProperty(skp));
12582                    }
12583                }
12584            } else if self.match_identifier("SORTKEY") {
12585                // SORTKEY(col1, col2, ...)
12586                if let Some(sortkey) = self.parse_sortkey()? {
12587                    table_properties.push(sortkey);
12588                }
12589            } else if self.match_identifier("DISTSTYLE") {
12590                // DISTSTYLE ALL|EVEN|AUTO|KEY
12591                if self.match_texts(&["ALL", "EVEN", "AUTO", "KEY"]) {
12592                    let style = self.previous().text.to_ascii_uppercase();
12593                    table_properties.push(Expression::DistStyleProperty(Box::new(
12594                        DistStyleProperty {
12595                            this: Box::new(Expression::Var(Box::new(Var { this: style }))),
12596                        },
12597                    )));
12598                }
12599            } else if self.match_identifier("BACKUP") {
12600                // BACKUP YES|NO
12601                if self.match_texts(&["YES", "NO"]) {
12602                    let value = self.previous().text.to_ascii_uppercase();
12603                    table_properties.push(Expression::BackupProperty(Box::new(BackupProperty {
12604                        this: Box::new(Expression::Var(Box::new(Var { this: value }))),
12605                    })));
12606                }
12607            } else {
12608                break;
12609            }
12610        }
12611
12612        // Teradata: PRIMARY/UNIQUE/INDEX and PARTITION BY clauses after columns
12613        if is_teradata {
12614            loop {
12615                // Consume optional comma separator between index specs (only if followed by an index keyword)
12616                if self.check(TokenType::Comma) {
12617                    let saved_comma = self.current;
12618                    self.skip(); // consume comma
12619                    let is_index_keyword = self.check(TokenType::Unique)
12620                        || self.check(TokenType::PrimaryKey)
12621                        || self.check(TokenType::Index)
12622                        || self.check(TokenType::No);
12623                    if !is_index_keyword {
12624                        self.current = saved_comma; // retreat
12625                    }
12626                }
12627                if self.match_token(TokenType::Unique) {
12628                    let primary = self.match_token(TokenType::PrimaryKey);
12629                    let amp = self.match_identifier("AMP");
12630                    self.match_token(TokenType::Index);
12631                    let params = if self.match_token(TokenType::LParen) {
12632                        let cols = self.parse_identifier_list()?;
12633                        self.expect(TokenType::RParen)?;
12634                        cols.into_iter()
12635                            .map(|id| {
12636                                Expression::boxed_column(Column {
12637                                    name: id,
12638                                    table: None,
12639                                    join_mark: false,
12640                                    trailing_comments: Vec::new(),
12641                                    span: None,
12642                                    inferred_type: None,
12643                                })
12644                            })
12645                            .collect()
12646                    } else {
12647                        Vec::new()
12648                    };
12649                    table_properties.push(Expression::Index(Box::new(Index {
12650                        this: None,
12651                        table: None,
12652                        unique: true,
12653                        primary: if primary {
12654                            Some(Box::new(Expression::Boolean(BooleanLiteral {
12655                                value: true,
12656                            })))
12657                        } else {
12658                            None
12659                        },
12660                        amp: if amp {
12661                            Some(Box::new(Expression::Boolean(BooleanLiteral {
12662                                value: true,
12663                            })))
12664                        } else {
12665                            None
12666                        },
12667                        params,
12668                    })));
12669                    continue;
12670                }
12671                if self.match_token(TokenType::PrimaryKey) {
12672                    let amp = self.match_identifier("AMP");
12673                    self.match_token(TokenType::Index);
12674                    let params = if self.match_token(TokenType::LParen) {
12675                        let cols = self.parse_identifier_list()?;
12676                        self.expect(TokenType::RParen)?;
12677                        cols.into_iter()
12678                            .map(|id| {
12679                                Expression::boxed_column(Column {
12680                                    name: id,
12681                                    table: None,
12682                                    join_mark: false,
12683                                    trailing_comments: Vec::new(),
12684                                    span: None,
12685                                    inferred_type: None,
12686                                })
12687                            })
12688                            .collect()
12689                    } else {
12690                        Vec::new()
12691                    };
12692                    table_properties.push(Expression::Index(Box::new(Index {
12693                        this: None,
12694                        table: None,
12695                        unique: false,
12696                        primary: Some(Box::new(Expression::Boolean(BooleanLiteral {
12697                            value: true,
12698                        }))),
12699                        amp: if amp {
12700                            Some(Box::new(Expression::Boolean(BooleanLiteral {
12701                                value: true,
12702                            })))
12703                        } else {
12704                            None
12705                        },
12706                        params,
12707                    })));
12708                    continue;
12709                }
12710                if self.match_token(TokenType::Index) {
12711                    let params = if self.match_token(TokenType::LParen) {
12712                        let cols = self.parse_identifier_list()?;
12713                        self.expect(TokenType::RParen)?;
12714                        cols.into_iter()
12715                            .map(|id| {
12716                                Expression::boxed_column(Column {
12717                                    name: id,
12718                                    table: None,
12719                                    join_mark: false,
12720                                    trailing_comments: Vec::new(),
12721                                    span: None,
12722                                    inferred_type: None,
12723                                })
12724                            })
12725                            .collect()
12726                    } else {
12727                        Vec::new()
12728                    };
12729                    table_properties.push(Expression::Index(Box::new(Index {
12730                        this: None,
12731                        table: None,
12732                        unique: false,
12733                        primary: None,
12734                        amp: None,
12735                        params,
12736                    })));
12737                    continue;
12738                }
12739                if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
12740                    let expr = self.parse_primary()?;
12741                    table_properties.push(Expression::PartitionedByProperty(Box::new(
12742                        PartitionedByProperty {
12743                            this: Box::new(expr),
12744                        },
12745                    )));
12746                    continue;
12747                }
12748                break;
12749            }
12750
12751            if on_commit.is_none()
12752                && self.check(TokenType::On)
12753                && self.check_next(TokenType::Commit)
12754            {
12755                self.skip(); // ON
12756                self.skip(); // COMMIT
12757                if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
12758                    on_commit = Some(OnCommit::PreserveRows);
12759                } else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
12760                    on_commit = Some(OnCommit::DeleteRows);
12761                } else {
12762                    return Err(
12763                        self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
12764                    );
12765                }
12766            }
12767        }
12768
12769        // ClickHouse: table properties after column definitions
12770        if is_clickhouse {
12771            self.parse_clickhouse_table_properties(&mut table_properties)?;
12772        }
12773
12774        // ClickHouse: EMPTY AS SELECT
12775        if matches!(
12776            self.config.dialect,
12777            Some(crate::dialects::DialectType::ClickHouse)
12778        ) && self.match_identifier("EMPTY")
12779        {
12780            table_properties.push(Expression::Var(Box::new(Var {
12781                this: "EMPTY".to_string(),
12782            })));
12783        }
12784
12785        // Handle AS SELECT after columns/WITH (CTAS with column definitions)
12786        // When there are no column definitions, AS comes after PARTITION BY/CLUSTER BY/OPTIONS
12787        let as_select = if !no_column_defs && self.match_token(TokenType::As) {
12788            Some(self.parse_statement()?)
12789        } else {
12790            None
12791        };
12792
12793        if is_clickhouse && as_select.is_some() {
12794            self.parse_clickhouse_table_properties(&mut table_properties)?;
12795        }
12796
12797        // Parse PARTITION BY RANGE/LIST/HASH(columns) for regular CREATE TABLE
12798        let is_bigquery = matches!(
12799            self.config.dialect,
12800            Some(crate::dialects::DialectType::BigQuery)
12801        );
12802        if !is_teradata && (self.check(TokenType::Partition) || self.check(TokenType::PartitionBy))
12803        {
12804            let parsed_bigquery_partition = if is_bigquery {
12805                if let Some(partition_property) = self.parse_bigquery_partition_by_property()? {
12806                    table_properties.push(partition_property);
12807                    true
12808                } else {
12809                    false
12810                }
12811            } else {
12812                false
12813            };
12814
12815            if !parsed_bigquery_partition {
12816                let saved = self.current;
12817                let is_partition_by = if self.match_token(TokenType::PartitionBy) {
12818                    true
12819                } else if self.match_token(TokenType::Partition) {
12820                    self.match_token(TokenType::By)
12821                } else {
12822                    false
12823                };
12824                if is_partition_by {
12825                    let partition_kind = if self.check(TokenType::Range) {
12826                        self.skip();
12827                        Some("RANGE".to_string())
12828                    } else if self.check(TokenType::List) {
12829                        self.skip();
12830                        Some("LIST".to_string())
12831                    } else if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
12832                        && self.check_next(TokenType::LParen)
12833                    {
12834                        // Only treat identifier as partition method (like HASH) if followed by (
12835                        Some(self.advance().text.to_ascii_uppercase())
12836                    } else {
12837                        // No explicit partition method (RANGE/LIST/HASH), just PARTITION BY (cols)
12838                        None
12839                    };
12840
12841                    // StarRocks/Doris: PARTITION BY func(), col (bare expressions without RANGE/LIST)
12842                    // When the partition_kind was consumed as an identifier that's actually a function call
12843                    // and the content after the parenthesized args includes a comma, it's a bare expression list
12844                    if is_doris_starrocks
12845                        && partition_kind.is_some()
12846                        && !matches!(
12847                            partition_kind.as_deref(),
12848                            Some("RANGE") | Some("LIST") | Some("HASH") | Some("KEY")
12849                        )
12850                    {
12851                        // Backtrack: re-parse as bare PARTITION BY with comma-separated expressions
12852                        let func_name = partition_kind.unwrap();
12853                        let mut raw_sql = format!("PARTITION BY {}", func_name);
12854                        // Helper closure for consuming parenthesized content with proper spacing
12855                        fn consume_parens(parser: &mut Parser, raw_sql: &mut String) {
12856                            if !parser.check(TokenType::LParen) {
12857                                return;
12858                            }
12859                            parser.advance();
12860                            raw_sql.push('(');
12861                            let mut depth = 1;
12862                            let mut last_type: Option<TokenType> = None;
12863                            while !parser.is_at_end() && depth > 0 {
12864                                let tok = parser.advance();
12865                                if tok.token_type == TokenType::LParen {
12866                                    depth += 1;
12867                                } else if tok.token_type == TokenType::RParen {
12868                                    depth -= 1;
12869                                    if depth == 0 {
12870                                        break;
12871                                    }
12872                                }
12873                                // Add space after commas
12874                                if matches!(last_type, Some(TokenType::Comma)) {
12875                                    raw_sql.push(' ');
12876                                }
12877                                if tok.token_type == TokenType::String {
12878                                    raw_sql.push('\'');
12879                                    raw_sql.push_str(&tok.text);
12880                                    raw_sql.push('\'');
12881                                } else {
12882                                    raw_sql.push_str(&tok.text);
12883                                }
12884                                last_type = Some(tok.token_type.clone());
12885                            }
12886                            raw_sql.push(')');
12887                        }
12888                        consume_parens(self, &mut raw_sql);
12889                        // Consume more comma-separated expressions
12890                        while self.match_token(TokenType::Comma) {
12891                            raw_sql.push_str(", ");
12892                            let tok = self.advance();
12893                            raw_sql.push_str(&tok.text);
12894                            consume_parens(self, &mut raw_sql);
12895                        }
12896                        table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
12897                    } else
12898                    // For Doris/StarRocks/MySQL RANGE/LIST, use structured parsing
12899                    if (is_doris_starrocks
12900                        || matches!(
12901                            self.config.dialect,
12902                            Some(crate::dialects::DialectType::MySQL)
12903                                | Some(crate::dialects::DialectType::SingleStore)
12904                                | Some(crate::dialects::DialectType::TiDB)
12905                        ))
12906                        && matches!(partition_kind.as_deref(), Some("RANGE") | Some("LIST"))
12907                    {
12908                        let partition_expr = self.parse_doris_partition_by_range_or_list(
12909                            partition_kind
12910                                .as_ref()
12911                                .map(|s| s.as_str())
12912                                .unwrap_or("RANGE"),
12913                        )?;
12914                        table_properties.push(partition_expr);
12915                    } else {
12916                        // Generic raw SQL parsing for other dialects
12917                        let no_partition_kind = partition_kind.is_none();
12918                        let mut raw_sql = match partition_kind {
12919                            Some(kind) => format!("PARTITION BY {}", kind),
12920                            None => "PARTITION BY ".to_string(),
12921                        };
12922                        if self.check(TokenType::LParen) {
12923                            self.skip();
12924                            raw_sql.push('(');
12925                            let mut depth = 1;
12926                            let mut last_tok_type: Option<TokenType> = None;
12927                            while !self.is_at_end() && depth > 0 {
12928                                let tok = self.advance();
12929                                if tok.token_type == TokenType::LParen {
12930                                    depth += 1;
12931                                } else if tok.token_type == TokenType::RParen {
12932                                    depth -= 1;
12933                                    if depth == 0 {
12934                                        break;
12935                                    }
12936                                }
12937                                // Add space before token if needed for proper formatting
12938                                let needs_space = match (&last_tok_type, &tok.token_type) {
12939                                    // Add space after comma
12940                                    (Some(TokenType::Comma), _) => true,
12941                                    // Add space after identifiers/keywords before other identifiers/keywords
12942                                    (Some(TokenType::Identifier), TokenType::Identifier) => true,
12943                                    _ => false,
12944                                };
12945                                if needs_space {
12946                                    raw_sql.push(' ');
12947                                }
12948                                // Handle string literals - preserve quotes
12949                                if tok.token_type == TokenType::String {
12950                                    raw_sql.push('\'');
12951                                    raw_sql.push_str(&tok.text);
12952                                    raw_sql.push('\'');
12953                                } else {
12954                                    raw_sql.push_str(&tok.text);
12955                                }
12956                                last_tok_type = Some(tok.token_type.clone());
12957                            }
12958                            raw_sql.push(')');
12959                        } else if no_partition_kind {
12960                            // Bare PARTITION BY expression list without a partition method
12961                            let mut first = true;
12962                            while !self.is_at_end()
12963                                && !self.check(TokenType::Cluster)
12964                                && !self.check(TokenType::As)
12965                                && !self.check(TokenType::Semicolon)
12966                                && !self.check(TokenType::RParen)
12967                                && !self.check_identifier("OPTIONS")
12968                            {
12969                                if !first {
12970                                    raw_sql.push_str(", ");
12971                                }
12972                                first = false;
12973                                let tok = self.advance();
12974                                raw_sql.push_str(&tok.text);
12975                                // Handle function calls: PARTITION BY DATE(col)
12976                                if self.check(TokenType::LParen) {
12977                                    self.skip();
12978                                    raw_sql.push('(');
12979                                    let mut depth = 1;
12980                                    while !self.is_at_end() && depth > 0 {
12981                                        let t = self.advance();
12982                                        if t.token_type == TokenType::LParen {
12983                                            depth += 1;
12984                                        } else if t.token_type == TokenType::RParen {
12985                                            depth -= 1;
12986                                            if depth == 0 {
12987                                                break;
12988                                            }
12989                                        }
12990                                        raw_sql.push_str(&t.text);
12991                                    }
12992                                    raw_sql.push(')');
12993                                }
12994                                if !self.match_token(TokenType::Comma) {
12995                                    break;
12996                                }
12997                            }
12998                        }
12999                        table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
13000                    }
13001                } else {
13002                    self.current = saved;
13003                }
13004            }
13005        }
13006
13007        // Parse CLUSTER BY (BigQuery) after PARTITION BY
13008        if is_bigquery {
13009            if let Some(cluster_property) = self.parse_bigquery_cluster_by_property()? {
13010                table_properties.push(cluster_property);
13011            }
13012        } else if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
13013            let mut cluster_names = Vec::new();
13014            loop {
13015                let name = self.expect_identifier_or_keyword()?;
13016                cluster_names.push(name);
13017                if !self.match_token(TokenType::Comma) {
13018                    break;
13019                }
13020            }
13021            table_properties.push(Expression::Raw(Raw {
13022                sql: format!("CLUSTER BY {}", cluster_names.join(", ")),
13023            }));
13024        }
13025
13026        // No-column-defs path: OPTIONS and AS SELECT come after PARTITION BY / CLUSTER BY
13027        if no_column_defs {
13028            if matches!(
13029                self.config.dialect,
13030                Some(crate::dialects::DialectType::BigQuery)
13031            ) {
13032                if let Some(options_property) = self.parse_bigquery_options_property()? {
13033                    table_properties.push(options_property);
13034                }
13035            } else if self.match_identifier("OPTIONS") {
13036                let options = self.parse_options_list()?;
13037                table_properties.push(Expression::Properties(Box::new(Properties {
13038                    expressions: options,
13039                })));
13040            }
13041        }
13042
13043        let as_select = if no_column_defs && self.match_token(TokenType::As) {
13044            Some(self.parse_statement()?)
13045        } else {
13046            as_select
13047        };
13048
13049        // For EXTERNAL tables, parse additional Snowflake options that may come after PARTITION BY
13050        // (location=@s2/logs/, partition_type = user_specified, file_format = (...), etc.)
13051        if is_special_modifier {
13052            while !self.is_at_end()
13053                && !self.check(TokenType::As)
13054                && !self.check(TokenType::Semicolon)
13055            {
13056                let is_snowflake_option = self.check(TokenType::Warehouse)
13057                    || self.check_identifier("TARGET_LAG")
13058                    || self.check_identifier("CATALOG")
13059                    || self.check_identifier("EXTERNAL_VOLUME")
13060                    || self.check_identifier("BASE_LOCATION")
13061                    || self.check_identifier("REFRESH_MODE")
13062                    || self.check_identifier("INITIALIZE")
13063                    || self.check_identifier("DATA_RETENTION_TIME_IN_DAYS")
13064                    || self.check_identifier("LOCATION")
13065                    || self.check_identifier("PARTITION_TYPE")
13066                    || self.check_identifier("FILE_FORMAT")
13067                    || self.check_identifier("AUTO_REFRESH");
13068                if is_snowflake_option {
13069                    let key = self.advance().text;
13070                    if self.match_token(TokenType::Eq) {
13071                        let value = if self.check(TokenType::LParen) {
13072                            // Parenthesized option list
13073                            self.skip();
13074                            let mut options = String::from("(");
13075                            let mut depth = 1;
13076                            while !self.is_at_end() && depth > 0 {
13077                                let tok = self.advance();
13078                                if tok.token_type == TokenType::LParen {
13079                                    depth += 1;
13080                                } else if tok.token_type == TokenType::RParen {
13081                                    depth -= 1;
13082                                }
13083                                if !options.ends_with('(')
13084                                    && !options.ends_with(' ')
13085                                    && tok.token_type != TokenType::RParen
13086                                {
13087                                    options.push(' ');
13088                                }
13089                                options.push_str(&tok.text);
13090                            }
13091                            options
13092                        } else if self.check(TokenType::String) {
13093                            let v = format!("'{}'", self.peek().text);
13094                            self.skip();
13095                            v
13096                        } else if self.check(TokenType::DAt) {
13097                            // Stage path like @s1/logs/
13098                            self.skip();
13099                            let mut path = String::from("@");
13100                            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
13101                                path.push_str(&self.advance().text);
13102                            }
13103                            while self.check(TokenType::Slash) {
13104                                if self.current + 1 < self.tokens.len() {
13105                                    let next = &self.tokens[self.current + 1];
13106                                    if next.text.eq_ignore_ascii_case("FILE_FORMAT")
13107                                        || next.text.eq_ignore_ascii_case("PARTITION_TYPE")
13108                                        || next.text.eq_ignore_ascii_case("AUTO_REFRESH")
13109                                        || next.text.eq_ignore_ascii_case("LOCATION")
13110                                        || next.text.eq_ignore_ascii_case("PARTITION")
13111                                        || next.text.eq_ignore_ascii_case("WAREHOUSE")
13112                                    {
13113                                        self.skip();
13114                                        path.push('/');
13115                                        break;
13116                                    }
13117                                }
13118                                self.skip();
13119                                path.push('/');
13120                                if self.is_identifier_token()
13121                                    || self.is_safe_keyword_as_identifier()
13122                                {
13123                                    path.push_str(&self.advance().text);
13124                                }
13125                            }
13126                            path
13127                        } else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
13128                            let mut path = self.advance().text;
13129                            while self.check(TokenType::Slash) {
13130                                if self.current + 1 < self.tokens.len() {
13131                                    let next = &self.tokens[self.current + 1];
13132                                    if next.text.eq_ignore_ascii_case("FILE_FORMAT")
13133                                        || next.text.eq_ignore_ascii_case("PARTITION_TYPE")
13134                                        || next.text.eq_ignore_ascii_case("AUTO_REFRESH")
13135                                        || next.text.eq_ignore_ascii_case("LOCATION")
13136                                        || next.text.eq_ignore_ascii_case("PARTITION")
13137                                        || next.text.eq_ignore_ascii_case("WAREHOUSE")
13138                                    {
13139                                        self.skip();
13140                                        path.push('/');
13141                                        break;
13142                                    }
13143                                }
13144                                self.skip();
13145                                path.push('/');
13146                                if self.is_identifier_token()
13147                                    || self.is_safe_keyword_as_identifier()
13148                                {
13149                                    path.push_str(&self.advance().text);
13150                                }
13151                            }
13152                            path
13153                        } else if self.check(TokenType::Warehouse) {
13154                            self.advance().text
13155                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
13156                        {
13157                            self.advance().text
13158                        } else {
13159                            break;
13160                        };
13161                        all_with_properties.push((key, value));
13162                    } else if self.is_identifier_token()
13163                        || self.is_safe_keyword_as_identifier()
13164                        || self.check(TokenType::Warehouse)
13165                    {
13166                        let value = self.advance().text;
13167                        all_with_properties.push((key, value));
13168                    }
13169                } else {
13170                    break;
13171                }
13172            }
13173        }
13174
13175        // Parse TSQL table-level WITH(SYSTEM_VERSIONING=ON(...)) after columns
13176        // This is different from the earlier WITH properties parsing.
13177        // TSQL uses WITH(...) after columns for system versioning.
13178        let post_table_properties = self.parse_post_table_properties()?;
13179
13180        // PostgreSQL: INHERITS (parent1, parent2, ...)
13181        let inherits = if self.match_identifier("INHERITS") {
13182            self.expect(TokenType::LParen)?;
13183            let mut parents = Vec::new();
13184            loop {
13185                parents.push(self.parse_table_ref()?);
13186                if !self.match_token(TokenType::Comma) {
13187                    break;
13188                }
13189            }
13190            self.expect(TokenType::RParen)?;
13191            parents
13192        } else {
13193            Vec::new()
13194        };
13195
13196        Ok(Expression::CreateTable(Box::new(CreateTable {
13197            name,
13198            on_cluster,
13199            columns,
13200            constraints,
13201            if_not_exists,
13202            temporary,
13203            or_replace,
13204            table_modifier: table_modifier.map(|s| s.to_string()),
13205            as_select,
13206            as_select_parenthesized: false,
13207            on_commit,
13208            clone_source: None,
13209            clone_at_clause: None,
13210            shallow_clone: false,
13211            is_copy: false,
13212            leading_comments,
13213            with_properties: all_with_properties,
13214            teradata_post_name_options: teradata_post_name_options.clone(),
13215            with_data: None,
13216            with_statistics: None,
13217            teradata_indexes: Vec::new(),
13218            with_cte: None,
13219            properties: table_properties,
13220            partition_of: None,
13221            post_table_properties,
13222            mysql_table_options,
13223            inherits,
13224            on_property,
13225            copy_grants,
13226            using_template: None,
13227            rollup,
13228            uuid,
13229        })))
13230    }
13231
13232    /// Parse CREATE TABLE ... PARTITION OF parent_table [(cols)] [FOR VALUES spec | DEFAULT] [PARTITION BY ...]
13233    fn parse_create_table_partition_of(
13234        &mut self,
13235        name: TableRef,
13236        if_not_exists: bool,
13237        temporary: bool,
13238        or_replace: bool,
13239        table_modifier: Option<&str>,
13240        leading_comments: Vec<String>,
13241    ) -> Result<Expression> {
13242        // Parse parent table name
13243        let parent_table = self.parse_table_ref()?;
13244
13245        // Optionally parse column constraints in parens: (unitsales DEFAULT 0) or (CONSTRAINT ...)
13246        // This must come before FOR VALUES or DEFAULT. We distinguish from other uses
13247        // by checking if the first token after LParen is CONSTRAINT or an identifier
13248        // that is not a string literal.
13249        let (columns, constraints) = if self.check(TokenType::LParen) {
13250            // Peek ahead: current is LParen, current+1 is first token inside parens
13251            let first_inside = self.current + 1;
13252            // Check if this is a partition column specification: (colname DEFAULT value)
13253            // Column names tokenize as Var (unquoted) or QuotedIdentifier (quoted)
13254            let is_column_defs = first_inside < self.tokens.len()
13255                && (self.tokens[first_inside].token_type == TokenType::Constraint
13256                    || ((self.tokens[first_inside].token_type == TokenType::Var
13257                        || self.tokens[first_inside].token_type == TokenType::QuotedIdentifier
13258                        || self.tokens[first_inside].token_type == TokenType::Identifier)
13259                        && first_inside + 1 < self.tokens.len()
13260                        && self.tokens[first_inside + 1].token_type == TokenType::Default));
13261
13262            if is_column_defs {
13263                self.skip(); // consume LParen
13264                             // Use special parsing for partition column specs - they don't have data types,
13265                             // just column names with constraint overrides like DEFAULT
13266                let (cols, constrs) = self.parse_partition_column_specs()?;
13267                self.expect(TokenType::RParen)?;
13268                (cols, constrs)
13269            } else {
13270                (Vec::new(), Vec::new())
13271            }
13272        } else {
13273            (Vec::new(), Vec::new())
13274        };
13275
13276        // Parse DEFAULT or FOR VALUES spec
13277        let partition_bound: Expression = if self.match_token(TokenType::Default) {
13278            // DEFAULT partition
13279            Expression::Var(Box::new(Var {
13280                this: "DEFAULT".to_string(),
13281            }))
13282        } else if self.match_token(TokenType::For) {
13283            // FOR VALUES ...
13284            self.expect(TokenType::Values)?;
13285            self.parse_partition_bound_spec()?
13286        } else {
13287            // Neither DEFAULT nor FOR VALUES - could be an error
13288            // but we'll be lenient and just create a DEFAULT
13289            Expression::Var(Box::new(Var {
13290                this: "DEFAULT".to_string(),
13291            }))
13292        };
13293
13294        let partition_of_expr =
13295            Expression::PartitionedOfProperty(Box::new(PartitionedOfProperty {
13296                this: Box::new(Expression::Table(Box::new(parent_table))),
13297                expression: Box::new(partition_bound),
13298            }));
13299
13300        // Optionally parse trailing PARTITION BY RANGE/LIST/HASH(columns)
13301        let mut table_properties: Vec<Expression> = Vec::new();
13302        if self.match_token(TokenType::Partition) || self.match_token(TokenType::PartitionBy) {
13303            // Could be PARTITION BY or just PartitionBy token
13304            if self.previous().token_type == TokenType::Partition {
13305                self.expect(TokenType::By)?;
13306            }
13307            // Parse RANGE/LIST/HASH(columns)
13308            let partition_kind = if self.check(TokenType::Identifier) || self.check(TokenType::Var)
13309            {
13310                let kind_text = self.advance().text.to_ascii_uppercase();
13311                kind_text
13312            } else if self.check(TokenType::Range) {
13313                self.skip();
13314                "RANGE".to_string()
13315            } else if self.check(TokenType::List) {
13316                self.skip();
13317                "LIST".to_string()
13318            } else {
13319                "RANGE".to_string()
13320            };
13321            // Parse (columns)
13322            let mut raw_sql = format!("PARTITION BY {}", partition_kind);
13323            if self.check(TokenType::LParen) {
13324                self.skip(); // consume LParen
13325                raw_sql.push('(');
13326                let mut depth = 1;
13327                while !self.is_at_end() && depth > 0 {
13328                    let tok = self.advance();
13329                    if tok.token_type == TokenType::LParen {
13330                        depth += 1;
13331                    } else if tok.token_type == TokenType::RParen {
13332                        depth -= 1;
13333                        if depth == 0 {
13334                            break;
13335                        }
13336                    }
13337                    raw_sql.push_str(&tok.text);
13338                }
13339                raw_sql.push(')');
13340            }
13341            table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
13342        }
13343
13344        Ok(Expression::CreateTable(Box::new(CreateTable {
13345            name,
13346            on_cluster: None,
13347            columns,
13348            constraints,
13349            if_not_exists,
13350            temporary,
13351            or_replace,
13352            table_modifier: table_modifier.map(|s| s.to_string()),
13353            as_select: None,
13354            as_select_parenthesized: false,
13355            on_commit: None,
13356            clone_source: None,
13357            clone_at_clause: None,
13358            shallow_clone: false,
13359            is_copy: false,
13360            leading_comments,
13361            with_properties: Vec::new(),
13362            teradata_post_name_options: Vec::new(),
13363            with_data: None,
13364            with_statistics: None,
13365            teradata_indexes: Vec::new(),
13366            with_cte: None,
13367            properties: table_properties,
13368            partition_of: Some(partition_of_expr),
13369            post_table_properties: Vec::new(),
13370            mysql_table_options: Vec::new(),
13371            inherits: Vec::new(),
13372            on_property: None,
13373            copy_grants: false,
13374            using_template: None,
13375            rollup: None,
13376            uuid: None,
13377        })))
13378    }
13379
13380    /// Parse partition bound spec for PARTITION OF: IN (...), FROM (...) TO (...), or WITH (MODULUS n, REMAINDER n)
13381    fn parse_partition_bound_spec(&mut self) -> Result<Expression> {
13382        if self.match_token(TokenType::In) {
13383            // IN (val, val, ...)
13384            self.expect(TokenType::LParen)?;
13385            let mut values = Vec::new();
13386            loop {
13387                let val = self.parse_expression()?;
13388                values.push(val);
13389                if !self.match_token(TokenType::Comma) {
13390                    break;
13391                }
13392            }
13393            self.expect(TokenType::RParen)?;
13394            // Use Tuple for multiple values (generator strips parens for partition bounds)
13395            let this_expr = if values.len() == 1 {
13396                values.into_iter().next().unwrap()
13397            } else {
13398                Expression::Tuple(Box::new(Tuple {
13399                    expressions: values,
13400                }))
13401            };
13402            Ok(Expression::PartitionBoundSpec(Box::new(
13403                PartitionBoundSpec {
13404                    this: Some(Box::new(this_expr)),
13405                    expression: None,
13406                    from_expressions: None,
13407                    to_expressions: None,
13408                },
13409            )))
13410        } else if self.match_token(TokenType::From) {
13411            // FROM (val, ...) TO (val, ...)
13412            self.expect(TokenType::LParen)?;
13413            let mut from_vals = Vec::new();
13414            loop {
13415                let val = self.parse_partition_bound_value()?;
13416                from_vals.push(val);
13417                if !self.match_token(TokenType::Comma) {
13418                    break;
13419                }
13420            }
13421            self.expect(TokenType::RParen)?;
13422
13423            self.expect(TokenType::To)?;
13424            self.expect(TokenType::LParen)?;
13425            let mut to_vals = Vec::new();
13426            loop {
13427                let val = self.parse_partition_bound_value()?;
13428                to_vals.push(val);
13429                if !self.match_token(TokenType::Comma) {
13430                    break;
13431                }
13432            }
13433            self.expect(TokenType::RParen)?;
13434
13435            let from_expr = if from_vals.len() == 1 {
13436                from_vals.into_iter().next().unwrap()
13437            } else {
13438                Expression::Tuple(Box::new(Tuple {
13439                    expressions: from_vals,
13440                }))
13441            };
13442            let to_expr = if to_vals.len() == 1 {
13443                to_vals.into_iter().next().unwrap()
13444            } else {
13445                Expression::Tuple(Box::new(Tuple {
13446                    expressions: to_vals,
13447                }))
13448            };
13449
13450            Ok(Expression::PartitionBoundSpec(Box::new(
13451                PartitionBoundSpec {
13452                    this: None,
13453                    expression: None,
13454                    from_expressions: Some(Box::new(from_expr)),
13455                    to_expressions: Some(Box::new(to_expr)),
13456                },
13457            )))
13458        } else if self.match_token(TokenType::With) {
13459            // WITH (MODULUS n, REMAINDER n)
13460            self.expect(TokenType::LParen)?;
13461            self.match_text_seq(&["MODULUS"]);
13462            let modulus = self.parse_expression()?;
13463            self.expect(TokenType::Comma)?;
13464            self.match_text_seq(&["REMAINDER"]);
13465            let remainder = self.parse_expression()?;
13466            self.expect(TokenType::RParen)?;
13467
13468            Ok(Expression::PartitionBoundSpec(Box::new(
13469                PartitionBoundSpec {
13470                    this: Some(Box::new(modulus)),
13471                    expression: Some(Box::new(remainder)),
13472                    from_expressions: None,
13473                    to_expressions: None,
13474                },
13475            )))
13476        } else {
13477            Err(self.parse_error("Expected IN, FROM, or WITH after FOR VALUES in PARTITION OF"))
13478        }
13479    }
13480
13481    /// Parse a single partition bound value (number, string, MINVALUE, MAXVALUE)
13482    fn parse_partition_bound_value(&mut self) -> Result<Expression> {
13483        if self.match_token(TokenType::Minvalue) {
13484            Ok(Expression::Var(Box::new(Var {
13485                this: "MINVALUE".to_string(),
13486            })))
13487        } else if self.match_token(TokenType::Maxvalue) {
13488            Ok(Expression::Var(Box::new(Var {
13489                this: "MAXVALUE".to_string(),
13490            })))
13491        } else {
13492            self.parse_expression()
13493        }
13494    }
13495
13496    /// Parse column specifications for PostgreSQL PARTITION OF syntax.
13497    /// Unlike regular column definitions, these don't have data types - just column names
13498    /// with constraint overrides like DEFAULT, NOT NULL, or table-level CONSTRAINT clauses.
13499    /// Example: (unitsales DEFAULT 0) or (CONSTRAINT check_date CHECK (logdate >= '2016-07-01'))
13500    fn parse_partition_column_specs(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>)> {
13501        let mut columns = Vec::new();
13502        let mut constraints = Vec::new();
13503
13504        loop {
13505            // Check for table-level constraint (CONSTRAINT name ...)
13506            if self.check(TokenType::Constraint) {
13507                constraints.push(self.parse_table_constraint()?);
13508            } else if self.check(TokenType::PrimaryKey)
13509                || self.check(TokenType::ForeignKey)
13510                || self.check(TokenType::Unique)
13511                || self.check(TokenType::Check)
13512                || self.check(TokenType::Exclude)
13513            {
13514                constraints.push(self.parse_table_constraint()?);
13515            } else {
13516                // Parse column name with optional constraints (no data type)
13517                columns.push(self.parse_partition_column_spec()?);
13518            }
13519
13520            if !self.match_token(TokenType::Comma) {
13521                break;
13522            }
13523            // ClickHouse allows a trailing comma before the closing ')'
13524            if matches!(
13525                self.config.dialect,
13526                Some(crate::dialects::DialectType::ClickHouse)
13527            ) && self.check(TokenType::RParen)
13528            {
13529                break;
13530            }
13531        }
13532
13533        Ok((columns, constraints))
13534    }
13535
13536    /// Parse a single partition column specification: column_name [DEFAULT value] [NOT NULL] [NULL] [WITH OPTIONS ...]
13537    fn parse_partition_column_spec(&mut self) -> Result<ColumnDef> {
13538        // Parse column name
13539        let name = self.expect_identifier_or_safe_keyword_with_quoted()?;
13540
13541        // Create column def with Unknown data type (data type comes from parent table)
13542        let mut col_def = ColumnDef::new(name.name.clone(), DataType::Unknown);
13543        col_def.name = name;
13544
13545        // Parse column constraints (no data type expected)
13546        loop {
13547            if self.match_token(TokenType::Default) {
13548                // DEFAULT value
13549                let default_val = self.parse_expression()?;
13550                col_def.default = Some(default_val.clone());
13551                col_def
13552                    .constraints
13553                    .push(ColumnConstraint::Default(default_val));
13554                col_def.constraint_order.push(ConstraintType::Default);
13555            } else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
13556                col_def.nullable = Some(false);
13557                col_def.constraint_order.push(ConstraintType::NotNull);
13558            } else if self.match_token(TokenType::Null) {
13559                col_def.nullable = Some(true);
13560                col_def.constraint_order.push(ConstraintType::Null);
13561            } else if self.match_token(TokenType::Constraint) {
13562                // Inline CONSTRAINT name ... for this column
13563                let constraint_name = self.expect_identifier_or_safe_keyword()?;
13564                if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
13565                    col_def.nullable = Some(false);
13566                    col_def.not_null_constraint_name = Some(constraint_name);
13567                    col_def.constraint_order.push(ConstraintType::NotNull);
13568                } else if self.match_token(TokenType::Check) {
13569                    col_def.check_constraint_name = Some(constraint_name);
13570                    if self.match_token(TokenType::LParen) {
13571                        let check_expr = self.parse_expression()?;
13572                        self.expect(TokenType::RParen)?;
13573                        col_def
13574                            .constraints
13575                            .push(ColumnConstraint::Check(check_expr));
13576                    }
13577                    col_def.constraint_order.push(ConstraintType::Check);
13578                } else if self.match_token(TokenType::Default) {
13579                    let default_val = self.parse_expression()?;
13580                    col_def.default = Some(default_val.clone());
13581                    col_def
13582                        .constraints
13583                        .push(ColumnConstraint::Default(default_val));
13584                    col_def.constraint_order.push(ConstraintType::Default);
13585                }
13586            } else if self.match_text_seq(&["WITH", "OPTIONS"]) {
13587                // PostgreSQL: WITH OPTIONS allows specifying more options
13588                // For now, just skip this - it's rarely used
13589                break;
13590            } else {
13591                break;
13592            }
13593        }
13594
13595        Ok(col_def)
13596    }
13597
13598    /// Parse WITH properties for CREATE TABLE (e.g., WITH (FORMAT='parquet', x='2'))
13599    /// Returns a list of (key, value) pairs
13600    fn parse_with_properties(&mut self) -> Result<Vec<(String, String)>> {
13601        self.expect(TokenType::LParen)?;
13602        let mut properties = Vec::new();
13603
13604        loop {
13605            if self.check(TokenType::RParen) {
13606                break;
13607            }
13608
13609            // Parse property name (can be keywords like FORMAT, TABLE_FORMAT)
13610            let mut key = self.expect_identifier_or_keyword()?;
13611
13612            // Handle multi-word keys like "PARTITIONED BY" -> "PARTITIONED_BY"
13613            if key.eq_ignore_ascii_case("PARTITIONED") && self.check(TokenType::By) {
13614                self.skip(); // consume BY
13615                key = "PARTITIONED_BY".to_string();
13616            }
13617
13618            // Expect = or special case for PARTITIONED_BY=(...)
13619            self.expect(TokenType::Eq)?;
13620
13621            // Parse property value - can be string, identifier, or parenthesized expression
13622            let value = if self.check(TokenType::String) {
13623                // Store string with quotes to preserve format
13624                let val = format!("'{}'", self.peek().text);
13625                self.skip();
13626                val
13627            } else if self.match_token(TokenType::LParen) {
13628                // Handle PARTITIONED_BY=(x INT, y INT) or similar
13629                let mut depth = 1;
13630                let mut result = String::from("(");
13631                let mut need_space = false;
13632                while !self.is_at_end() && depth > 0 {
13633                    if self.check(TokenType::LParen) {
13634                        depth += 1;
13635                    } else if self.check(TokenType::RParen) {
13636                        depth -= 1;
13637                        if depth == 0 {
13638                            break;
13639                        }
13640                    }
13641                    let token = self.peek();
13642                    let text = &token.text;
13643                    let token_type = token.token_type;
13644
13645                    // Determine if we need a space before this token
13646                    let is_punctuation = matches!(
13647                        token_type,
13648                        TokenType::Comma | TokenType::LParen | TokenType::RParen
13649                    );
13650                    if need_space && !is_punctuation {
13651                        result.push(' ');
13652                    }
13653
13654                    result.push_str(text);
13655
13656                    // Determine if we need a space after this token
13657                    need_space = token_type == TokenType::Comma
13658                        || (!is_punctuation
13659                            && !matches!(
13660                                token_type,
13661                                TokenType::LParen | TokenType::RParen | TokenType::Comma
13662                            ));
13663                    self.skip();
13664                }
13665                self.expect(TokenType::RParen)?;
13666                result.push(')');
13667                result
13668            } else if self.check_identifier("ARRAY")
13669                && self
13670                    .peek_nth(1)
13671                    .is_some_and(|t| t.token_type == TokenType::LBracket)
13672            {
13673                // Handle ARRAY['value', 'value', ...] syntax (Athena/Presto)
13674                let mut result = self.advance().text.clone(); // consume ARRAY
13675                self.expect(TokenType::LBracket)?;
13676                result.push('[');
13677                let mut first = true;
13678                while !self.is_at_end() && !self.check(TokenType::RBracket) {
13679                    if !first {
13680                        if self.match_token(TokenType::Comma) {
13681                            result.push_str(", ");
13682                        } else {
13683                            break;
13684                        }
13685                    }
13686                    first = false;
13687                    // Parse array element (usually a string)
13688                    if self.check(TokenType::String) {
13689                        result.push('\'');
13690                        result.push_str(&self.advance().text);
13691                        result.push('\'');
13692                    } else if self.is_identifier_token() {
13693                        result.push_str(&self.advance().text);
13694                    } else {
13695                        break;
13696                    }
13697                }
13698                self.expect(TokenType::RBracket)?;
13699                result.push(']');
13700                result
13701            } else if self.check(TokenType::Number) {
13702                // Numeric value (e.g., bucket_count=64)
13703                self.advance().text.clone()
13704            } else {
13705                // Just an identifier or keyword (e.g., allow_page_locks=on)
13706                self.expect_identifier_or_keyword()?
13707            };
13708
13709            properties.push((key, value));
13710
13711            if !self.match_token(TokenType::Comma) {
13712                break;
13713            }
13714        }
13715
13716        self.expect(TokenType::RParen)?;
13717        Ok(properties)
13718    }
13719
13720    /// Parse column definitions and table constraints
13721    fn parse_column_definitions(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>)> {
13722        let mut columns = Vec::new();
13723        let mut constraints = Vec::new();
13724
13725        loop {
13726            if self.check(TokenType::RParen) {
13727                break;
13728            }
13729            // Check for LIKE clause (PostgreSQL)
13730            if self.check(TokenType::Like) {
13731                constraints.push(self.parse_like_clause()?);
13732            }
13733            // Check for table-level constraint
13734            // For CHECK, only treat as constraint if followed by '(' (NOT in ClickHouse — there
13735            // CHECK/ASSUME without CONSTRAINT keyword is not supported, and 'check' can be a column name).
13736            // Otherwise, 'check' is a column name (e.g., CREATE TABLE t (check INT)).
13737            else if self.check(TokenType::Constraint)
13738                || self.check(TokenType::PrimaryKey)
13739                || self.check(TokenType::ForeignKey)
13740                || self.check(TokenType::Unique)
13741                || (self.check(TokenType::Check)
13742                    && !matches!(
13743                        self.config.dialect,
13744                        Some(crate::dialects::DialectType::ClickHouse)
13745                    )
13746                    && self
13747                        .peek_nth(1)
13748                        .map_or(false, |t| t.token_type == TokenType::LParen))
13749                || self.check(TokenType::Exclude)
13750            {
13751                constraints.push(self.parse_table_constraint()?);
13752            } else if matches!(
13753                self.config.dialect,
13754                Some(crate::dialects::DialectType::ClickHouse)
13755            ) && self.check(TokenType::Index)
13756            {
13757                // ClickHouse: INDEX name expr TYPE type_func(args) GRANULARITY n
13758                self.skip(); // consume INDEX
13759                let name = self.expect_identifier_or_keyword_with_quoted()?;
13760                // Use parse_conjunction to handle comparisons like c0 < (SELECT _table)
13761                let expression = self.parse_conjunction()?.ok_or_else(|| {
13762                    self.parse_error("Expected expression in ClickHouse INDEX definition")
13763                })?;
13764                let index_type = if self.match_token(TokenType::Type) {
13765                    // Parse function or identifier for type (e.g., bloom_filter(0.001), set(100), minmax)
13766                    // Handle keywords like 'set' that are tokenized as TokenType::Set
13767                    if let Some(func) = self.parse_function()? {
13768                        Some(Box::new(func))
13769                    } else if !self.check(TokenType::Identifier)
13770                        && !self.check(TokenType::Var)
13771                        && !self.is_at_end()
13772                    {
13773                        // Handle keywords as index type names (e.g., set, minmax)
13774                        let type_name = self.advance().text.clone();
13775                        if self.check(TokenType::LParen) {
13776                            // It's a function call like set(100)
13777                            self.skip(); // consume (
13778                            let mut args = Vec::new();
13779                            if !self.check(TokenType::RParen) {
13780                                args.push(self.parse_expression()?);
13781                                while self.match_token(TokenType::Comma) {
13782                                    args.push(self.parse_expression()?);
13783                                }
13784                            }
13785                            self.expect(TokenType::RParen)?;
13786                            Some(Box::new(Expression::Function(Box::new(Function::new(
13787                                type_name, args,
13788                            )))))
13789                        } else {
13790                            // Just an identifier
13791                            Some(Box::new(Expression::Identifier(Identifier::new(type_name))))
13792                        }
13793                    } else if let Some(id) = self.parse_id_var()? {
13794                        Some(Box::new(id))
13795                    } else {
13796                        None
13797                    }
13798                } else {
13799                    None
13800                };
13801                let granularity = if self.match_identifier("GRANULARITY") {
13802                    let gran_val = self.parse_expression()?;
13803                    Some(Box::new(gran_val))
13804                } else {
13805                    None
13806                };
13807                constraints.push(TableConstraint::Index {
13808                    name: Some(name),
13809                    columns: Vec::new(),
13810                    kind: None,
13811                    modifiers: ConstraintModifiers::default(),
13812                    use_key_keyword: false,
13813                    expression: Some(Box::new(expression)),
13814                    index_type,
13815                    granularity,
13816                });
13817            } else if !matches!(
13818                self.config.dialect,
13819                Some(crate::dialects::DialectType::ClickHouse)
13820            ) && (self.check(TokenType::Index)
13821                || self.check(TokenType::Key)
13822                || self.check_identifier("FULLTEXT")
13823                || self.check_identifier("SPATIAL"))
13824            {
13825                // INDEX/KEY constraint (MySQL). Guard KEY <type> as a normal column definition
13826                // (e.g. ClickHouse: `key UInt64`).
13827                let looks_like_key_constraint = if self.check(TokenType::Key) {
13828                    self.check_next(TokenType::LParen)
13829                        || ((self.check_next(TokenType::Identifier)
13830                            || self.check_next(TokenType::Var)
13831                            || self.check_next(TokenType::QuotedIdentifier))
13832                            && self.current + 2 < self.tokens.len()
13833                            && self.tokens[self.current + 2].token_type == TokenType::LParen)
13834                } else {
13835                    true
13836                };
13837
13838                if looks_like_key_constraint {
13839                    constraints.push(self.parse_index_table_constraint()?);
13840                } else {
13841                    columns.push(self.parse_column_def()?);
13842                }
13843            } else if self.check_identifier("PERIOD") {
13844                // TSQL: PERIOD FOR SYSTEM_TIME (start_col, end_col)
13845                if let Some(period_constraint) =
13846                    self.parse_period_for_system_time_table_constraint()?
13847                {
13848                    constraints.push(period_constraint);
13849                } else {
13850                    // Not actually PERIOD FOR SYSTEM_TIME, treat as column definition
13851                    columns.push(self.parse_column_def()?);
13852                }
13853            } else if self.check_identifier("INITIALLY") {
13854                // PostgreSQL: INITIALLY DEFERRED / INITIALLY IMMEDIATE as table-level setting
13855                self.skip(); // consume INITIALLY
13856                if self.match_identifier("DEFERRED") {
13857                    constraints.push(TableConstraint::InitiallyDeferred { deferred: true });
13858                } else if self.match_identifier("IMMEDIATE") {
13859                    constraints.push(TableConstraint::InitiallyDeferred { deferred: false });
13860                } else {
13861                    return Err(self.parse_error("Expected DEFERRED or IMMEDIATE after INITIALLY"));
13862                }
13863            } else if matches!(
13864                self.config.dialect,
13865                Some(crate::dialects::DialectType::ClickHouse)
13866            ) && self.check_identifier("PROJECTION")
13867            {
13868                // ClickHouse: PROJECTION name (SELECT ...) or PROJECTION name INDEX expr TYPE type_name
13869                self.skip(); // consume PROJECTION
13870                let name = self.expect_identifier_or_keyword_with_quoted()?;
13871                if self.match_token(TokenType::LParen) {
13872                    let expression = self.parse_statement()?;
13873                    self.expect(TokenType::RParen)?;
13874                    // ClickHouse: PROJECTION name (SELECT ...) WITH SETTINGS (key=value, ...)
13875                    if self.check(TokenType::With)
13876                        && self.current + 1 < self.tokens.len()
13877                        && self.tokens[self.current + 1].token_type == TokenType::Settings
13878                    {
13879                        self.skip(); // consume WITH
13880                        self.skip(); // consume SETTINGS
13881                        if self.match_token(TokenType::LParen) {
13882                            // Consume key=value pairs
13883                            loop {
13884                                if self.check(TokenType::RParen) {
13885                                    break;
13886                                }
13887                                if self.is_identifier_token()
13888                                    || self.is_safe_keyword_as_identifier()
13889                                {
13890                                    self.skip(); // key
13891                                }
13892                                if self.match_token(TokenType::Eq) {
13893                                    let _ = self.parse_primary()?; // value
13894                                }
13895                                if !self.match_token(TokenType::Comma) {
13896                                    break;
13897                                }
13898                            }
13899                            self.expect(TokenType::RParen)?;
13900                        }
13901                    }
13902                    constraints.push(TableConstraint::Projection { name, expression });
13903                } else if self.match_token(TokenType::Index) {
13904                    // PROJECTION name INDEX expr TYPE type_name
13905                    let expr = self.parse_bitwise()?.ok_or_else(|| {
13906                        self.parse_error(
13907                            "Expected expression in ClickHouse PROJECTION INDEX definition",
13908                        )
13909                    })?;
13910                    let type_str = if self.match_token(TokenType::Type) {
13911                        if !self.is_at_end()
13912                            && !self.check(TokenType::Comma)
13913                            && !self.check(TokenType::RParen)
13914                        {
13915                            self.advance().text.clone()
13916                        } else {
13917                            String::new()
13918                        }
13919                    } else {
13920                        String::new()
13921                    };
13922                    let raw_sql = if type_str.is_empty() {
13923                        format!("INDEX {} ", expr)
13924                    } else {
13925                        format!("INDEX {} TYPE {}", expr, type_str)
13926                    };
13927                    constraints.push(TableConstraint::Projection {
13928                        name,
13929                        expression: Expression::Raw(Raw { sql: raw_sql }),
13930                    });
13931                } else {
13932                    constraints.push(TableConstraint::Projection {
13933                        name,
13934                        expression: Expression::Null(Null),
13935                    });
13936                }
13937            } else {
13938                // Parse column definition
13939                columns.push(self.parse_column_def()?);
13940            }
13941
13942            if !self.match_token(TokenType::Comma) {
13943                break;
13944            }
13945            // ClickHouse: allow trailing comma before closing paren
13946            if matches!(
13947                self.config.dialect,
13948                Some(crate::dialects::DialectType::ClickHouse)
13949            ) && self.check(TokenType::RParen)
13950            {
13951                break;
13952            }
13953        }
13954
13955        Ok((columns, constraints))
13956    }
13957
13958    /// Parse LIKE clause in CREATE TABLE: LIKE source_table [INCLUDING|EXCLUDING options]
13959    fn parse_like_clause(&mut self) -> Result<TableConstraint> {
13960        self.expect(TokenType::Like)?;
13961        let source = self.parse_table_ref()?;
13962        let mut options = Vec::new();
13963
13964        // Parse optional INCLUDING/EXCLUDING modifiers
13965        loop {
13966            if self.match_identifier("INCLUDING") {
13967                let prop = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
13968                options.push((LikeOptionAction::Including, prop));
13969            } else if self.match_identifier("EXCLUDING") {
13970                let prop = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
13971                options.push((LikeOptionAction::Excluding, prop));
13972            } else {
13973                break;
13974            }
13975        }
13976
13977        Ok(TableConstraint::Like { source, options })
13978    }
13979
13980    /// Parse a single column definition
13981    fn parse_column_def(&mut self) -> Result<ColumnDef> {
13982        // Column names can be keywords like 'end', 'truncate', 'view', etc.
13983        // ClickHouse allows any keyword as column name (from, select, etc.)
13984        let mut name = if matches!(
13985            self.config.dialect,
13986            Some(crate::dialects::DialectType::ClickHouse)
13987        ) {
13988            self.expect_identifier_or_keyword_with_quoted()?
13989        } else {
13990            self.expect_identifier_or_safe_keyword_with_quoted()?
13991        };
13992        // ClickHouse: Nested column names like n.b for Nested() columns
13993        if matches!(
13994            self.config.dialect,
13995            Some(crate::dialects::DialectType::ClickHouse)
13996        ) {
13997            while self.match_token(TokenType::Dot) {
13998                let sub = self.expect_identifier_or_safe_keyword_with_quoted()?;
13999                name = Identifier {
14000                    name: format!("{}.{}", name.name, sub.name),
14001                    quoted: name.quoted,
14002                    trailing_comments: sub.trailing_comments,
14003                    span: None,
14004                };
14005            }
14006        }
14007
14008        // TSQL computed columns have no data type: column_name AS (expression) [PERSISTED]
14009        // Check if AS follows immediately (no data type)
14010        if self.check(TokenType::As) {
14011            let mut col_def = ColumnDef::new(
14012                name.name.clone(),
14013                DataType::Custom {
14014                    name: String::new(),
14015                },
14016            );
14017            col_def.name = name;
14018            // Consume AS and parse computed column expression
14019            self.skip(); // consume AS
14020            if self.check(TokenType::LParen) {
14021                self.parse_as_computed_column(&mut col_def)?;
14022            }
14023            return Ok(col_def);
14024        }
14025
14026        // SQLite allows column definitions without types: CREATE TABLE t (x, y)
14027        // ClickHouse allows typeless columns with DEFAULT/MATERIALIZED/ALIAS/EPHEMERAL
14028        // Check if the next token indicates no type (comma, rparen, or constraint keyword)
14029        let no_type = self.check(TokenType::Comma)
14030            || self.check(TokenType::RParen)
14031            || (matches!(
14032                self.config.dialect,
14033                Some(crate::dialects::DialectType::ClickHouse)
14034            ) && (self.check(TokenType::Default)
14035                || self.check(TokenType::Materialized)
14036                || self.check_identifier("ALIAS")
14037                || self.check_identifier("EPHEMERAL")));
14038        let data_type = if no_type {
14039            // No type specified - use empty custom type
14040            DataType::Custom {
14041                name: String::new(),
14042            }
14043        } else {
14044            self.parse_data_type()?
14045        };
14046
14047        let mut col_def = ColumnDef::new(name.name.clone(), data_type);
14048        col_def.name = name;
14049        col_def.no_type = no_type;
14050
14051        // Parse MySQL type modifiers (UNSIGNED, ZEROFILL)
14052        // These come after the data type but before other constraints
14053        while self.match_identifier("UNSIGNED")
14054            || self.match_identifier("ZEROFILL")
14055            || self.match_identifier("SIGNED")
14056        {
14057            let modifier = self.previous().text.to_ascii_uppercase();
14058            if modifier == "UNSIGNED" {
14059                col_def.unsigned = true;
14060            } else if modifier == "ZEROFILL" {
14061                col_def.zerofill = true;
14062            }
14063            // SIGNED is the default, no action needed
14064        }
14065
14066        // BigQuery: OPTIONS (key=value, ...) on column - comes right after type
14067        if self.match_identifier("OPTIONS") {
14068            col_def.options = self.parse_options_list()?;
14069        }
14070
14071        // Parse column constraints
14072        loop {
14073            if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
14074                col_def.nullable = Some(false);
14075                col_def.constraint_order.push(ConstraintType::NotNull);
14076            } else if self.match_token(TokenType::Null) {
14077                col_def.nullable = Some(true);
14078                col_def.constraint_order.push(ConstraintType::Null);
14079            } else if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
14080                // Handle PRIMARY KEY [ASC|DESC]
14081                col_def.primary_key = true;
14082                // Capture ASC/DESC after PRIMARY KEY
14083                if self.match_token(TokenType::Asc) {
14084                    col_def.primary_key_order = Some(SortOrder::Asc);
14085                } else if self.match_token(TokenType::Desc) {
14086                    col_def.primary_key_order = Some(SortOrder::Desc);
14087                }
14088                col_def.constraint_order.push(ConstraintType::PrimaryKey);
14089            } else if self.match_token(TokenType::Constraint) {
14090                // Inline CONSTRAINT name ... (e.g., CONSTRAINT fk_name REFERENCES ...)
14091                let constraint_name = self.expect_identifier()?;
14092                // After constraint name, expect REFERENCES, PRIMARY KEY, UNIQUE, CHECK, NOT NULL, NULL, etc.
14093                if self.match_token(TokenType::References) {
14094                    let mut fk_ref = self.parse_foreign_key_ref()?;
14095                    fk_ref.constraint_name = Some(constraint_name);
14096                    col_def
14097                        .constraints
14098                        .push(ColumnConstraint::References(fk_ref));
14099                    col_def.constraint_order.push(ConstraintType::References);
14100                } else if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
14101                    col_def.primary_key = true;
14102                    col_def.primary_key_constraint_name = Some(constraint_name);
14103                    col_def.constraint_order.push(ConstraintType::PrimaryKey);
14104                } else if self.match_token(TokenType::Unique) {
14105                    col_def.unique = true;
14106                    col_def.unique_constraint_name = Some(constraint_name);
14107                    // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
14108                    if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
14109                        col_def.unique_nulls_not_distinct = true;
14110                    }
14111                    col_def.constraint_order.push(ConstraintType::Unique);
14112                } else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
14113                    col_def.nullable = Some(false);
14114                    col_def.not_null_constraint_name = Some(constraint_name);
14115                    col_def.constraint_order.push(ConstraintType::NotNull);
14116                } else if self.match_token(TokenType::Check) {
14117                    col_def.check_constraint_name = Some(constraint_name);
14118                    // Parse CHECK constraint expression
14119                    if self.match_token(TokenType::LParen) {
14120                        let check_expr = self.parse_expression()?;
14121                        self.expect(TokenType::RParen)?;
14122                        col_def
14123                            .constraints
14124                            .push(ColumnConstraint::Check(check_expr));
14125                    } else if matches!(
14126                        self.config.dialect,
14127                        Some(crate::dialects::DialectType::ClickHouse)
14128                    ) {
14129                        // ClickHouse: CHECK expr without parens
14130                        let check_expr = self.parse_or()?;
14131                        col_def
14132                            .constraints
14133                            .push(ColumnConstraint::Check(check_expr));
14134                    }
14135                    col_def.constraint_order.push(ConstraintType::Check);
14136                }
14137            } else if self.match_token(TokenType::Unique) {
14138                col_def.unique = true;
14139                // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
14140                if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
14141                    col_def.unique_nulls_not_distinct = true;
14142                }
14143                col_def.constraint_order.push(ConstraintType::Unique);
14144            } else if self.match_token(TokenType::Check) {
14145                // Standalone CHECK (expr) constraint (without CONSTRAINT name)
14146                if self.match_token(TokenType::LParen) {
14147                    let check_expr = self.parse_expression()?;
14148                    self.expect(TokenType::RParen)?;
14149                    col_def
14150                        .constraints
14151                        .push(ColumnConstraint::Check(check_expr));
14152                    col_def.constraint_order.push(ConstraintType::Check);
14153                } else if matches!(
14154                    self.config.dialect,
14155                    Some(crate::dialects::DialectType::ClickHouse)
14156                ) {
14157                    // ClickHouse: CHECK expr without parens
14158                    let check_expr = self.parse_or()?;
14159                    col_def
14160                        .constraints
14161                        .push(ColumnConstraint::Check(check_expr));
14162                    col_def.constraint_order.push(ConstraintType::Check);
14163                }
14164            } else if self.match_token(TokenType::AutoIncrement) || self.match_keyword("IDENTITY") {
14165                col_def.auto_increment = true;
14166                col_def.constraint_order.push(ConstraintType::AutoIncrement);
14167                // Handle IDENTITY/AUTOINCREMENT options: START n INCREMENT m [ORDER|NOORDER] or (start, increment)
14168                if self.match_keyword("START") {
14169                    col_def.auto_increment_start = Some(Box::new(self.parse_primary()?));
14170                    if self.match_keyword("INCREMENT") {
14171                        col_def.auto_increment_increment = Some(Box::new(self.parse_primary()?));
14172                    }
14173                    // Snowflake: ORDER or NOORDER option
14174                    if self.match_token(TokenType::Order) {
14175                        col_def.auto_increment_order = Some(true);
14176                    } else if self.match_identifier("NOORDER") {
14177                        col_def.auto_increment_order = Some(false);
14178                    }
14179                } else if self.match_token(TokenType::LParen) {
14180                    // IDENTITY(start, increment) or AUTOINCREMENT(start, increment)
14181                    col_def.auto_increment_start = Some(Box::new(self.parse_primary()?));
14182                    if self.match_token(TokenType::Comma) {
14183                        col_def.auto_increment_increment = Some(Box::new(self.parse_primary()?));
14184                    }
14185                    self.expect(TokenType::RParen)?;
14186                }
14187            } else if self.match_token(TokenType::Default) {
14188                // ClickHouse: DEFAULT expressions can be complex (today(), a + 1, cond ? x : y, etc.)
14189                col_def.default = if matches!(
14190                    self.config.dialect,
14191                    Some(crate::dialects::DialectType::ClickHouse)
14192                ) {
14193                    Some(self.parse_expression()?)
14194                } else {
14195                    Some(self.parse_unary()?)
14196                };
14197                col_def.constraint_order.push(ConstraintType::Default);
14198            } else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
14199                // Snowflake/SQL Server: FOREIGN KEY REFERENCES table(columns)
14200                // The FOREIGN KEY keywords are followed by REFERENCES
14201                self.expect(TokenType::References)?;
14202                let mut fk_ref = self.parse_foreign_key_ref()?;
14203                fk_ref.has_foreign_key_keywords = true;
14204                col_def
14205                    .constraints
14206                    .push(ColumnConstraint::References(fk_ref));
14207                col_def.constraint_order.push(ConstraintType::References);
14208            } else if self.match_token(TokenType::References) {
14209                let fk_ref = self.parse_foreign_key_ref()?;
14210                col_def
14211                    .constraints
14212                    .push(ColumnConstraint::References(fk_ref));
14213                col_def.constraint_order.push(ConstraintType::References);
14214            } else if self.match_token(TokenType::Generated) {
14215                // GENERATED [BY DEFAULT [ON NULL] | ALWAYS] AS ...
14216                // Could be: AS IDENTITY, AS (expr) STORED|VIRTUAL, AS ROW START|END
14217                self.parse_generated_column_constraint(&mut col_def)?;
14218            } else if self.match_token(TokenType::Collate) {
14219                // COLLATE collation_name (may be quoted like "de_DE")
14220                // Also handle dotted names like pg_catalog."default"
14221                let mut collation = self.expect_identifier_or_keyword_with_quoted()?;
14222                // Check for dotted collation names: pg_catalog."default"
14223                while self.match_token(TokenType::Dot) {
14224                    let next = self.expect_identifier_or_keyword_with_quoted()?;
14225                    let sep = if next.quoted {
14226                        format!("{}.\"{}\"", collation.name, next.name)
14227                    } else {
14228                        format!("{}.{}", collation.name, next.name)
14229                    };
14230                    collation = Identifier {
14231                        name: sep,
14232                        quoted: false,
14233                        trailing_comments: Vec::new(),
14234                        span: None,
14235                    };
14236                }
14237                col_def
14238                    .constraints
14239                    .push(ColumnConstraint::Collate(collation));
14240                col_def.constraint_order.push(ConstraintType::Collate);
14241            } else if self.match_token(TokenType::Comment) {
14242                // COMMENT 'comment text'
14243                let comment_text = self.expect_string()?;
14244                col_def
14245                    .constraints
14246                    .push(ColumnConstraint::Comment(comment_text));
14247                col_def.constraint_order.push(ConstraintType::Comment);
14248            } else if self.match_keywords(&[TokenType::On, TokenType::Update]) {
14249                // MySQL: ON UPDATE expression (e.g., ON UPDATE CURRENT_TIMESTAMP)
14250                let expr = self.parse_unary()?;
14251                col_def.on_update = Some(expr);
14252                col_def.constraint_order.push(ConstraintType::OnUpdate);
14253            } else if self.match_identifier("ENCODE") {
14254                // Redshift: ENCODE encoding_type (e.g., ZSTD, DELTA, LZO, etc.)
14255                let encoding = self.expect_identifier_or_keyword()?;
14256                col_def.encoding = Some(encoding);
14257                col_def.constraint_order.push(ConstraintType::Encode);
14258            } else if !matches!(
14259                self.config.dialect,
14260                Some(crate::dialects::DialectType::ClickHouse)
14261            ) && self.match_token(TokenType::Format)
14262            {
14263                // Teradata: FORMAT 'pattern' (not ClickHouse — FORMAT there is statement-level)
14264                let format_str = self.expect_string()?;
14265                col_def.format = Some(format_str);
14266            } else if self.match_identifier("TITLE") {
14267                // Teradata: TITLE 'title'
14268                let title_str = self.expect_string()?;
14269                col_def.title = Some(title_str);
14270            } else if self.match_identifier("INLINE") {
14271                // Teradata: INLINE LENGTH n
14272                self.match_identifier("LENGTH");
14273                let length = self.expect_number()?;
14274                col_def.inline_length = Some(length as u64);
14275            } else if self.match_identifier("COMPRESS") {
14276                // Teradata: COMPRESS or COMPRESS (values) or COMPRESS 'value'
14277                if self.match_token(TokenType::LParen) {
14278                    let values = self.parse_expression_list()?;
14279                    self.expect(TokenType::RParen)?;
14280                    col_def.compress = Some(values);
14281                } else if self.check(TokenType::String) {
14282                    // COMPRESS 'value'
14283                    let value = self.parse_primary()?;
14284                    col_def.compress = Some(vec![value]);
14285                } else {
14286                    // COMPRESS without values
14287                    col_def.compress = Some(Vec::new());
14288                }
14289            } else if self.match_identifier("CHARACTER") {
14290                // Teradata: CHARACTER SET name
14291                self.match_token(TokenType::Set);
14292                let charset = self.expect_identifier_or_keyword()?;
14293                col_def.character_set = Some(charset);
14294            } else if self.match_identifier("UPPERCASE") {
14295                // Teradata: UPPERCASE
14296                col_def.uppercase = true;
14297            } else if self.match_identifier("CASESPECIFIC") {
14298                // Teradata: CASESPECIFIC
14299                col_def.casespecific = Some(true);
14300            } else if self.match_text_seq(&["NOT", "FOR", "REPLICATION"]) {
14301                // TSQL: NOT FOR REPLICATION - skip this modifier (not preserved in output for non-TSQL)
14302                col_def.not_for_replication = true;
14303            } else if self.match_token(TokenType::Not) && self.match_identifier("CASESPECIFIC") {
14304                // Teradata: NOT CASESPECIFIC
14305                col_def.casespecific = Some(false);
14306            } else if self.match_keyword("TAG")
14307                || (self.match_token(TokenType::With) && self.match_keyword("TAG"))
14308            {
14309                // Snowflake: TAG (key='value', ...) or WITH TAG (key='value', ...)
14310                let tags = self.parse_tags()?;
14311                col_def.constraints.push(ColumnConstraint::Tags(tags));
14312                col_def.constraint_order.push(ConstraintType::Tags);
14313            } else if self.match_token(TokenType::As) {
14314                // Computed column: AS (expression) [STORED|VIRTUAL|PERSISTED] [NOT NULL]
14315                // TSQL: AS (expression) [PERSISTED] [NOT NULL]
14316                // MySQL shorthand: AS (expression) [STORED|VIRTUAL]
14317                // Also: Snowflake External Table virtual column expression
14318                if self.check(TokenType::LParen) {
14319                    self.parse_as_computed_column(&mut col_def)?;
14320                }
14321            } else if self.match_identifier("CODEC") {
14322                // ClickHouse: CODEC(LZ4HC(9), ZSTD, DELTA)
14323                self.expect(TokenType::LParen)?;
14324                let start = self.current;
14325                let mut depth = 1;
14326                while !self.is_at_end() && depth > 0 {
14327                    if self.check(TokenType::LParen) {
14328                        depth += 1;
14329                    }
14330                    if self.check(TokenType::RParen) {
14331                        depth -= 1;
14332                        if depth == 0 {
14333                            break;
14334                        }
14335                    }
14336                    self.skip();
14337                }
14338                let codec_text = self.tokens_to_sql(start, self.current);
14339                self.expect(TokenType::RParen)?;
14340                col_def.codec = Some(codec_text);
14341            } else if self.match_identifier("STATISTICS") {
14342                // ClickHouse: STATISTICS(tdigest, minmax, uniq, ...)
14343                self.expect(TokenType::LParen)?;
14344                let mut depth = 1;
14345                while !self.is_at_end() && depth > 0 {
14346                    if self.check(TokenType::LParen) {
14347                        depth += 1;
14348                    }
14349                    if self.check(TokenType::RParen) {
14350                        depth -= 1;
14351                        if depth == 0 {
14352                            break;
14353                        }
14354                    }
14355                    self.skip();
14356                }
14357                self.expect(TokenType::RParen)?;
14358                // Statistics info is stored but we don't need it for transpilation
14359            } else if self.match_identifier("EPHEMERAL") {
14360                // ClickHouse: EPHEMERAL [expr] [type]
14361                // EPHEMERAL can optionally be followed by an expression, then optionally a data type
14362                if !self.check(TokenType::Comma)
14363                    && !self.check(TokenType::RParen)
14364                    && !self.is_at_end()
14365                    && !self.check_identifier("CODEC")
14366                    && !self.check_identifier("TTL")
14367                    && !self.check(TokenType::Comment)
14368                {
14369                    let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
14370                    col_def.ephemeral = Some(Some(Box::new(expr)));
14371                    // ClickHouse: type can follow EPHEMERAL expression (e.g., b EPHEMERAL 'a' String)
14372                    if col_def.no_type
14373                        && !self.check(TokenType::Comma)
14374                        && !self.check(TokenType::RParen)
14375                        && !self.is_at_end()
14376                        && !self.check_identifier("CODEC")
14377                        && !self.check_identifier("TTL")
14378                        && !self.check(TokenType::Comment)
14379                    {
14380                        col_def.data_type = self.parse_data_type()?;
14381                        col_def.no_type = false;
14382                    }
14383                } else {
14384                    col_def.ephemeral = Some(None);
14385                }
14386            } else if self.check(TokenType::Materialized) && !self.check_next(TokenType::View) {
14387                // ClickHouse: MATERIALIZED expr (but not MATERIALIZED VIEW)
14388                self.skip(); // consume MATERIALIZED
14389                let expr = self.parse_or()?;
14390                col_def.materialized_expr = Some(Box::new(expr));
14391            } else if self.match_identifier("ALIAS") {
14392                // ClickHouse: ALIAS expr
14393                let expr = self.parse_or()?;
14394                col_def.alias_expr = Some(Box::new(expr));
14395            } else if matches!(
14396                self.config.dialect,
14397                Some(crate::dialects::DialectType::ClickHouse)
14398            ) && self.check_identifier("EXPRESSION")
14399            {
14400                // ClickHouse dictionary column: EXPRESSION expr
14401                self.skip(); // consume EXPRESSION
14402                let expr = self.parse_or()?;
14403                col_def.materialized_expr = Some(Box::new(expr));
14404            } else if matches!(
14405                self.config.dialect,
14406                Some(crate::dialects::DialectType::ClickHouse)
14407            ) && (self.match_identifier("HIERARCHICAL")
14408                || self.match_identifier("IS_OBJECT_ID")
14409                || self.match_identifier("INJECTIVE")
14410                || self.match_identifier("BIDIRECTIONAL"))
14411            {
14412                // ClickHouse dictionary column attributes: HIERARCHICAL, IS_OBJECT_ID, INJECTIVE, BIDIRECTIONAL
14413                // These are flag-like attributes with no value, just skip them
14414            } else if self.match_identifier("TTL") {
14415                // ClickHouse: TTL expr
14416                let expr = self.parse_expression()?;
14417                col_def.ttl_expr = Some(Box::new(expr));
14418            } else if matches!(
14419                self.config.dialect,
14420                Some(crate::dialects::DialectType::ClickHouse)
14421            ) && self.check(TokenType::Settings)
14422                && self.check_next(TokenType::LParen)
14423            {
14424                // ClickHouse: SETTINGS (key = value, ...) on column definition
14425                // Only match parenthesized form; non-parenthesized SETTINGS is statement-level
14426                self.skip(); // consume SETTINGS
14427                self.expect(TokenType::LParen)?;
14428                let mut depth = 1i32;
14429                while !self.is_at_end() && depth > 0 {
14430                    if self.check(TokenType::LParen) {
14431                        depth += 1;
14432                    }
14433                    if self.check(TokenType::RParen) {
14434                        depth -= 1;
14435                        if depth == 0 {
14436                            break;
14437                        }
14438                    }
14439                    self.skip();
14440                }
14441                self.expect(TokenType::RParen)?;
14442            } else {
14443                // Skip unknown column modifiers (DEFERRABLE, CHARACTER SET, etc.)
14444                // to allow parsing to continue
14445                if self.skip_column_modifier() {
14446                    continue;
14447                }
14448                break;
14449            }
14450        }
14451
14452        Ok(col_def)
14453    }
14454
14455    /// Skip optional column modifiers that we don't need to preserve
14456    fn skip_column_modifier(&mut self) -> bool {
14457        // NOT DEFERRABLE, NOT CASESPECIFIC - handle NOT followed by specific keywords
14458        // (NOT NULL is handled earlier in the constraint loop)
14459        if self.check(TokenType::Not) {
14460            // Check what follows NOT
14461            if self.check_next_identifier("DEFERRABLE")
14462                || self.check_next_identifier("CASESPECIFIC")
14463            {
14464                self.skip(); // consume NOT
14465                self.skip(); // consume DEFERRABLE/CASESPECIFIC
14466                return true;
14467            }
14468        }
14469        // DEFERRABLE / NOT DEFERRABLE / INITIALLY DEFERRED / INITIALLY IMMEDIATE
14470        if self.match_identifier("DEFERRABLE")
14471            || self.match_identifier("DEFERRED")
14472            || self.match_identifier("IMMEDIATE")
14473        {
14474            return true;
14475        }
14476        // CHARACTER SET name
14477        if self.match_identifier("CHARACTER") {
14478            self.match_token(TokenType::Set);
14479            // Consume charset name (can be multiple parts like LATIN, utf8_bin, etc.)
14480            let _ = self.match_token(TokenType::Var) || self.match_token(TokenType::Identifier);
14481            return true;
14482        }
14483        // UPPERCASE, CASESPECIFIC
14484        if self.match_identifier("UPPERCASE") || self.match_identifier("CASESPECIFIC") {
14485            return true;
14486        }
14487        // Note: COMPRESS, FORMAT, TITLE, and INLINE LENGTH are now properly parsed and stored in ColumnDef
14488        false
14489    }
14490
14491    /// Parse Teradata-specific table options after CREATE TABLE AS
14492    /// Returns (with_data, with_statistics, teradata_indexes)
14493    fn parse_teradata_table_options(&mut self) -> (Option<bool>, Option<bool>, Vec<TeradataIndex>) {
14494        let mut with_data = None;
14495        let mut with_statistics = None;
14496        let mut teradata_indexes = Vec::new();
14497
14498        loop {
14499            // WITH DATA [AND STATISTICS] / WITH NO DATA [AND NO STATISTICS]
14500            if self.match_token(TokenType::With) {
14501                let no = self.match_token(TokenType::No); // optional NO
14502                self.match_identifier("DATA");
14503                with_data = Some(!no); // WITH DATA = true, WITH NO DATA = false
14504                                       // Optional AND [NO] STATISTICS
14505                if self.match_token(TokenType::And) {
14506                    let no_stats = self.match_token(TokenType::No); // optional NO
14507                    self.match_identifier("STATISTICS");
14508                    with_statistics = Some(!no_stats); // AND STATISTICS = true, AND NO STATISTICS = false
14509                }
14510                continue;
14511            }
14512            // NO PRIMARY INDEX
14513            if self.match_token(TokenType::No) {
14514                self.match_token(TokenType::PrimaryKey);
14515                self.match_token(TokenType::Index);
14516                teradata_indexes.push(TeradataIndex {
14517                    kind: TeradataIndexKind::NoPrimary,
14518                    name: None,
14519                    columns: Vec::new(),
14520                });
14521                // Consume optional comma separator between index specs
14522                self.match_token(TokenType::Comma);
14523                continue;
14524            }
14525            // PRIMARY AMP INDEX / PRIMARY INDEX
14526            if self.match_token(TokenType::PrimaryKey) {
14527                let is_amp = self.match_identifier("AMP");
14528                self.match_token(TokenType::Index);
14529                // Optional index name
14530                let name = if self.is_identifier_token() && !self.check(TokenType::LParen) {
14531                    Some(self.advance().text)
14532                } else {
14533                    None
14534                };
14535                // Optional column list
14536                let columns = if self.match_token(TokenType::LParen) {
14537                    let cols = self.parse_identifier_list_raw();
14538                    self.match_token(TokenType::RParen);
14539                    cols
14540                } else {
14541                    Vec::new()
14542                };
14543                teradata_indexes.push(TeradataIndex {
14544                    kind: if is_amp {
14545                        TeradataIndexKind::PrimaryAmp
14546                    } else {
14547                        TeradataIndexKind::Primary
14548                    },
14549                    name,
14550                    columns,
14551                });
14552                // Consume optional comma separator between index specs
14553                self.match_token(TokenType::Comma);
14554                continue;
14555            }
14556            // UNIQUE [PRIMARY] INDEX
14557            if self.match_token(TokenType::Unique) {
14558                let is_primary = self.match_token(TokenType::PrimaryKey);
14559                self.match_token(TokenType::Index);
14560                // Optional index name
14561                let name = if self.is_identifier_token() {
14562                    Some(self.advance().text)
14563                } else {
14564                    None
14565                };
14566                // Optional column list
14567                let columns = if self.match_token(TokenType::LParen) {
14568                    let cols = self.parse_identifier_list_raw();
14569                    self.match_token(TokenType::RParen);
14570                    cols
14571                } else {
14572                    Vec::new()
14573                };
14574                teradata_indexes.push(TeradataIndex {
14575                    kind: if is_primary {
14576                        TeradataIndexKind::UniquePrimary
14577                    } else {
14578                        TeradataIndexKind::Unique
14579                    },
14580                    name,
14581                    columns,
14582                });
14583                // Consume optional comma separator between index specs
14584                self.match_token(TokenType::Comma);
14585                continue;
14586            }
14587            // Plain INDEX (non-primary, non-unique)
14588            if self.match_token(TokenType::Index) {
14589                // Optional index name
14590                let name = if self.is_identifier_token() && !self.check(TokenType::LParen) {
14591                    Some(self.advance().text)
14592                } else {
14593                    None
14594                };
14595                // Optional column list
14596                let columns = if self.match_token(TokenType::LParen) {
14597                    let cols = self.parse_identifier_list_raw();
14598                    self.match_token(TokenType::RParen);
14599                    cols
14600                } else {
14601                    Vec::new()
14602                };
14603                teradata_indexes.push(TeradataIndex {
14604                    kind: TeradataIndexKind::Secondary,
14605                    name,
14606                    columns,
14607                });
14608                // Consume optional comma separator between index specs
14609                self.match_token(TokenType::Comma);
14610                continue;
14611            }
14612            break;
14613        }
14614
14615        (with_data, with_statistics, teradata_indexes)
14616    }
14617
14618    /// Parse Teradata table options after name before column list (comma-separated)
14619    fn parse_teradata_post_name_options(&mut self) -> Vec<String> {
14620        // Options begin with a comma after the table name.
14621        if !self.match_token(TokenType::Comma) {
14622            return Vec::new();
14623        }
14624
14625        let mut options = Vec::new();
14626        let mut current_tokens: Vec<(String, TokenType)> = Vec::new();
14627        let mut paren_depth = 0;
14628        let mut in_value = false;
14629
14630        while !self.is_at_end() {
14631            if self.check(TokenType::LParen) && paren_depth == 0 {
14632                if !in_value {
14633                    // Column list begins
14634                    break;
14635                }
14636                let mut is_terminal = false;
14637                if let Some((last_text, last_type)) = current_tokens.last() {
14638                    let last_upper = last_text.to_ascii_uppercase();
14639                    is_terminal = matches!(last_type, TokenType::Number | TokenType::String)
14640                        || matches!(
14641                            last_upper.as_str(),
14642                            "ON" | "OFF"
14643                                | "DEFAULT"
14644                                | "NEVER"
14645                                | "ALWAYS"
14646                                | "MINIMUM"
14647                                | "MAXIMUM"
14648                                | "BYTES"
14649                                | "KBYTES"
14650                                | "KILOBYTES"
14651                                | "PERCENT"
14652                        );
14653                }
14654                if is_terminal {
14655                    break;
14656                }
14657            }
14658
14659            let token = self.advance();
14660
14661            match token.token_type {
14662                TokenType::LParen => {
14663                    paren_depth += 1;
14664                }
14665                TokenType::RParen => {
14666                    if paren_depth > 0 {
14667                        paren_depth -= 1;
14668                        if paren_depth == 0 && in_value {
14669                            in_value = false;
14670                        }
14671                    }
14672                }
14673                TokenType::Eq => {
14674                    if paren_depth == 0 {
14675                        in_value = true;
14676                    }
14677                }
14678                TokenType::Comma => {
14679                    if paren_depth == 0 {
14680                        let option = self.join_teradata_option_tokens(current_tokens);
14681                        if !option.is_empty() {
14682                            options.push(option);
14683                        }
14684                        current_tokens = Vec::new();
14685                        in_value = false;
14686                        continue;
14687                    }
14688                }
14689                _ => {}
14690            }
14691
14692            let text = if token.token_type == TokenType::QuotedIdentifier {
14693                let quote_char = if self.config.dialect == Some(crate::dialects::DialectType::MySQL)
14694                    || self.config.dialect == Some(crate::dialects::DialectType::SingleStore)
14695                    || self.config.dialect == Some(crate::dialects::DialectType::Doris)
14696                    || self.config.dialect == Some(crate::dialects::DialectType::StarRocks)
14697                {
14698                    '`'
14699                } else {
14700                    '"'
14701                };
14702                format!("{}{}{}", quote_char, token.text, quote_char)
14703            } else if token.token_type == TokenType::String {
14704                format!("'{}'", token.text)
14705            } else {
14706                token.text.clone()
14707            };
14708
14709            let mut join_type = token.token_type;
14710            if join_type == TokenType::Percent && token.text.eq_ignore_ascii_case("PERCENT") {
14711                // Treat PERCENT as an identifier to preserve spacing (e.g., "1 PERCENT")
14712                join_type = TokenType::Identifier;
14713            }
14714            current_tokens.push((text, join_type));
14715        }
14716
14717        if !current_tokens.is_empty() {
14718            let option = self.join_teradata_option_tokens(current_tokens);
14719            if !option.is_empty() {
14720                options.push(option);
14721            }
14722        }
14723
14724        options
14725    }
14726
14727    /// Parse identifier list for Teradata indexes, returning raw strings
14728    fn parse_identifier_list_raw(&mut self) -> Vec<String> {
14729        let mut identifiers = Vec::new();
14730        loop {
14731            if self.is_identifier_token() || self.is_identifier_or_keyword_token() {
14732                identifiers.push(self.advance().text);
14733            }
14734            if !self.match_token(TokenType::Comma) {
14735                break;
14736            }
14737        }
14738        identifiers
14739    }
14740
14741    /// Parse GENERATED column constraint after GENERATED token has been consumed.
14742    /// Handles three forms:
14743    /// 1. GENERATED [BY DEFAULT | ALWAYS] AS IDENTITY [...] -> GeneratedAsIdentity
14744    /// 2. GENERATED ALWAYS AS (expr) [STORED|VIRTUAL] -> ComputedColumn
14745    /// 3. GENERATED ALWAYS AS ROW START|END [HIDDEN] -> GeneratedAsRow
14746    fn parse_generated_column_constraint(&mut self, col_def: &mut ColumnDef) -> Result<()> {
14747        let always;
14748        let mut on_null = false;
14749
14750        // BY DEFAULT [ON NULL] | ALWAYS
14751        if self.match_token(TokenType::By) {
14752            self.expect(TokenType::Default)?;
14753            on_null = self.match_keywords(&[TokenType::On, TokenType::Null]);
14754            always = false;
14755        } else {
14756            self.expect(TokenType::Always)?;
14757            always = true;
14758        }
14759
14760        // Expect AS
14761        self.expect(TokenType::As)?;
14762
14763        // Check what follows AS
14764        if self.check(TokenType::Row) {
14765            // GENERATED ALWAYS AS ROW START|END [HIDDEN]
14766            self.skip(); // consume ROW
14767            let start = if self.match_token(TokenType::Start) {
14768                true
14769            } else {
14770                self.expect(TokenType::End)?;
14771                false
14772            };
14773            let hidden = self.match_identifier("HIDDEN");
14774            col_def
14775                .constraints
14776                .push(ColumnConstraint::GeneratedAsRow(GeneratedAsRow {
14777                    start,
14778                    hidden,
14779                }));
14780            col_def
14781                .constraint_order
14782                .push(ConstraintType::GeneratedAsRow);
14783        } else if self.check(TokenType::Identity) {
14784            // GENERATED [BY DEFAULT | ALWAYS] AS IDENTITY [(...)]
14785            self.skip(); // consume IDENTITY
14786
14787            let mut start = None;
14788            let mut increment = None;
14789            let mut minvalue = None;
14790            let mut maxvalue = None;
14791            let mut cycle = None;
14792
14793            // Optional sequence options in parentheses
14794            if self.match_token(TokenType::LParen) {
14795                loop {
14796                    if self.match_token(TokenType::Start) {
14797                        self.match_token(TokenType::With);
14798                        start = Some(Box::new(self.parse_unary()?));
14799                    } else if self.match_token(TokenType::Increment) {
14800                        self.match_token(TokenType::By);
14801                        increment = Some(Box::new(self.parse_unary()?));
14802                    } else if self.match_token(TokenType::Minvalue) {
14803                        minvalue = Some(Box::new(self.parse_unary()?));
14804                    } else if self.match_token(TokenType::Maxvalue) {
14805                        maxvalue = Some(Box::new(self.parse_unary()?));
14806                    } else if self.match_token(TokenType::Cycle) {
14807                        cycle = Some(true);
14808                    } else if self.match_keywords(&[TokenType::No, TokenType::Cycle]) {
14809                        cycle = Some(false);
14810                    } else if self.check(TokenType::RParen) {
14811                        break;
14812                    } else {
14813                        self.skip();
14814                    }
14815                }
14816                self.expect(TokenType::RParen)?;
14817            }
14818
14819            col_def
14820                .constraints
14821                .push(ColumnConstraint::GeneratedAsIdentity(GeneratedAsIdentity {
14822                    always,
14823                    on_null,
14824                    start,
14825                    increment,
14826                    minvalue,
14827                    maxvalue,
14828                    cycle,
14829                }));
14830            col_def
14831                .constraint_order
14832                .push(ConstraintType::GeneratedAsIdentity);
14833        } else if self.check(TokenType::LParen) {
14834            // GENERATED ALWAYS AS (expr) [STORED|VIRTUAL]
14835            self.skip(); // consume LParen
14836            let expr = self.parse_expression()?;
14837            self.expect(TokenType::RParen)?;
14838
14839            // Check for STORED or VIRTUAL
14840            let (persisted, persistence_kind) = if self.match_identifier("STORED") {
14841                (true, Some("STORED".to_string()))
14842            } else if self.match_identifier("VIRTUAL") {
14843                (false, Some("VIRTUAL".to_string()))
14844            } else {
14845                (false, None)
14846            };
14847
14848            col_def
14849                .constraints
14850                .push(ColumnConstraint::ComputedColumn(ComputedColumn {
14851                    expression: Box::new(expr),
14852                    persisted,
14853                    not_null: false,
14854                    persistence_kind,
14855                    data_type: None,
14856                }));
14857            col_def
14858                .constraint_order
14859                .push(ConstraintType::ComputedColumn);
14860        } else {
14861            // Fallback: treat as GENERATED AS IDENTITY without explicit IDENTITY keyword
14862            col_def
14863                .constraints
14864                .push(ColumnConstraint::GeneratedAsIdentity(GeneratedAsIdentity {
14865                    always,
14866                    on_null,
14867                    start: None,
14868                    increment: None,
14869                    minvalue: None,
14870                    maxvalue: None,
14871                    cycle: None,
14872                }));
14873            col_def
14874                .constraint_order
14875                .push(ConstraintType::GeneratedAsIdentity);
14876        }
14877        Ok(())
14878    }
14879
14880    /// Parse AS (expr) [STORED|VIRTUAL|PERSISTED] [TYPE] [NOT NULL] for computed columns.
14881    /// Called after AS token has been consumed and we've confirmed LParen follows.
14882    /// SingleStore: AS (expr) PERSISTED TYPE NOT NULL
14883    fn parse_as_computed_column(&mut self, col_def: &mut ColumnDef) -> Result<()> {
14884        self.expect(TokenType::LParen)?;
14885        let expr = self.parse_expression()?;
14886        self.expect(TokenType::RParen)?;
14887
14888        // Check for STORED, VIRTUAL, or PERSISTED
14889        let (persisted, persistence_kind) = if self.match_identifier("STORED") {
14890            (true, Some("STORED".to_string()))
14891        } else if self.match_identifier("VIRTUAL") {
14892            (false, Some("VIRTUAL".to_string()))
14893        } else if self.match_identifier("PERSISTED") {
14894            (true, Some("PERSISTED".to_string()))
14895        } else {
14896            (false, None)
14897        };
14898
14899        // For PERSISTED columns, check for optional data type (SingleStore: PERSISTED TYPE NOT NULL)
14900        // Also check for AUTO keyword for SingleStore: PERSISTED AUTO NOT NULL
14901        let data_type = if persistence_kind.as_deref() == Some("PERSISTED") {
14902            // Check if next token looks like a data type (not NOT, not end of input, not comma/rparen)
14903            if !self.is_at_end()
14904                && !self.check(TokenType::Not)
14905                && !self.check(TokenType::Comma)
14906                && !self.check(TokenType::RParen)
14907                && !self.check(TokenType::Semicolon)
14908            {
14909                let tok = self.peek();
14910                // Check for AUTO keyword (SingleStore: PERSISTED AUTO)
14911                if tok.text.eq_ignore_ascii_case("AUTO") {
14912                    self.skip(); // consume AUTO
14913                    None // AUTO is not a data type, just a modifier
14914                } else if tok.token_type.is_keyword()
14915                    || tok.token_type == TokenType::Identifier
14916                    || tok.token_type == TokenType::Var
14917                {
14918                    Some(self.parse_data_type()?)
14919                } else {
14920                    None
14921                }
14922            } else {
14923                None
14924            }
14925        } else {
14926            None
14927        };
14928
14929        // For PERSISTED columns, check for NOT NULL
14930        let not_null = if persistence_kind.as_deref() == Some("PERSISTED") {
14931            self.match_keywords(&[TokenType::Not, TokenType::Null])
14932        } else {
14933            false
14934        };
14935
14936        col_def
14937            .constraints
14938            .push(ColumnConstraint::ComputedColumn(ComputedColumn {
14939                expression: Box::new(expr),
14940                persisted,
14941                not_null,
14942                persistence_kind,
14943                data_type,
14944            }));
14945        col_def
14946            .constraint_order
14947            .push(ConstraintType::ComputedColumn);
14948        Ok(())
14949    }
14950
14951    /// Parse PERIOD FOR SYSTEM_TIME (start_col, end_col) as a table constraint.
14952    /// Returns None if this is not actually PERIOD FOR SYSTEM_TIME (e.g., just a column named PERIOD).
14953    fn parse_period_for_system_time_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
14954        // Save position for possible retreat
14955        let saved = self.current;
14956
14957        if self.match_identifier("PERIOD") {
14958            // Check if followed by FOR SYSTEM_TIME
14959            if self.match_token(TokenType::For) {
14960                if self.match_identifier("SYSTEM_TIME") {
14961                    // Parse (start_col, end_col)
14962                    self.expect(TokenType::LParen)?;
14963                    let start_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
14964                    self.expect(TokenType::Comma)?;
14965                    let end_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
14966                    self.expect(TokenType::RParen)?;
14967                    return Ok(Some(TableConstraint::PeriodForSystemTime {
14968                        start_col: start_name,
14969                        end_col: end_name,
14970                    }));
14971                }
14972            }
14973        }
14974
14975        // Not PERIOD FOR SYSTEM_TIME, retreat
14976        self.current = saved;
14977        Ok(None)
14978    }
14979
14980    /// Parse MySQL table options that appear after the closing paren of column definitions.
14981    /// Handles ENGINE=val, AUTO_INCREMENT=val, DEFAULT CHARSET=val, ROW_FORMAT=val,
14982    /// COMMENT='val', COLLATE=val, etc.
14983    fn parse_mysql_table_options(&mut self) -> Vec<(String, String)> {
14984        let mut options = Vec::new();
14985        loop {
14986            // Skip optional commas between options
14987            self.match_token(TokenType::Comma);
14988
14989            // DEFAULT CHARSET=val or DEFAULT CHARACTER SET=val
14990            if self.check(TokenType::Default) {
14991                let saved = self.current;
14992                self.skip(); // consume DEFAULT
14993                if self.check_identifier("CHARSET") || self.check_identifier("CHARACTER") {
14994                    let is_character = self.check_identifier("CHARACTER");
14995                    let key_part = self.advance().text.to_ascii_uppercase();
14996                    if is_character {
14997                        // CHARACTER SET
14998                        self.match_token(TokenType::Set);
14999                    }
15000                    if self.match_token(TokenType::Eq) {
15001                        let value = if self.check(TokenType::String) {
15002                            let v = format!("'{}'", self.peek().text);
15003                            self.skip();
15004                            v
15005                        } else if self.is_identifier_token()
15006                            || self.is_safe_keyword_as_identifier()
15007                            || self.check(TokenType::Number)
15008                        {
15009                            self.advance().text
15010                        } else {
15011                            self.current = saved;
15012                            break;
15013                        };
15014                        // Normalize CHARSET -> CHARACTER SET
15015                        let key = if is_character || key_part == "CHARSET" {
15016                            "DEFAULT CHARACTER SET".to_string()
15017                        } else {
15018                            format!("DEFAULT {}", key_part)
15019                        };
15020                        options.push((key, value));
15021                        continue;
15022                    }
15023                }
15024                self.current = saved;
15025                break;
15026            }
15027
15028            // ENGINE=val, AUTO_INCREMENT=val, ROW_FORMAT=val, COLLATE=val, KEY_BLOCK_SIZE=val
15029            let is_known_option = self.check_identifier("ENGINE")
15030                || self.check(TokenType::AutoIncrement)
15031                || self.check_identifier("ROW_FORMAT")
15032                || self.check(TokenType::Collate)
15033                || self.check_identifier("KEY_BLOCK_SIZE")
15034                || self.check_identifier("PACK_KEYS")
15035                || self.check_identifier("STATS_AUTO_RECALC")
15036                || self.check_identifier("STATS_PERSISTENT")
15037                || self.check_identifier("STATS_SAMPLE_PAGES")
15038                || self.check_identifier("MAX_ROWS")
15039                || self.check_identifier("MIN_ROWS")
15040                || self.check_identifier("CHECKSUM")
15041                || self.check_identifier("DELAY_KEY_WRITE")
15042                || self.check_identifier("COMPRESSION")
15043                || self.check_identifier("CONNECTION")
15044                || self.check_identifier("TABLESPACE")
15045                || self.check_identifier("ENCRYPTION");
15046
15047            if is_known_option {
15048                let key = self.advance().text.to_ascii_uppercase();
15049                if self.match_token(TokenType::Eq) {
15050                    let value = if self.check(TokenType::String) {
15051                        let v = format!("'{}'", self.peek().text);
15052                        self.skip();
15053                        v
15054                    } else if self.check(TokenType::Number) {
15055                        self.advance().text
15056                    } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
15057                        self.advance().text
15058                    } else {
15059                        break;
15060                    };
15061                    options.push((key, value));
15062                    continue;
15063                }
15064                break;
15065            }
15066
15067            // COMMENT='val' (Comment is a keyword token type)
15068            if self.check(TokenType::Comment) {
15069                let saved = self.current;
15070                self.skip(); // consume COMMENT
15071                if self.match_token(TokenType::Eq) {
15072                    if self.check(TokenType::String) {
15073                        let v = format!("'{}'", self.peek().text);
15074                        self.skip();
15075                        options.push(("COMMENT".to_string(), v));
15076                        continue;
15077                    }
15078                } else if self.check(TokenType::String) {
15079                    let v = format!("'{}'", self.peek().text);
15080                    self.skip();
15081                    options.push(("COMMENT".to_string(), v));
15082                    continue;
15083                }
15084                self.current = saved;
15085                break;
15086            }
15087
15088            // CHARACTER SET=val or CHARSET=val (without DEFAULT prefix)
15089            if self.check_identifier("CHARACTER") || self.check_identifier("CHARSET") {
15090                let saved = self.current;
15091                let is_character = self.check_identifier("CHARACTER");
15092                self.skip(); // consume CHARACTER or CHARSET
15093                if is_character {
15094                    // CHARACTER SET
15095                    if !self.match_token(TokenType::Set) {
15096                        self.current = saved;
15097                        break;
15098                    }
15099                }
15100                if self.match_token(TokenType::Eq) {
15101                    let value = if self.check(TokenType::String) {
15102                        let v = format!("'{}'", self.peek().text);
15103                        self.skip();
15104                        v
15105                    } else if self.is_identifier_token()
15106                        || self.is_safe_keyword_as_identifier()
15107                        || self.check(TokenType::Number)
15108                    {
15109                        self.advance().text
15110                    } else {
15111                        self.current = saved;
15112                        break;
15113                    };
15114                    options.push(("CHARACTER SET".to_string(), value));
15115                    continue;
15116                }
15117                self.current = saved;
15118                break;
15119            }
15120
15121            break;
15122        }
15123        options
15124    }
15125
15126    /// Parse Hive-specific table properties that appear after column definitions.
15127    /// Handles: ROW FORMAT (SERDE/DELIMITED), STORED AS/BY, LOCATION, TBLPROPERTIES
15128    fn parse_hive_table_properties(&mut self) -> Result<Vec<Expression>> {
15129        let mut properties = Vec::new();
15130
15131        loop {
15132            // ROW FORMAT SERDE 'class' [WITH SERDEPROPERTIES (...)]
15133            // ROW FORMAT DELIMITED [FIELDS TERMINATED BY ...] [...]
15134            if self.match_token(TokenType::Row) {
15135                if let Some(row_format) = self.parse_row()? {
15136                    properties.push(row_format);
15137                    continue;
15138                }
15139            }
15140
15141            // STORED AS INPUTFORMAT 'input' OUTPUTFORMAT 'output'
15142            // STORED AS format_name
15143            // STORED BY 'storage_handler_class'
15144            if self.match_identifier("STORED") {
15145                if self.match_token(TokenType::By) {
15146                    // STORED BY 'storage_handler_class'
15147                    let handler = self.parse_string()?.unwrap_or(Expression::Null(Null));
15148                    properties.push(Expression::StorageHandlerProperty(Box::new(
15149                        StorageHandlerProperty {
15150                            this: Box::new(handler),
15151                        },
15152                    )));
15153                    continue;
15154                } else if self.match_token(TokenType::As) {
15155                    // STORED AS INPUTFORMAT 'x' OUTPUTFORMAT 'y' or STORED AS format
15156                    if self.match_token(TokenType::InputFormat) {
15157                        let input_format = self.parse_string()?;
15158                        let output_format = if self.match_identifier("OUTPUTFORMAT") {
15159                            self.parse_string()?
15160                        } else {
15161                            None
15162                        };
15163                        // Use InputOutputFormat inside FileFormatProperty.this
15164                        let io_format =
15165                            Expression::InputOutputFormat(Box::new(InputOutputFormat {
15166                                input_format: input_format.map(Box::new),
15167                                output_format: output_format.map(Box::new),
15168                            }));
15169                        properties.push(Expression::FileFormatProperty(Box::new(
15170                            FileFormatProperty {
15171                                this: Some(Box::new(io_format)),
15172                                expressions: vec![],
15173                                hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
15174                                    value: true,
15175                                }))),
15176                            },
15177                        )));
15178                        continue;
15179                    } else {
15180                        // STORED AS format_name (e.g., STORED AS TEXTFILE, STORED AS ORC)
15181                        let format = if self.check(TokenType::String) {
15182                            Expression::Literal(Box::new(Literal::String(
15183                                self.advance().text.clone(),
15184                            )))
15185                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
15186                        {
15187                            Expression::Identifier(Identifier::new(self.advance().text.clone()))
15188                        } else {
15189                            break;
15190                        };
15191                        properties.push(Expression::FileFormatProperty(Box::new(
15192                            FileFormatProperty {
15193                                this: Some(Box::new(format)),
15194                                expressions: vec![],
15195                                hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
15196                                    value: true,
15197                                }))),
15198                            },
15199                        )));
15200                        continue;
15201                    }
15202                }
15203            }
15204
15205            // USING format_name (Databricks/Spark) e.g., USING DELTA, USING PARQUET
15206            // This is similar to STORED AS but uses different syntax
15207            if self.match_token(TokenType::Using) {
15208                // Parse the format name (e.g., DELTA, PARQUET, ICEBERG, etc.)
15209                let format = if self.check(TokenType::String) {
15210                    Expression::Literal(Box::new(Literal::String(self.advance().text.clone())))
15211                } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
15212                    Expression::Identifier(Identifier::new(self.advance().text.clone()))
15213                } else {
15214                    break;
15215                };
15216                // Create FileFormatProperty WITHOUT hive_format to signal USING syntax
15217                properties.push(Expression::FileFormatProperty(Box::new(
15218                    FileFormatProperty {
15219                        this: Some(Box::new(format)),
15220                        expressions: vec![],
15221                        hive_format: None, // None indicates USING syntax (not STORED AS)
15222                    },
15223                )));
15224                continue;
15225            }
15226
15227            // LOCATION 'path'
15228            if self.match_identifier("LOCATION") {
15229                let path = self.parse_string()?.unwrap_or(Expression::Null(Null));
15230                properties.push(Expression::LocationProperty(Box::new(LocationProperty {
15231                    this: Box::new(path),
15232                })));
15233                continue;
15234            }
15235
15236            // TBLPROPERTIES ('key'='value', ...)
15237            if self.match_identifier("TBLPROPERTIES") {
15238                // Parse the property list manually since parse_property doesn't handle key=value
15239                self.expect(TokenType::LParen)?;
15240                let mut prop_exprs = Vec::new();
15241                loop {
15242                    if self.check(TokenType::RParen) {
15243                        break;
15244                    }
15245                    // Parse 'key'='value' or key=value
15246                    let key = self.parse_primary()?;
15247                    if self.match_token(TokenType::Eq) {
15248                        let value = self.parse_primary()?;
15249                        prop_exprs.push(Expression::Eq(Box::new(BinaryOp::new(key, value))));
15250                    } else {
15251                        prop_exprs.push(key);
15252                    }
15253                    if !self.match_token(TokenType::Comma) {
15254                        break;
15255                    }
15256                }
15257                self.expect(TokenType::RParen)?;
15258                properties.push(Expression::Properties(Box::new(Properties {
15259                    expressions: prop_exprs,
15260                })));
15261                continue;
15262            }
15263
15264            // DISTRIBUTED BY HASH (col1, col2) [BUCKETS n] (StarRocks/Doris)
15265            if self.match_identifier("DISTRIBUTED") {
15266                if let Some(dist_prop) = self.parse_distributed_property()? {
15267                    properties.push(dist_prop);
15268                    continue;
15269                }
15270            }
15271
15272            // CLUSTERED BY (col, col, ...) [SORTED BY (col, col, ...)] INTO n BUCKETS (Hive/Athena)
15273            if self.match_identifier("CLUSTERED") {
15274                self.expect(TokenType::By)?;
15275                self.expect(TokenType::LParen)?;
15276                let expressions = self.parse_expression_list()?;
15277                self.expect(TokenType::RParen)?;
15278
15279                // Optional SORTED BY (col, col, ...)
15280                let sorted_by = if self.match_identifier("SORTED") {
15281                    self.expect(TokenType::By)?;
15282                    self.expect(TokenType::LParen)?;
15283                    let sorted_exprs = self.parse_expression_list()?;
15284                    self.expect(TokenType::RParen)?;
15285                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
15286                        expressions: sorted_exprs,
15287                    }))))
15288                } else {
15289                    None
15290                };
15291
15292                // INTO n BUCKETS
15293                let buckets = if self.match_token(TokenType::Into) {
15294                    let num = self.parse_expression()?;
15295                    if !self.match_identifier("BUCKETS") {
15296                        return Err(self.parse_error("Expected BUCKETS after INTO <n>"));
15297                    }
15298                    Some(Box::new(num))
15299                } else {
15300                    None
15301                };
15302
15303                properties.push(Expression::ClusteredByProperty(Box::new(
15304                    ClusteredByProperty {
15305                        expressions,
15306                        sorted_by,
15307                        buckets,
15308                    },
15309                )));
15310                continue;
15311            }
15312
15313            // PARTITIONED BY (col, col, ...) or PARTITIONED BY (col, BUCKET(n, col), ...) (Hive/Athena/Iceberg)
15314            if self.match_identifier("PARTITIONED") {
15315                self.expect(TokenType::By)?;
15316                self.expect(TokenType::LParen)?;
15317
15318                let mut partition_exprs = Vec::new();
15319                loop {
15320                    if self.check(TokenType::RParen) {
15321                        break;
15322                    }
15323
15324                    // Check for transform functions like BUCKET(n, col), TRUNCATE(n, col), etc.
15325                    if self.check_identifier("BUCKET") || self.check_identifier("TRUNCATE") {
15326                        let func_name = self.advance().text.clone();
15327                        self.expect(TokenType::LParen)?;
15328                        let args = self.parse_expression_list()?;
15329                        self.expect(TokenType::RParen)?;
15330
15331                        // Create a Function expression for BUCKET/TRUNCATE
15332                        partition_exprs.push(Expression::Function(Box::new(Function {
15333                            name: func_name,
15334                            args,
15335                            distinct: false,
15336                            trailing_comments: Vec::new(),
15337                            use_bracket_syntax: false,
15338                            no_parens: false,
15339                            quoted: false,
15340                            span: None,
15341                            inferred_type: None,
15342                        })));
15343                    } else {
15344                        // Try to parse as column definition (name data_type) for Hive-style partitioned by
15345                        // e.g., PARTITIONED BY (y INT, z STRING)
15346                        let saved_pos = self.current;
15347                        let mut parsed_as_column = false;
15348                        // Allow type keywords (like DATE, TIMESTAMP) as column names in PARTITIONED BY
15349                        if self.check(TokenType::Var)
15350                            || self.check(TokenType::Identifier)
15351                            || self.check(TokenType::Date)
15352                            || self.check(TokenType::Timestamp)
15353                            || self.check(TokenType::Int)
15354                            || self.check(TokenType::BigInt)
15355                            || self.check(TokenType::SmallInt)
15356                            || self.check(TokenType::TinyInt)
15357                            || self.check(TokenType::Float)
15358                            || self.check(TokenType::Double)
15359                            || self.check(TokenType::Boolean)
15360                        {
15361                            let col_name = self.advance().text.clone();
15362                            // Check if next token looks like a data type
15363                            if self.check(TokenType::Var)
15364                                || self.check(TokenType::Identifier)
15365                                || self.check(TokenType::Int)
15366                                || self.check(TokenType::BigInt)
15367                                || self.check(TokenType::SmallInt)
15368                                || self.check(TokenType::TinyInt)
15369                                || self.check(TokenType::Float)
15370                                || self.check(TokenType::Double)
15371                                || self.check(TokenType::Boolean)
15372                                || self.check(TokenType::Date)
15373                                || self.check(TokenType::Timestamp)
15374                            {
15375                                let type_text = self.peek().text.to_ascii_uppercase();
15376                                let is_type = matches!(
15377                                    type_text.as_str(),
15378                                    "INT"
15379                                        | "INTEGER"
15380                                        | "BIGINT"
15381                                        | "SMALLINT"
15382                                        | "TINYINT"
15383                                        | "FLOAT"
15384                                        | "DOUBLE"
15385                                        | "DECIMAL"
15386                                        | "NUMERIC"
15387                                        | "STRING"
15388                                        | "VARCHAR"
15389                                        | "CHAR"
15390                                        | "BINARY"
15391                                        | "BOOLEAN"
15392                                        | "DATE"
15393                                        | "TIMESTAMP"
15394                                        | "DATETIME"
15395                                        | "ARRAY"
15396                                        | "MAP"
15397                                        | "STRUCT"
15398                                );
15399                                if is_type {
15400                                    // Parse as column definition
15401                                    let data_type = self.parse_data_type()?;
15402                                    // Store as ColumnDef expression
15403                                    partition_exprs.push(Expression::ColumnDef(Box::new(
15404                                        crate::expressions::ColumnDef::new(col_name, data_type),
15405                                    )));
15406                                    parsed_as_column = true;
15407                                }
15408                            }
15409                        }
15410                        if !parsed_as_column {
15411                            // Backtrack and parse as regular expression
15412                            self.current = saved_pos;
15413                            partition_exprs.push(self.parse_expression()?);
15414                        }
15415                    }
15416
15417                    if !self.match_token(TokenType::Comma) {
15418                        break;
15419                    }
15420                }
15421                self.expect(TokenType::RParen)?;
15422
15423                properties.push(Expression::PartitionedByProperty(Box::new(
15424                    PartitionedByProperty {
15425                        this: Box::new(Expression::Tuple(Box::new(Tuple {
15426                            expressions: partition_exprs,
15427                        }))),
15428                    },
15429                )));
15430                continue;
15431            }
15432
15433            // No more Hive properties
15434            break;
15435        }
15436
15437        Ok(properties)
15438    }
15439
15440    /// Parse table-level properties that appear after the closing paren of column definitions.
15441    /// Currently handles TSQL WITH(SYSTEM_VERSIONING=ON(...)).
15442    fn parse_post_table_properties(&mut self) -> Result<Vec<Expression>> {
15443        let mut properties = Vec::new();
15444
15445        // Doris/StarRocks: UNIQUE KEY (cols) or DUPLICATE KEY (cols) after column definitions
15446        // These are table key properties that define the distribution/sort key
15447        let is_doris_starrocks = matches!(
15448            self.config.dialect,
15449            Some(crate::dialects::DialectType::Doris)
15450                | Some(crate::dialects::DialectType::StarRocks)
15451        );
15452        if is_doris_starrocks {
15453            // UNIQUE KEY (c1, c2, ...) - defines unique key columns
15454            if self.match_text_seq(&["UNIQUE", "KEY"]) {
15455                let exprs = self.parse_composite_key_expressions()?;
15456                properties.push(Expression::UniqueKeyProperty(Box::new(
15457                    crate::expressions::UniqueKeyProperty { expressions: exprs },
15458                )));
15459            }
15460            // DUPLICATE KEY (c1, c2, ...) - defines duplicate key columns
15461            else if self.match_text_seq(&["DUPLICATE", "KEY"]) {
15462                let exprs = self.parse_composite_key_expressions()?;
15463                properties.push(Expression::DuplicateKeyProperty(Box::new(
15464                    crate::expressions::DuplicateKeyProperty { expressions: exprs },
15465                )));
15466            }
15467
15468            // DISTRIBUTED BY HASH (col1, col2) [BUCKETS n] - comes after UNIQUE KEY / DUPLICATE KEY
15469            if self.match_identifier("DISTRIBUTED") {
15470                if let Some(dist_prop) = self.parse_distributed_property()? {
15471                    properties.push(dist_prop);
15472                }
15473            }
15474
15475            // PROPERTIES ('key'='value', ...) - comes after DISTRIBUTED BY
15476            if self.match_identifier("PROPERTIES") {
15477                let props = self.parse_options_list()?;
15478                if !props.is_empty() {
15479                    properties.push(Expression::Properties(Box::new(Properties {
15480                        expressions: props,
15481                    })));
15482                }
15483            }
15484        }
15485
15486        // Check for WITH( that might contain SYSTEM_VERSIONING
15487        // We need to be careful not to consume a WITH that is meant for WITH properties
15488        // or other purposes. We only handle WITH(SYSTEM_VERSIONING=...) here.
15489        if self.check(TokenType::With) {
15490            // Look ahead: WITH followed by ( followed by SYSTEM_VERSIONING
15491            let saved = self.current;
15492            if self.match_token(TokenType::With) {
15493                if self.match_token(TokenType::LParen) {
15494                    if self.check_identifier("SYSTEM_VERSIONING") {
15495                        self.skip(); // consume SYSTEM_VERSIONING
15496                        self.expect(TokenType::Eq)?;
15497
15498                        let on = if self.match_token(TokenType::On) {
15499                            true
15500                        } else if self.match_identifier("OFF") {
15501                            false
15502                        } else {
15503                            return Err(
15504                                self.parse_error("Expected ON or OFF after SYSTEM_VERSIONING=")
15505                            );
15506                        };
15507
15508                        let mut history_table = None;
15509                        let mut data_consistency = None;
15510
15511                        // Optional parameters: ON(HISTORY_TABLE=..., DATA_CONSISTENCY_CHECK=...)
15512                        if on && self.match_token(TokenType::LParen) {
15513                            loop {
15514                                if self.check(TokenType::RParen) {
15515                                    break;
15516                                }
15517                                if self.match_identifier("HISTORY_TABLE") {
15518                                    self.expect(TokenType::Eq)?;
15519                                    // Parse table reference (could be [dbo].[table])
15520                                    let table_ref = self.parse_table_ref()?;
15521                                    history_table = Some(Expression::Table(Box::new(table_ref)));
15522                                } else if self.match_identifier("DATA_CONSISTENCY_CHECK") {
15523                                    self.expect(TokenType::Eq)?;
15524                                    let val = self.expect_identifier_or_keyword()?;
15525                                    data_consistency = Some(Expression::Identifier(
15526                                        crate::expressions::Identifier::new(val),
15527                                    ));
15528                                } else if self.check(TokenType::RParen) {
15529                                    break;
15530                                } else {
15531                                    self.skip();
15532                                }
15533                                self.match_token(TokenType::Comma);
15534                            }
15535                            self.expect(TokenType::RParen)?;
15536                        }
15537
15538                        self.expect(TokenType::RParen)?; // close WITH(...)
15539
15540                        properties.push(Expression::WithSystemVersioningProperty(Box::new(
15541                            WithSystemVersioningProperty {
15542                                on: if on {
15543                                    Some(Box::new(Expression::Boolean(
15544                                        crate::expressions::BooleanLiteral { value: true },
15545                                    )))
15546                                } else {
15547                                    None
15548                                },
15549                                this: history_table.map(Box::new),
15550                                data_consistency: data_consistency.map(Box::new),
15551                                retention_period: None,
15552                                with_: Some(Box::new(Expression::Boolean(
15553                                    crate::expressions::BooleanLiteral { value: true },
15554                                ))),
15555                            },
15556                        )));
15557                    } else {
15558                        // Not SYSTEM_VERSIONING, retreat
15559                        self.current = saved;
15560                    }
15561                } else {
15562                    // Not WITH(...), retreat
15563                    self.current = saved;
15564                }
15565            }
15566        }
15567
15568        Ok(properties)
15569    }
15570
15571    /// Parse composite key expressions for UNIQUE KEY (cols) or DUPLICATE KEY (cols)
15572    /// Returns a vector of column identifiers
15573    fn parse_composite_key_expressions(&mut self) -> Result<Vec<Expression>> {
15574        self.expect(TokenType::LParen)?;
15575        let mut expressions = Vec::new();
15576        loop {
15577            if let Some(id) = self.parse_id_var()? {
15578                expressions.push(id);
15579            } else {
15580                break;
15581            }
15582            if !self.match_token(TokenType::Comma) {
15583                break;
15584            }
15585        }
15586        self.expect(TokenType::RParen)?;
15587        Ok(expressions)
15588    }
15589
15590    /// Parse a table-level constraint
15591    fn parse_table_constraint(&mut self) -> Result<TableConstraint> {
15592        // Optional constraint name
15593        let name = if self.match_token(TokenType::Constraint) {
15594            // Use safe keyword version to accept keywords as constraint names (e.g., CONSTRAINT identity CHECK ...)
15595            Some(self.expect_identifier_or_safe_keyword_with_quoted()?)
15596        } else {
15597            None
15598        };
15599
15600        self.parse_constraint_definition(name)
15601    }
15602
15603    /// Parse constraint definition (after optional CONSTRAINT name)
15604    fn parse_constraint_definition(&mut self, name: Option<Identifier>) -> Result<TableConstraint> {
15605        if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
15606            // PRIMARY KEY [CLUSTERED|NONCLUSTERED] [name] (col1, col2) [INCLUDE (col3, col4)]
15607            // MySQL allows: PRIMARY KEY pk_name (col1, col2)
15608            // TSQL allows: PRIMARY KEY CLUSTERED (col1, col2)
15609
15610            // Check for TSQL CLUSTERED/NONCLUSTERED modifier
15611            let clustered = if self.check_identifier("CLUSTERED") {
15612                self.skip();
15613                Some("CLUSTERED".to_string())
15614            } else if self.check_identifier("NONCLUSTERED") {
15615                self.skip();
15616                Some("NONCLUSTERED".to_string())
15617            } else {
15618                None
15619            };
15620
15621            let actual_name = if name.is_none() && !self.check(TokenType::LParen) {
15622                if matches!(
15623                    self.config.dialect,
15624                    Some(crate::dialects::DialectType::ClickHouse)
15625                ) {
15626                    // ClickHouse: PRIMARY KEY col (without parentheses)
15627                    None
15628                } else if self.is_identifier_token() || self.check(TokenType::QuotedIdentifier) {
15629                    Some(self.expect_identifier_with_quoted()?)
15630                } else if self.check(TokenType::String)
15631                    && matches!(
15632                        self.config.dialect,
15633                        Some(crate::dialects::DialectType::MySQL)
15634                    )
15635                {
15636                    // MySQL: double-quoted strings can be used as constraint names
15637                    // e.g., PRIMARY KEY "pk_name" (id) -> PRIMARY KEY `pk_name` (id)
15638                    let s = self.advance().text.clone();
15639                    Some(Identifier {
15640                        name: s,
15641                        quoted: true,
15642                        trailing_comments: Vec::new(),
15643                        span: None,
15644                    })
15645                } else {
15646                    None
15647                }
15648            } else {
15649                name.clone()
15650            };
15651            // ClickHouse: PRIMARY KEY col without parens — parse single column
15652            let columns = if matches!(
15653                self.config.dialect,
15654                Some(crate::dialects::DialectType::ClickHouse)
15655            ) && !self.check(TokenType::LParen)
15656                && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
15657            {
15658                let col_name = self.expect_identifier_or_keyword_with_quoted()?;
15659                vec![col_name]
15660            } else {
15661                self.expect(TokenType::LParen)?;
15662                // ClickHouse: allow empty PRIMARY KEY ()
15663                let cols = if self.check(TokenType::RParen) {
15664                    Vec::new()
15665                } else if matches!(
15666                    self.config.dialect,
15667                    Some(crate::dialects::DialectType::ClickHouse)
15668                ) {
15669                    // ClickHouse: PRIMARY KEY(v1, gcd(v1, v2)) - expressions allowed
15670                    let mut exprs = Vec::new();
15671                    loop {
15672                        let expr = self.parse_expression()?;
15673                        let name = self.expression_to_sql(&expr);
15674                        exprs.push(Identifier::new(name));
15675                        if !self.match_token(TokenType::Comma) {
15676                            break;
15677                        }
15678                    }
15679                    exprs
15680                } else {
15681                    self.parse_index_identifier_list()?
15682                };
15683                self.expect(TokenType::RParen)?;
15684                cols
15685            };
15686            // Parse optional INCLUDE (columns)
15687            let include_columns = if self.match_identifier("INCLUDE") {
15688                self.expect(TokenType::LParen)?;
15689                let cols = self.parse_identifier_list()?;
15690                self.expect(TokenType::RParen)?;
15691                cols
15692            } else {
15693                Vec::new()
15694            };
15695            // Parse optional constraint modifiers (ENFORCED, DEFERRABLE, etc.)
15696            let mut modifiers = self.parse_constraint_modifiers();
15697            modifiers.clustered = clustered;
15698            let has_constraint_keyword = name.is_some();
15699            Ok(TableConstraint::PrimaryKey {
15700                name: actual_name.or(name),
15701                columns,
15702                include_columns,
15703                modifiers,
15704                has_constraint_keyword,
15705            })
15706        } else if self.match_token(TokenType::Unique) {
15707            // UNIQUE [CLUSTERED|NONCLUSTERED] [KEY|INDEX] [NULLS NOT DISTINCT] [name] (col1, col2) or UNIQUE column_name
15708            // MySQL allows: UNIQUE KEY name (cols), UNIQUE INDEX name (cols), UNIQUE (cols)
15709            // TSQL allows: UNIQUE CLUSTERED (cols)
15710            // PostgreSQL 15+: UNIQUE NULLS NOT DISTINCT (cols)
15711
15712            // Check for TSQL CLUSTERED/NONCLUSTERED modifier
15713            let clustered = if self.check_identifier("CLUSTERED") {
15714                self.skip();
15715                Some("CLUSTERED".to_string())
15716            } else if self.check_identifier("NONCLUSTERED") {
15717                self.skip();
15718                Some("NONCLUSTERED".to_string())
15719            } else {
15720                None
15721            };
15722
15723            let use_key_keyword =
15724                self.match_token(TokenType::Key) || self.match_token(TokenType::Index);
15725
15726            // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
15727            let nulls_not_distinct = self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]);
15728
15729            // Check for optional constraint name (before columns)
15730            let actual_name = if name.is_none()
15731                && self.is_identifier_token()
15732                && !self.check_next(TokenType::Comma)
15733            {
15734                // Name might be here: UNIQUE KEY idx_name (cols)
15735                if self.check_next(TokenType::LParen) {
15736                    Some(self.expect_identifier_with_quoted()?)
15737                } else {
15738                    None
15739                }
15740            } else {
15741                name.clone()
15742            };
15743
15744            if self.match_token(TokenType::LParen) {
15745                let columns = self.parse_index_identifier_list()?;
15746                self.expect(TokenType::RParen)?;
15747                let mut modifiers = self.parse_constraint_modifiers();
15748                modifiers.clustered = clustered;
15749                if use_key_keyword {
15750                    // UNIQUE KEY/INDEX - use Index constraint type with UNIQUE kind
15751                    Ok(TableConstraint::Index {
15752                        name: actual_name.or(name),
15753                        columns,
15754                        kind: Some("UNIQUE".to_string()),
15755                        modifiers,
15756                        use_key_keyword,
15757                        expression: None,
15758                        index_type: None,
15759                        granularity: None,
15760                    })
15761                } else {
15762                    let has_constraint_keyword = name.is_some();
15763                    Ok(TableConstraint::Unique {
15764                        name: actual_name.or(name),
15765                        columns,
15766                        columns_parenthesized: true,
15767                        modifiers,
15768                        has_constraint_keyword,
15769                        nulls_not_distinct,
15770                    })
15771                }
15772            } else {
15773                // Single column unique (for ALTER TABLE ADD CONSTRAINT name UNIQUE colname)
15774                let col_name = self.expect_identifier()?;
15775                let mut modifiers = self.parse_constraint_modifiers();
15776                modifiers.clustered = clustered;
15777                let has_constraint_keyword = name.is_some();
15778                Ok(TableConstraint::Unique {
15779                    name: actual_name.or(name),
15780                    columns: vec![Identifier::new(col_name)],
15781                    columns_parenthesized: false,
15782                    modifiers,
15783                    has_constraint_keyword,
15784                    nulls_not_distinct,
15785                })
15786            }
15787        } else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
15788            // FOREIGN KEY (col1) [REFERENCES other_table(col2)] [ON DELETE ...] [ON UPDATE ...]
15789            self.expect(TokenType::LParen)?;
15790            let columns = self.parse_identifier_list()?;
15791            self.expect(TokenType::RParen)?;
15792            if self.match_token(TokenType::References) {
15793                let references = self.parse_foreign_key_ref()?;
15794                let modifiers = self.parse_constraint_modifiers();
15795                Ok(TableConstraint::ForeignKey {
15796                    name,
15797                    columns,
15798                    references: Some(references),
15799                    on_delete: None,
15800                    on_update: None,
15801                    modifiers,
15802                })
15803            } else {
15804                // No REFERENCES - parse optional ON DELETE/ON UPDATE directly
15805                let mut on_delete = None;
15806                let mut on_update = None;
15807                loop {
15808                    if self.check(TokenType::On) {
15809                        let saved = self.current;
15810                        self.skip(); // consume ON
15811                        if self.match_token(TokenType::Delete) {
15812                            on_delete = Some(self.parse_referential_action()?);
15813                        } else if self.match_token(TokenType::Update) {
15814                            on_update = Some(self.parse_referential_action()?);
15815                        } else {
15816                            self.current = saved;
15817                            break;
15818                        }
15819                    } else {
15820                        break;
15821                    }
15822                }
15823                let modifiers = self.parse_constraint_modifiers();
15824                Ok(TableConstraint::ForeignKey {
15825                    name,
15826                    columns,
15827                    references: None,
15828                    on_delete,
15829                    on_update,
15830                    modifiers,
15831                })
15832            }
15833        } else if self.match_token(TokenType::Check) {
15834            // CHECK (expression) or CHECK (SELECT ...) or ClickHouse: CHECK expression (without parens)
15835            let expression = if self.match_token(TokenType::LParen) {
15836                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
15837                    // SELECT/WITH in CHECK constraint — parse directly, no Subquery wrapper
15838                    // The generator already wraps CHECK content in parens
15839                    self.parse_statement()?
15840                } else {
15841                    self.parse_expression()?
15842                };
15843                self.expect(TokenType::RParen)?;
15844                expr
15845            } else if matches!(
15846                self.config.dialect,
15847                Some(crate::dialects::DialectType::ClickHouse)
15848            ) {
15849                self.parse_or()?
15850            } else {
15851                self.expect(TokenType::LParen)?;
15852                unreachable!()
15853            };
15854            let modifiers = self.parse_constraint_modifiers();
15855            Ok(TableConstraint::Check {
15856                name,
15857                expression,
15858                modifiers,
15859            })
15860        } else if self.match_token(TokenType::Exclude) {
15861            // PostgreSQL EXCLUDE constraint
15862            // EXCLUDE [USING method] (element WITH operator, ...) [INCLUDE (cols)] [WHERE (expr)] [WITH (params)]
15863            let using = if self.match_token(TokenType::Using) {
15864                Some(self.expect_identifier()?)
15865            } else {
15866                None
15867            };
15868
15869            self.expect(TokenType::LParen)?;
15870            let mut elements = Vec::new();
15871            loop {
15872                // Parse element expression: may be a function call like INT4RANGE(vid, nid)
15873                // or column name possibly with operator class, ASC/DESC, NULLS FIRST/LAST
15874                let mut expr_parts = Vec::new();
15875                let mut paren_depth = 0;
15876                while !self.is_at_end() {
15877                    if self.check(TokenType::LParen) {
15878                        paren_depth += 1;
15879                        expr_parts.push(self.advance().text);
15880                    } else if self.check(TokenType::RParen) {
15881                        if paren_depth == 0 {
15882                            break;
15883                        }
15884                        paren_depth -= 1;
15885                        expr_parts.push(self.advance().text);
15886                    } else if paren_depth == 0 && self.check(TokenType::With) {
15887                        break;
15888                    } else if self.check(TokenType::String) {
15889                        // Preserve string literal quotes
15890                        let token = self.advance();
15891                        expr_parts.push(format!("'{}'", token.text));
15892                    } else {
15893                        expr_parts.push(self.advance().text);
15894                    }
15895                }
15896                let expression = expr_parts
15897                    .join(" ")
15898                    .replace(" (", "(")
15899                    .replace(" )", ")")
15900                    .replace("( ", "(")
15901                    .replace(" ,", ",");
15902
15903                // Parse WITH operator
15904                self.expect(TokenType::With)?;
15905                let operator = self.advance().text.clone();
15906
15907                elements.push(ExcludeElement {
15908                    expression,
15909                    operator,
15910                });
15911
15912                if !self.match_token(TokenType::Comma) {
15913                    break;
15914                }
15915            }
15916            self.expect(TokenType::RParen)?;
15917
15918            // Parse optional INCLUDE (columns)
15919            let include_columns = if self.match_identifier("INCLUDE") {
15920                self.expect(TokenType::LParen)?;
15921                let cols = self.parse_identifier_list()?;
15922                self.expect(TokenType::RParen)?;
15923                cols
15924            } else {
15925                Vec::new()
15926            };
15927
15928            // Parse optional WITH (storage_parameters)
15929            let with_params = if self.match_token(TokenType::With) {
15930                self.expect(TokenType::LParen)?;
15931                let mut params = Vec::new();
15932                loop {
15933                    let key = self.expect_identifier()?;
15934                    self.expect(TokenType::Eq)?;
15935                    let val = self.advance().text.clone();
15936                    params.push((key, val));
15937                    if !self.match_token(TokenType::Comma) {
15938                        break;
15939                    }
15940                }
15941                self.expect(TokenType::RParen)?;
15942                params
15943            } else {
15944                Vec::new()
15945            };
15946
15947            // Parse optional USING INDEX TABLESPACE tablespace_name
15948            let using_index_tablespace =
15949                if self.check(TokenType::Using) && self.check_next(TokenType::Index) {
15950                    self.skip(); // consume USING
15951                    self.skip(); // consume INDEX
15952                    if self.match_identifier("TABLESPACE") {
15953                        Some(self.expect_identifier()?)
15954                    } else {
15955                        None
15956                    }
15957                } else {
15958                    None
15959                };
15960
15961            // Parse optional WHERE clause
15962            let where_clause = if self.match_token(TokenType::Where) {
15963                self.expect(TokenType::LParen)?;
15964                let expr = self.parse_expression()?;
15965                self.expect(TokenType::RParen)?;
15966                Some(Box::new(expr))
15967            } else {
15968                None
15969            };
15970
15971            let modifiers = self.parse_constraint_modifiers();
15972            Ok(TableConstraint::Exclude {
15973                name,
15974                using,
15975                elements,
15976                include_columns,
15977                where_clause,
15978                with_params,
15979                using_index_tablespace,
15980                modifiers,
15981            })
15982        } else if matches!(
15983            self.config.dialect,
15984            Some(crate::dialects::DialectType::ClickHouse)
15985        ) && self.check_identifier("ASSUME")
15986        {
15987            // ClickHouse: CONSTRAINT name ASSUME expression
15988            // Used for query optimization assumptions
15989            self.skip(); // consume ASSUME
15990            let expression = if self.match_token(TokenType::LParen) {
15991                // ASSUME (expr) or ASSUME (SELECT ...)
15992                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
15993                    self.parse_statement()?
15994                } else {
15995                    self.parse_expression()?
15996                };
15997                self.expect(TokenType::RParen)?;
15998                expr
15999            } else {
16000                self.parse_expression()?
16001            };
16002            Ok(TableConstraint::Assume { name, expression })
16003        } else if self.match_token(TokenType::Default) {
16004            // TSQL: CONSTRAINT name DEFAULT value FOR column
16005            let expression = self.parse_expression()?;
16006            self.expect(TokenType::For)?;
16007            let column = self.expect_identifier_with_quoted()?;
16008            Ok(TableConstraint::Default {
16009                name,
16010                expression,
16011                column,
16012            })
16013        } else {
16014            Err(self.parse_error("Expected PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK, or EXCLUDE"))
16015        }
16016    }
16017
16018    /// Parse INDEX/KEY table constraint for MySQL
16019    /// Syntax: [FULLTEXT|SPATIAL] {INDEX|KEY} [name] [USING {BTREE|HASH}] (columns)
16020    ///     or: [FULLTEXT|SPATIAL] {INDEX|KEY} [USING {BTREE|HASH}] (columns)  -- no name
16021    fn parse_index_table_constraint(&mut self) -> Result<TableConstraint> {
16022        // Check for FULLTEXT or SPATIAL prefix
16023        let kind = if self.match_identifier("FULLTEXT") {
16024            Some("FULLTEXT".to_string())
16025        } else if self.match_identifier("SPATIAL") {
16026            Some("SPATIAL".to_string())
16027        } else {
16028            None
16029        };
16030
16031        // Consume INDEX or KEY keyword, track which was used
16032        let use_key_keyword = if self.match_token(TokenType::Key) {
16033            true
16034        } else {
16035            self.match_token(TokenType::Index);
16036            false
16037        };
16038
16039        // Check for USING before index name (MySQL allows: INDEX USING BTREE (col))
16040        let early_using = if self.check(TokenType::Using) {
16041            self.match_token(TokenType::Using);
16042            if self.match_identifier("BTREE") {
16043                Some("BTREE".to_string())
16044            } else if self.match_identifier("HASH") {
16045                Some("HASH".to_string())
16046            } else {
16047                None
16048            }
16049        } else {
16050            None
16051        };
16052
16053        // Optional index name (only if next token is not LParen or Using)
16054        let name = if !self.check(TokenType::LParen)
16055            && !self.check(TokenType::Using)
16056            && self.is_identifier_token()
16057        {
16058            Some(Identifier::new(self.advance().text))
16059        } else {
16060            None
16061        };
16062
16063        // Check for USING after index name (if not already parsed)
16064        let late_using = if early_using.is_none() && self.match_token(TokenType::Using) {
16065            if self.match_identifier("BTREE") {
16066                Some("BTREE".to_string())
16067            } else if self.match_identifier("HASH") {
16068                Some("HASH".to_string())
16069            } else {
16070                None
16071            }
16072        } else {
16073            None
16074        };
16075
16076        // Parse columns (with optional prefix length and DESC)
16077        self.expect(TokenType::LParen)?;
16078        let columns = self.parse_index_identifier_list()?;
16079        self.expect(TokenType::RParen)?;
16080
16081        // Parse optional constraint modifiers (USING after columns, COMMENT, etc.)
16082        let mut modifiers = self.parse_constraint_modifiers();
16083
16084        // Set the using value from wherever we found it
16085        // Both early_using (before name) and late_using (after name, before columns) mean USING is before columns
16086        if early_using.is_some() {
16087            modifiers.using = early_using;
16088            modifiers.using_before_columns = true;
16089        } else if late_using.is_some() {
16090            modifiers.using = late_using;
16091            modifiers.using_before_columns = true; // USING was after name but before columns
16092        }
16093        // If using was found in parse_constraint_modifiers (after columns), using_before_columns stays false
16094
16095        Ok(TableConstraint::Index {
16096            name,
16097            columns,
16098            kind,
16099            modifiers,
16100            use_key_keyword,
16101            expression: None,
16102            index_type: None,
16103            granularity: None,
16104        })
16105    }
16106
16107    /// Parse constraint modifiers like ENFORCED, DEFERRABLE, NORELY, USING, etc.
16108    fn parse_constraint_modifiers(&mut self) -> ConstraintModifiers {
16109        let mut modifiers = ConstraintModifiers::default();
16110        loop {
16111            if self.match_token(TokenType::Not) {
16112                // NOT ENFORCED, NOT DEFERRABLE, NOT VALID
16113                if self.match_identifier("ENFORCED") {
16114                    modifiers.enforced = Some(false);
16115                } else if self.match_identifier("DEFERRABLE") {
16116                    modifiers.deferrable = Some(false);
16117                } else if self.match_identifier("VALID") {
16118                    modifiers.not_valid = true;
16119                }
16120            } else if self.match_identifier("ENFORCED") {
16121                modifiers.enforced = Some(true);
16122            } else if self.match_identifier("DEFERRABLE") {
16123                modifiers.deferrable = Some(true);
16124            } else if self.match_identifier("INITIALLY") {
16125                // INITIALLY DEFERRED or INITIALLY IMMEDIATE
16126                if self.match_identifier("DEFERRED") {
16127                    modifiers.initially_deferred = Some(true);
16128                } else if self.match_identifier("IMMEDIATE") {
16129                    modifiers.initially_deferred = Some(false);
16130                }
16131            } else if self.match_identifier("NORELY") {
16132                modifiers.norely = true;
16133            } else if self.match_identifier("RELY") {
16134                modifiers.rely = true;
16135            } else if self.match_token(TokenType::Using) {
16136                // USING BTREE or USING HASH (MySQL)
16137                if self.match_identifier("BTREE") {
16138                    modifiers.using = Some("BTREE".to_string());
16139                } else if self.match_identifier("HASH") {
16140                    modifiers.using = Some("HASH".to_string());
16141                }
16142            } else if self.match_token(TokenType::Comment) {
16143                // MySQL index COMMENT 'text'
16144                if self.check(TokenType::String) {
16145                    modifiers.comment = Some(self.advance().text);
16146                }
16147            } else if self.match_identifier("VISIBLE") {
16148                modifiers.visible = Some(true);
16149            } else if self.match_identifier("INVISIBLE") {
16150                modifiers.visible = Some(false);
16151            } else if self.match_identifier("ENGINE_ATTRIBUTE") {
16152                // MySQL ENGINE_ATTRIBUTE = 'value'
16153                self.match_token(TokenType::Eq);
16154                if self.check(TokenType::String) {
16155                    modifiers.engine_attribute = Some(self.advance().text);
16156                }
16157            } else if self.check(TokenType::With) {
16158                let saved_with = self.current;
16159                self.skip(); // consume WITH
16160                if self.match_identifier("PARSER") {
16161                    // MySQL WITH PARSER name
16162                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
16163                        modifiers.with_parser = Some(self.advance().text);
16164                    }
16165                } else if self.check(TokenType::LParen) {
16166                    // TSQL: WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF, ...)
16167                    // Parse and store the options
16168                    self.skip(); // consume (
16169                    loop {
16170                        if self.check(TokenType::RParen) || self.is_at_end() {
16171                            break;
16172                        }
16173                        // Parse KEY=VALUE pair
16174                        let key = self.advance().text.clone();
16175                        if self.match_token(TokenType::Eq) {
16176                            let value = self.advance().text.clone();
16177                            modifiers.with_options.push((key, value));
16178                        }
16179                        if !self.match_token(TokenType::Comma) {
16180                            break;
16181                        }
16182                    }
16183                    let _ = self.match_token(TokenType::RParen);
16184                } else {
16185                    // Not WITH PARSER or WITH (...), backtrack
16186                    self.current = saved_with;
16187                    break;
16188                }
16189            } else if self.check(TokenType::On) {
16190                let saved_on = self.current;
16191                self.skip(); // consume ON
16192                if self.match_identifier("CONFLICT") {
16193                    // SQLite ON CONFLICT action: ROLLBACK, ABORT, FAIL, IGNORE, REPLACE
16194                    if self.match_token(TokenType::Rollback) {
16195                        modifiers.on_conflict = Some("ROLLBACK".to_string());
16196                    } else if self.match_identifier("ABORT") {
16197                        modifiers.on_conflict = Some("ABORT".to_string());
16198                    } else if self.match_identifier("FAIL") {
16199                        modifiers.on_conflict = Some("FAIL".to_string());
16200                    } else if self.match_token(TokenType::Ignore) {
16201                        modifiers.on_conflict = Some("IGNORE".to_string());
16202                    } else if self.match_token(TokenType::Replace) {
16203                        modifiers.on_conflict = Some("REPLACE".to_string());
16204                    }
16205                } else if self.is_identifier_token() || self.check(TokenType::QuotedIdentifier) {
16206                    // TSQL: ON [filegroup] - parse and store
16207                    let quoted = self.check(TokenType::QuotedIdentifier);
16208                    let name = self.advance().text.clone();
16209                    modifiers.on_filegroup = Some(Identifier {
16210                        name,
16211                        quoted,
16212                        trailing_comments: Vec::new(),
16213                        span: None,
16214                    });
16215                } else {
16216                    // Unknown ON clause, backtrack
16217                    self.current = saved_on;
16218                    break;
16219                }
16220            } else {
16221                break;
16222            }
16223        }
16224        modifiers
16225    }
16226
16227    /// Parse foreign key reference
16228    fn parse_foreign_key_ref(&mut self) -> Result<ForeignKeyRef> {
16229        let table = self.parse_table_ref()?;
16230
16231        let columns = if self.match_token(TokenType::LParen) {
16232            let cols = self.parse_identifier_list()?;
16233            self.expect(TokenType::RParen)?;
16234            cols
16235        } else {
16236            Vec::new()
16237        };
16238
16239        // Handle optional MATCH clause (MATCH FULL, MATCH PARTIAL, MATCH SIMPLE)
16240        // MATCH clause comes BEFORE ON DELETE/ON UPDATE in PostgreSQL
16241        let match_type = if self.match_token(TokenType::Match) {
16242            if self.check(TokenType::Full) {
16243                self.skip();
16244                Some(MatchType::Full)
16245            } else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
16246                let text = self.advance().text.to_ascii_uppercase();
16247                match text.as_str() {
16248                    "PARTIAL" => Some(MatchType::Partial),
16249                    "SIMPLE" => Some(MatchType::Simple),
16250                    _ => None,
16251                }
16252            } else {
16253                None
16254            }
16255        } else {
16256            None
16257        };
16258
16259        // ON DELETE and ON UPDATE can appear in either order
16260        let mut on_delete = None;
16261        let mut on_update = None;
16262        let mut on_update_first = false;
16263        let mut first_clause = true;
16264
16265        // Try parsing up to 2 ON clauses
16266        for _ in 0..2 {
16267            if on_delete.is_none() && self.match_keywords(&[TokenType::On, TokenType::Delete]) {
16268                on_delete = Some(self.parse_referential_action()?);
16269            } else if on_update.is_none()
16270                && self.match_keywords(&[TokenType::On, TokenType::Update])
16271            {
16272                if first_clause {
16273                    on_update_first = true;
16274                }
16275                on_update = Some(self.parse_referential_action()?);
16276            } else {
16277                break;
16278            }
16279            first_clause = false;
16280        }
16281
16282        // MATCH clause can also appear after ON DELETE/ON UPDATE
16283        let mut match_after_actions = false;
16284        let match_type = if match_type.is_none() && self.match_token(TokenType::Match) {
16285            match_after_actions = on_delete.is_some() || on_update.is_some();
16286            if self.check(TokenType::Full) {
16287                self.skip();
16288                Some(MatchType::Full)
16289            } else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
16290                let text = self.advance().text.to_ascii_uppercase();
16291                match text.as_str() {
16292                    "PARTIAL" => Some(MatchType::Partial),
16293                    "SIMPLE" => Some(MatchType::Simple),
16294                    _ => None,
16295                }
16296            } else {
16297                None
16298            }
16299        } else {
16300            match_type
16301        };
16302
16303        // Handle optional DEFERRABLE / NOT DEFERRABLE
16304        let deferrable = if self.match_identifier("DEFERRABLE") {
16305            Some(true)
16306        } else if self.match_token(TokenType::Not) && self.match_identifier("DEFERRABLE") {
16307            Some(false)
16308        } else {
16309            None
16310        };
16311
16312        Ok(ForeignKeyRef {
16313            table,
16314            columns,
16315            on_delete,
16316            on_update,
16317            on_update_first,
16318            match_type,
16319            match_after_actions,
16320            constraint_name: None, // Will be set by caller if CONSTRAINT was used
16321            deferrable,
16322            has_foreign_key_keywords: false, // Will be set by caller if FOREIGN KEY preceded REFERENCES
16323        })
16324    }
16325
16326    /// Parse referential action (CASCADE, SET NULL, etc.)
16327    fn parse_referential_action(&mut self) -> Result<ReferentialAction> {
16328        if self.match_token(TokenType::Cascade) {
16329            Ok(ReferentialAction::Cascade)
16330        } else if self.match_keywords(&[TokenType::Set, TokenType::Null]) {
16331            Ok(ReferentialAction::SetNull)
16332        } else if self.match_keywords(&[TokenType::Set, TokenType::Default]) {
16333            Ok(ReferentialAction::SetDefault)
16334        } else if self.match_token(TokenType::Restrict) {
16335            Ok(ReferentialAction::Restrict)
16336        } else if self.match_token(TokenType::No) {
16337            // NO ACTION - NO is a token, ACTION is an identifier
16338            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ACTION") {
16339                self.skip();
16340            }
16341            Ok(ReferentialAction::NoAction)
16342        } else {
16343            Err(self.parse_error("Expected CASCADE, SET NULL, SET DEFAULT, RESTRICT, or NO ACTION"))
16344        }
16345    }
16346
16347    /// Parse Snowflake TAG clause: TAG (key='value', key2='value2')
16348    fn parse_tags(&mut self) -> Result<Tags> {
16349        self.expect(TokenType::LParen)?;
16350        let mut expressions = Vec::new();
16351
16352        loop {
16353            // Parse key = 'value' as a Property expression
16354            let key = self.expect_identifier_or_keyword()?;
16355            self.expect(TokenType::Eq)?;
16356            let value = self.parse_primary()?;
16357
16358            // Create a Property expression: key = value
16359            expressions.push(Expression::Property(Box::new(Property {
16360                this: Box::new(Expression::Identifier(Identifier::new(key))),
16361                value: Some(Box::new(value)),
16362            })));
16363
16364            if !self.match_token(TokenType::Comma) {
16365                break;
16366            }
16367        }
16368
16369        self.expect(TokenType::RParen)?;
16370
16371        Ok(Tags { expressions })
16372    }
16373
16374    /// Parse CREATE VIEW
16375    fn parse_create_view(
16376        &mut self,
16377        or_replace: bool,
16378        or_alter: bool,
16379        materialized: bool,
16380        temporary: bool,
16381        algorithm: Option<String>,
16382        definer: Option<String>,
16383        security: Option<FunctionSecurity>,
16384        secure: bool,
16385    ) -> Result<Expression> {
16386        self.expect(TokenType::View)?;
16387
16388        // Handle IF NOT EXISTS
16389        let if_not_exists =
16390            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
16391
16392        let name = self.parse_table_ref()?;
16393
16394        // ClickHouse: UUID 'xxx' clause after view name
16395        if matches!(
16396            self.config.dialect,
16397            Some(crate::dialects::DialectType::ClickHouse)
16398        ) && self.check_identifier("UUID")
16399        {
16400            self.skip(); // consume UUID
16401            let _ = self.advance(); // consume UUID string value
16402        }
16403
16404        // ClickHouse: ON CLUSTER clause (after view name)
16405        let on_cluster = self.parse_on_cluster_clause()?;
16406
16407        // ClickHouse: TO destination_table clause
16408        let to_table = if self.match_token(TokenType::To) {
16409            Some(self.parse_table_ref()?)
16410        } else {
16411            None
16412        };
16413
16414        // Snowflake: COPY GRANTS (before column list)
16415        let copy_grants = self.match_text_seq(&["COPY", "GRANTS"]);
16416
16417        // For materialized views, column definitions can include data types: (c1 INT, c2 INT)
16418        // This applies to Doris, ClickHouse, and potentially other dialects
16419        // We need to parse this as a schema instead of simple column names
16420        // Track if we parsed a schema (with types) vs simple columns
16421        let mut schema: Option<Schema> = None;
16422        let mut unique_key: Option<UniqueKeyProperty> = None;
16423
16424        // Optional column list with optional COMMENT and OPTIONS per column
16425        let columns = if self.check(TokenType::LParen) {
16426            // For materialized views or ClickHouse views, try to parse as schema with typed columns
16427            if materialized
16428                || matches!(
16429                    self.config.dialect,
16430                    Some(crate::dialects::DialectType::ClickHouse)
16431                )
16432            {
16433                // Save position to backtrack if needed
16434                let saved_pos = self.current;
16435
16436                // Try to parse as schema (with typed columns)
16437                if let Some(Expression::Schema(parsed_schema)) = self.parse_schema()? {
16438                    schema = Some(*parsed_schema);
16439
16440                    // Doris: KEY (columns) after schema
16441                    if self.match_text_seq(&["KEY"]) {
16442                        let exprs = self.parse_composite_key_expressions()?;
16443                        unique_key = Some(UniqueKeyProperty { expressions: exprs });
16444                    }
16445
16446                    Vec::new() // Use schema instead of columns
16447                } else {
16448                    // Backtrack and parse as simple columns
16449                    self.current = saved_pos;
16450                    self.parse_view_columns()?
16451                }
16452            } else {
16453                self.parse_view_columns()?
16454            }
16455        } else {
16456            Vec::new()
16457        };
16458
16459        // Snowflake: COPY GRANTS can also appear after column list
16460        let copy_grants = copy_grants || self.match_text_seq(&["COPY", "GRANTS"]);
16461
16462        // Presto/Trino/StarRocks: SECURITY DEFINER/INVOKER/NONE (after view name, before AS)
16463        // MySQL also allows SQL SECURITY DEFINER/INVOKER after the view name
16464        // This differs from MySQL's SQL SECURITY which can also come before VIEW keyword
16465        let (security, security_sql_style, security_after_name) = if security.is_some() {
16466            // MySQL-style SQL SECURITY was parsed before VIEW keyword
16467            (security, true, false)
16468        } else if self.check_identifier("SQL")
16469            && self.current + 1 < self.tokens.len()
16470            && self.tokens[self.current + 1]
16471                .text
16472                .eq_ignore_ascii_case("SECURITY")
16473        {
16474            // SQL SECURITY after view name
16475            self.skip(); // consume SQL
16476            self.skip(); // consume SECURITY
16477            let sec = if self.match_identifier("DEFINER") {
16478                Some(FunctionSecurity::Definer)
16479            } else if self.match_identifier("INVOKER") {
16480                Some(FunctionSecurity::Invoker)
16481            } else if self.match_identifier("NONE") {
16482                Some(FunctionSecurity::None)
16483            } else {
16484                None
16485            };
16486            (sec, true, true)
16487        } else if self.match_identifier("SECURITY") {
16488            // Presto-style SECURITY after view name
16489            let sec = if self.match_identifier("DEFINER") {
16490                Some(FunctionSecurity::Definer)
16491            } else if self.match_identifier("INVOKER") {
16492                Some(FunctionSecurity::Invoker)
16493            } else if self.match_identifier("NONE") {
16494                Some(FunctionSecurity::None)
16495            } else {
16496                None
16497            };
16498            (sec, false, false)
16499        } else {
16500            (None, true, false)
16501        };
16502
16503        // Snowflake: COMMENT = 'text'
16504        let view_comment = if self.match_token(TokenType::Comment) {
16505            // Match = or skip if not present (some dialects use COMMENT='text')
16506            let _ = self.match_token(TokenType::Eq);
16507            Some(self.expect_string()?)
16508        } else {
16509            None
16510        };
16511
16512        // Snowflake: TAG (name='value', ...)
16513        let tags = if self.match_identifier("TAG") {
16514            let mut tag_list = Vec::new();
16515            if self.match_token(TokenType::LParen) {
16516                loop {
16517                    let tag_name = self.expect_identifier()?;
16518                    let tag_value = if self.match_token(TokenType::Eq) {
16519                        self.expect_string()?
16520                    } else {
16521                        String::new()
16522                    };
16523                    tag_list.push((tag_name, tag_value));
16524                    if !self.match_token(TokenType::Comma) {
16525                        break;
16526                    }
16527                }
16528                self.expect(TokenType::RParen)?;
16529            }
16530            tag_list
16531        } else {
16532            Vec::new()
16533        };
16534
16535        // BigQuery: OPTIONS (key=value, ...)
16536        let options = if self.match_identifier("OPTIONS") {
16537            self.parse_options_list()?
16538        } else {
16539            Vec::new()
16540        };
16541
16542        // Doris: BUILD IMMEDIATE/DEFERRED for materialized views
16543        let build = if self.match_identifier("BUILD") {
16544            if self.match_identifier("IMMEDIATE") {
16545                Some("IMMEDIATE".to_string())
16546            } else if self.match_identifier("DEFERRED") {
16547                Some("DEFERRED".to_string())
16548            } else {
16549                // Unexpected token after BUILD - try to consume it
16550                let value = self.expect_identifier_or_keyword()?;
16551                Some(value.to_ascii_uppercase())
16552            }
16553        } else {
16554            None
16555        };
16556
16557        // Doris: REFRESH COMPLETE/AUTO ON MANUAL/COMMIT/SCHEDULE [EVERY n UNIT] [STARTS 'datetime']
16558        // ClickHouse: REFRESH AFTER interval / REFRESH EVERY interval [OFFSET interval] [RANDOMIZE FOR interval] [APPEND]
16559        let refresh = if self.match_token(TokenType::Refresh) {
16560            if matches!(
16561                self.config.dialect,
16562                Some(crate::dialects::DialectType::ClickHouse)
16563            ) {
16564                // ClickHouse REFRESH syntax: consume tokens until AS/POPULATE/TO/ENGINE or end
16565                while !self.is_at_end()
16566                    && !self.check(TokenType::As)
16567                    && !self.check_identifier("POPULATE")
16568                    && !self.check_identifier("TO")
16569                    && !self.check_identifier("APPEND")
16570                    && !self.check_identifier("ENGINE")
16571                    && !self.check(TokenType::Semicolon)
16572                {
16573                    self.skip();
16574                }
16575                // Consume APPEND if present (REFRESH ... APPEND TO target)
16576                let _ = self.match_identifier("APPEND");
16577                None
16578            } else {
16579                Some(Box::new(self.parse_refresh_trigger_property()?))
16580            }
16581        } else {
16582            None
16583        };
16584
16585        // ClickHouse: TO destination_table after REFRESH ... APPEND
16586        // e.g., CREATE MATERIALIZED VIEW v REFRESH AFTER 1 SECOND APPEND TO tab (cols) EMPTY AS ...
16587        let to_table = if to_table.is_none() && self.match_token(TokenType::To) {
16588            Some(self.parse_table_ref()?)
16589        } else {
16590            to_table
16591        };
16592
16593        // ClickHouse: column definitions after REFRESH ... APPEND TO tab (cols)
16594        if schema.is_none()
16595            && self.check(TokenType::LParen)
16596            && matches!(
16597                self.config.dialect,
16598                Some(crate::dialects::DialectType::ClickHouse)
16599            )
16600        {
16601            let saved_pos = self.current;
16602            if let Some(Expression::Schema(parsed_schema)) = self.parse_schema()? {
16603                schema = Some(*parsed_schema);
16604            } else {
16605                self.current = saved_pos;
16606            }
16607        }
16608
16609        // Redshift: AUTO REFRESH YES|NO for materialized views
16610        let auto_refresh = if self.match_text_seq(&["AUTO", "REFRESH"]) {
16611            if self.match_identifier("YES") {
16612                Some(true)
16613            } else if self.match_identifier("NO") {
16614                Some(false)
16615            } else {
16616                None
16617            }
16618        } else {
16619            None
16620        };
16621
16622        // ClickHouse: Parse table properties (ENGINE, ORDER BY, SAMPLE, SETTINGS, TTL, etc.)
16623        // These appear after column definitions but before AS clause for materialized views
16624        let mut table_properties = Vec::new();
16625        if materialized
16626            && matches!(
16627                self.config.dialect,
16628                Some(crate::dialects::DialectType::ClickHouse)
16629            )
16630        {
16631            self.parse_clickhouse_table_properties(&mut table_properties)?;
16632        }
16633
16634        // ClickHouse: POPULATE / EMPTY keywords before AS in materialized views
16635        if materialized
16636            && matches!(
16637                self.config.dialect,
16638                Some(crate::dialects::DialectType::ClickHouse)
16639            )
16640        {
16641            let _ = self.match_identifier("POPULATE");
16642            let _ = self.match_identifier("EMPTY");
16643        }
16644
16645        // AS is optional - some dialects (e.g., Presto) allow SELECT without AS
16646        let has_as = self.match_token(TokenType::As);
16647        if !has_as && !self.check(TokenType::Select) && !self.check(TokenType::With) {
16648            // No AS and no SELECT/WITH means no query - return empty view (for partial statements)
16649            return Ok(Expression::CreateView(Box::new(CreateView {
16650                name,
16651                columns,
16652                query: Expression::Null(Null), // Placeholder for incomplete VIEW
16653                or_replace,
16654                or_alter,
16655                if_not_exists,
16656                materialized,
16657                temporary,
16658                secure,
16659                algorithm,
16660                definer,
16661                security,
16662                security_sql_style,
16663                security_after_name,
16664                query_parenthesized: false,
16665                locking_mode: None,
16666                locking_access: None,
16667                copy_grants,
16668                comment: view_comment,
16669                tags,
16670                options,
16671                build,
16672                refresh,
16673                schema: schema.map(Box::new),
16674                unique_key: unique_key.map(Box::new),
16675                no_schema_binding: false,
16676                auto_refresh,
16677                on_cluster,
16678                to_table,
16679                table_properties,
16680            })));
16681        }
16682
16683        // Parse Teradata LOCKING clause: LOCKING ROW|TABLE|DATABASE FOR ACCESS|READ|WRITE
16684        let mut locking_mode: Option<String> = None;
16685        let mut locking_access: Option<String> = None;
16686        if self.match_token(TokenType::Lock) || self.match_identifier("LOCKING") {
16687            // Capture: ROW, TABLE, DATABASE, etc.
16688            if self.match_token(TokenType::Row) {
16689                locking_mode = Some("ROW".to_string());
16690            } else if self.match_token(TokenType::Table) {
16691                locking_mode = Some("TABLE".to_string());
16692            } else if self.match_token(TokenType::Database) || self.match_identifier("DATABASE") {
16693                locking_mode = Some("DATABASE".to_string());
16694            }
16695            // Capture FOR ACCESS|READ|WRITE
16696            if self.match_token(TokenType::For) {
16697                if self.match_identifier("ACCESS") {
16698                    locking_access = Some("ACCESS".to_string());
16699                } else if self.match_identifier("READ") {
16700                    locking_access = Some("READ".to_string());
16701                } else if self.match_identifier("WRITE") {
16702                    locking_access = Some("WRITE".to_string());
16703                }
16704            }
16705        }
16706
16707        // Use parse_statement to handle SELECT, WITH...SELECT, or (SELECT...)
16708        let query_parenthesized = self.check(TokenType::LParen);
16709        let query = if self.check(TokenType::With) {
16710            self.parse_statement()?
16711        } else if query_parenthesized {
16712            // Handle (SELECT ...) or (WITH ... SELECT ...) - parenthesized query
16713            self.skip(); // consume (
16714            let inner = if self.check(TokenType::With) {
16715                self.parse_statement()?
16716            } else {
16717                self.parse_select()?
16718            };
16719            self.expect(TokenType::RParen)?;
16720            inner
16721        } else {
16722            self.parse_select()?
16723        };
16724
16725        // Redshift: WITH NO SCHEMA BINDING (after the query)
16726        let no_schema_binding = self.match_text_seq(&["WITH", "NO", "SCHEMA", "BINDING"]);
16727
16728        Ok(Expression::CreateView(Box::new(CreateView {
16729            name,
16730            columns,
16731            query,
16732            or_replace,
16733            or_alter,
16734            if_not_exists,
16735            materialized,
16736            temporary,
16737            secure,
16738            algorithm,
16739            definer,
16740            security,
16741            security_sql_style,
16742            security_after_name,
16743            query_parenthesized,
16744            locking_mode,
16745            locking_access,
16746            copy_grants,
16747            comment: view_comment,
16748            tags,
16749            options,
16750            build,
16751            refresh,
16752            schema: schema.map(Box::new),
16753            unique_key: unique_key.map(Box::new),
16754            no_schema_binding,
16755            auto_refresh,
16756            on_cluster,
16757            to_table,
16758            table_properties,
16759        })))
16760    }
16761
16762    /// Parse view column list: (col1, col2 OPTIONS(...) COMMENT 'text', ...)
16763    /// For simple view definitions without data types
16764    fn parse_view_columns(&mut self) -> Result<Vec<ViewColumn>> {
16765        self.expect(TokenType::LParen)?;
16766        let mut cols = Vec::new();
16767        loop {
16768            let col_name = self.expect_identifier()?;
16769            // BigQuery: OPTIONS (key=value, ...) on view column
16770            let options = if self.match_identifier("OPTIONS") {
16771                self.parse_options_list()?
16772            } else {
16773                Vec::new()
16774            };
16775            // Optional COMMENT 'text'
16776            let comment = if self.match_token(TokenType::Comment) {
16777                Some(self.expect_string()?)
16778            } else {
16779                None
16780            };
16781            cols.push(ViewColumn {
16782                name: Identifier::new(col_name),
16783                comment,
16784                options,
16785            });
16786            if !self.match_token(TokenType::Comma) {
16787                break;
16788            }
16789        }
16790        self.expect(TokenType::RParen)?;
16791        Ok(cols)
16792    }
16793
16794    /// Parse CREATE [CLUSTERED|NONCLUSTERED] INDEX
16795    fn parse_create_index_with_clustered(
16796        &mut self,
16797        unique: bool,
16798        clustered: Option<String>,
16799    ) -> Result<Expression> {
16800        self.expect(TokenType::Index)?;
16801
16802        // PostgreSQL: CREATE INDEX CONCURRENTLY idx ON t(c)
16803        let concurrently = self.match_identifier("CONCURRENTLY");
16804
16805        // Handle IF NOT EXISTS
16806        let if_not_exists =
16807            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
16808
16809        // Index name is optional when IF NOT EXISTS is specified (PostgreSQL)
16810        let name = if if_not_exists && self.check(TokenType::On) {
16811            Identifier::new("") // Empty name when omitted
16812        } else {
16813            self.expect_identifier_with_quoted()?
16814        };
16815        self.expect(TokenType::On)?;
16816        let table = self.parse_table_ref()?;
16817
16818        // Optional USING clause
16819        let using = if self.match_token(TokenType::Using) {
16820            Some(self.expect_identifier()?)
16821        } else {
16822            None
16823        };
16824
16825        // Parse index columns (optional for COLUMNSTORE indexes)
16826        let columns = if self.match_token(TokenType::LParen) {
16827            let cols = self.parse_index_columns()?;
16828            self.expect(TokenType::RParen)?;
16829            cols
16830        } else if clustered
16831            .as_ref()
16832            .is_some_and(|c| c.contains("COLUMNSTORE"))
16833        {
16834            // COLUMNSTORE indexes don't require a column list
16835            Vec::new()
16836        } else if matches!(
16837            self.config.dialect,
16838            Some(crate::dialects::DialectType::ClickHouse)
16839        ) {
16840            // ClickHouse: CREATE INDEX idx ON table expr TYPE minmax GRANULARITY 1
16841            // No parentheses around the expression — consume to semicolon as Command
16842            let mut parts = vec![
16843                "CREATE".to_string(),
16844                if unique {
16845                    "UNIQUE INDEX".to_string()
16846                } else {
16847                    "INDEX".to_string()
16848                },
16849                name.name.clone(),
16850                "ON".to_string(),
16851            ];
16852            // Rebuild table name
16853            if let Some(ref s) = table.schema {
16854                parts.push(format!("{}.{}", s.name, table.name.name));
16855            } else {
16856                parts.push(table.name.name.clone());
16857            }
16858            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
16859                let token = self.advance();
16860                if token.token_type == TokenType::String {
16861                    parts.push(format!("'{}'", token.text));
16862                } else if token.token_type == TokenType::QuotedIdentifier {
16863                    parts.push(format!("\"{}\"", token.text));
16864                } else {
16865                    parts.push(token.text.clone());
16866                }
16867            }
16868            return Ok(Expression::Command(Box::new(crate::expressions::Command {
16869                this: parts.join(" "),
16870            })));
16871        } else {
16872            self.expect(TokenType::LParen)?;
16873            let cols = self.parse_index_columns()?;
16874            self.expect(TokenType::RParen)?;
16875            cols
16876        };
16877
16878        // PostgreSQL: INCLUDE (col1, col2) clause
16879        let include_columns = if self.match_identifier("INCLUDE") {
16880            self.expect(TokenType::LParen)?;
16881            let mut cols = Vec::new();
16882            loop {
16883                cols.push(self.expect_identifier_with_quoted()?);
16884                if !self.match_token(TokenType::Comma) {
16885                    break;
16886                }
16887            }
16888            self.expect(TokenType::RParen)?;
16889            cols
16890        } else {
16891            Vec::new()
16892        };
16893
16894        // TSQL: WITH (option=value, ...) clause for index options
16895        let with_options = if self.check(TokenType::With) {
16896            // parse_with_properties expects the WITH keyword to NOT be consumed
16897            // but we need to check if we have WITH followed by LParen
16898            if self
16899                .peek_nth(1)
16900                .is_some_and(|t| t.token_type == TokenType::LParen)
16901            {
16902                self.skip(); // consume WITH
16903                self.parse_with_properties()?
16904            } else {
16905                Vec::new()
16906            }
16907        } else {
16908            Vec::new()
16909        };
16910
16911        // PostgreSQL: WHERE clause for partial indexes
16912        let where_clause = if self.match_token(TokenType::Where) {
16913            Some(Box::new(self.parse_expression()?))
16914        } else {
16915            None
16916        };
16917
16918        // TSQL: ON filegroup or partition scheme clause
16919        // e.g., ON PRIMARY, ON X([y])
16920        let on_filegroup = if self.match_token(TokenType::On) {
16921            // Get the filegroup/partition scheme name
16922            let token = self.advance();
16923            let mut filegroup = token.text.clone();
16924            // Check for partition scheme with column: ON partition_scheme(column)
16925            if self.match_token(TokenType::LParen) {
16926                filegroup.push('(');
16927                // Parse the partition column(s)
16928                loop {
16929                    let col_token = self.advance();
16930                    // For TSQL, use bracket quoting for quoted identifiers
16931                    if col_token.token_type == TokenType::QuotedIdentifier {
16932                        filegroup.push('[');
16933                        filegroup.push_str(&col_token.text);
16934                        filegroup.push(']');
16935                    } else {
16936                        filegroup.push_str(&col_token.text);
16937                    }
16938                    if !self.match_token(TokenType::Comma) {
16939                        break;
16940                    }
16941                    filegroup.push_str(", ");
16942                }
16943                self.expect(TokenType::RParen)?;
16944                filegroup.push(')');
16945            }
16946            Some(filegroup)
16947        } else {
16948            None
16949        };
16950
16951        Ok(Expression::CreateIndex(Box::new(CreateIndex {
16952            name,
16953            table,
16954            columns,
16955            unique,
16956            if_not_exists,
16957            using,
16958            clustered,
16959            concurrently,
16960            where_clause,
16961            include_columns,
16962            with_options,
16963            on_filegroup,
16964        })))
16965    }
16966
16967    /// Parse index columns - can be identifiers or expressions (like function calls)
16968    fn parse_index_columns(&mut self) -> Result<Vec<IndexColumn>> {
16969        let mut columns = Vec::new();
16970        loop {
16971            // Parse as expression to handle function calls like BOX(location, location)
16972            let expr = self.parse_expression()?;
16973
16974            // Extract column name from expression
16975            let column = match &expr {
16976                Expression::Identifier(ident) => ident.clone(),
16977                Expression::Column(col) => {
16978                    // For column expressions (e.g., simple identifier like [Col]),
16979                    // extract the identifier directly to preserve quoting
16980                    col.name.clone()
16981                }
16982                Expression::Function(_func) => {
16983                    // For function expressions, create an identifier from the function call
16984                    Identifier::new(self.expression_to_sql(&expr))
16985                }
16986                _ => Identifier::new(self.expression_to_sql(&expr)),
16987            };
16988
16989            // Parse optional PostgreSQL operator class (e.g., varchar_pattern_ops, public.gin_trgm_ops)
16990            // An opclass is an identifier that appears before ASC/DESC/NULLS and is not a keyword
16991            let opclass = if self.is_identifier_token()
16992                && !self.check(TokenType::Asc)
16993                && !self.check(TokenType::Desc)
16994                && !self.check(TokenType::Nulls)
16995            {
16996                let mut opclass_name = self.advance().text;
16997                // Handle qualified opclass names like public.gin_trgm_ops
16998                while self.match_token(TokenType::Dot) {
16999                    opclass_name.push('.');
17000                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
17001                        opclass_name.push_str(&self.advance().text);
17002                    }
17003                }
17004                Some(opclass_name)
17005            } else {
17006                None
17007            };
17008
17009            let desc = self.match_token(TokenType::Desc);
17010            let asc = if !desc {
17011                self.match_token(TokenType::Asc)
17012            } else {
17013                false
17014            };
17015            let nulls_first = if self.match_token(TokenType::Nulls) {
17016                if self.match_token(TokenType::First) {
17017                    Some(true)
17018                } else if self.match_token(TokenType::Last) {
17019                    Some(false)
17020                } else {
17021                    None
17022                }
17023            } else {
17024                None
17025            };
17026            columns.push(IndexColumn {
17027                column,
17028                desc,
17029                asc,
17030                nulls_first,
17031                opclass,
17032            });
17033            if !self.match_token(TokenType::Comma) {
17034                break;
17035            }
17036        }
17037        Ok(columns)
17038    }
17039
17040    /// Convert an expression to its SQL string representation (simple version for index expressions)
17041    fn expression_to_sql(&self, expr: &Expression) -> String {
17042        match expr {
17043            Expression::Identifier(ident) => ident.name.clone(),
17044            Expression::Function(func) => {
17045                let args = func
17046                    .args
17047                    .iter()
17048                    .map(|a| self.expression_to_sql(a))
17049                    .collect::<Vec<_>>()
17050                    .join(", ");
17051                format!("{}({})", func.name, args)
17052            }
17053            Expression::Column(col) => {
17054                if let Some(ref table) = col.table {
17055                    format!("{}.{}", table, col.name)
17056                } else {
17057                    col.name.to_string()
17058                }
17059            }
17060            Expression::Literal(lit) => match lit.as_ref() {
17061                Literal::String(s) => format!("'{}'", s),
17062                Literal::Number(n) => n.clone(),
17063                _ => "?".to_string(),
17064            },
17065            Expression::Null(_) => "NULL".to_string(),
17066            Expression::Boolean(b) => {
17067                if b.value {
17068                    "TRUE".to_string()
17069                } else {
17070                    "FALSE".to_string()
17071                }
17072            }
17073            _ => "?".to_string(),
17074        }
17075    }
17076
17077    /// Parse DROP statement
17078    fn parse_drop(&mut self) -> Result<Expression> {
17079        // Capture leading comments from the DROP token (e.g., "-- comment\nDROP TABLE ...")
17080        let leading_comments = self.current_leading_comments().to_vec();
17081        self.expect(TokenType::Drop)?;
17082
17083        // ClickHouse: DROP TEMPORARY TABLE / DROP TEMPORARY VIEW
17084        if self.check(TokenType::Temporary)
17085            && matches!(
17086                self.config.dialect,
17087                Some(crate::dialects::DialectType::ClickHouse)
17088            )
17089        {
17090            self.skip(); // consume TEMPORARY
17091            if self.check(TokenType::View) {
17092                return self.parse_drop_view(false);
17093            }
17094            return self.parse_drop_table_with_iceberg(leading_comments.clone(), false);
17095        }
17096
17097        // Snowflake: DROP ICEBERG TABLE
17098        if self.check_identifier("ICEBERG")
17099            && self.current + 1 < self.tokens.len()
17100            && self.tokens[self.current + 1].token_type == TokenType::Table
17101        {
17102            self.skip(); // consume ICEBERG
17103            return self.parse_drop_table_with_iceberg(leading_comments, true);
17104        }
17105
17106        match self.peek().token_type {
17107            TokenType::Table => self.parse_drop_table_with_iceberg(leading_comments, false),
17108            TokenType::View => self.parse_drop_view(false),
17109            TokenType::Materialized => {
17110                self.skip(); // consume MATERIALIZED
17111                self.parse_drop_view(true)
17112            }
17113            TokenType::Index => self.parse_drop_index(),
17114            TokenType::Schema => self.parse_drop_schema(),
17115            TokenType::Database => self.parse_drop_database(),
17116            TokenType::Function => self.parse_drop_function(),
17117            TokenType::Procedure => self.parse_drop_procedure(),
17118            TokenType::Sequence => self.parse_drop_sequence(),
17119            TokenType::Trigger => self.parse_drop_trigger(),
17120            TokenType::Type => self.parse_drop_type(),
17121            TokenType::Domain => {
17122                // DROP DOMAIN is similar to DROP TYPE
17123                self.skip();
17124                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17125                let name = self.parse_table_ref()?;
17126                let cascade = self.match_token(TokenType::Cascade);
17127                if !cascade {
17128                    self.match_token(TokenType::Restrict);
17129                }
17130                Ok(Expression::DropType(Box::new(DropType {
17131                    name,
17132                    if_exists,
17133                    cascade,
17134                })))
17135            }
17136            TokenType::Namespace => {
17137                // DROP NAMESPACE is similar to DROP SCHEMA (Spark/Databricks)
17138                self.skip();
17139                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17140                // Parse potentially qualified namespace name (a.b.c)
17141                let mut name_parts = vec![self.expect_identifier()?];
17142                while self.match_token(TokenType::Dot) {
17143                    name_parts.push(self.expect_identifier()?);
17144                }
17145                let name = Identifier::new(name_parts.join("."));
17146                let cascade = self.match_token(TokenType::Cascade);
17147                if !cascade {
17148                    self.match_token(TokenType::Restrict);
17149                }
17150                Ok(Expression::DropNamespace(Box::new(DropNamespace {
17151                    name,
17152                    if_exists,
17153                    cascade,
17154                })))
17155            }
17156            _ => {
17157                // ClickHouse: DROP DICTIONARY, DROP USER, DROP QUOTA, DROP ROLE,
17158                // DROP ROW POLICY, DROP SETTINGS PROFILE, DROP NAMED COLLECTION
17159                if matches!(
17160                    self.config.dialect,
17161                    Some(crate::dialects::DialectType::ClickHouse)
17162                ) {
17163                    let text_upper = self.peek().text.to_ascii_uppercase();
17164                    if matches!(
17165                        text_upper.as_str(),
17166                        "DICTIONARY"
17167                            | "USER"
17168                            | "QUOTA"
17169                            | "ROLE"
17170                            | "ROW"
17171                            | "POLICY"
17172                            | "NAMED"
17173                            | "WORKLOAD"
17174                            | "RESOURCE"
17175                            | "PROFILE"
17176                    ) || self.check(TokenType::Settings)
17177                        || self.check(TokenType::Partition)
17178                    {
17179                        self.skip(); // consume keyword, previous() is now set
17180                        let mut tokens: Vec<(String, TokenType)> = vec![
17181                            ("DROP".to_string(), TokenType::Var),
17182                            (
17183                                self.previous().text.to_ascii_uppercase(),
17184                                self.previous().token_type,
17185                            ),
17186                        ];
17187                        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17188                            let token = self.advance();
17189                            let text = if token.token_type == TokenType::QuotedIdentifier {
17190                                format!("\"{}\"", token.text)
17191                            } else if token.token_type == TokenType::String {
17192                                format!("'{}'", token.text)
17193                            } else {
17194                                token.text.clone()
17195                            };
17196                            tokens.push((text, token.token_type));
17197                        }
17198                        return Ok(Expression::Command(Box::new(Command {
17199                            this: self.join_command_tokens(tokens),
17200                        })));
17201                    }
17202                }
17203                // Snowflake: DROP STREAM, DROP TASK, DROP STAGE, DROP WAREHOUSE, etc.
17204                if matches!(
17205                    self.config.dialect,
17206                    Some(crate::dialects::DialectType::Snowflake)
17207                ) {
17208                    let text_upper = self.peek().text.to_ascii_uppercase();
17209                    let is_snowflake_drop = matches!(
17210                        text_upper.as_str(),
17211                        "STREAM"
17212                            | "TASK"
17213                            | "STAGE"
17214                            | "WAREHOUSE"
17215                            | "PIPE"
17216                            | "INTEGRATION"
17217                            | "TAG"
17218                            | "NETWORK"
17219                            | "SHARE"
17220                    ) || (text_upper == "FILE"
17221                        && self.current + 1 < self.tokens.len()
17222                        && self.tokens[self.current + 1]
17223                            .text
17224                            .eq_ignore_ascii_case("FORMAT"));
17225                    if is_snowflake_drop {
17226                        self.skip(); // consume the object type keyword
17227                        let mut tokens: Vec<(String, TokenType)> = vec![
17228                            ("DROP".to_string(), TokenType::Var),
17229                            (
17230                                self.previous().text.to_ascii_uppercase(),
17231                                self.previous().token_type,
17232                            ),
17233                        ];
17234                        // For FILE FORMAT, also consume FORMAT
17235                        if text_upper == "FILE" {
17236                            let fmt = self.advance();
17237                            tokens.push((fmt.text.to_ascii_uppercase(), fmt.token_type));
17238                        }
17239                        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17240                            let token = self.advance();
17241                            let text = if token.token_type == TokenType::QuotedIdentifier {
17242                                format!("\"{}\"", token.text)
17243                            } else if token.token_type == TokenType::String {
17244                                format!("'{}'", token.text)
17245                            } else {
17246                                token.text.clone()
17247                            };
17248                            tokens.push((text, token.token_type));
17249                        }
17250                        return Ok(Expression::Command(Box::new(Command {
17251                            this: self.join_command_tokens(tokens),
17252                        })));
17253                    }
17254                }
17255                Err(self.parse_error(format!(
17256                    "Expected TABLE, VIEW, INDEX, SCHEMA, DATABASE, FUNCTION, PROCEDURE, SEQUENCE, TRIGGER, TYPE, or NAMESPACE after DROP, got {:?}",
17257                    self.peek().token_type
17258                )))
17259            }
17260        }
17261    }
17262
17263    /// Parse DROP TABLE
17264    fn parse_drop_table_with_iceberg(
17265        &mut self,
17266        leading_comments: Vec<String>,
17267        iceberg: bool,
17268    ) -> Result<Expression> {
17269        self.expect(TokenType::Table)?;
17270
17271        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17272
17273        // ClickHouse: IF EMPTY
17274        if !if_exists
17275            && matches!(
17276                self.config.dialect,
17277                Some(crate::dialects::DialectType::ClickHouse)
17278            )
17279        {
17280            if self.check(TokenType::If)
17281                && self.current + 1 < self.tokens.len()
17282                && self.tokens[self.current + 1]
17283                    .text
17284                    .eq_ignore_ascii_case("EMPTY")
17285            {
17286                self.skip(); // consume IF
17287                self.skip(); // consume EMPTY
17288            }
17289        }
17290
17291        // Parse table names (can be multiple)
17292        let mut names = Vec::new();
17293        loop {
17294            names.push(self.parse_table_ref()?);
17295            if !self.match_token(TokenType::Comma) {
17296                break;
17297            }
17298        }
17299
17300        // Handle CASCADE [CONSTRAINTS] or RESTRICT
17301        let mut cascade = false;
17302        let mut cascade_constraints = false;
17303        let mut restrict = false;
17304        if self.match_token(TokenType::Cascade) {
17305            if self.match_identifier("CONSTRAINTS") {
17306                cascade_constraints = true;
17307            } else {
17308                cascade = true;
17309            }
17310        } else {
17311            restrict = self.match_token(TokenType::Restrict);
17312        }
17313
17314        // Handle PURGE (Oracle)
17315        let purge = self.match_identifier("PURGE");
17316
17317        // ClickHouse: ON CLUSTER clause
17318        if matches!(
17319            self.config.dialect,
17320            Some(crate::dialects::DialectType::ClickHouse)
17321        ) {
17322            let _ = self.parse_on_cluster_clause()?;
17323        }
17324
17325        // ClickHouse: SYNC keyword
17326        let sync = if matches!(
17327            self.config.dialect,
17328            Some(crate::dialects::DialectType::ClickHouse)
17329        ) {
17330            let s = self.match_identifier("SYNC");
17331            self.match_identifier("NO");
17332            self.match_identifier("DELAY");
17333            s
17334        } else {
17335            false
17336        };
17337
17338        Ok(Expression::DropTable(Box::new(DropTable {
17339            names,
17340            if_exists,
17341            cascade,
17342            cascade_constraints,
17343            purge,
17344            leading_comments,
17345            object_id_args: None,
17346            sync,
17347            iceberg,
17348            restrict,
17349        })))
17350    }
17351
17352    /// Parse DROP VIEW
17353    fn parse_drop_view(&mut self, materialized: bool) -> Result<Expression> {
17354        self.expect(TokenType::View)?;
17355
17356        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17357        let name = self.parse_table_ref()?;
17358
17359        // ClickHouse: ON CLUSTER clause
17360        if matches!(
17361            self.config.dialect,
17362            Some(crate::dialects::DialectType::ClickHouse)
17363        ) {
17364            let _ = self.parse_on_cluster_clause()?;
17365            self.match_identifier("SYNC");
17366        }
17367
17368        Ok(Expression::DropView(Box::new(DropView {
17369            name,
17370            if_exists,
17371            materialized,
17372        })))
17373    }
17374
17375    /// Parse DROP INDEX
17376    fn parse_drop_index(&mut self) -> Result<Expression> {
17377        self.expect(TokenType::Index)?;
17378
17379        // PostgreSQL CONCURRENTLY modifier
17380        let concurrently = self.match_identifier("CONCURRENTLY");
17381
17382        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17383
17384        // Parse potentially qualified index name (a.b.c)
17385        let mut name_parts = vec![self.expect_identifier()?];
17386        while self.match_token(TokenType::Dot) {
17387            name_parts.push(self.expect_identifier()?);
17388        }
17389        let name = Identifier::new(name_parts.join("."));
17390
17391        // Optional ON table
17392        let table = if self.match_token(TokenType::On) {
17393            Some(self.parse_table_ref()?)
17394        } else {
17395            None
17396        };
17397
17398        Ok(Expression::DropIndex(Box::new(DropIndex {
17399            name,
17400            table,
17401            if_exists,
17402            concurrently,
17403        })))
17404    }
17405
17406    /// Parse ALTER statement
17407    fn parse_alter(&mut self) -> Result<Expression> {
17408        self.expect(TokenType::Alter)?;
17409
17410        // Check for ICEBERG modifier before TABLE
17411        let alter_table_modifier = if self.check_identifier("ICEBERG") {
17412            self.skip();
17413            Some("ICEBERG".to_string())
17414        } else {
17415            None
17416        };
17417
17418        match self.peek().token_type {
17419            TokenType::Table => {
17420                self.skip();
17421                // Handle IF EXISTS after ALTER TABLE
17422                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17423                // Handle PostgreSQL ONLY modifier: ALTER TABLE ONLY "Album" ...
17424                let has_only = self.match_token(TokenType::Only);
17425                let mut name = self.parse_table_ref()?;
17426                if has_only {
17427                    name.only = true;
17428                }
17429
17430                // ClickHouse: ON CLUSTER clause
17431                let on_cluster = self.parse_on_cluster_clause()?;
17432
17433                // Hive: PARTITION(key=value, ...) clause before actions
17434                let partition = if self.match_token(TokenType::Partition) {
17435                    self.expect(TokenType::LParen)?;
17436                    let mut parts = Vec::new();
17437                    loop {
17438                        let key = self.expect_identifier()?;
17439                        self.expect(TokenType::Eq)?;
17440                        let value = self.parse_expression()?;
17441                        parts.push((Identifier::new(key), value));
17442                        if !self.match_token(TokenType::Comma) {
17443                            break;
17444                        }
17445                    }
17446                    self.expect(TokenType::RParen)?;
17447                    Some(parts)
17448                } else {
17449                    None
17450                };
17451
17452                let mut actions = Vec::new();
17453                let mut last_was_add_column = false;
17454                let mut with_check_modifier: Option<String> = None;
17455
17456                loop {
17457                    // Check for MySQL trailing options (ALGORITHM=val, LOCK=val)
17458                    // before trying to parse as a column def or action.
17459                    // The comma before ALGORITHM was consumed at the bottom of the previous iteration.
17460                    if self.check_identifier("ALGORITHM") || self.check_identifier("LOCK") {
17461                        break;
17462                    }
17463
17464                    // TSQL: WITH CHECK / WITH NOCHECK before ADD CONSTRAINT
17465                    if self.check(TokenType::With) {
17466                        let saved = self.current;
17467                        self.skip(); // consume WITH
17468                        if self.check(TokenType::Check) {
17469                            self.skip(); // consume CHECK
17470                            with_check_modifier = Some("WITH CHECK".to_string());
17471                            // Continue to parse the actual action (ADD CONSTRAINT, etc.)
17472                        } else if self.check_identifier("NOCHECK") {
17473                            self.skip(); // consume NOCHECK
17474                            with_check_modifier = Some("WITH NOCHECK".to_string());
17475                            // Continue to parse the actual action (ADD CONSTRAINT, etc.)
17476                        } else {
17477                            // Not WITH CHECK/NOCHECK, restore position
17478                            self.current = saved;
17479                        }
17480                    }
17481
17482                    // If last action was ADD COLUMN and we just saw a comma,
17483                    // check if this is another column definition (not a new action keyword)
17484                    if last_was_add_column
17485                        && !self.check(TokenType::Add)
17486                        && !self.check(TokenType::Drop)
17487                        && !self.check(TokenType::Alter)
17488                        && !self.check(TokenType::Rename)
17489                        && !self.check(TokenType::Set)
17490                        && !self.check_identifier("MODIFY")
17491                        && !self.check(TokenType::Delete)
17492                        && !self.check(TokenType::Update)
17493                        && !self.check_identifier("DETACH")
17494                        && !self.check_identifier("ATTACH")
17495                        && !self.check_identifier("FREEZE")
17496                        && !self.check_identifier("CLEAR")
17497                        && !self.check_identifier("MATERIALIZE")
17498                        && !self.check(TokenType::Comment)
17499                        && !self.check(TokenType::Replace)
17500                        && !self.check_identifier("MOVE")
17501                        && !self.check_identifier("REMOVE")
17502                        && !self.check_identifier("APPLY")
17503                    {
17504                        // Parse additional column definition
17505                        self.match_token(TokenType::Column); // optional COLUMN keyword
17506                        let if_not_exists = self.match_keywords(&[
17507                            TokenType::If,
17508                            TokenType::Not,
17509                            TokenType::Exists,
17510                        ]);
17511                        let col_def = self.parse_column_def()?;
17512                        let position = if self.match_token(TokenType::First) {
17513                            Some(ColumnPosition::First)
17514                        } else if self.match_token(TokenType::After) {
17515                            let after_col = self.expect_identifier()?;
17516                            // ClickHouse: AFTER n.a (dotted nested column name)
17517                            let after_name = if self.match_token(TokenType::Dot) {
17518                                let field = self.expect_identifier()?;
17519                                format!("{}.{}", after_col, field)
17520                            } else {
17521                                after_col
17522                            };
17523                            Some(ColumnPosition::After(Identifier::new(after_name)))
17524                        } else {
17525                            None
17526                        };
17527                        actions.push(AlterTableAction::AddColumn {
17528                            column: col_def,
17529                            if_not_exists,
17530                            position,
17531                        });
17532                        // last_was_add_column remains true
17533                    } else {
17534                        // Check for MySQL trailing options (ALGORITHM=val, LOCK=val)
17535                        // before trying to parse as an action
17536                        if self.check_identifier("ALGORITHM") || self.check_identifier("LOCK") {
17537                            // Retreat one to re-process the comma in the trailing options loop
17538                            self.current -= 1; // back up past the comma consumed in loop
17539                            break;
17540                        }
17541                        let action = self.parse_alter_action()?;
17542                        last_was_add_column = matches!(action, AlterTableAction::AddColumn { .. });
17543                        actions.push(action);
17544                    }
17545                    if !self.match_token(TokenType::Comma) {
17546                        break;
17547                    }
17548                }
17549
17550                // Parse trailing MySQL ALTER TABLE options: ALGORITHM=val, LOCK=val
17551                // These can appear after actions separated by commas (comma already consumed)
17552                // or directly if no actions were parsed
17553                let mut algorithm = None;
17554                let mut lock = None;
17555                loop {
17556                    // First check without consuming comma (comma may have been consumed by action loop)
17557                    if self.check_identifier("ALGORITHM") {
17558                        self.skip();
17559                        self.expect(TokenType::Eq)?;
17560                        algorithm = Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
17561                        self.match_token(TokenType::Comma); // optional trailing comma
17562                    } else if self.check_identifier("LOCK") {
17563                        self.skip();
17564                        self.expect(TokenType::Eq)?;
17565                        lock = Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
17566                        self.match_token(TokenType::Comma); // optional trailing comma
17567                    } else if self.match_token(TokenType::Comma) {
17568                        // Try after comma
17569                        if self.check_identifier("ALGORITHM") {
17570                            self.skip();
17571                            self.expect(TokenType::Eq)?;
17572                            algorithm =
17573                                Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
17574                        } else if self.check_identifier("LOCK") {
17575                            self.skip();
17576                            self.expect(TokenType::Eq)?;
17577                            lock = Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
17578                        } else {
17579                            self.current -= 1;
17580                            break;
17581                        }
17582                    } else {
17583                        break;
17584                    }
17585                }
17586
17587                // ClickHouse: consume optional trailing SETTINGS clause
17588                // e.g., ALTER TABLE t ADD COLUMN c Int64 SETTINGS mutations_sync=2, alter_sync=2
17589                if matches!(
17590                    self.config.dialect,
17591                    Some(crate::dialects::DialectType::ClickHouse)
17592                ) && self.check(TokenType::Settings)
17593                {
17594                    self.skip(); // consume SETTINGS
17595                    let _ = self.parse_settings_property()?;
17596                }
17597
17598                Ok(Expression::AlterTable(Box::new(AlterTable {
17599                    name,
17600                    actions,
17601                    if_exists,
17602                    algorithm,
17603                    lock,
17604                    with_check: with_check_modifier,
17605                    partition,
17606                    on_cluster,
17607                    table_modifier: alter_table_modifier,
17608                })))
17609            }
17610            TokenType::View => self.parse_alter_view_with_modifiers(None, None, None),
17611            TokenType::Index => self.parse_alter_index(),
17612            TokenType::Sequence => self.parse_alter_sequence(),
17613            _ if self.check_identifier("SESSION") => {
17614                // ALTER SESSION SET/UNSET (Snowflake)
17615                self.skip(); // consume SESSION
17616                match self.parse_alter_session()? {
17617                    Some(expr) => Ok(expr),
17618                    None => {
17619                        // Fall back to command
17620                        Ok(Expression::Command(Box::new(Command {
17621                            this: "ALTER SESSION".to_string(),
17622                        })))
17623                    }
17624                }
17625            }
17626            _ => {
17627                // MySQL: ALTER ALGORITHM = val VIEW, ALTER DEFINER = val VIEW,
17628                // ALTER SQL SECURITY = val VIEW
17629                let mut view_algorithm = None;
17630                let mut view_definer = None;
17631                let mut view_sql_security = None;
17632
17633                loop {
17634                    if self.check_identifier("ALGORITHM") {
17635                        self.skip();
17636                        self.expect(TokenType::Eq)?;
17637                        view_algorithm =
17638                            Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
17639                    } else if self.check_identifier("DEFINER") {
17640                        self.skip();
17641                        self.expect(TokenType::Eq)?;
17642                        // Parse user@host format: 'admin'@'localhost'
17643                        let mut definer_str = String::new();
17644                        if self.check(TokenType::String) {
17645                            definer_str.push_str(&format!("'{}'", self.advance().text));
17646                        } else {
17647                            definer_str.push_str(&self.expect_identifier_or_keyword()?);
17648                        }
17649                        // Check for @ separator
17650                        if !self.is_at_end() && self.peek().text == "@" {
17651                            definer_str.push_str(&self.advance().text);
17652                            if self.check(TokenType::String) {
17653                                definer_str.push_str(&format!("'{}'", self.advance().text));
17654                            } else if !self.is_at_end() {
17655                                definer_str.push_str(&self.advance().text);
17656                            }
17657                        }
17658                        view_definer = Some(definer_str);
17659                    } else if self.check_identifier("SQL") {
17660                        self.skip();
17661                        if self.match_identifier("SECURITY") {
17662                            self.match_token(TokenType::Eq);
17663                            view_sql_security =
17664                                Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
17665                        }
17666                    } else {
17667                        break;
17668                    }
17669                }
17670
17671                if self.check(TokenType::View) {
17672                    self.parse_alter_view_with_modifiers(
17673                        view_algorithm,
17674                        view_definer,
17675                        view_sql_security,
17676                    )
17677                } else {
17678                    // Fall back to Raw for unrecognized ALTER targets
17679                    let start = self.current;
17680                    while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17681                        self.skip();
17682                    }
17683                    let sql = self.tokens_to_sql(start, self.current);
17684                    Ok(Expression::Raw(Raw {
17685                        sql: format!("ALTER {}", sql),
17686                    }))
17687                }
17688            }
17689        }
17690    }
17691
17692    /// Parse ALTER TABLE action
17693    fn parse_alter_action(&mut self) -> Result<AlterTableAction> {
17694        if self.match_token(TokenType::Add) {
17695            // ClickHouse: ADD INDEX idx expr TYPE minmax GRANULARITY 1
17696            // ClickHouse: ADD PROJECTION name (SELECT ...)
17697            // ClickHouse: ADD STATISTICS col1, col2 TYPE tdigest, uniq
17698            // These have different syntax from MySQL ADD INDEX, so consume as Raw
17699            if matches!(
17700                self.config.dialect,
17701                Some(crate::dialects::DialectType::ClickHouse)
17702            ) && (self.check(TokenType::Index)
17703                || self.check_identifier("PROJECTION")
17704                || self.check_identifier("STATISTICS"))
17705            {
17706                let is_statistics = self.check_identifier("STATISTICS");
17707                let mut tokens: Vec<(String, TokenType)> =
17708                    vec![("ADD".to_string(), TokenType::Add)];
17709                let mut paren_depth = 0i32;
17710                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17711                    // STATISTICS uses commas internally (col1, col2 TYPE t1, t2), don't break at comma
17712                    if self.check(TokenType::Comma) && paren_depth == 0 && !is_statistics {
17713                        break;
17714                    }
17715                    let token = self.advance();
17716                    if token.token_type == TokenType::LParen {
17717                        paren_depth += 1;
17718                    }
17719                    if token.token_type == TokenType::RParen {
17720                        paren_depth -= 1;
17721                    }
17722                    let text = if token.token_type == TokenType::QuotedIdentifier {
17723                        format!("\"{}\"", token.text)
17724                    } else if token.token_type == TokenType::String {
17725                        format!("'{}'", token.text)
17726                    } else {
17727                        token.text.clone()
17728                    };
17729                    tokens.push((text, token.token_type));
17730                }
17731                return Ok(AlterTableAction::Raw {
17732                    sql: self.join_command_tokens(tokens),
17733                });
17734            }
17735            // ADD SEARCH OPTIMIZATION [ON method(columns), ...] — Snowflake
17736            if self.check_identifier("SEARCH")
17737                && self
17738                    .peek_nth(1)
17739                    .map(|t| t.text.eq_ignore_ascii_case("OPTIMIZATION"))
17740                    == Some(true)
17741            {
17742                let mut tokens: Vec<(String, TokenType)> =
17743                    vec![("ADD".to_string(), TokenType::Add)];
17744                let mut paren_depth = 0i32;
17745                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17746                    if self.check(TokenType::Comma) && paren_depth == 0 {
17747                        break;
17748                    }
17749                    let token = self.advance();
17750                    if token.token_type == TokenType::LParen {
17751                        paren_depth += 1;
17752                    }
17753                    if token.token_type == TokenType::RParen {
17754                        paren_depth -= 1;
17755                    }
17756                    tokens.push((token.text.clone(), token.token_type));
17757                }
17758                return Ok(AlterTableAction::Raw {
17759                    sql: self.join_command_tokens(tokens),
17760                });
17761            }
17762            // ADD ROW ACCESS POLICY name ON (columns) — Snowflake
17763            if self.check_identifier("ROW")
17764                && self
17765                    .peek_nth(1)
17766                    .map(|t| t.text.eq_ignore_ascii_case("ACCESS"))
17767                    == Some(true)
17768            {
17769                let mut tokens: Vec<(String, TokenType)> =
17770                    vec![("ADD".to_string(), TokenType::Add)];
17771                let mut paren_depth = 0i32;
17772                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17773                    if self.check(TokenType::Comma) && paren_depth == 0 {
17774                        break;
17775                    }
17776                    let token = self.advance();
17777                    if token.token_type == TokenType::LParen {
17778                        paren_depth += 1;
17779                    }
17780                    if token.token_type == TokenType::RParen {
17781                        paren_depth -= 1;
17782                    }
17783                    tokens.push((token.text.clone(), token.token_type));
17784                }
17785                return Ok(AlterTableAction::Raw {
17786                    sql: self.join_command_tokens(tokens),
17787                });
17788            }
17789            // ADD CONSTRAINT or ADD COLUMN or ADD INDEX
17790            if self.match_token(TokenType::Constraint) {
17791                // ADD CONSTRAINT name ...
17792                let name = Some(self.expect_identifier_with_quoted()?);
17793                let constraint = self.parse_constraint_definition(name)?;
17794                Ok(AlterTableAction::AddConstraint(constraint))
17795            } else if self.check(TokenType::PrimaryKey)
17796                || self.check(TokenType::ForeignKey)
17797                || self.check(TokenType::Check)
17798            {
17799                // ADD PRIMARY KEY / FOREIGN KEY / CHECK (without CONSTRAINT keyword)
17800                let constraint = self.parse_table_constraint()?;
17801                Ok(AlterTableAction::AddConstraint(constraint))
17802            } else if self.check(TokenType::Index)
17803                || self.check(TokenType::Key)
17804                || self.check(TokenType::Unique)
17805                || self.check_identifier("FULLTEXT")
17806                || self.check_identifier("SPATIAL")
17807            {
17808                // ADD [UNIQUE|FULLTEXT|SPATIAL] [{INDEX|KEY}] [name] (columns) [USING {BTREE|HASH}]
17809                let kind = if self.match_token(TokenType::Unique) {
17810                    Some("UNIQUE".to_string())
17811                } else if self.match_identifier("FULLTEXT") {
17812                    Some("FULLTEXT".to_string())
17813                } else if self.match_identifier("SPATIAL") {
17814                    Some("SPATIAL".to_string())
17815                } else {
17816                    None
17817                };
17818                // Consume optional INDEX or KEY keyword, track which was used
17819                let use_key_keyword = if self.match_token(TokenType::Key) {
17820                    true
17821                } else {
17822                    self.match_token(TokenType::Index);
17823                    false
17824                };
17825
17826                // Optional index name (before the columns)
17827                let name = if !self.check(TokenType::LParen) && !self.check(TokenType::Using) {
17828                    Some(self.expect_identifier_with_quoted()?)
17829                } else {
17830                    None
17831                };
17832
17833                // Parse columns (with optional prefix length and DESC)
17834                self.expect(TokenType::LParen)?;
17835                let columns = self.parse_index_identifier_list()?;
17836                self.expect(TokenType::RParen)?;
17837
17838                // Parse optional USING BTREE|HASH
17839                let modifiers = self.parse_constraint_modifiers();
17840
17841                Ok(AlterTableAction::AddConstraint(TableConstraint::Index {
17842                    name,
17843                    columns,
17844                    kind,
17845                    modifiers,
17846                    use_key_keyword,
17847                    expression: None,
17848                    index_type: None,
17849                    granularity: None,
17850                }))
17851            } else if self.match_identifier("COLUMNS") {
17852                // ADD COLUMNS (col1 TYPE, col2 TYPE, ...) [CASCADE] - Hive/Spark syntax
17853                self.expect(TokenType::LParen)?;
17854                let mut columns = Vec::new();
17855                loop {
17856                    let col_def = self.parse_column_def()?;
17857                    columns.push(col_def);
17858                    if !self.match_token(TokenType::Comma) {
17859                        break;
17860                    }
17861                }
17862                self.expect(TokenType::RParen)?;
17863                let cascade = self.match_token(TokenType::Cascade);
17864                Ok(AlterTableAction::AddColumns { columns, cascade })
17865            } else if self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]) {
17866                // ADD IF NOT EXISTS PARTITION(key = value) - Hive/Spark syntax
17867                // ADD IF NOT EXISTS col1 INT, col2 INT - Snowflake syntax
17868                if self.match_token(TokenType::Partition) {
17869                    self.expect(TokenType::LParen)?;
17870                    let mut partition_exprs = Vec::new();
17871                    loop {
17872                        if let Some(expr) = self.parse_conjunction()? {
17873                            partition_exprs.push(expr);
17874                        }
17875                        if !self.match_token(TokenType::Comma) {
17876                            break;
17877                        }
17878                    }
17879                    self.expect(TokenType::RParen)?;
17880                    let partition =
17881                        Expression::Partition(Box::new(crate::expressions::Partition {
17882                            expressions: partition_exprs,
17883                            subpartition: false,
17884                        }));
17885                    let location = if self.match_text_seq(&["LOCATION"]) {
17886                        self.parse_property()?
17887                    } else {
17888                        None
17889                    };
17890                    return Ok(AlterTableAction::AddPartition {
17891                        partition,
17892                        if_not_exists: true,
17893                        location,
17894                    });
17895                } else {
17896                    // Snowflake: ADD IF NOT EXISTS col1 INT, [IF NOT EXISTS] col2 INT
17897                    // Parse just the first column; the caller's comma loop handles the rest
17898                    let col_def = self.parse_column_def()?;
17899                    return Ok(AlterTableAction::AddColumn {
17900                        column: col_def,
17901                        if_not_exists: true,
17902                        position: None,
17903                    });
17904                }
17905            } else if self.check(TokenType::Partition) {
17906                // ADD PARTITION(key = value) - Hive/Spark syntax
17907                self.skip(); // consume PARTITION
17908                self.expect(TokenType::LParen)?;
17909                let mut partition_exprs = Vec::new();
17910                loop {
17911                    if let Some(expr) = self.parse_conjunction()? {
17912                        partition_exprs.push(expr);
17913                    }
17914                    if !self.match_token(TokenType::Comma) {
17915                        break;
17916                    }
17917                }
17918                self.expect(TokenType::RParen)?;
17919                let partition = Expression::Partition(Box::new(crate::expressions::Partition {
17920                    expressions: partition_exprs,
17921                    subpartition: false,
17922                }));
17923                let location = if self.match_text_seq(&["LOCATION"]) {
17924                    // Parse the LOCATION value (typically a string literal like 'path')
17925                    Some(self.parse_primary()?)
17926                } else {
17927                    None
17928                };
17929                Ok(AlterTableAction::AddPartition {
17930                    partition,
17931                    if_not_exists: false,
17932                    location,
17933                })
17934            } else {
17935                // ADD COLUMN or ADD (col1 TYPE, col2 TYPE) for Oracle
17936                let has_column_keyword = self.match_token(TokenType::Column); // optional COLUMN keyword
17937
17938                // Check for Oracle-style ADD (col1 TYPE, col2 TYPE, ...) without COLUMN keyword
17939                if !has_column_keyword && self.check(TokenType::LParen) {
17940                    // Oracle multi-column ADD syntax: ADD (col1 TYPE, col2 TYPE, ...)
17941                    self.skip(); // consume '('
17942                    let mut columns = Vec::new();
17943                    loop {
17944                        let col_def = self.parse_column_def()?;
17945                        columns.push(col_def);
17946                        if !self.match_token(TokenType::Comma) {
17947                            break;
17948                        }
17949                    }
17950                    self.expect(TokenType::RParen)?;
17951                    // Use AddColumns with cascade=false for Oracle syntax
17952                    Ok(AlterTableAction::AddColumns {
17953                        columns,
17954                        cascade: false,
17955                    })
17956                } else {
17957                    // Handle IF NOT EXISTS for ADD COLUMN
17958                    let if_not_exists =
17959                        self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
17960                    let col_def = self.parse_column_def()?;
17961                    // Check for FIRST or AFTER position modifiers (MySQL/MariaDB)
17962                    let position = if self.match_token(TokenType::First) {
17963                        Some(ColumnPosition::First)
17964                    } else if self.match_token(TokenType::After) {
17965                        let after_col = self.expect_identifier()?;
17966                        // ClickHouse: AFTER n.a (dotted nested column name)
17967                        let after_name = if self.match_token(TokenType::Dot) {
17968                            let field = self.expect_identifier()?;
17969                            format!("{}.{}", after_col, field)
17970                        } else {
17971                            after_col
17972                        };
17973                        Some(ColumnPosition::After(Identifier::new(after_name)))
17974                    } else {
17975                        None
17976                    };
17977                    Ok(AlterTableAction::AddColumn {
17978                        column: col_def,
17979                        if_not_exists,
17980                        position,
17981                    })
17982                }
17983            }
17984        } else if self.match_token(TokenType::Drop) {
17985            // ClickHouse: DROP INDEX idx, DROP PROJECTION name, DROP STATISTICS, etc.
17986            // These have different syntax from MySQL, so consume as Raw
17987            if matches!(
17988                self.config.dialect,
17989                Some(crate::dialects::DialectType::ClickHouse)
17990            ) && (self.check(TokenType::Index)
17991                || self.check_identifier("PROJECTION")
17992                || self.check_identifier("STATISTICS")
17993                || self.check_identifier("DETACHED")
17994                || self.check_identifier("PART"))
17995            {
17996                let is_statistics = self.check_identifier("STATISTICS");
17997                let mut tokens: Vec<(String, TokenType)> =
17998                    vec![("DROP".to_string(), TokenType::Drop)];
17999                let mut paren_depth = 0i32;
18000                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18001                    if self.check(TokenType::Comma) && paren_depth == 0 && !is_statistics {
18002                        break;
18003                    }
18004                    let token = self.advance();
18005                    if token.token_type == TokenType::LParen {
18006                        paren_depth += 1;
18007                    }
18008                    if token.token_type == TokenType::RParen {
18009                        paren_depth -= 1;
18010                    }
18011                    let text = if token.token_type == TokenType::QuotedIdentifier {
18012                        format!("\"{}\"", token.text)
18013                    } else if token.token_type == TokenType::String {
18014                        format!("'{}'", token.text)
18015                    } else {
18016                        token.text.clone()
18017                    };
18018                    tokens.push((text, token.token_type));
18019                }
18020                return Ok(AlterTableAction::Raw {
18021                    sql: self.join_command_tokens(tokens),
18022                });
18023            }
18024            // DROP SEARCH OPTIMIZATION [ON method(columns), ...] — Snowflake
18025            if self.check_identifier("SEARCH")
18026                && self
18027                    .peek_nth(1)
18028                    .map(|t| t.text.eq_ignore_ascii_case("OPTIMIZATION"))
18029                    == Some(true)
18030            {
18031                let mut tokens: Vec<(String, TokenType)> =
18032                    vec![("DROP".to_string(), TokenType::Drop)];
18033                let mut paren_depth = 0i32;
18034                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18035                    if self.check(TokenType::Comma) && paren_depth == 0 {
18036                        break;
18037                    }
18038                    let token = self.advance();
18039                    if token.token_type == TokenType::LParen {
18040                        paren_depth += 1;
18041                    }
18042                    if token.token_type == TokenType::RParen {
18043                        paren_depth -= 1;
18044                    }
18045                    tokens.push((token.text.clone(), token.token_type));
18046                }
18047                return Ok(AlterTableAction::Raw {
18048                    sql: self.join_command_tokens(tokens),
18049                });
18050            }
18051
18052            // DROP [ALL] ROW ACCESS POLICY/POLICIES — Snowflake
18053            if (self.check_identifier("ROW")
18054                && self
18055                    .peek_nth(1)
18056                    .map(|t| t.text.eq_ignore_ascii_case("ACCESS"))
18057                    == Some(true))
18058                || (self.check_identifier("ALL")
18059                    && self
18060                        .peek_nth(1)
18061                        .map(|t| t.text.eq_ignore_ascii_case("ROW"))
18062                        == Some(true))
18063            {
18064                let mut tokens: Vec<(String, TokenType)> =
18065                    vec![("DROP".to_string(), TokenType::Drop)];
18066                let mut paren_depth = 0i32;
18067                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18068                    if self.check(TokenType::Comma) && paren_depth == 0 {
18069                        break;
18070                    }
18071                    let token = self.advance();
18072                    if token.token_type == TokenType::LParen {
18073                        paren_depth += 1;
18074                    }
18075                    if token.token_type == TokenType::RParen {
18076                        paren_depth -= 1;
18077                    }
18078                    tokens.push((token.text.clone(), token.token_type));
18079                }
18080                return Ok(AlterTableAction::Raw {
18081                    sql: self.join_command_tokens(tokens),
18082                });
18083            }
18084
18085            // Handle IF EXISTS before determining what to drop
18086            let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
18087
18088            if self.match_token(TokenType::Partition) {
18089                // DROP [IF EXISTS] PARTITION expr [, PARTITION expr ...]
18090                // ClickHouse supports: PARTITION 201901, PARTITION ALL,
18091                // PARTITION tuple(...), PARTITION ID '...'
18092                let mut partitions = Vec::new();
18093                loop {
18094                    if self.check(TokenType::LParen) {
18095                        // ClickHouse: PARTITION (expr) or PARTITION (expr, expr, ...)
18096                        // Standard SQL: PARTITION (key=value, ...)
18097                        // Peek ahead: if LParen is followed by String/Number (not identifier=),
18098                        // parse as expression
18099                        let is_ch_expr = matches!(
18100                            self.config.dialect,
18101                            Some(crate::dialects::DialectType::ClickHouse)
18102                        ) && self.current + 1 < self.tokens.len()
18103                            && (self.tokens[self.current + 1].token_type == TokenType::String
18104                                || self.tokens[self.current + 1].token_type == TokenType::Number
18105                                || self.tokens[self.current + 1].token_type == TokenType::LParen
18106                                || (self.current + 2 < self.tokens.len()
18107                                    && self.tokens[self.current + 2].token_type != TokenType::Eq));
18108                        if is_ch_expr {
18109                            // Parse as tuple expression
18110                            let expr = self.parse_expression()?;
18111                            partitions.push(vec![(Identifier::new("__expr__".to_string()), expr)]);
18112                        } else {
18113                            self.skip(); // consume (
18114                            let mut parts = Vec::new();
18115                            loop {
18116                                let key = self.expect_identifier()?;
18117                                self.expect(TokenType::Eq)?;
18118                                let value = self.parse_expression()?;
18119                                parts.push((Identifier::new(key), value));
18120                                if !self.match_token(TokenType::Comma) {
18121                                    break;
18122                                }
18123                            }
18124                            self.expect(TokenType::RParen)?;
18125                            partitions.push(parts);
18126                        }
18127                    } else if self.match_text_seq(&["ALL"]) {
18128                        // ClickHouse: PARTITION ALL
18129                        partitions.push(vec![(
18130                            Identifier::new("ALL".to_string()),
18131                            Expression::Boolean(BooleanLiteral { value: true }),
18132                        )]);
18133                    } else if self.match_text_seq(&["ID"]) {
18134                        // ClickHouse: PARTITION ID 'string'
18135                        let id_val = self.parse_expression()?;
18136                        partitions.push(vec![(Identifier::new("ID".to_string()), id_val)]);
18137                    } else {
18138                        // ClickHouse: PARTITION <expression> (number, tuple(...), etc.)
18139                        let expr = self.parse_expression()?;
18140                        partitions.push(vec![(Identifier::new("__expr__".to_string()), expr)]);
18141                    }
18142                    // Check for ", PARTITION" for multiple partitions
18143                    if self.match_token(TokenType::Comma) {
18144                        if !self.match_token(TokenType::Partition) {
18145                            break;
18146                        }
18147                    } else {
18148                        break;
18149                    }
18150                }
18151                Ok(AlterTableAction::DropPartition {
18152                    partitions,
18153                    if_exists,
18154                })
18155            } else if self.match_token(TokenType::Column) {
18156                // DROP [IF EXISTS] COLUMN [IF EXISTS] name [CASCADE]
18157                // Check for IF EXISTS after COLUMN as well
18158                let if_exists =
18159                    if_exists || self.match_keywords(&[TokenType::If, TokenType::Exists]);
18160                let mut name = self.expect_identifier_with_quoted()?;
18161                // ClickHouse: nested column names like n.ui8
18162                if matches!(
18163                    self.config.dialect,
18164                    Some(crate::dialects::DialectType::ClickHouse)
18165                ) && self.match_token(TokenType::Dot)
18166                {
18167                    let sub = self.expect_identifier_with_quoted()?;
18168                    name.name = format!("{}.{}", name.name, sub.name);
18169                }
18170                let cascade = self.match_token(TokenType::Cascade);
18171                Ok(AlterTableAction::DropColumn {
18172                    name,
18173                    if_exists,
18174                    cascade,
18175                })
18176            } else if self.match_token(TokenType::Constraint) {
18177                // DROP [IF EXISTS] CONSTRAINT name
18178                let name = self.expect_identifier_with_quoted()?;
18179                Ok(AlterTableAction::DropConstraint { name, if_exists })
18180            } else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
18181                // DROP FOREIGN KEY name (Oracle/MySQL)
18182                let name = self.expect_identifier_with_quoted()?;
18183                Ok(AlterTableAction::DropForeignKey { name })
18184            } else if self.check_identifier("COLUMNS") && self.check_next(TokenType::LParen) {
18185                // DROP COLUMNS (col1, col2, ...) - Spark/Databricks syntax
18186                self.skip(); // consume COLUMNS
18187                self.expect(TokenType::LParen)?;
18188                let mut names = Vec::new();
18189                loop {
18190                    let name = self.expect_identifier_with_quoted()?;
18191                    names.push(name);
18192                    if !self.match_token(TokenType::Comma) {
18193                        break;
18194                    }
18195                }
18196                self.expect(TokenType::RParen)?;
18197                Ok(AlterTableAction::DropColumns { names })
18198            } else {
18199                // DROP [IF EXISTS] name (implicit column) [CASCADE]
18200                let mut name = self.expect_identifier_with_quoted()?;
18201                // ClickHouse: nested column names like n.ui8
18202                if matches!(
18203                    self.config.dialect,
18204                    Some(crate::dialects::DialectType::ClickHouse)
18205                ) && self.match_token(TokenType::Dot)
18206                {
18207                    let sub = self.expect_identifier_with_quoted()?;
18208                    name.name = format!("{}.{}", name.name, sub.name);
18209                }
18210                let cascade = self.match_token(TokenType::Cascade);
18211                Ok(AlterTableAction::DropColumn {
18212                    name,
18213                    if_exists,
18214                    cascade,
18215                })
18216            }
18217        } else if self.match_token(TokenType::Rename) {
18218            if self.match_token(TokenType::Column) {
18219                // RENAME COLUMN [IF EXISTS] old TO new
18220                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
18221                let mut old_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
18222                // ClickHouse: nested column names like n.x
18223                if matches!(
18224                    self.config.dialect,
18225                    Some(crate::dialects::DialectType::ClickHouse)
18226                ) && self.match_token(TokenType::Dot)
18227                {
18228                    let field = self.expect_identifier_with_quoted()?;
18229                    old_name = Identifier {
18230                        name: format!("{}.{}", old_name.name, field.name),
18231                        quoted: false,
18232                        trailing_comments: Vec::new(),
18233                        span: None,
18234                    };
18235                }
18236                self.expect(TokenType::To)?;
18237                let mut new_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
18238                // ClickHouse: nested column names like n.y
18239                if matches!(
18240                    self.config.dialect,
18241                    Some(crate::dialects::DialectType::ClickHouse)
18242                ) && self.match_token(TokenType::Dot)
18243                {
18244                    let field = self.expect_identifier_or_safe_keyword_with_quoted()?;
18245                    new_name = Identifier {
18246                        name: format!("{}.{}", new_name.name, field.name),
18247                        quoted: false,
18248                        trailing_comments: Vec::new(),
18249                        span: None,
18250                    };
18251                }
18252                Ok(AlterTableAction::RenameColumn {
18253                    old_name,
18254                    new_name,
18255                    if_exists,
18256                })
18257            } else if self.match_token(TokenType::To) {
18258                // RENAME TO new_table
18259                let new_name = self.parse_table_ref()?;
18260                Ok(AlterTableAction::RenameTable(new_name))
18261            } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
18262                // StarRocks/Doris: RENAME new_name (without TO)
18263                // SQLite: RENAME old_name TO new_name (without COLUMN keyword)
18264                let first_name = self.expect_identifier_with_quoted()?;
18265                if self.match_token(TokenType::To) {
18266                    let new_name = self.expect_identifier_with_quoted()?;
18267                    Ok(AlterTableAction::RenameColumn {
18268                        old_name: first_name,
18269                        new_name,
18270                        if_exists: false,
18271                    })
18272                } else {
18273                    // No TO keyword: treat as RENAME TABLE (StarRocks/Doris)
18274                    Ok(AlterTableAction::RenameTable(TableRef::new(
18275                        first_name.name,
18276                    )))
18277                }
18278            } else {
18279                Err(self.parse_error("Expected COLUMN or TO after RENAME"))
18280            }
18281        } else if self.match_token(TokenType::Alter) {
18282            // Check for ALTER INDEX (MySQL: ALTER TABLE t ALTER INDEX i VISIBLE/INVISIBLE)
18283            if self.match_token(TokenType::Index) {
18284                let name = self.expect_identifier_with_quoted()?;
18285                let visible = if self.match_identifier("VISIBLE") {
18286                    true
18287                } else if self.match_identifier("INVISIBLE") {
18288                    false
18289                } else {
18290                    return Err(
18291                        self.parse_error("Expected VISIBLE or INVISIBLE after ALTER INDEX name")
18292                    );
18293                };
18294                Ok(AlterTableAction::AlterIndex { name, visible })
18295            } else if self.check_identifier("SORTKEY") {
18296                // Redshift: ALTER TABLE t ALTER SORTKEY AUTO|NONE|(col1, col2)
18297                self.skip(); // consume SORTKEY
18298                if self.match_texts(&["AUTO", "NONE"]) {
18299                    let style = self.previous().text.to_ascii_uppercase();
18300                    Ok(AlterTableAction::AlterSortKey {
18301                        this: Some(style),
18302                        expressions: Vec::new(),
18303                        compound: false,
18304                    })
18305                } else if self.check(TokenType::LParen) {
18306                    // (col1, col2) syntax
18307                    let wrapped = self.parse_wrapped_id_vars()?;
18308                    let expressions = if let Some(Expression::Tuple(t)) = wrapped {
18309                        t.expressions
18310                    } else {
18311                        Vec::new()
18312                    };
18313                    Ok(AlterTableAction::AlterSortKey {
18314                        this: None,
18315                        expressions,
18316                        compound: false,
18317                    })
18318                } else {
18319                    Err(self.parse_error("Expected AUTO, NONE, or (columns) after SORTKEY"))
18320                }
18321            } else if self.check_identifier("COMPOUND") {
18322                // Redshift: ALTER TABLE t ALTER COMPOUND SORTKEY (col1, col2)
18323                self.skip(); // consume COMPOUND
18324                if !self.match_identifier("SORTKEY") {
18325                    return Err(self.parse_error("Expected SORTKEY after COMPOUND"));
18326                }
18327                if self.check(TokenType::LParen) {
18328                    let wrapped = self.parse_wrapped_id_vars()?;
18329                    let expressions = if let Some(Expression::Tuple(t)) = wrapped {
18330                        t.expressions
18331                    } else {
18332                        Vec::new()
18333                    };
18334                    Ok(AlterTableAction::AlterSortKey {
18335                        this: None,
18336                        expressions,
18337                        compound: true,
18338                    })
18339                } else {
18340                    Err(self.parse_error("Expected (columns) after COMPOUND SORTKEY"))
18341                }
18342            } else if self.check_identifier("DISTSTYLE") {
18343                // Redshift: ALTER TABLE t ALTER DISTSTYLE ALL|EVEN|AUTO|KEY [DISTKEY col]
18344                self.skip(); // consume DISTSTYLE
18345                if self.match_texts(&["ALL", "EVEN", "AUTO"]) {
18346                    let style = self.previous().text.to_ascii_uppercase();
18347                    Ok(AlterTableAction::AlterDistStyle {
18348                        style,
18349                        distkey: None,
18350                    })
18351                } else if self.match_token(TokenType::Key) || self.match_identifier("KEY") {
18352                    // DISTSTYLE KEY DISTKEY col
18353                    if !self.match_identifier("DISTKEY") {
18354                        return Err(self.parse_error("Expected DISTKEY after DISTSTYLE KEY"));
18355                    }
18356                    let col = self.expect_identifier_with_quoted()?;
18357                    Ok(AlterTableAction::AlterDistStyle {
18358                        style: "KEY".to_string(),
18359                        distkey: Some(col),
18360                    })
18361                } else {
18362                    Err(self.parse_error("Expected ALL, EVEN, AUTO, or KEY after DISTSTYLE"))
18363                }
18364            } else if self.check_identifier("DISTKEY") {
18365                // Redshift: ALTER TABLE t ALTER DISTKEY col (shorthand for DISTSTYLE KEY DISTKEY col)
18366                self.skip(); // consume DISTKEY
18367                let col = self.expect_identifier_with_quoted()?;
18368                Ok(AlterTableAction::AlterDistStyle {
18369                    style: "KEY".to_string(),
18370                    distkey: Some(col),
18371                })
18372            } else {
18373                // ALTER COLUMN
18374                self.match_token(TokenType::Column); // optional COLUMN keyword
18375                let name = self.expect_identifier_with_quoted()?;
18376                let action = self.parse_alter_column_action()?;
18377                Ok(AlterTableAction::AlterColumn {
18378                    name,
18379                    action,
18380                    use_modify_keyword: false,
18381                })
18382            }
18383        } else if self.match_identifier("MODIFY") {
18384            // ClickHouse: MODIFY ORDER BY, MODIFY SETTING, MODIFY TTL, MODIFY QUERY,
18385            // MODIFY COLUMN name type [DEFAULT|MATERIALIZED|ALIAS] [CODEC] [TTL] [COMMENT], etc.
18386            // These are ClickHouse-specific and have richer syntax than MySQL MODIFY COLUMN.
18387            // Consume all ClickHouse MODIFY actions as Raw.
18388            if matches!(
18389                self.config.dialect,
18390                Some(crate::dialects::DialectType::ClickHouse)
18391            ) {
18392                // MODIFY SETTING uses commas between settings (not action separators)
18393                let is_setting =
18394                    self.check(TokenType::Settings) || self.check_identifier("SETTING");
18395                let mut tokens: Vec<(String, TokenType)> =
18396                    vec![("MODIFY".to_string(), TokenType::Var)];
18397                let mut paren_depth = 0i32;
18398                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18399                    if self.check(TokenType::Comma) && paren_depth == 0 && !is_setting {
18400                        break;
18401                    }
18402                    let token = self.advance();
18403                    if token.token_type == TokenType::LParen {
18404                        paren_depth += 1;
18405                    }
18406                    if token.token_type == TokenType::RParen {
18407                        paren_depth -= 1;
18408                    }
18409                    let text = if token.token_type == TokenType::QuotedIdentifier {
18410                        format!("\"{}\"", token.text)
18411                    } else if token.token_type == TokenType::String {
18412                        format!("'{}'", token.text)
18413                    } else {
18414                        token.text.clone()
18415                    };
18416                    tokens.push((text, token.token_type));
18417                }
18418                return Ok(AlterTableAction::Raw {
18419                    sql: self.join_command_tokens(tokens),
18420                });
18421            }
18422            // MODIFY COLUMN (MySQL/Snowflake syntax — routes through same action parser as ALTER COLUMN)
18423            self.match_token(TokenType::Column); // optional COLUMN keyword
18424            let name = Identifier::new(self.expect_identifier()?);
18425            let action = self.parse_alter_column_action()?;
18426            Ok(AlterTableAction::AlterColumn {
18427                name,
18428                action,
18429                use_modify_keyword: true,
18430            })
18431        } else if self.match_identifier("CHANGE") {
18432            // CHANGE [COLUMN] old_name new_name [data_type] [COMMENT 'comment'] - Hive/MySQL/SingleStore syntax
18433            // In SingleStore, data_type can be omitted for simple renames
18434            self.match_token(TokenType::Column); // optional COLUMN keyword
18435            let old_name = Identifier::new(self.expect_identifier()?);
18436            let new_name = Identifier::new(self.expect_identifier()?);
18437            // Try to parse data type - it's optional in SingleStore
18438            let data_type = if !self.is_at_end()
18439                && !self.check(TokenType::Comment)
18440                && !self.check(TokenType::Comma)
18441                && !self.check(TokenType::Semicolon)
18442            {
18443                // Check if next token could start a data type
18444                let tok = self.peek();
18445                if tok.token_type.is_keyword()
18446                    || tok.token_type == TokenType::Identifier
18447                    || tok.token_type == TokenType::Var
18448                {
18449                    Some(self.parse_data_type()?)
18450                } else {
18451                    None
18452                }
18453            } else {
18454                None
18455            };
18456            let comment = if self.match_token(TokenType::Comment) {
18457                Some(self.expect_string()?)
18458            } else {
18459                None
18460            };
18461            let cascade = self.match_text_seq(&["CASCADE"]);
18462            // Also check for RESTRICT (the opposite, just consume it)
18463            if !cascade {
18464                self.match_text_seq(&["RESTRICT"]);
18465            }
18466            Ok(AlterTableAction::ChangeColumn {
18467                old_name,
18468                new_name,
18469                data_type,
18470                comment,
18471                cascade,
18472            })
18473        } else if self.match_token(TokenType::Constraint) {
18474            // CONSTRAINT name ... (implicit ADD, CONSTRAINT already consumed)
18475            // Parse the constraint name and then the constraint definition
18476            let name = Some(self.expect_identifier_with_quoted()?);
18477            let constraint = self.parse_constraint_definition(name)?;
18478            Ok(AlterTableAction::AddConstraint(constraint))
18479        } else if self.check(TokenType::PrimaryKey)
18480            || self.check(TokenType::ForeignKey)
18481            || self.check(TokenType::Unique)
18482        {
18483            // ADD CONSTRAINT (implicit ADD, no CONSTRAINT keyword)
18484            let constraint = self.parse_table_constraint()?;
18485            Ok(AlterTableAction::AddConstraint(constraint))
18486        } else if self.match_token(TokenType::Delete) {
18487            // ALTER TABLE t DELETE WHERE x = 1 (BigQuery syntax)
18488            self.expect(TokenType::Where)?;
18489            let where_clause = self.parse_expression()?;
18490            Ok(AlterTableAction::Delete { where_clause })
18491        } else if self.match_keyword("SWAP") {
18492            // Snowflake: ALTER TABLE a SWAP WITH b
18493            self.expect(TokenType::With)?;
18494            let target = self.parse_table_ref()?;
18495            Ok(AlterTableAction::SwapWith(target))
18496        } else if self.match_token(TokenType::Set) {
18497            // TSQL: ALTER TABLE t SET (SYSTEM_VERSIONING=ON, DATA_DELETION=ON, ...)
18498            if self.check(TokenType::LParen) {
18499                self.skip(); // consume (
18500                let mut expressions = Vec::new();
18501                loop {
18502                    if self.check(TokenType::RParen) {
18503                        break;
18504                    }
18505                    if self.check_identifier("SYSTEM_VERSIONING") {
18506                        let expr = self.parse_system_versioning_option()?;
18507                        expressions.push(expr);
18508                    } else if self.check_identifier("DATA_DELETION") {
18509                        let expr = self.parse_data_deletion_option()?;
18510                        expressions.push(expr);
18511                    } else {
18512                        // Generic key=value (e.g., FILESTREAM_ON = 'test')
18513                        let expr = self.parse_expression()?;
18514                        expressions.push(expr);
18515                    }
18516                    if !self.match_token(TokenType::Comma) {
18517                        break;
18518                    }
18519                }
18520                self.expect(TokenType::RParen)?;
18521                Ok(AlterTableAction::SetOptions { expressions })
18522            } else if self.match_keyword("TAG") {
18523                // Snowflake: SET TAG key='value', ... (key can be qualified like schema.tagname)
18524                let mut tags = Vec::new();
18525                loop {
18526                    // Parse qualified tag name (e.g., foo.bar or just bar)
18527                    let mut key = self.expect_identifier_or_keyword()?;
18528                    while self.match_token(TokenType::Dot) {
18529                        let next = self.expect_identifier_or_keyword()?;
18530                        key = format!("{}.{}", key, next);
18531                    }
18532                    self.expect(TokenType::Eq)?;
18533                    let value = self.parse_primary()?;
18534                    tags.push((key, value));
18535                    if !self.match_token(TokenType::Comma) {
18536                        break;
18537                    }
18538                }
18539                Ok(AlterTableAction::SetTag { expressions: tags })
18540            } else if self.check_identifier("LOGGED") {
18541                // PostgreSQL: ALTER TABLE t SET LOGGED
18542                self.skip();
18543                Ok(AlterTableAction::SetAttribute {
18544                    attribute: "LOGGED".to_string(),
18545                })
18546            } else if self.check_identifier("UNLOGGED") {
18547                // PostgreSQL: ALTER TABLE t SET UNLOGGED
18548                self.skip();
18549                Ok(AlterTableAction::SetAttribute {
18550                    attribute: "UNLOGGED".to_string(),
18551                })
18552            } else if self.match_identifier("WITHOUT") {
18553                // PostgreSQL: ALTER TABLE t SET WITHOUT CLUSTER/OIDS
18554                let what = self.expect_identifier_or_keyword()?;
18555                Ok(AlterTableAction::SetAttribute {
18556                    attribute: format!("WITHOUT {}", what),
18557                })
18558            } else if self.check_identifier("ACCESS") {
18559                // PostgreSQL: ALTER TABLE t SET ACCESS METHOD method
18560                self.skip();
18561                // Consume "METHOD"
18562                if !self.match_identifier("METHOD") {
18563                    return Err(self.parse_error("Expected METHOD after ACCESS"));
18564                }
18565                let method = self.expect_identifier_or_keyword()?;
18566                Ok(AlterTableAction::SetAttribute {
18567                    attribute: format!("ACCESS METHOD {}", method),
18568                })
18569            } else if self.check_identifier("TABLESPACE") {
18570                // PostgreSQL: ALTER TABLE t SET TABLESPACE tablespace
18571                self.skip();
18572                let name = self.expect_identifier_or_keyword()?;
18573                Ok(AlterTableAction::SetAttribute {
18574                    attribute: format!("TABLESPACE {}", name),
18575                })
18576            } else if self.check_identifier("STAGE_FILE_FORMAT") {
18577                // Snowflake: ALTER TABLE t SET STAGE_FILE_FORMAT = (options)
18578                self.skip();
18579                let options = self.parse_wrapped_options()?;
18580                Ok(AlterTableAction::SetStageFileFormat { options })
18581            } else if self.check_identifier("STAGE_COPY_OPTIONS") {
18582                // Snowflake: ALTER TABLE t SET STAGE_COPY_OPTIONS = (options)
18583                self.skip();
18584                let options = self.parse_wrapped_options()?;
18585                Ok(AlterTableAction::SetStageCopyOptions { options })
18586            } else if self.match_token(TokenType::Authorization) {
18587                // Trino: ALTER TABLE t SET AUTHORIZATION [ROLE] user
18588                let mut auth_text = String::new();
18589                if self.match_texts(&["ROLE"]) {
18590                    auth_text.push_str("ROLE ");
18591                }
18592                let user = self.expect_identifier_or_keyword()?;
18593                auth_text.push_str(&user);
18594                Ok(AlterTableAction::SetAttribute {
18595                    attribute: format!("AUTHORIZATION {}", auth_text),
18596                })
18597            } else if self.match_identifier("PROPERTIES") {
18598                // Trino: ALTER TABLE t SET PROPERTIES x = 'y', ...
18599                let mut properties = Vec::new();
18600                loop {
18601                    // Parse property name (could be identifier or string literal)
18602                    let key = if self.check(TokenType::String) {
18603                        self.expect_string()?
18604                    } else {
18605                        self.expect_identifier_or_keyword()?
18606                    };
18607                    self.expect(TokenType::Eq)?;
18608                    // Parse value (could be DEFAULT or an expression)
18609                    let value = if self.match_token(TokenType::Default) {
18610                        // Use Var instead of Identifier so it won't be quoted
18611                        Expression::Var(Box::new(crate::expressions::Var {
18612                            this: "DEFAULT".to_string(),
18613                        }))
18614                    } else {
18615                        self.parse_expression()?
18616                    };
18617                    properties.push((key, value));
18618                    if !self.match_token(TokenType::Comma) {
18619                        break;
18620                    }
18621                }
18622                Ok(AlterTableAction::SetProperty { properties })
18623            } else if self.match_text_seq(&["TABLE", "PROPERTIES"]) {
18624                // Redshift: ALTER TABLE t SET TABLE PROPERTIES ('a' = '5', 'b' = 'c')
18625                self.expect(TokenType::LParen)?;
18626                let mut properties = Vec::new();
18627                loop {
18628                    if self.check(TokenType::RParen) {
18629                        break;
18630                    }
18631                    // Parse key (string literal)
18632                    let key = self.parse_primary()?;
18633                    self.expect(TokenType::Eq)?;
18634                    // Parse value (string literal)
18635                    let value = self.parse_primary()?;
18636                    properties.push((key, value));
18637                    if !self.match_token(TokenType::Comma) {
18638                        break;
18639                    }
18640                }
18641                self.expect(TokenType::RParen)?;
18642                Ok(AlterTableAction::SetTableProperties { properties })
18643            } else if self.match_text_seq(&["LOCATION"]) {
18644                // Redshift: ALTER TABLE t SET LOCATION 's3://bucket/folder/'
18645                let location = self.expect_string()?;
18646                Ok(AlterTableAction::SetLocation { location })
18647            } else if self.match_text_seq(&["FILE", "FORMAT"]) {
18648                // Redshift: ALTER TABLE t SET FILE FORMAT AVRO
18649                let format = self.expect_identifier_or_keyword()?;
18650                Ok(AlterTableAction::SetFileFormat { format })
18651            } else if self.peek_nth(1).map(|t| t.token_type) != Some(TokenType::Eq) {
18652                // SET <multi-word clause> (e.g., SET PROJECTION POLICY name) — consume as Raw
18653                let mut tokens: Vec<(String, TokenType)> =
18654                    vec![("SET".to_string(), TokenType::Set)];
18655                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18656                    if self.check(TokenType::Comma) {
18657                        break;
18658                    }
18659                    tokens.push((self.advance().text.clone(), TokenType::Var));
18660                }
18661                Ok(AlterTableAction::Raw {
18662                    sql: self.join_command_tokens(tokens),
18663                })
18664            } else {
18665                // Snowflake: SET property=value, ...
18666                let mut properties = Vec::new();
18667                loop {
18668                    let key = self.expect_identifier_or_keyword()?;
18669                    self.expect(TokenType::Eq)?;
18670                    let value = self.parse_expression()?;
18671                    properties.push((key, value));
18672                    if !self.match_token(TokenType::Comma) {
18673                        break;
18674                    }
18675                }
18676                Ok(AlterTableAction::SetProperty { properties })
18677            }
18678        } else if self.match_keyword("UNSET") {
18679            // Snowflake: ALTER TABLE t UNSET property or UNSET TAG key
18680            if self.match_keyword("TAG") {
18681                // UNSET TAG key1, key2 (keys can be qualified like schema.tagname)
18682                let mut names = Vec::new();
18683                loop {
18684                    let mut name = self.expect_identifier_or_keyword()?;
18685                    while self.match_token(TokenType::Dot) {
18686                        let next = self.expect_identifier_or_keyword()?;
18687                        name = format!("{}.{}", name, next);
18688                    }
18689                    names.push(name);
18690                    if !self.match_token(TokenType::Comma) {
18691                        break;
18692                    }
18693                }
18694                Ok(AlterTableAction::UnsetTag { names })
18695            } else if self.peek_nth(1).map(|t| {
18696                t.token_type != TokenType::Comma
18697                    && t.token_type != TokenType::Semicolon
18698                    && t.token_type != TokenType::Eof
18699            }) == Some(true)
18700                && !self.is_at_end()
18701                && self.peek_nth(1).map(|t| t.token_type != TokenType::Eq) == Some(true)
18702            {
18703                // UNSET <multi-word clause> (e.g., UNSET PROJECTION POLICY) — consume as Raw
18704                let mut tokens: Vec<(String, TokenType)> =
18705                    vec![("UNSET".to_string(), TokenType::Var)];
18706                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18707                    if self.check(TokenType::Comma) {
18708                        break;
18709                    }
18710                    tokens.push((self.advance().text.clone(), TokenType::Var));
18711                }
18712                Ok(AlterTableAction::Raw {
18713                    sql: self.join_command_tokens(tokens),
18714                })
18715            } else {
18716                // UNSET property1, property2
18717                let mut properties = Vec::new();
18718                loop {
18719                    let name = self.expect_identifier_or_keyword()?;
18720                    properties.push(name);
18721                    if !self.match_token(TokenType::Comma) {
18722                        break;
18723                    }
18724                }
18725                Ok(AlterTableAction::UnsetProperty { properties })
18726            }
18727        } else if self.match_keyword("CLUSTER") {
18728            // Snowflake: ALTER TABLE t CLUSTER BY (col1, col2 DESC)
18729            self.expect(TokenType::By)?;
18730            self.expect(TokenType::LParen)?;
18731            // Parse ordered expressions (can have ASC/DESC modifiers)
18732            let ordered = self.parse_order_by_list()?;
18733            // Convert Ordered to Expression (wrapping in Ordered if it has ordering)
18734            let expressions: Vec<Expression> = ordered
18735                .into_iter()
18736                .map(|o| Expression::Ordered(Box::new(o)))
18737                .collect();
18738            self.expect(TokenType::RParen)?;
18739            Ok(AlterTableAction::ClusterBy { expressions })
18740        } else if self.match_token(TokenType::Replace) {
18741            // ClickHouse: REPLACE PARTITION expr FROM table
18742            if self.match_token(TokenType::Partition) {
18743                let partition_expr = if self.match_text_seq(&["ALL"]) {
18744                    Expression::Identifier(Identifier::new("ALL".to_string()))
18745                } else if self.match_text_seq(&["ID"]) {
18746                    let id_val = self.parse_expression()?;
18747                    // Store as Raw to preserve "ID <value>" format
18748                    let id_str = match &id_val {
18749                        Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
18750                            let Literal::String(s) = lit.as_ref() else {
18751                                unreachable!()
18752                            };
18753                            format!("ID '{}'", s)
18754                        }
18755                        _ => format!("ID {}", "?"),
18756                    };
18757                    Expression::Raw(Raw { sql: id_str })
18758                } else {
18759                    self.parse_expression()?
18760                };
18761                let source = if self.match_token(TokenType::From) {
18762                    let tref = self.parse_table_ref()?;
18763                    Some(Box::new(Expression::Table(Box::new(tref))))
18764                } else {
18765                    None
18766                };
18767                Ok(AlterTableAction::ReplacePartition {
18768                    partition: partition_expr,
18769                    source,
18770                })
18771            } else {
18772                Err(self.parse_error("Expected PARTITION after REPLACE in ALTER TABLE"))
18773            }
18774        } else if matches!(
18775            self.config.dialect,
18776            Some(crate::dialects::DialectType::ClickHouse)
18777        ) {
18778            // ClickHouse-specific ALTER TABLE mutations: UPDATE, DELETE, DETACH, ATTACH,
18779            // FREEZE, UNFREEZE, MATERIALIZE, CLEAR, COMMENT COLUMN, MODIFY ORDER BY,
18780            // MOVE PARTITION, FETCH PARTITION, ADD INDEX, DROP INDEX, CLEAR INDEX
18781            // For ClickHouse, consume any unrecognized ALTER TABLE action as Raw
18782            // (covers UPDATE, DELETE, DETACH, ATTACH, FREEZE, MOVE, FETCH, etc.)
18783            {
18784                let keyword = self.advance().text.clone();
18785                let mut tokens: Vec<(String, TokenType)> = vec![(keyword, TokenType::Var)];
18786                let mut paren_depth = 0i32;
18787                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18788                    // Stop at comma only when at top-level (not inside parens) — it separates ALTER actions
18789                    if self.check(TokenType::Comma) && paren_depth == 0 {
18790                        break;
18791                    }
18792                    let token = self.advance();
18793                    if token.token_type == TokenType::LParen {
18794                        paren_depth += 1;
18795                    }
18796                    if token.token_type == TokenType::RParen {
18797                        paren_depth -= 1;
18798                    }
18799                    let text = if token.token_type == TokenType::QuotedIdentifier {
18800                        format!("\"{}\"", token.text)
18801                    } else if token.token_type == TokenType::String {
18802                        format!("'{}'", token.text)
18803                    } else {
18804                        token.text.clone()
18805                    };
18806                    tokens.push((text, token.token_type));
18807                }
18808                Ok(AlterTableAction::Raw {
18809                    sql: self.join_command_tokens(tokens),
18810                })
18811            }
18812        } else if self.check_identifier("REORGANIZE")
18813            || self.check_identifier("COALESCE")
18814            || self.check_identifier("EXCHANGE")
18815            || self.check_identifier("ANALYZE")
18816            || self.check_identifier("OPTIMIZE")
18817            || self.check_identifier("REBUILD")
18818            || self.check_identifier("REPAIR")
18819            || self.check_identifier("DISCARD")
18820            || self.check_identifier("IMPORT")
18821        {
18822            // MySQL partition operations: REORGANIZE PARTITION, COALESCE PARTITION, etc.
18823            // Consume as Raw, respecting parenthesis depth
18824            let keyword = self.advance().text.clone();
18825            let mut tokens: Vec<(String, TokenType)> = vec![(keyword, TokenType::Var)];
18826            let mut paren_depth = 0i32;
18827            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18828                if self.check(TokenType::Comma) && paren_depth == 0 {
18829                    break;
18830                }
18831                let token = self.advance();
18832                if token.token_type == TokenType::LParen {
18833                    paren_depth += 1;
18834                }
18835                if token.token_type == TokenType::RParen {
18836                    paren_depth -= 1;
18837                    if paren_depth < 0 {
18838                        break;
18839                    }
18840                }
18841                let text = if token.token_type == TokenType::QuotedIdentifier {
18842                    format!("\"{}\"", token.text)
18843                } else if token.token_type == TokenType::String {
18844                    format!("'{}'", token.text)
18845                } else {
18846                    token.text.clone()
18847                };
18848                tokens.push((text, token.token_type));
18849            }
18850            Ok(AlterTableAction::Raw {
18851                sql: self.join_command_tokens(tokens),
18852            })
18853        } else {
18854            Err(self.parse_error(format!(
18855                "Expected ADD, DROP, RENAME, ALTER, SET, UNSET, SWAP, CLUSTER, or REPLACE in ALTER TABLE, got {:?}",
18856                self.peek().token_type
18857            )))
18858        }
18859    }
18860
18861    /// Parse TSQL SYSTEM_VERSIONING option in ALTER TABLE SET (...)
18862    /// Handles: SYSTEM_VERSIONING=OFF, SYSTEM_VERSIONING=ON, SYSTEM_VERSIONING=ON(HISTORY_TABLE=..., ...)
18863    fn parse_system_versioning_option(&mut self) -> Result<Expression> {
18864        self.skip(); // consume SYSTEM_VERSIONING
18865        self.expect(TokenType::Eq)?;
18866
18867        let mut prop = WithSystemVersioningProperty {
18868            on: None,
18869            this: None,
18870            data_consistency: None,
18871            retention_period: None,
18872            with_: None,
18873        };
18874
18875        if self.match_identifier("OFF") {
18876            // SYSTEM_VERSIONING=OFF
18877            // on is None => generates OFF
18878            return Ok(Expression::WithSystemVersioningProperty(Box::new(prop)));
18879        }
18880
18881        // SYSTEM_VERSIONING=ON or SYSTEM_VERSIONING=ON(...)
18882        if self.match_token(TokenType::On) || self.match_identifier("ON") {
18883            prop.on = Some(Box::new(Expression::Boolean(BooleanLiteral {
18884                value: true,
18885            })));
18886        }
18887
18888        if self.match_token(TokenType::LParen) {
18889            // Parse options inside ON(...)
18890            loop {
18891                if self.check(TokenType::RParen) {
18892                    break;
18893                }
18894                if self.match_identifier("HISTORY_TABLE") {
18895                    self.expect(TokenType::Eq)?;
18896                    let table = self.parse_table_ref()?;
18897                    prop.this = Some(Box::new(Expression::Table(Box::new(table))));
18898                } else if self.match_identifier("DATA_CONSISTENCY_CHECK") {
18899                    self.expect(TokenType::Eq)?;
18900                    let val = self.expect_identifier_or_keyword()?;
18901                    prop.data_consistency = Some(Box::new(Expression::Identifier(
18902                        Identifier::new(val.to_ascii_uppercase()),
18903                    )));
18904                } else if self.match_identifier("HISTORY_RETENTION_PERIOD") {
18905                    self.expect(TokenType::Eq)?;
18906                    if let Some(rp) = self.parse_retention_period()? {
18907                        prop.retention_period = Some(Box::new(rp));
18908                    }
18909                } else {
18910                    // Skip unknown options
18911                    self.skip();
18912                }
18913                if !self.match_token(TokenType::Comma) {
18914                    break;
18915                }
18916            }
18917            self.expect(TokenType::RParen)?;
18918        }
18919
18920        Ok(Expression::WithSystemVersioningProperty(Box::new(prop)))
18921    }
18922
18923    /// Parse TSQL DATA_DELETION option in ALTER TABLE SET (...)
18924    /// Handles: DATA_DELETION=ON, DATA_DELETION=OFF, DATA_DELETION=ON(FILTER_COLUMN=..., RETENTION_PERIOD=...)
18925    fn parse_data_deletion_option(&mut self) -> Result<Expression> {
18926        self.skip(); // consume DATA_DELETION
18927        self.expect(TokenType::Eq)?;
18928
18929        let on = if self.match_identifier("ON") || self.match_token(TokenType::On) {
18930            true
18931        } else if self.match_identifier("OFF") {
18932            false
18933        } else {
18934            false
18935        };
18936
18937        let on_expr = Box::new(Expression::Boolean(BooleanLiteral { value: on }));
18938        let mut filter_column = None;
18939        let mut retention_period = None;
18940
18941        if self.match_token(TokenType::LParen) {
18942            loop {
18943                if self.check(TokenType::RParen) {
18944                    break;
18945                }
18946                if self.match_identifier("FILTER_COLUMN") {
18947                    self.expect(TokenType::Eq)?;
18948                    let col = self.expect_identifier_or_keyword()?;
18949                    filter_column = Some(Box::new(Expression::boxed_column(Column {
18950                        name: Identifier::new(col),
18951                        table: None,
18952                        join_mark: false,
18953                        trailing_comments: Vec::new(),
18954                        span: None,
18955                        inferred_type: None,
18956                    })));
18957                } else if self.match_identifier("RETENTION_PERIOD") {
18958                    self.expect(TokenType::Eq)?;
18959                    if let Some(rp) = self.parse_retention_period()? {
18960                        retention_period = Some(Box::new(rp));
18961                    }
18962                } else {
18963                    self.skip();
18964                }
18965                if !self.match_token(TokenType::Comma) {
18966                    break;
18967                }
18968            }
18969            self.expect(TokenType::RParen)?;
18970        }
18971
18972        Ok(Expression::DataDeletionProperty(Box::new(
18973            DataDeletionProperty {
18974                on: on_expr,
18975                filter_column,
18976                retention_period,
18977            },
18978        )))
18979    }
18980
18981    /// Parse ALTER COLUMN action
18982    fn parse_alter_column_action(&mut self) -> Result<AlterColumnAction> {
18983        if self.match_token(TokenType::Set) {
18984            if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
18985                Ok(AlterColumnAction::SetNotNull)
18986            } else if self.match_token(TokenType::Default) {
18987                let expr = self.parse_primary()?;
18988                Ok(AlterColumnAction::SetDefault(expr))
18989            } else if self.match_identifier("DATA") {
18990                // SET DATA TYPE
18991                // TYPE can be a keyword token or identifier
18992                let _ = self.match_token(TokenType::Type) || self.match_identifier("TYPE");
18993                let data_type = self.parse_data_type()?;
18994                // Optional COLLATE (can be identifier or string literal like 'binary')
18995                let collate = if self.match_token(TokenType::Collate) {
18996                    if self.check(TokenType::String) {
18997                        let text = self.advance().text.clone();
18998                        Some(format!("'{}'", text))
18999                    } else {
19000                        Some(self.expect_identifier_or_keyword()?)
19001                    }
19002                } else {
19003                    None
19004                };
19005                // Optional USING expression
19006                let using = if self.match_token(TokenType::Using) {
19007                    Some(self.parse_expression()?)
19008                } else {
19009                    None
19010                };
19011                Ok(AlterColumnAction::SetDataType {
19012                    data_type,
19013                    using,
19014                    collate,
19015                })
19016            } else if self.match_identifier("VISIBLE") {
19017                Ok(AlterColumnAction::SetVisible)
19018            } else if self.match_identifier("INVISIBLE") {
19019                Ok(AlterColumnAction::SetInvisible)
19020            } else {
19021                Err(self.parse_error("Expected NOT NULL, DEFAULT, VISIBLE, or INVISIBLE after SET"))
19022            }
19023        } else if self.match_token(TokenType::Drop) {
19024            if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
19025                Ok(AlterColumnAction::DropNotNull)
19026            } else if self.match_token(TokenType::Default) {
19027                Ok(AlterColumnAction::DropDefault)
19028            } else {
19029                Err(self.parse_error("Expected NOT NULL or DEFAULT after DROP"))
19030            }
19031        } else if self.match_token(TokenType::Comment) {
19032            // ALTER COLUMN col COMMENT 'comment'
19033            let comment = self.expect_string()?;
19034            Ok(AlterColumnAction::Comment(comment))
19035        } else if self.match_token(TokenType::Type)
19036            || self.match_identifier("TYPE")
19037            || self.is_identifier_token()
19038        {
19039            // TYPE data_type or just data_type (PostgreSQL/Redshift: ALTER COLUMN col TYPE datatype)
19040            let data_type = self.parse_data_type()?;
19041            // Optional COLLATE (can be identifier or string literal like 'binary')
19042            let collate = if self.match_token(TokenType::Collate) {
19043                if self.check(TokenType::String) {
19044                    Some(self.advance().text.clone())
19045                } else {
19046                    Some(self.expect_identifier_or_keyword()?)
19047                }
19048            } else {
19049                None
19050            };
19051            // Optional USING expression
19052            let using = if self.match_token(TokenType::Using) {
19053                Some(self.parse_expression()?)
19054            } else {
19055                None
19056            };
19057            Ok(AlterColumnAction::SetDataType {
19058                data_type,
19059                using,
19060                collate,
19061            })
19062        } else {
19063            Err(self.parse_error("Expected SET, DROP, or TYPE in ALTER COLUMN"))
19064        }
19065    }
19066
19067    /// Parse TRUNCATE statement
19068    fn parse_truncate(&mut self) -> Result<Expression> {
19069        self.expect(TokenType::Truncate)?;
19070
19071        // ClickHouse: TRUNCATE ALL TABLES FROM [IF EXISTS] db
19072        if matches!(
19073            self.config.dialect,
19074            Some(crate::dialects::DialectType::ClickHouse)
19075        ) && self.check_identifier("ALL")
19076            && self.current + 1 < self.tokens.len()
19077            && self.tokens[self.current + 1]
19078                .text
19079                .eq_ignore_ascii_case("TABLES")
19080        {
19081            // Consume remaining tokens as Command
19082            let mut parts = vec!["TRUNCATE".to_string()];
19083            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
19084                let token = self.advance();
19085                if token.token_type == TokenType::String {
19086                    parts.push(format!("'{}'", token.text));
19087                } else {
19088                    parts.push(token.text.clone());
19089                }
19090            }
19091            return Ok(Expression::Command(Box::new(crate::expressions::Command {
19092                this: parts.join(" "),
19093            })));
19094        }
19095
19096        let target = if self.match_token(TokenType::Database) {
19097            TruncateTarget::Database
19098        } else {
19099            // ClickHouse: TRUNCATE TEMPORARY TABLE t
19100            self.match_token(TokenType::Temporary);
19101            self.match_token(TokenType::Table); // optional TABLE keyword
19102            TruncateTarget::Table
19103        };
19104
19105        // Parse optional IF EXISTS
19106        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
19107
19108        // Parse first table with optional ONLY modifier
19109        let has_only = self.match_token(TokenType::Only);
19110        let mut table = self.parse_table_ref()?;
19111        if has_only {
19112            table.only = true;
19113        }
19114
19115        // Check for * suffix on table name (PostgreSQL: inherit children)
19116        let first_star = self.match_token(TokenType::Star);
19117
19118        // TSQL: WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))
19119        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
19120            if let Some(hint_expr) = self.parse_truncate_table_hints()? {
19121                match hint_expr {
19122                    Expression::Tuple(tuple) => {
19123                        table.hints = tuple.expressions;
19124                    }
19125                    other => {
19126                        table.hints = vec![other];
19127                    }
19128                }
19129            }
19130        }
19131
19132        // ClickHouse: ON CLUSTER clause
19133        let on_cluster = self.parse_on_cluster_clause()?;
19134
19135        // Parse additional tables for multi-table TRUNCATE
19136        let mut extra_tables = Vec::new();
19137        if first_star {
19138            // The first table has a * suffix, so create an entry for it
19139            extra_tables.push(TruncateTableEntry {
19140                table: table.clone(),
19141                star: true,
19142            });
19143        }
19144        while self.match_token(TokenType::Comma) {
19145            let extra_only = self.match_token(TokenType::Only);
19146            let mut extra_table = self.parse_table_ref()?;
19147            if extra_only {
19148                extra_table.only = true;
19149            }
19150            let extra_star = self.match_token(TokenType::Star);
19151            extra_tables.push(TruncateTableEntry {
19152                table: extra_table,
19153                star: extra_star,
19154            });
19155        }
19156
19157        // Parse RESTART IDENTITY / CONTINUE IDENTITY
19158        // RESTART is TokenType::Restart keyword, IDENTITY is TokenType::Identity keyword
19159        let identity = if self.match_token(TokenType::Restart) {
19160            self.match_token(TokenType::Identity);
19161            Some(TruncateIdentity::Restart)
19162        } else if self.match_identifier("CONTINUE") {
19163            self.match_token(TokenType::Identity);
19164            Some(TruncateIdentity::Continue)
19165        } else {
19166            None
19167        };
19168
19169        // Parse CASCADE or RESTRICT
19170        // CASCADE is TokenType::Cascade keyword, RESTRICT is TokenType::Restrict keyword
19171        let cascade = self.match_token(TokenType::Cascade);
19172        let restrict = if !cascade {
19173            self.match_token(TokenType::Restrict)
19174        } else {
19175            false
19176        };
19177
19178        // Parse Hive PARTITION clause: PARTITION(key = value, ...)
19179        // parse_partition consumes the PARTITION keyword itself
19180        let partition = self.parse_partition()?;
19181
19182        // ClickHouse: TRUNCATE TABLE t SETTINGS key=value, ...
19183        if matches!(
19184            self.config.dialect,
19185            Some(crate::dialects::DialectType::ClickHouse)
19186        ) && self.match_token(TokenType::Settings)
19187        {
19188            // Consume settings expressions (they're not stored in the AST for TRUNCATE)
19189            loop {
19190                let _ = self.parse_expression()?;
19191                if !self.match_token(TokenType::Comma) {
19192                    break;
19193                }
19194            }
19195        }
19196
19197        Ok(Expression::Truncate(Box::new(Truncate {
19198            target,
19199            if_exists,
19200            table,
19201            on_cluster,
19202            cascade,
19203            extra_tables,
19204            identity,
19205            restrict,
19206            partition: partition.map(Box::new),
19207        })))
19208    }
19209
19210    /// Parse VALUES table constructor: VALUES (1, 'a'), (2, 'b')
19211    fn parse_values(&mut self) -> Result<Expression> {
19212        self.expect(TokenType::Values)?;
19213
19214        let mut expressions = Vec::new();
19215
19216        // Handle bare VALUES without parentheses: VALUES 1, 2, 3 -> VALUES (1), (2), (3)
19217        if !self.check(TokenType::LParen) {
19218            loop {
19219                let val = self.parse_expression()?;
19220                expressions.push(Tuple {
19221                    expressions: vec![val],
19222                });
19223                if !self.match_token(TokenType::Comma) {
19224                    break;
19225                }
19226            }
19227        } else {
19228            loop {
19229                self.expect(TokenType::LParen)?;
19230                // Parse VALUES tuple elements with optional AS aliases (Hive syntax)
19231                let row_values = self.parse_values_expression_list()?;
19232                self.expect(TokenType::RParen)?;
19233
19234                expressions.push(Tuple {
19235                    expressions: row_values,
19236                });
19237
19238                if !self.match_token(TokenType::Comma) {
19239                    break;
19240                }
19241                // ClickHouse: allow trailing comma after last tuple
19242                if matches!(
19243                    self.config.dialect,
19244                    Some(crate::dialects::DialectType::ClickHouse)
19245                ) && !self.check(TokenType::LParen)
19246                {
19247                    break;
19248                }
19249            }
19250        }
19251
19252        // Check for alias: VALUES (1, 2) AS new_data or VALUES (1, 2) new_data
19253        let (alias, column_aliases) = if self.match_token(TokenType::As) {
19254            let alias_name = self.expect_identifier()?;
19255            let alias = Some(Identifier::new(alias_name));
19256
19257            // Check for column aliases: AS new_data(a, b)
19258            let col_aliases = if self.match_token(TokenType::LParen) {
19259                let aliases = self.parse_identifier_list()?;
19260                self.expect(TokenType::RParen)?;
19261                aliases
19262            } else {
19263                Vec::new()
19264            };
19265            (alias, col_aliases)
19266        } else if self.check(TokenType::Var) && !self.check_keyword() {
19267            // Implicit alias: VALUES (0) foo(bar)
19268            let alias_name = self.advance().text.clone();
19269            let alias = Some(Identifier::new(alias_name));
19270            let col_aliases = if self.match_token(TokenType::LParen) {
19271                let aliases = self.parse_identifier_list()?;
19272                self.expect(TokenType::RParen)?;
19273                aliases
19274            } else {
19275                Vec::new()
19276            };
19277            (alias, col_aliases)
19278        } else {
19279            (None, Vec::new())
19280        };
19281
19282        // VALUES can be followed by set operations (UNION, etc.)
19283        let values_expr = Expression::Values(Box::new(Values {
19284            expressions,
19285            alias,
19286            column_aliases,
19287        }));
19288
19289        // Check for set operations after VALUES
19290        self.parse_set_operation(values_expr)
19291    }
19292
19293    /// Parse USE statement: USE db, USE DATABASE x, USE SCHEMA x.y, USE ROLE x, etc.
19294    fn parse_use(&mut self) -> Result<Expression> {
19295        self.expect(TokenType::Use)?;
19296
19297        // Check for Snowflake: USE SECONDARY ROLES ALL|NONE|role1, role2, ...
19298        if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SECONDARY") {
19299            self.skip(); // consume SECONDARY
19300                         // Check for ROLES
19301            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ROLES") {
19302                self.skip(); // consume ROLES
19303                             // Parse ALL, NONE, or comma-separated role list
19304                let mut roles = Vec::new();
19305                loop {
19306                    if self.check(TokenType::Var)
19307                        || self.check(TokenType::All)
19308                        || self.check(TokenType::Identifier)
19309                    {
19310                        let role = self.advance().text.clone();
19311                        roles.push(role);
19312                        if !self.match_token(TokenType::Comma) {
19313                            break;
19314                        }
19315                    } else {
19316                        break;
19317                    }
19318                }
19319                let name = if roles.is_empty() {
19320                    "ALL".to_string()
19321                } else {
19322                    roles.join(", ")
19323                };
19324                return Ok(Expression::Use(Box::new(Use {
19325                    kind: Some(UseKind::SecondaryRoles),
19326                    this: Identifier::new(name),
19327                })));
19328            }
19329        }
19330
19331        // Check for kind: DATABASE, SCHEMA, ROLE, WAREHOUSE, CATALOG
19332        // Note: ROLE and CATALOG are not keywords, so we check the text
19333        let kind = if self.match_token(TokenType::Database) {
19334            Some(UseKind::Database)
19335        } else if self.match_token(TokenType::Schema) {
19336            Some(UseKind::Schema)
19337        } else if self.match_token(TokenType::Warehouse) {
19338            Some(UseKind::Warehouse)
19339        } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ROLE") {
19340            self.skip();
19341            Some(UseKind::Role)
19342        } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("CATALOG") {
19343            self.skip();
19344            Some(UseKind::Catalog)
19345        } else {
19346            None
19347        };
19348
19349        // Parse the name (can be qualified like x.y)
19350        // Use expect_identifier_or_keyword_with_quoted because names like "default", "system" are valid
19351        let mut ident = self.expect_identifier_or_keyword_with_quoted()?;
19352
19353        // Handle qualified names like schema.table for USE SCHEMA x.y
19354        if self.match_token(TokenType::Dot) {
19355            let second_part = self.expect_identifier_or_keyword_with_quoted()?;
19356            ident.name = format!("{}.{}", ident.name, second_part.name);
19357        }
19358
19359        Ok(Expression::Use(Box::new(Use { kind, this: ident })))
19360    }
19361
19362    /// Parse EXPORT DATA statement (BigQuery)
19363    /// EXPORT DATA [WITH CONNECTION connection] OPTIONS (...) AS SELECT ...
19364    fn parse_export_data(&mut self) -> Result<Expression> {
19365        self.skip(); // consume EXPORT
19366
19367        // Expect DATA
19368        if !self.match_identifier("DATA") {
19369            return Err(self.parse_error("Expected DATA after EXPORT"));
19370        }
19371
19372        // Optional: WITH CONNECTION connection
19373        let connection = if self.match_text_seq(&["WITH", "CONNECTION"]) {
19374            // Parse connection identifier (can be qualified: project.location.connection)
19375            let first = self.expect_identifier()?;
19376            let connection_name = if self.match_token(TokenType::Dot) {
19377                let second = self.expect_identifier()?;
19378                if self.match_token(TokenType::Dot) {
19379                    let third = self.expect_identifier()?;
19380                    format!("{}.{}.{}", first, second, third)
19381                } else {
19382                    format!("{}.{}", first, second)
19383                }
19384            } else {
19385                first
19386            };
19387            Some(Box::new(Expression::Identifier(Identifier::new(
19388                connection_name,
19389            ))))
19390        } else {
19391            None
19392        };
19393
19394        // Expect OPTIONS (...)
19395        let options = if self.match_identifier("OPTIONS") {
19396            self.parse_options_list()?
19397        } else {
19398            Vec::new()
19399        };
19400
19401        // Expect AS
19402        self.expect(TokenType::As)?;
19403
19404        // Parse the SELECT query
19405        let query = self.parse_statement()?;
19406
19407        Ok(Expression::Export(Box::new(Export {
19408            this: Box::new(query),
19409            connection,
19410            options,
19411        })))
19412    }
19413
19414    /// Parse CACHE TABLE statement (Spark)
19415    /// CACHE [LAZY] TABLE name [OPTIONS(...)] [AS query]
19416    fn parse_cache(&mut self) -> Result<Expression> {
19417        self.expect(TokenType::Cache)?;
19418
19419        // Check for LAZY keyword
19420        let lazy = self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("LAZY");
19421        if lazy {
19422            self.skip();
19423        }
19424
19425        self.expect(TokenType::Table)?;
19426        let table = Identifier::new(self.expect_identifier()?);
19427
19428        // Check for OPTIONS clause
19429        let options =
19430            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OPTIONS") {
19431                self.skip();
19432                self.expect(TokenType::LParen)?;
19433                let mut opts = Vec::new();
19434                loop {
19435                    // Parse key = value pairs (key can be string literal or identifier)
19436                    let key = if self.check(TokenType::NationalString) {
19437                        let token = self.advance();
19438                        Expression::Literal(Box::new(Literal::NationalString(token.text)))
19439                    } else if self.check(TokenType::String) {
19440                        let token = self.advance();
19441                        Expression::Literal(Box::new(Literal::String(token.text)))
19442                    } else {
19443                        Expression::Identifier(Identifier::new(self.expect_identifier()?))
19444                    };
19445                    // Eq is optional - Spark allows space-separated key value pairs
19446                    // e.g., OPTIONS ('storageLevel' 'DISK_ONLY') or OPTIONS ('key' = 'value')
19447                    let _ = self.match_token(TokenType::Eq);
19448                    let value = self.parse_expression()?;
19449                    opts.push((key, value));
19450                    if !self.match_token(TokenType::Comma) {
19451                        break;
19452                    }
19453                }
19454                self.expect(TokenType::RParen)?;
19455                opts
19456            } else {
19457                Vec::new()
19458            };
19459
19460        // Check for AS clause or implicit query (SELECT without AS in Spark)
19461        let query = if self.match_token(TokenType::As) {
19462            Some(self.parse_statement()?)
19463        } else if self.check(TokenType::Select) || self.check(TokenType::With) {
19464            // Spark allows SELECT without AS keyword after CACHE TABLE
19465            Some(self.parse_statement()?)
19466        } else {
19467            None
19468        };
19469
19470        Ok(Expression::Cache(Box::new(Cache {
19471            table,
19472            lazy,
19473            options,
19474            query,
19475        })))
19476    }
19477
19478    /// Parse UNCACHE TABLE statement (Spark)
19479    /// UNCACHE TABLE [IF EXISTS] name
19480    fn parse_uncache(&mut self) -> Result<Expression> {
19481        self.expect(TokenType::Uncache)?;
19482        self.expect(TokenType::Table)?;
19483
19484        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
19485        let table = Identifier::new(self.expect_identifier()?);
19486
19487        Ok(Expression::Uncache(Box::new(Uncache { table, if_exists })))
19488    }
19489
19490    /// Parse LOAD DATA statement (Hive)
19491    /// LOAD DATA [LOCAL] INPATH 'path' [OVERWRITE] INTO TABLE table_name
19492    /// [PARTITION (col=val, ...)] [INPUTFORMAT 'format'] [SERDE 'serde']
19493    fn parse_load_data(&mut self) -> Result<Expression> {
19494        self.expect(TokenType::Load)?;
19495
19496        // Expect DATA keyword
19497        let data_token = self.advance();
19498        if !data_token.text.eq_ignore_ascii_case("DATA") {
19499            return Err(self.parse_error("Expected DATA after LOAD"));
19500        }
19501
19502        // Check for LOCAL keyword
19503        let local = self.match_token(TokenType::Local);
19504
19505        // Expect INPATH
19506        self.expect(TokenType::Inpath)?;
19507
19508        // Parse the path (string literal)
19509        let inpath = if self.check(TokenType::String) {
19510            self.advance().text
19511        } else {
19512            return Err(self.parse_error("Expected string literal after INPATH"));
19513        };
19514
19515        // Check for OVERWRITE keyword
19516        let overwrite = self.match_token(TokenType::Overwrite);
19517
19518        // Expect INTO TABLE
19519        self.expect(TokenType::Into)?;
19520        self.expect(TokenType::Table)?;
19521
19522        // Parse table name (can be qualified)
19523        let table = Expression::Table(Box::new(self.parse_table_ref()?));
19524
19525        // Check for PARTITION clause
19526        let partition = if self.match_token(TokenType::Partition) {
19527            self.expect(TokenType::LParen)?;
19528            let mut partitions = Vec::new();
19529            loop {
19530                let col = Identifier::new(self.expect_identifier_or_keyword()?);
19531                self.expect(TokenType::Eq)?;
19532                let val = self.parse_expression()?;
19533                partitions.push((col, val));
19534                if !self.match_token(TokenType::Comma) {
19535                    break;
19536                }
19537            }
19538            self.expect(TokenType::RParen)?;
19539            partitions
19540        } else {
19541            Vec::new()
19542        };
19543
19544        // Check for INPUTFORMAT clause
19545        let input_format = if self.match_token(TokenType::InputFormat) {
19546            if self.check(TokenType::String) {
19547                Some(self.advance().text)
19548            } else {
19549                return Err(self.parse_error("Expected string literal after INPUTFORMAT"));
19550            }
19551        } else {
19552            None
19553        };
19554
19555        // Check for SERDE clause
19556        let serde = if self.match_token(TokenType::Serde) {
19557            if self.check(TokenType::String) {
19558                Some(self.advance().text)
19559            } else {
19560                return Err(self.parse_error("Expected string literal after SERDE"));
19561            }
19562        } else {
19563            None
19564        };
19565
19566        Ok(Expression::LoadData(Box::new(LoadData {
19567            local,
19568            inpath,
19569            overwrite,
19570            table,
19571            partition,
19572            input_format,
19573            serde,
19574        })))
19575    }
19576
19577    /// Parse PRAGMA statement (SQLite)
19578    /// PRAGMA [schema.]name [= value | (args...)]
19579    fn parse_pragma(&mut self) -> Result<Expression> {
19580        self.expect(TokenType::Pragma)?;
19581
19582        // Parse schema.name or just name
19583        let first_name = self.expect_identifier_or_keyword()?;
19584
19585        let (schema, name) = if self.match_token(TokenType::Dot) {
19586            // First name was schema
19587            let pragma_name = self.expect_identifier_or_keyword()?;
19588            (
19589                Some(Identifier::new(first_name)),
19590                Identifier::new(pragma_name),
19591            )
19592        } else {
19593            (None, Identifier::new(first_name))
19594        };
19595
19596        // Check for assignment or function call
19597        let (value, args) = if self.match_token(TokenType::Eq) {
19598            // PRAGMA name = value
19599            let val = self.parse_expression()?;
19600            (Some(val), Vec::new())
19601        } else if self.match_token(TokenType::LParen) {
19602            // PRAGMA name(args...)
19603            let mut arguments = Vec::new();
19604            if !self.check(TokenType::RParen) {
19605                loop {
19606                    arguments.push(self.parse_expression()?);
19607                    if !self.match_token(TokenType::Comma) {
19608                        break;
19609                    }
19610                }
19611            }
19612            self.expect(TokenType::RParen)?;
19613            (None, arguments)
19614        } else {
19615            (None, Vec::new())
19616        };
19617
19618        Ok(Expression::Pragma(Box::new(Pragma {
19619            schema,
19620            name,
19621            value,
19622            args,
19623        })))
19624    }
19625
19626    /// Parse ROLLBACK statement
19627    /// ROLLBACK [TO [SAVEPOINT] <name>]
19628    fn parse_rollback(&mut self) -> Result<Expression> {
19629        self.expect(TokenType::Rollback)?;
19630
19631        // Check for optional TRANSACTION, TRAN, or WORK keyword
19632        let has_transaction = self.match_token(TokenType::Transaction)
19633            || self.match_identifier("TRAN")
19634            || self.match_identifier("WORK");
19635
19636        // Check for TO SAVEPOINT (standard SQL) or transaction name (TSQL)
19637        let (savepoint, this) = if self.match_token(TokenType::To) {
19638            // Optional SAVEPOINT keyword
19639            self.match_token(TokenType::Savepoint);
19640            // Savepoint name
19641            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
19642                let name = self.advance().text;
19643                (
19644                    Some(Box::new(Expression::Identifier(Identifier::new(name)))),
19645                    None,
19646                )
19647            } else {
19648                (None, None)
19649            }
19650        } else if has_transaction
19651            && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19652        {
19653            // TSQL: ROLLBACK TRANSACTION transaction_name
19654            let name = self.advance().text;
19655            (
19656                None,
19657                Some(Box::new(Expression::Identifier(Identifier::new(name)))),
19658            )
19659        } else if has_transaction {
19660            // Just ROLLBACK TRANSACTION - store marker
19661            (
19662                None,
19663                Some(Box::new(Expression::Identifier(Identifier::new(
19664                    "TRANSACTION".to_string(),
19665                )))),
19666            )
19667        } else {
19668            (None, None)
19669        };
19670
19671        Ok(Expression::Rollback(Box::new(Rollback { savepoint, this })))
19672    }
19673
19674    /// Parse COMMIT statement
19675    /// COMMIT [TRANSACTION|TRAN|WORK] [transaction_name] [WITH (DELAYED_DURABILITY = ON|OFF)] [AND [NO] CHAIN]
19676    fn parse_commit(&mut self) -> Result<Expression> {
19677        self.expect(TokenType::Commit)?;
19678
19679        // Check for optional TRANSACTION, TRAN, or WORK keyword
19680        let has_transaction = self.match_token(TokenType::Transaction)
19681            || self.match_identifier("TRAN")
19682            || self.match_identifier("WORK");
19683
19684        // Parse optional transaction name (TSQL)
19685        let this = if has_transaction
19686            && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19687            && !self.check(TokenType::With)
19688            && !self.check(TokenType::And)
19689        {
19690            let name = self.advance().text;
19691            Some(Box::new(Expression::Identifier(Identifier::new(name))))
19692        } else if has_transaction {
19693            // Store marker that TRANSACTION keyword was present
19694            Some(Box::new(Expression::Identifier(Identifier::new(
19695                "TRANSACTION".to_string(),
19696            ))))
19697        } else {
19698            None
19699        };
19700
19701        // Parse WITH (DELAYED_DURABILITY = ON|OFF) for TSQL
19702        let durability = if self.match_token(TokenType::With) && self.match_token(TokenType::LParen)
19703        {
19704            // Check for DELAYED_DURABILITY
19705            if self.match_identifier("DELAYED_DURABILITY") && self.match_token(TokenType::Eq) {
19706                // ON is a keyword (TokenType::On), OFF is an identifier
19707                let on = self.match_token(TokenType::On) || self.match_identifier("ON");
19708                if !on {
19709                    self.match_identifier("OFF");
19710                }
19711                self.expect(TokenType::RParen)?;
19712                Some(Box::new(Expression::Boolean(BooleanLiteral { value: on })))
19713            } else {
19714                // Skip to RParen
19715                while !self.check(TokenType::RParen) && !self.is_at_end() {
19716                    self.skip();
19717                }
19718                self.match_token(TokenType::RParen);
19719                None
19720            }
19721        } else {
19722            None
19723        };
19724
19725        // Parse AND [NO] CHAIN
19726        let chain = if self.match_token(TokenType::And) {
19727            let no_chain = self.match_token(TokenType::No);
19728            self.match_identifier("CHAIN");
19729            if no_chain {
19730                // AND NO CHAIN - explicit false
19731                Some(Box::new(Expression::Boolean(BooleanLiteral {
19732                    value: false,
19733                })))
19734            } else {
19735                // AND CHAIN - explicit true
19736                Some(Box::new(Expression::Boolean(BooleanLiteral {
19737                    value: true,
19738                })))
19739            }
19740        } else {
19741            None
19742        };
19743
19744        Ok(Expression::Commit(Box::new(Commit {
19745            chain,
19746            this,
19747            durability,
19748        })))
19749    }
19750
19751    /// Parse END statement (PostgreSQL alias for COMMIT)
19752    /// END [WORK|TRANSACTION] [AND [NO] CHAIN]
19753    fn parse_end_transaction(&mut self) -> Result<Expression> {
19754        self.expect(TokenType::End)?;
19755
19756        // Check for optional WORK or TRANSACTION keyword
19757        let _has_work = self.match_identifier("WORK") || self.match_token(TokenType::Transaction);
19758
19759        // Parse AND [NO] CHAIN
19760        let chain = if self.match_token(TokenType::And) {
19761            let no_chain = self.match_token(TokenType::No);
19762            self.match_identifier("CHAIN");
19763            if no_chain {
19764                // AND NO CHAIN - explicit false
19765                Some(Box::new(Expression::Boolean(BooleanLiteral {
19766                    value: false,
19767                })))
19768            } else {
19769                // AND CHAIN - explicit true
19770                Some(Box::new(Expression::Boolean(BooleanLiteral {
19771                    value: true,
19772                })))
19773            }
19774        } else {
19775            None
19776        };
19777
19778        // Return as COMMIT since END is an alias
19779        Ok(Expression::Commit(Box::new(Commit {
19780            chain,
19781            this: None,
19782            durability: None,
19783        })))
19784    }
19785
19786    /// Parse BEGIN/START TRANSACTION statement
19787    /// BEGIN [DEFERRED|IMMEDIATE|EXCLUSIVE] [TRANSACTION|TRAN|WORK] [transaction_name] [WITH MARK 'description']
19788    /// Also handles procedural BEGIN blocks (BigQuery, etc.): BEGIN statement_list END
19789    fn parse_transaction(&mut self) -> Result<Expression> {
19790        self.expect(TokenType::Begin)?;
19791
19792        // Check if this is a procedural BEGIN block rather than a transaction
19793        // If next token is not a transaction keyword and we have more tokens, it's a procedural block
19794        let is_transaction = self.is_at_end()
19795            || self.check(TokenType::Semicolon)
19796            || self.check(TokenType::Transaction)
19797            || self.check_identifier("TRAN")
19798            || self.check_identifier("WORK")
19799            || self.check_identifier("DEFERRED")
19800            || self.check_identifier("IMMEDIATE")
19801            || self.check_identifier("EXCLUSIVE");
19802
19803        if !is_transaction {
19804            // TSQL: BEGIN TRY ... END TRY [BEGIN CATCH ... END CATCH]
19805            // These are block-structured constructs that may contain semicolons,
19806            // so we can't use parse_command() which stops at the first semicolon.
19807            let is_try = self.check_identifier("TRY");
19808            let is_catch = self.check_identifier("CATCH");
19809            if is_try || is_catch {
19810                let block_kind = if is_try { "TRY" } else { "CATCH" };
19811                self.skip(); // consume TRY or CATCH
19812                let mut tokens: Vec<(String, TokenType)> = vec![
19813                    ("BEGIN".to_string(), TokenType::Begin),
19814                    (block_kind.to_string(), TokenType::Var),
19815                ];
19816                // Collect tokens until matching END TRY / END CATCH
19817                while !self.is_at_end() {
19818                    if self.check(TokenType::End)
19819                        && self.current + 1 < self.tokens.len()
19820                        && self.tokens[self.current + 1]
19821                            .text
19822                            .eq_ignore_ascii_case(block_kind)
19823                    {
19824                        tokens.push(("END".to_string(), TokenType::End));
19825                        self.skip(); // consume END
19826                        tokens.push((block_kind.to_string(), TokenType::Var));
19827                        self.skip(); // consume TRY/CATCH
19828                        break;
19829                    }
19830                    let token = self.advance();
19831                    let text = if token.token_type == TokenType::String {
19832                        format!("'{}'", token.text)
19833                    } else if token.token_type == TokenType::QuotedIdentifier {
19834                        format!("\"{}\"", token.text)
19835                    } else {
19836                        token.text.clone()
19837                    };
19838                    tokens.push((text, token.token_type));
19839                }
19840                let mut result = Expression::Command(Box::new(Command {
19841                    this: self.join_command_tokens(tokens),
19842                }));
19843
19844                // If this was a TRY block, check for a following BEGIN CATCH block
19845                if is_try
19846                    && self.check(TokenType::Begin)
19847                    && self.current + 1 < self.tokens.len()
19848                    && self.tokens[self.current + 1]
19849                        .text
19850                        .eq_ignore_ascii_case("CATCH")
19851                {
19852                    // Recursively parse the BEGIN CATCH block
19853                    let catch_block = self.parse_transaction()?;
19854                    // Combine TRY and CATCH into a single command
19855                    if let (Expression::Command(try_cmd), Expression::Command(catch_cmd)) =
19856                        (&result, &catch_block)
19857                    {
19858                        result = Expression::Command(Box::new(Command {
19859                            this: format!("{} {}", try_cmd.this, catch_cmd.this),
19860                        }));
19861                    }
19862                }
19863
19864                return Ok(result);
19865            }
19866
19867            // This is a procedural BEGIN block - parse as Command
19868            // Collect remaining tokens until end of statement
19869            return self
19870                .parse_command()?
19871                .ok_or_else(|| self.parse_error("Failed to parse BEGIN block"));
19872        }
19873
19874        // Check for transaction kind: DEFERRED, IMMEDIATE, EXCLUSIVE (SQLite)
19875        let kind = if self.match_identifier("DEFERRED")
19876            || self.match_identifier("IMMEDIATE")
19877            || self.match_identifier("EXCLUSIVE")
19878        {
19879            Some(self.previous().text.clone())
19880        } else {
19881            None
19882        };
19883
19884        // Check for TRANSACTION, TRAN, or WORK keyword
19885        let has_transaction_keyword = self.match_token(TokenType::Transaction)
19886            || self.match_identifier("TRAN")
19887            || self.match_identifier("WORK");
19888
19889        // Parse optional transaction name (TSQL style: BEGIN TRANSACTION trans_name)
19890        let trans_name = if has_transaction_keyword
19891            && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19892            && !self.check(TokenType::With)
19893        {
19894            // Could be a transaction name or @variable
19895            let name = self.advance().text;
19896            Some(name)
19897        } else {
19898            None
19899        };
19900
19901        // Combine kind and trans_name into `this`
19902        let this = if let Some(name) = trans_name {
19903            Some(Box::new(Expression::Identifier(Identifier::new(name))))
19904        } else if let Some(k) = kind {
19905            Some(Box::new(Expression::Identifier(Identifier::new(k))))
19906        } else {
19907            None
19908        };
19909
19910        // Parse WITH MARK 'description' (TSQL)
19911        let mark = if self.match_token(TokenType::With) && self.match_identifier("MARK") {
19912            if self.check(TokenType::String) {
19913                let desc = self.advance().text;
19914                Some(Box::new(Expression::Literal(Box::new(Literal::String(
19915                    desc,
19916                )))))
19917            } else {
19918                Some(Box::new(Expression::Literal(Box::new(Literal::String(
19919                    "".to_string(),
19920                )))))
19921            }
19922        } else if has_transaction_keyword {
19923            // Store "TRANSACTION" marker to preserve round-trip
19924            Some(Box::new(Expression::Identifier(Identifier::new(
19925                "TRANSACTION".to_string(),
19926            ))))
19927        } else {
19928            None
19929        };
19930
19931        // Parse any additional transaction modes (isolation levels, etc.)
19932        let mut mode_parts: Vec<String> = Vec::new();
19933        while self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
19934            let mut mode_tokens: Vec<String> = Vec::new();
19935            while (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19936                && !self.check(TokenType::Comma)
19937            {
19938                mode_tokens.push(self.advance().text);
19939            }
19940            if !mode_tokens.is_empty() {
19941                mode_parts.push(mode_tokens.join(" "));
19942            }
19943            if !self.match_token(TokenType::Comma) {
19944                break;
19945            }
19946        }
19947
19948        let modes = if !mode_parts.is_empty() {
19949            Some(Box::new(Expression::Identifier(Identifier::new(
19950                mode_parts.join(", "),
19951            ))))
19952        } else {
19953            None
19954        };
19955
19956        Ok(Expression::Transaction(Box::new(Transaction {
19957            this,
19958            modes,
19959            mark,
19960        })))
19961    }
19962
19963    /// Parse START TRANSACTION statement
19964    /// START TRANSACTION [READ ONLY | READ WRITE] [, ISOLATION LEVEL ...]
19965    fn parse_start_transaction(&mut self) -> Result<Expression> {
19966        self.expect(TokenType::Start)?;
19967
19968        // Expect TRANSACTION keyword
19969        self.expect(TokenType::Transaction)?;
19970
19971        // Parse any transaction modes (READ ONLY, READ WRITE, ISOLATION LEVEL, etc.)
19972        let mut mode_parts: Vec<String> = Vec::new();
19973        while self.is_identifier_token()
19974            || self.is_safe_keyword_as_identifier()
19975            || self.match_identifier("READ")
19976        {
19977            // If we matched READ, add it to tokens
19978            let read_matched = if self.previous().text.eq_ignore_ascii_case("READ") {
19979                true
19980            } else {
19981                false
19982            };
19983            let mut mode_tokens: Vec<String> = Vec::new();
19984            if read_matched {
19985                mode_tokens.push("READ".to_string());
19986            }
19987            while (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19988                && !self.check(TokenType::Comma)
19989            {
19990                mode_tokens.push(self.advance().text);
19991            }
19992            if !mode_tokens.is_empty() {
19993                mode_parts.push(mode_tokens.join(" "));
19994            }
19995            if !self.match_token(TokenType::Comma) {
19996                break;
19997            }
19998        }
19999
20000        let modes = if !mode_parts.is_empty() {
20001            Some(Box::new(Expression::Identifier(Identifier::new(
20002                mode_parts.join(", "),
20003            ))))
20004        } else {
20005            None
20006        };
20007
20008        Ok(Expression::Transaction(Box::new(Transaction {
20009            this: None, // START TRANSACTION doesn't have a kind like DEFERRED/IMMEDIATE
20010            modes,
20011            // Mark as START to differentiate from BEGIN
20012            mark: Some(Box::new(Expression::Identifier(Identifier::new(
20013                "START".to_string(),
20014            )))),
20015        })))
20016    }
20017
20018    /// Parse DESCRIBE statement
20019    /// DESCRIBE [EXTENDED|FORMATTED|ANALYZE] <table_or_query>
20020    /// Also handles EXPLAIN (parsed as Describe)
20021    fn parse_describe(&mut self) -> Result<Expression> {
20022        // Accept DESCRIBE, DESC, and EXPLAIN (Var token)
20023        // Capture leading comments from the first token
20024        let leading_comments = if self.check(TokenType::Describe) {
20025            let token = self.advance();
20026            token.comments
20027        } else if self.check(TokenType::Desc) {
20028            let token = self.advance();
20029            token.comments
20030        } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EXPLAIN") {
20031            let token = self.advance(); // consume EXPLAIN
20032            token.comments
20033        } else {
20034            return Err(self.parse_error("Expected DESCRIBE, DESC, or EXPLAIN"));
20035        };
20036
20037        // Check for EXTENDED or FORMATTED keywords
20038        let extended = self.match_identifier("EXTENDED");
20039        let formatted = if !extended {
20040            self.match_identifier("FORMATTED")
20041        } else {
20042            false
20043        };
20044
20045        // Check for style keywords like ANALYZE, HISTORY
20046        // ClickHouse: EXPLAIN SYNTAX/AST/PLAN/PIPELINE/ESTIMATE/TABLE OVERRIDE/CURRENT TRANSACTION
20047        // For HISTORY, we need to look ahead to ensure it's not part of a schema-qualified
20048        // table name like "history.tbl". If the next token is a Dot, "history" is a schema name.
20049        let style = if !extended && !formatted && self.match_identifier("ANALYZE") {
20050            Some("ANALYZE".to_string())
20051        } else if !extended
20052            && !formatted
20053            && matches!(
20054                self.config.dialect,
20055                Some(crate::dialects::DialectType::ClickHouse)
20056            )
20057        {
20058            // ClickHouse EXPLAIN styles
20059            let text_upper = if !self.is_at_end() {
20060                self.peek().text.to_ascii_uppercase()
20061            } else {
20062                String::new()
20063            };
20064            match text_upper.as_str() {
20065                "SYNTAX" | "AST" | "PLAN" | "PIPELINE" | "ESTIMATE" | "QUERY" | "CURRENT" => {
20066                    self.skip();
20067                    let mut style_str = text_upper;
20068                    // Handle multi-word: TABLE OVERRIDE, CURRENT TRANSACTION, QUERY TREE
20069                    if style_str == "CURRENT" && self.check_identifier("TRANSACTION") {
20070                        style_str.push_str(" TRANSACTION");
20071                        self.skip();
20072                    }
20073                    if style_str == "QUERY" && self.check_identifier("TREE") {
20074                        style_str.push_str(" TREE");
20075                        self.skip();
20076                    }
20077                    Some(style_str)
20078                }
20079                _ if self.check(TokenType::Table) => {
20080                    // EXPLAIN TABLE OVERRIDE
20081                    self.skip(); // consume TABLE
20082                    if self.check_identifier("OVERRIDE") {
20083                        self.skip();
20084                        Some("TABLE OVERRIDE".to_string())
20085                    } else {
20086                        // Not TABLE OVERRIDE, backtrack
20087                        self.current -= 1;
20088                        None
20089                    }
20090                }
20091                _ => None,
20092            }
20093        } else if !extended
20094            && !formatted
20095            && (self.check(TokenType::Identifier)
20096                || self.check(TokenType::Var)
20097                || self.check(TokenType::QuotedIdentifier))
20098            && self.peek().text.eq_ignore_ascii_case("HISTORY")
20099            && self.peek_nth(1).map(|t| t.token_type) != Some(TokenType::Dot)
20100        {
20101            self.skip(); // consume HISTORY
20102            Some("HISTORY".to_string())
20103        } else {
20104            None
20105        };
20106
20107        // Check for object kind like SEMANTIC VIEW, TABLE, INPUT, OUTPUT, etc.
20108        let kind = if self.match_identifier("SEMANTIC") {
20109            if self.match_token(TokenType::View) {
20110                Some("SEMANTIC VIEW".to_string())
20111            } else {
20112                Some("SEMANTIC".to_string())
20113            }
20114        } else if self.match_token(TokenType::Table) {
20115            Some("TABLE".to_string())
20116        } else if self.match_token(TokenType::View) {
20117            Some("VIEW".to_string())
20118        } else if self.match_identifier("DATABASE") {
20119            Some("DATABASE".to_string())
20120        } else if self.match_identifier("SCHEMA") {
20121            Some("SCHEMA".to_string())
20122        } else if self.match_token(TokenType::Procedure) {
20123            Some("PROCEDURE".to_string())
20124        } else if self.match_token(TokenType::Function) {
20125            Some("FUNCTION".to_string())
20126        } else if self.match_token(TokenType::Input) {
20127            Some("INPUT".to_string())
20128        } else if self.match_token(TokenType::Output) {
20129            Some("OUTPUT".to_string())
20130        } else {
20131            None
20132        };
20133
20134        // ClickHouse: parse EXPLAIN settings before the target statement
20135        // e.g., EXPLAIN actions=1, description=0 SELECT ...
20136        // e.g., EXPLAIN PLAN actions=1 SELECT ...
20137        let mut properties = Vec::new();
20138        if matches!(
20139            self.config.dialect,
20140            Some(crate::dialects::DialectType::ClickHouse)
20141        ) {
20142            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
20143                // Look for key=value pairs before a statement keyword
20144                if (self.is_identifier_token()
20145                    || self.is_safe_keyword_as_identifier()
20146                    || self.check(TokenType::Type))
20147                    && self.current + 1 < self.tokens.len()
20148                    && self.tokens[self.current + 1].token_type == TokenType::Eq
20149                {
20150                    let name = self.advance().text.to_lowercase();
20151                    self.skip(); // consume =
20152                    let value = self.advance().text.clone();
20153                    properties.push((name, value));
20154                    self.match_token(TokenType::Comma); // optional comma between settings
20155                } else {
20156                    break;
20157                }
20158            }
20159        }
20160
20161        // Parse target - could be a table name or a SELECT/INSERT/other statement
20162        // ClickHouse: EXPLAIN/DESC can precede any statement or subquery
20163        let target = if self.check(TokenType::Select) || self.check(TokenType::With) {
20164            self.parse_statement()?
20165        } else if self.check(TokenType::LParen) && {
20166            // Look through nested parens for SELECT/WITH
20167            let mut depth = 0usize;
20168            let mut found_select = false;
20169            for i in 0..100 {
20170                match self.peek_nth(i).map(|t| t.token_type) {
20171                    Some(TokenType::LParen) => depth += 1,
20172                    Some(TokenType::Select) | Some(TokenType::With) if depth > 0 => {
20173                        found_select = true;
20174                        break;
20175                    }
20176                    _ => break,
20177                }
20178            }
20179            found_select
20180        } {
20181            // DESC (((SELECT ...))) — deeply nested parenthesized subquery
20182            self.parse_statement()?
20183        } else if matches!(
20184            self.config.dialect,
20185            Some(crate::dialects::DialectType::ClickHouse)
20186        ) && (self.check(TokenType::Insert)
20187            || self.check(TokenType::Create)
20188            || self.check(TokenType::Alter)
20189            || self.check(TokenType::Drop)
20190            || self.check(TokenType::Set)
20191            || self.check(TokenType::System))
20192        {
20193            self.parse_statement()?
20194        } else if matches!(
20195            self.config.dialect,
20196            Some(crate::dialects::DialectType::ClickHouse)
20197        ) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
20198            && self.peek_nth(1).map(|t| t.token_type) == Some(TokenType::LParen)
20199        {
20200            // ClickHouse: DESC format(Values, '(123)') — function call as target
20201            self.parse_expression()?
20202        } else {
20203            // Parse as table reference
20204            let table = self.parse_table_ref()?;
20205            Expression::Table(Box::new(table))
20206        };
20207
20208        // Parse optional parenthesized type signature for PROCEDURE/FUNCTION
20209        // e.g., DESCRIBE PROCEDURE get_employees(INT, VARCHAR)
20210        let params = if matches!(kind.as_deref(), Some("PROCEDURE") | Some("FUNCTION"))
20211            && self.match_token(TokenType::LParen)
20212        {
20213            let mut type_args = Vec::new();
20214            if !self.check(TokenType::RParen) {
20215                loop {
20216                    // Collect tokens for this type until comma or closing paren
20217                    let mut parts = Vec::new();
20218                    let mut paren_depth = 0usize;
20219                    while !self.is_at_end() {
20220                        if self.check(TokenType::LParen) {
20221                            paren_depth += 1;
20222                            parts.push(self.advance().text.clone());
20223                        } else if self.check(TokenType::RParen) {
20224                            if paren_depth == 0 {
20225                                break;
20226                            }
20227                            paren_depth -= 1;
20228                            parts.push(self.advance().text.clone());
20229                        } else if self.check(TokenType::Comma) && paren_depth == 0 {
20230                            break;
20231                        } else {
20232                            parts.push(self.advance().text.clone());
20233                        }
20234                    }
20235                    type_args.push(parts.join(" ").trim().to_uppercase());
20236                    if !self.match_token(TokenType::Comma) {
20237                        break;
20238                    }
20239                }
20240            }
20241            self.expect(TokenType::RParen)?;
20242            type_args
20243        } else {
20244            Vec::new()
20245        };
20246
20247        // Parse optional PARTITION clause (Spark/Hive)
20248        let partition = if self.match_token(TokenType::Partition) {
20249            // PARTITION(key = value, ...)
20250            self.expect(TokenType::LParen)?;
20251            // Parse partition expressions (e.g., ds = '2024-01-01')
20252            let mut partition_exprs = Vec::new();
20253            loop {
20254                if let Some(expr) = self.parse_conjunction()? {
20255                    partition_exprs.push(expr);
20256                }
20257                if !self.match_token(TokenType::Comma) {
20258                    break;
20259                }
20260            }
20261            self.expect(TokenType::RParen)?;
20262            let partition = Expression::Partition(Box::new(crate::expressions::Partition {
20263                expressions: partition_exprs,
20264                subpartition: false,
20265            }));
20266            Some(Box::new(partition))
20267        } else {
20268            None
20269        };
20270
20271        // ClickHouse: consume optional SETTINGS clause after target
20272        // e.g., DESC format(CSV, '...') SETTINGS key='val', key2='val2'
20273        if matches!(
20274            self.config.dialect,
20275            Some(crate::dialects::DialectType::ClickHouse)
20276        ) && self.check(TokenType::Settings)
20277        {
20278            self.skip(); // consume SETTINGS
20279            let _ = self.parse_settings_property()?;
20280        }
20281
20282        // Databricks: DESCRIBE ... AS JSON
20283        let as_json = if self.check(TokenType::As)
20284            && self
20285                .peek_nth(1)
20286                .map(|t| t.text.eq_ignore_ascii_case("JSON"))
20287                == Some(true)
20288        {
20289            self.skip(); // consume AS
20290            self.skip(); // consume JSON
20291            true
20292        } else {
20293            false
20294        };
20295
20296        // Parse optional post-target properties like type=stage (non-ClickHouse)
20297        if properties.is_empty() {
20298            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
20299                // Check for identifier or keyword that could be a property name
20300                if self.check(TokenType::Var) || self.check(TokenType::Type) || self.check_keyword()
20301                {
20302                    let name = self.advance().text.to_lowercase();
20303                    if self.match_token(TokenType::Eq) {
20304                        let value = self.advance().text.clone();
20305                        properties.push((name, value));
20306                    } else {
20307                        // Not a property, put it back (can't easily undo, so break)
20308                        break;
20309                    }
20310                } else {
20311                    break;
20312                }
20313            }
20314        }
20315
20316        Ok(Expression::Describe(Box::new(Describe {
20317            target,
20318            extended,
20319            formatted,
20320            kind,
20321            properties,
20322            style,
20323            partition,
20324            leading_comments,
20325            as_json,
20326            params,
20327        })))
20328    }
20329
20330    /// Parse SHOW statement
20331    /// SHOW [TERSE] <object_type> [HISTORY] [LIKE pattern] [IN <scope>] [STARTS WITH pattern] [LIMIT n] [FROM object]
20332    fn parse_show(&mut self) -> Result<Expression> {
20333        self.expect(TokenType::Show)?;
20334
20335        // Check for TERSE
20336        let terse = self.match_identifier("TERSE");
20337
20338        // Parse the thing to show (DATABASES, TABLES, SCHEMAS, etc.)
20339        // This can be multiple words like "PRIMARY KEYS" or "IMPORTED KEYS"
20340        let mut this_parts = Vec::new();
20341        let mut target: Option<Expression> = None;
20342        let mut mutex: Option<bool> = None;
20343
20344        // Consume identifier tokens until we hit a keyword like LIKE, IN, FROM, LIMIT, HISTORY
20345        // Special handling for SingleStore SHOW variations
20346        while !self.is_at_end() {
20347            let current = self.peek();
20348            // Stop at keywords that start clauses
20349            if matches!(
20350                current.token_type,
20351                TokenType::Like
20352                    | TokenType::In
20353                    | TokenType::From
20354                    | TokenType::Limit
20355                    | TokenType::Semicolon
20356                    | TokenType::Eof
20357                    | TokenType::Where
20358                    | TokenType::For
20359                    | TokenType::Offset
20360                    | TokenType::Settings
20361            ) {
20362                // ClickHouse: SHOW CREATE SETTINGS PROFILE - don't stop at SETTINGS
20363                if current.token_type == TokenType::Settings
20364                    && matches!(
20365                        self.config.dialect,
20366                        Some(crate::dialects::DialectType::ClickHouse)
20367                    )
20368                    && this_parts.join(" ") == "CREATE"
20369                {
20370                    // Fall through to process SETTINGS as part of the type name
20371                } else {
20372                    break;
20373                }
20374            }
20375            // Handle comma-separated profile types (e.g., SHOW PROFILE BLOCK IO, PAGE FAULTS)
20376            // Append comma to the last part to preserve spacing
20377            if current.token_type == TokenType::Comma {
20378                if !this_parts.is_empty() {
20379                    let last = this_parts.pop().unwrap();
20380                    this_parts.push(format!("{},", last));
20381                }
20382                self.skip();
20383                continue;
20384            }
20385            // Stop at HISTORY keyword (but not as the first word)
20386            if !this_parts.is_empty() && current.text.eq_ignore_ascii_case("HISTORY") {
20387                break;
20388            }
20389            // Stop at STARTS keyword
20390            if current.text.eq_ignore_ascii_case("STARTS") {
20391                break;
20392            }
20393            // SingleStore: SHOW PLAN <id> - handle number directly (before Var/keyword check)
20394            // This is needed because numbers don't pass the Var/keyword check
20395            let joined_check = this_parts.join(" ");
20396            if joined_check == "PLAN" && current.token_type == TokenType::Number {
20397                let id = self.advance().text;
20398                target = Some(Expression::Literal(Box::new(Literal::Number(id))));
20399                break;
20400            }
20401            // Accept identifiers and keywords as part of the object type
20402            if current.token_type == TokenType::Var || current.token_type.is_keyword() {
20403                let joined = this_parts.join(" ");
20404
20405                // SingleStore: SHOW CREATE <type> <name> - preserve case for name
20406                // Types: AGGREGATE, PIPELINE, PROJECTION
20407                if matches!(
20408                    joined.as_str(),
20409                    "CREATE AGGREGATE" | "CREATE PIPELINE" | "CREATE PROJECTION"
20410                ) {
20411                    let name = self.advance().text;
20412                    target = Some(Expression::Identifier(Identifier::new(name)));
20413                    break;
20414                }
20415
20416                // SingleStore: SHOW <type> ON <name> - preserve case for name after ON
20417                // Check if current token is "ON" (but not at start)
20418                if current.text.eq_ignore_ascii_case("ON") && !this_parts.is_empty() {
20419                    this_parts.push("ON".to_string());
20420                    self.skip();
20421                    // Parse the name after ON, preserving case
20422                    if !self.is_at_end() {
20423                        let next = self.peek();
20424                        // Handle "ON TABLE name" pattern
20425                        if next.text.eq_ignore_ascii_case("TABLE") {
20426                            this_parts.push("TABLE".to_string());
20427                            self.skip();
20428                        }
20429                        // Parse the actual name
20430                        if !self.is_at_end() {
20431                            let name_tok = self.peek();
20432                            if name_tok.token_type == TokenType::Var
20433                                || name_tok.token_type.is_keyword()
20434                            {
20435                                let name = self.advance().text;
20436                                target = Some(Expression::Identifier(Identifier::new(name)));
20437                            }
20438                        }
20439                    }
20440                    break;
20441                }
20442
20443                // SingleStore: SHOW REPRODUCTION INTO OUTFILE 'filename'
20444                if current.text.eq_ignore_ascii_case("INTO") && joined == "REPRODUCTION" {
20445                    this_parts.push("INTO".to_string());
20446                    self.skip();
20447                    if !self.is_at_end() && self.peek().text.eq_ignore_ascii_case("OUTFILE") {
20448                        this_parts.push("OUTFILE".to_string());
20449                        self.skip();
20450                        // Parse the filename
20451                        if !self.is_at_end() && self.check(TokenType::String) {
20452                            let filename = self.advance().text;
20453                            target = Some(Expression::Literal(Box::new(Literal::String(filename))));
20454                        }
20455                    }
20456                    break;
20457                }
20458
20459                // SingleStore: SHOW PLAN [JSON] <id> - capture the numeric ID
20460                if joined == "PLAN" {
20461                    // Check if current is "JSON" - if so, push it and check for number
20462                    if current.text.eq_ignore_ascii_case("JSON") {
20463                        this_parts.push("JSON".to_string());
20464                        self.skip();
20465                        // Now check for number
20466                        if !self.is_at_end() && self.check(TokenType::Number) {
20467                            let id = self.advance().text;
20468                            target = Some(Expression::Literal(Box::new(Literal::Number(id))));
20469                        }
20470                        break;
20471                    }
20472                    // Check if current is a number (plan ID)
20473                    if current.token_type == TokenType::Number {
20474                        let id = self.advance().text;
20475                        target = Some(Expression::Literal(Box::new(Literal::Number(id))));
20476                        break;
20477                    }
20478                }
20479
20480                this_parts.push(current.text.to_ascii_uppercase());
20481                self.skip();
20482
20483                // ClickHouse: SHOW CREATE TABLE/VIEW/DICTIONARY <qualified_name>
20484                // After detecting CREATE TABLE/VIEW/DICTIONARY, parse the next as a table ref
20485                let joined = this_parts.join(" ");
20486                if matches!(
20487                    joined.as_str(),
20488                    "CREATE TABLE"
20489                        | "CREATE VIEW"
20490                        | "CREATE DICTIONARY"
20491                        | "CREATE DATABASE"
20492                        | "CREATE MATERIALIZED VIEW"
20493                        | "CREATE LIVE VIEW"
20494                ) {
20495                    if !self.is_at_end()
20496                        && (self.check(TokenType::Var)
20497                            || self.check(TokenType::QuotedIdentifier)
20498                            || self.is_safe_keyword_as_identifier())
20499                    {
20500                        let table = self.parse_table_ref()?;
20501                        target = Some(Expression::Table(Box::new(table)));
20502                    }
20503                    break;
20504                }
20505
20506                // ClickHouse: SHOW CREATE ROLE/PROFILE/QUOTA/ROW POLICY/POLICY with multi-name or ON clause
20507                // These have complex syntax (comma-separated names, ON db.table) - consume as raw text
20508                if matches!(
20509                    self.config.dialect,
20510                    Some(crate::dialects::DialectType::ClickHouse)
20511                ) && (matches!(
20512                    joined.as_str(),
20513                    "CREATE ROLE"
20514                        | "CREATE QUOTA"
20515                        | "CREATE SETTINGS PROFILE"
20516                        | "CREATE PROFILE"
20517                        | "CREATE ROW POLICY"
20518                        | "CREATE POLICY"
20519                        | "CREATE USER"
20520                ) || matches!(
20521                    joined.as_str(),
20522                    "SHOW CREATE ROLE"
20523                        | "SHOW CREATE QUOTA"
20524                        | "SHOW CREATE SETTINGS PROFILE"
20525                        | "SHOW CREATE PROFILE"
20526                        | "SHOW CREATE ROW POLICY"
20527                        | "SHOW CREATE POLICY"
20528                        | "SHOW CREATE USER"
20529                )) {
20530                    let mut parts = Vec::new();
20531                    while !self.is_at_end() && self.peek().token_type != TokenType::Semicolon {
20532                        parts.push(self.advance().text.clone());
20533                    }
20534                    target = Some(Expression::Identifier(Identifier::new(parts.join(" "))));
20535                    break;
20536                }
20537
20538                // ClickHouse: SHOW CREATE <qualified_name> (without TABLE/VIEW keyword)
20539                // e.g., SHOW CREATE INFORMATION_SCHEMA.COLUMNS
20540                if joined == "CREATE"
20541                    && matches!(
20542                        self.config.dialect,
20543                        Some(crate::dialects::DialectType::ClickHouse)
20544                    )
20545                    && !self.is_at_end()
20546                    && (self.check(TokenType::Var) || self.check(TokenType::QuotedIdentifier))
20547                    && !matches!(
20548                        self.peek().text.to_ascii_uppercase().as_str(),
20549                        "TABLE"
20550                            | "VIEW"
20551                            | "DICTIONARY"
20552                            | "DATABASE"
20553                            | "MATERIALIZED"
20554                            | "LIVE"
20555                            | "TEMPORARY"
20556                            | "ROLE"
20557                            | "QUOTA"
20558                            | "POLICY"
20559                            | "PROFILE"
20560                            | "USER"
20561                            | "ROW"
20562                            | "SETTINGS"
20563                    )
20564                {
20565                    let table = self.parse_table_ref()?;
20566                    target = Some(Expression::Table(Box::new(table)));
20567                    break;
20568                }
20569
20570                // Special handling for ENGINE: the next token is the engine name (case-preserved)
20571                // followed by STATUS or MUTEX
20572                if joined == "ENGINE" {
20573                    // Parse engine name (case-preserved)
20574                    if !self.is_at_end() {
20575                        let engine_tok = self.peek();
20576                        if engine_tok.token_type == TokenType::Var
20577                            || engine_tok.token_type.is_keyword()
20578                        {
20579                            let engine_name = self.advance().text;
20580                            target = Some(Expression::Identifier(Identifier::new(engine_name)));
20581                            // Parse STATUS or MUTEX
20582                            if !self.is_at_end() {
20583                                let next = self.peek();
20584                                let next_upper = next.text.to_ascii_uppercase();
20585                                if next_upper == "STATUS" {
20586                                    self.skip();
20587                                    mutex = Some(false);
20588                                } else if next_upper == "MUTEX" {
20589                                    self.skip();
20590                                    mutex = Some(true);
20591                                }
20592                            }
20593                        }
20594                    }
20595                    break;
20596                }
20597            } else {
20598                break;
20599            }
20600        }
20601
20602        let this = this_parts.join(" ");
20603
20604        // Check for HISTORY
20605        let history = self.match_identifier("HISTORY");
20606
20607        // Check for FOR target (MySQL: SHOW GRANTS FOR foo, SHOW PROFILE ... FOR QUERY 5)
20608        // SingleStore: SHOW GROUPS FOR ROLE 'role_name', SHOW GROUPS FOR USER 'username'
20609        let for_target = if self.match_token(TokenType::For) {
20610            // Parse the target (can be multi-word like QUERY 5, or ROLE 'name')
20611            let mut parts = Vec::new();
20612            while !self.is_at_end() {
20613                let tok = self.peek();
20614                if matches!(
20615                    tok.token_type,
20616                    TokenType::Like
20617                        | TokenType::In
20618                        | TokenType::From
20619                        | TokenType::Limit
20620                        | TokenType::Semicolon
20621                        | TokenType::Eof
20622                        | TokenType::Where
20623                ) {
20624                    break;
20625                }
20626                if tok.token_type == TokenType::Var
20627                    || tok.token_type.is_keyword()
20628                    || tok.token_type == TokenType::Number
20629                {
20630                    parts.push(self.advance().text);
20631                } else if tok.token_type == TokenType::String {
20632                    // Handle string literals (e.g., SHOW GROUPS FOR ROLE 'role_name')
20633                    let text = self.advance().text;
20634                    parts.push(format!("'{}'", text));
20635                } else {
20636                    break;
20637                }
20638            }
20639            if parts.is_empty() {
20640                None
20641            } else {
20642                Some(Expression::Identifier(Identifier::new(parts.join(" "))))
20643            }
20644        } else {
20645            None
20646        };
20647
20648        // Check for LIKE pattern
20649        let like = if self.match_token(TokenType::Like) {
20650            Some(self.parse_primary()?)
20651        } else {
20652            None
20653        };
20654
20655        // Check for IN scope
20656        let (scope_kind, scope) = if self.match_token(TokenType::In) {
20657            // Parse scope kind and optionally scope object
20658            // Check for keywords: ACCOUNT, DATABASE, SCHEMA, TABLE, CLASS, APPLICATION
20659            let (kind, scope_obj) = if self.match_keyword("ACCOUNT") {
20660                (Some("ACCOUNT".to_string()), None)
20661            } else if self.match_token(TokenType::Database) {
20662                // IN DATABASE [name]
20663                let scope_obj = if !self.is_at_end()
20664                    && !self.check(TokenType::Like)
20665                    && !self.check(TokenType::Limit)
20666                    && !self.check(TokenType::Semicolon)
20667                    && !self.check_keyword_text("STARTS")
20668                {
20669                    let table = self.parse_table_ref()?;
20670                    Some(Expression::Table(Box::new(table)))
20671                } else {
20672                    None
20673                };
20674                (Some("DATABASE".to_string()), scope_obj)
20675            } else if self.match_token(TokenType::Schema) {
20676                // IN SCHEMA [name]
20677                let scope_obj = if !self.is_at_end()
20678                    && !self.check(TokenType::Like)
20679                    && !self.check(TokenType::Limit)
20680                    && !self.check(TokenType::Semicolon)
20681                    && !self.check_keyword_text("STARTS")
20682                {
20683                    let table = self.parse_table_ref()?;
20684                    Some(Expression::Table(Box::new(table)))
20685                } else {
20686                    None
20687                };
20688                (Some("SCHEMA".to_string()), scope_obj)
20689            } else if self.match_token(TokenType::Table) {
20690                // IN TABLE [name]
20691                let scope_obj = if !self.is_at_end()
20692                    && !self.check(TokenType::Like)
20693                    && !self.check(TokenType::Limit)
20694                    && !self.check(TokenType::Semicolon)
20695                    && !self.check_keyword_text("STARTS")
20696                {
20697                    let table = self.parse_table_ref()?;
20698                    Some(Expression::Table(Box::new(table)))
20699                } else {
20700                    None
20701                };
20702                (Some("TABLE".to_string()), scope_obj)
20703            } else if self.match_token(TokenType::View) {
20704                // IN VIEW [name]
20705                let scope_obj = if !self.is_at_end()
20706                    && !self.check(TokenType::Like)
20707                    && !self.check(TokenType::Limit)
20708                    && !self.check(TokenType::Semicolon)
20709                    && !self.check_keyword_text("STARTS")
20710                {
20711                    let table = self.parse_table_ref()?;
20712                    Some(Expression::Table(Box::new(table)))
20713                } else {
20714                    None
20715                };
20716                (Some("VIEW".to_string()), scope_obj)
20717            } else if self.match_keyword("CLASS") {
20718                // IN CLASS name
20719                let scope_obj = if !self.is_at_end() {
20720                    let table = self.parse_table_ref()?;
20721                    Some(Expression::Table(Box::new(table)))
20722                } else {
20723                    None
20724                };
20725                (Some("CLASS".to_string()), scope_obj)
20726            } else if self.match_keyword("APPLICATION") {
20727                // IN APPLICATION [PACKAGE] name
20728                let kind = if self.match_keyword("PACKAGE") {
20729                    "APPLICATION PACKAGE".to_string()
20730                } else {
20731                    "APPLICATION".to_string()
20732                };
20733                let scope_obj = if !self.is_at_end() {
20734                    let table = self.parse_table_ref()?;
20735                    Some(Expression::Table(Box::new(table)))
20736                } else {
20737                    None
20738                };
20739                (Some(kind), scope_obj)
20740            } else {
20741                // Default - infer scope_kind based on what we're showing
20742                // Python SQLGlot: SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"}
20743                let table = self.parse_table_ref()?;
20744                let inferred_kind = match this.as_str() {
20745                    "OBJECTS" | "TABLES" | "VIEWS" | "SEQUENCES" | "UNIQUE KEYS"
20746                    | "IMPORTED KEYS" => "SCHEMA",
20747                    "PRIMARY KEYS" => "TABLE",
20748                    _ => "SCHEMA", // Default to SCHEMA for unknown types
20749                };
20750                (
20751                    Some(inferred_kind.to_string()),
20752                    Some(Expression::Table(Box::new(table))),
20753                )
20754            };
20755            (kind, scope_obj)
20756        } else {
20757            (None, None)
20758        };
20759
20760        // Check for STARTS WITH
20761        let starts_with = if self.match_keyword("STARTS") {
20762            self.match_token(TokenType::With); // WITH is a keyword token
20763            Some(self.parse_primary()?)
20764        } else {
20765            None
20766        };
20767
20768        // Check for LIMIT
20769        let limit = if self.match_token(TokenType::Limit) {
20770            Some(Box::new(Limit {
20771                this: self.parse_expression()?,
20772                percent: false,
20773                comments: Vec::new(),
20774            }))
20775        } else {
20776            None
20777        };
20778
20779        // Check for FROM (can be a string literal or identifier)
20780        // For MySQL SHOW COLUMNS/INDEX, the first FROM is the target table,
20781        // and the second FROM is the database
20782        let mut from = if self.match_token(TokenType::From) {
20783            Some(self.parse_primary()?)
20784        } else {
20785            None
20786        };
20787
20788        // Check for second FROM clause (MySQL: SHOW COLUMNS FROM tbl FROM db, SHOW INDEX FROM foo FROM bar)
20789        let mut db = if from.is_some() && self.match_token(TokenType::From) {
20790            Some(self.parse_primary()?)
20791        } else {
20792            None
20793        };
20794
20795        // Normalize MySQL SHOW INDEX/COLUMNS FROM db.tbl -> FROM tbl FROM db.
20796        if matches!(this.as_str(), "INDEX" | "COLUMNS") && db.is_none() {
20797            if let Some(from_expr) = from.take() {
20798                match from_expr {
20799                    Expression::Table(mut t) => {
20800                        if let Some(db_ident) = t.schema.take().or(t.catalog.take()) {
20801                            db = Some(Expression::Identifier(db_ident));
20802                            from = Some(Expression::Identifier(t.name));
20803                        } else {
20804                            from = Some(Expression::Table(t));
20805                        }
20806                    }
20807                    Expression::Column(c) => {
20808                        if let Some(table_ident) = c.table {
20809                            db = Some(Expression::Identifier(table_ident));
20810                            from = Some(Expression::Identifier(c.name));
20811                        } else {
20812                            from = Some(Expression::Column(c));
20813                        }
20814                    }
20815                    Expression::Identifier(id) => {
20816                        if let Some((db_name, table_name)) = id.name.split_once('.') {
20817                            db = Some(Expression::Identifier(Identifier::new(db_name)));
20818                            from = Some(Expression::Identifier(Identifier {
20819                                name: table_name.to_string(),
20820                                quoted: id.quoted,
20821                                trailing_comments: id.trailing_comments,
20822                                span: None,
20823                            }));
20824                        } else {
20825                            from = Some(Expression::Identifier(id));
20826                        }
20827                    }
20828                    other => {
20829                        from = Some(other);
20830                    }
20831                }
20832            }
20833        }
20834
20835        // MySQL: SHOW TABLES FROM db LIKE 'pattern' (LIKE can come after FROM)
20836        let like = if like.is_none() && self.match_token(TokenType::Like) {
20837            Some(self.parse_primary()?)
20838        } else {
20839            like
20840        };
20841
20842        // ClickHouse: SHOW ... NOT LIKE 'pattern' / NOT ILIKE 'pattern'
20843        if matches!(
20844            self.config.dialect,
20845            Some(crate::dialects::DialectType::ClickHouse)
20846        ) && self.check(TokenType::Not)
20847        {
20848            if self.current + 1 < self.tokens.len()
20849                && matches!(
20850                    self.tokens[self.current + 1].token_type,
20851                    TokenType::Like | TokenType::ILike
20852                )
20853            {
20854                self.skip(); // consume NOT
20855                self.skip(); // consume LIKE/ILIKE
20856                let _ = self.parse_primary()?; // consume pattern
20857            }
20858        }
20859
20860        // ClickHouse: SHOW ... ILIKE 'pattern'
20861        if matches!(
20862            self.config.dialect,
20863            Some(crate::dialects::DialectType::ClickHouse)
20864        ) && self.match_token(TokenType::ILike)
20865        {
20866            let _ = self.parse_primary()?; // consume pattern
20867        }
20868
20869        // Check for WHERE clause (MySQL: SHOW STATUS WHERE condition)
20870        let where_clause = if self.match_token(TokenType::Where) {
20871            Some(self.parse_expression()?)
20872        } else {
20873            None
20874        };
20875
20876        // Check for WITH PRIVILEGES clause (Snowflake: SHOW ... WITH PRIVILEGES USAGE, MODIFY)
20877        let privileges = if self.match_token(TokenType::With) && self.match_keyword("PRIVILEGES") {
20878            // Parse comma-separated list of privilege names (no parentheses)
20879            let mut privs = Vec::new();
20880            loop {
20881                if self.is_at_end() || self.check(TokenType::Semicolon) {
20882                    break;
20883                }
20884                let tok = self.peek();
20885                if tok.token_type == TokenType::Var || tok.token_type.is_keyword() {
20886                    privs.push(self.advance().text.to_ascii_uppercase());
20887                    // Check for comma to continue
20888                    if !self.match_token(TokenType::Comma) {
20889                        break;
20890                    }
20891                } else {
20892                    break;
20893                }
20894            }
20895            privs
20896        } else {
20897            Vec::new()
20898        };
20899
20900        // ClickHouse: SHOW ... SETTINGS key=val, key=val
20901        if matches!(
20902            self.config.dialect,
20903            Some(crate::dialects::DialectType::ClickHouse)
20904        ) {
20905            self.parse_clickhouse_settings_clause()?;
20906        }
20907
20908        Ok(Expression::Show(Box::new(Show {
20909            this,
20910            terse,
20911            history,
20912            like,
20913            scope_kind,
20914            scope,
20915            starts_with,
20916            limit,
20917            from,
20918            where_clause,
20919            for_target,
20920            db,
20921            target,
20922            mutex,
20923            privileges,
20924        })))
20925    }
20926
20927    /// Parse COPY statement (Snowflake, PostgreSQL)
20928    /// COPY INTO <table> FROM <source> [(<parameters>)]
20929    /// COPY INTO <location> FROM <table> [(<parameters>)]
20930    fn parse_copy(&mut self) -> Result<Expression> {
20931        self.expect(TokenType::Copy)?;
20932
20933        // Check for INTO (Snowflake/TSQL style: COPY INTO)
20934        let is_into = self.match_token(TokenType::Into);
20935
20936        // Parse target table or location (possibly with column list)
20937        let this = if self.check(TokenType::LParen) {
20938            // Subquery: COPY (SELECT ...) TO ...
20939            self.parse_primary()?
20940        } else if self.check(TokenType::DAt)
20941            || self.check(TokenType::String)
20942            || self.is_stage_reference()
20943        {
20944            // Stage or file destination (for exports): COPY INTO @stage or COPY INTO 's3://...'
20945            self.parse_file_location()?
20946        } else {
20947            // Table reference, possibly with column list: COPY table (col1, col2)
20948            let table = self.parse_table_ref()?;
20949            // Check for column list
20950            if self.check(TokenType::LParen) {
20951                // Peek ahead to see if this is a column list or a subquery
20952                // Column list won't start with SELECT
20953                let has_column_list = {
20954                    let start = self.current;
20955                    self.skip(); // consume (
20956                    let is_select = self.check(TokenType::Select);
20957                    self.current = start; // backtrack
20958                    !is_select
20959                };
20960                if has_column_list {
20961                    self.skip(); // consume (
20962                    let mut columns = Vec::new();
20963                    loop {
20964                        let col_name = self.expect_identifier_or_keyword()?;
20965                        columns.push(col_name);
20966                        if !self.match_token(TokenType::Comma) {
20967                            break;
20968                        }
20969                    }
20970                    self.expect(TokenType::RParen)?;
20971                    // Create a schema expression with the table and columns
20972                    Expression::Schema(Box::new(Schema {
20973                        this: Some(Box::new(Expression::Table(Box::new(table)))),
20974                        expressions: columns
20975                            .into_iter()
20976                            .map(|c| {
20977                                Expression::boxed_column(Column {
20978                                    name: Identifier::new(c),
20979                                    table: None,
20980                                    join_mark: false,
20981                                    trailing_comments: Vec::new(),
20982                                    span: None,
20983                                    inferred_type: None,
20984                                })
20985                            })
20986                            .collect(),
20987                    }))
20988                } else {
20989                    Expression::Table(Box::new(table))
20990                }
20991            } else {
20992                Expression::Table(Box::new(table))
20993            }
20994        };
20995
20996        // Determine direction: FROM means loading into table, TO means exporting
20997        let kind = self.match_token(TokenType::From);
20998        let has_to = if !kind {
20999            // Try TO keyword for export (TO is a keyword token, not an identifier)
21000            self.match_token(TokenType::To)
21001        } else {
21002            false
21003        };
21004
21005        // Parse source/destination files or stage only if FROM/TO was found
21006        // and we're not at a parameter (which would start with identifier = ...)
21007        let mut files = Vec::new();
21008        if kind
21009            || has_to
21010            || self.check(TokenType::String)
21011            || self.is_stage_reference()
21012            || self.check(TokenType::LParen)
21013        {
21014            // Check for subquery: FROM (SELECT ...)
21015            if self.check(TokenType::LParen) {
21016                // Peek ahead to see if this is a subquery
21017                let start = self.current;
21018                self.skip(); // consume (
21019                let is_select = self.check(TokenType::Select);
21020                self.current = start; // backtrack
21021                if is_select {
21022                    // Parse the subquery
21023                    let subquery = self.parse_primary()?;
21024                    files.push(subquery);
21025                }
21026            }
21027            // Parse file location(s) until we hit a parameter or end
21028            while !self.is_at_end() && !self.check(TokenType::Semicolon) && files.is_empty()
21029                || (self.check(TokenType::Comma) && !files.is_empty())
21030            {
21031                // Consume comma if present (for multiple files)
21032                if !files.is_empty() && !self.match_token(TokenType::Comma) {
21033                    break;
21034                }
21035                // Check if this looks like a parameter (identifier followed by =)
21036                // But stage references (@stage) are not parameters
21037                if (self.check(TokenType::Var) || self.check_keyword())
21038                    && !self.is_stage_reference()
21039                {
21040                    let lookahead = self.current + 1;
21041                    if lookahead < self.tokens.len()
21042                        && self.tokens[lookahead].token_type == TokenType::Eq
21043                    {
21044                        break; // This is a parameter, stop parsing files
21045                    }
21046                }
21047                // Check for WITH keyword - stop parsing files
21048                if self.check(TokenType::With) {
21049                    break;
21050                }
21051                // Stop if we don't see a file location start
21052                // Include QuotedIdentifier for Databricks backtick-quoted paths like `s3://link`
21053                if !self.check(TokenType::String)
21054                    && !self.is_stage_reference()
21055                    && !self.check(TokenType::Var)
21056                    && !self.check_keyword()
21057                    && !self.check(TokenType::QuotedIdentifier)
21058                {
21059                    break;
21060                }
21061                // For COPY INTO ... FROM table_name, handle dotted table references
21062                // If the next token is a Var/Identifier and the one after is a Dot, parse as table reference
21063                if (self.check(TokenType::Var) || self.is_identifier_token())
21064                    && !self.is_stage_reference()
21065                {
21066                    let lookahead = self.current + 1;
21067                    let has_dot = lookahead < self.tokens.len()
21068                        && self.tokens[lookahead].token_type == TokenType::Dot;
21069                    if has_dot {
21070                        let table = self.parse_table_ref()?;
21071                        files.push(Expression::Table(Box::new(table)));
21072                        continue;
21073                    }
21074                }
21075                let location = self.parse_file_location()?;
21076                files.push(location);
21077            }
21078        }
21079
21080        // Parse credentials and parameters
21081        let mut params = Vec::new();
21082        let mut credentials = None;
21083        let mut with_wrapped = false;
21084
21085        // Parse Snowflake-style parameters: KEY = VALUE or KEY = (nested values)
21086        // or DuckDB/PostgreSQL WITH (KEY VALUE, ...) format
21087        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
21088            // Match WITH keyword if present (some dialects use WITH before params)
21089            let had_with = self.match_token(TokenType::With);
21090
21091            // Check for wrapped parameters in parentheses
21092            if self.match_token(TokenType::LParen) {
21093                if had_with {
21094                    with_wrapped = true;
21095                }
21096                while !self.check(TokenType::RParen) && !self.is_at_end() {
21097                    let param = self.parse_copy_parameter()?;
21098                    params.push(param);
21099                    // Consume optional comma between params
21100                    self.match_token(TokenType::Comma);
21101                }
21102                self.expect(TokenType::RParen)?;
21103                break;
21104            }
21105
21106            // Parse individual parameter: NAME = value
21107            if self.check(TokenType::Var) || self.check_keyword() {
21108                let param = self.parse_copy_parameter()?;
21109
21110                // Handle special CREDENTIALS parameter (case-insensitive)
21111                if param.name.eq_ignore_ascii_case("CREDENTIALS") {
21112                    // For Redshift-style CREDENTIALS 'string' (single string value)
21113                    // vs Snowflake-style CREDENTIALS = (KEY='value', KEY2='value')
21114                    if let Some(Expression::Literal(lit)) = &param.value {
21115                        if let Literal::String(s) = lit.as_ref() {
21116                            // Redshift style: store as a simple credentials string
21117                            let creds = Credentials {
21118                                credentials: vec![("".to_string(), s.clone())],
21119                                storage: None,
21120                                encryption: None,
21121                            };
21122                            credentials = Some(Box::new(creds));
21123                        }
21124                    } else {
21125                        // Snowflake style: key=value pairs
21126                        let creds = Credentials {
21127                            credentials: param
21128                                .values
21129                                .iter()
21130                                .filter_map(|v| {
21131                                    if let Expression::Eq(eq) = v {
21132                                        let key = if let Expression::Column(c) = &eq.left {
21133                                            c.name.name.clone()
21134                                        } else {
21135                                            return None;
21136                                        };
21137                                        let val = if let Expression::Literal(lit) = &eq.right {
21138                                            if let Literal::String(s) = lit.as_ref() {
21139                                                s.clone()
21140                                            } else {
21141                                                String::new()
21142                                            }
21143                                        } else {
21144                                            return None;
21145                                        };
21146                                        Some((key, val))
21147                                    } else {
21148                                        None
21149                                    }
21150                                })
21151                                .collect(),
21152                            storage: None,
21153                            encryption: None,
21154                        };
21155                        credentials = Some(Box::new(creds));
21156                    }
21157                } else if param.name.eq_ignore_ascii_case("STORAGE_INTEGRATION") {
21158                    // Store STORAGE_INTEGRATION as a regular parameter only
21159                    // Don't use the credentials.storage field for this
21160                    params.push(param);
21161                } else {
21162                    params.push(param);
21163                }
21164            } else {
21165                break;
21166            }
21167        }
21168
21169        Ok(Expression::Copy(Box::new(CopyStmt {
21170            this,
21171            kind,
21172            files,
21173            params,
21174            credentials,
21175            is_into,
21176            with_wrapped,
21177        })))
21178    }
21179
21180    /// Parse a single COPY parameter: NAME = value, NAME = (nested values), or NAME value (no =)
21181    fn parse_copy_parameter(&mut self) -> Result<CopyParameter> {
21182        // Preserve original case for parameter name (important for Redshift COPY options)
21183        let name = self.expect_identifier_or_keyword()?;
21184
21185        let mut value = None;
21186        let mut values = Vec::new();
21187
21188        let has_eq = self.match_token(TokenType::Eq);
21189
21190        if has_eq {
21191            if self.match_token(TokenType::LParen) {
21192                // Nested parameter list: KEY = (nested_key=value, ...) or KEY = (value1, value2)
21193                // Check if this is a list of simple values (like strings) or key=value pairs
21194                // If the first token is a string/number, it's a list of values
21195                if self.check(TokenType::String) || self.check(TokenType::Number) {
21196                    // Simple value list: FILES = ('test1.csv', 'test2.csv')
21197                    while !self.check(TokenType::RParen) && !self.is_at_end() {
21198                        values.push(self.parse_primary()?);
21199                        if !self.match_token(TokenType::Comma) {
21200                            break;
21201                        }
21202                    }
21203                } else {
21204                    // Key=value pairs: CREDENTIALS = (AWS_KEY_ID='id' AWS_SECRET_KEY='key')
21205                    while !self.check(TokenType::RParen) && !self.is_at_end() {
21206                        // Parse nested key=value pairs
21207                        let nested_key = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
21208                        if self.match_token(TokenType::Eq) {
21209                            let nested_value = self.parse_copy_param_value()?;
21210                            // Create an Eq expression for the nested key=value
21211                            values.push(Expression::Eq(Box::new(BinaryOp {
21212                                left: Expression::boxed_column(Column {
21213                                    name: Identifier::new(nested_key),
21214                                    table: None,
21215                                    join_mark: false,
21216                                    trailing_comments: Vec::new(),
21217                                    span: None,
21218                                    inferred_type: None,
21219                                }),
21220                                right: nested_value,
21221                                left_comments: Vec::new(),
21222                                operator_comments: Vec::new(),
21223                                trailing_comments: Vec::new(),
21224                                inferred_type: None,
21225                            })));
21226                        } else {
21227                            // Just a keyword/value without =
21228                            values.push(Expression::boxed_column(Column {
21229                                name: Identifier::new(nested_key),
21230                                table: None,
21231                                join_mark: false,
21232                                trailing_comments: Vec::new(),
21233                                span: None,
21234                                inferred_type: None,
21235                            }));
21236                        }
21237                        // Consume optional comma between nested values
21238                        self.match_token(TokenType::Comma);
21239                    }
21240                }
21241                self.expect(TokenType::RParen)?;
21242            } else {
21243                // Simple value: KEY = value
21244                value = Some(self.parse_copy_param_value()?);
21245            }
21246        } else {
21247            // No = sign: DuckDB/PostgreSQL format (KEY value or KEY (col1, col2))
21248            // Check if followed by a value: string, number, boolean, identifier, or tuple
21249            if self.check(TokenType::LParen) {
21250                // Check if this is a COPY_INTO_VARLEN_OPTIONS parameter
21251                // These are Databricks/Snowflake options that contain key='value' pairs without = before (
21252                let is_varlen_option = matches!(
21253                    name.as_str(),
21254                    "FORMAT_OPTIONS" | "COPY_OPTIONS" | "FILE_FORMAT" | "CREDENTIAL"
21255                );
21256
21257                self.skip(); // consume (
21258
21259                if is_varlen_option {
21260                    // Parse as key='value' pairs: FORMAT_OPTIONS ('opt1'='true', 'opt2'='test')
21261                    while !self.check(TokenType::RParen) && !self.is_at_end() {
21262                        if self.check(TokenType::String) {
21263                            // Parse 'key'='value' pair
21264                            let key_token = self.advance();
21265                            let key = key_token.text.clone();
21266                            if self.match_token(TokenType::Eq) {
21267                                let val = self.parse_copy_param_value()?;
21268                                values.push(Expression::Eq(Box::new(BinaryOp {
21269                                    left: Expression::Literal(Box::new(Literal::String(key))),
21270                                    right: val,
21271                                    left_comments: Vec::new(),
21272                                    operator_comments: Vec::new(),
21273                                    trailing_comments: Vec::new(),
21274                                    inferred_type: None,
21275                                })));
21276                            } else {
21277                                // Just a string without =
21278                                values.push(Expression::Literal(Box::new(Literal::String(key))));
21279                            }
21280                        } else if self.check(TokenType::Var)
21281                            || self.check_keyword()
21282                            || self.is_identifier_token()
21283                        {
21284                            // Parse identifier='value' pair (unquoted key)
21285                            let key = self.advance().text.clone();
21286                            if self.match_token(TokenType::Eq) {
21287                                let val = self.parse_copy_param_value()?;
21288                                values.push(Expression::Eq(Box::new(BinaryOp {
21289                                    left: Expression::boxed_column(Column {
21290                                        name: Identifier::new(key),
21291                                        table: None,
21292                                        join_mark: false,
21293                                        trailing_comments: Vec::new(),
21294                                        span: None,
21295                                        inferred_type: None,
21296                                    }),
21297                                    right: val,
21298                                    left_comments: Vec::new(),
21299                                    operator_comments: Vec::new(),
21300                                    trailing_comments: Vec::new(),
21301                                    inferred_type: None,
21302                                })));
21303                            } else {
21304                                // Just an identifier without =
21305                                values.push(Expression::boxed_column(Column {
21306                                    name: Identifier::new(key),
21307                                    table: None,
21308                                    join_mark: false,
21309                                    trailing_comments: Vec::new(),
21310                                    span: None,
21311                                    inferred_type: None,
21312                                }));
21313                            }
21314                        } else {
21315                            break;
21316                        }
21317                        self.match_token(TokenType::Comma);
21318                    }
21319                } else {
21320                    // Tuple value: FORCE_NOT_NULL (col1, col2)
21321                    let mut items = Vec::new();
21322                    while !self.check(TokenType::RParen) && !self.is_at_end() {
21323                        items.push(self.parse_primary()?);
21324                        if !self.match_token(TokenType::Comma) {
21325                            break;
21326                        }
21327                    }
21328                    value = Some(Expression::Tuple(Box::new(Tuple { expressions: items })));
21329                }
21330                self.expect(TokenType::RParen)?;
21331            } else if self.check(TokenType::LBrace) {
21332                // Map literal: KV_METADATA {'key': 'value', ...}
21333                value = Some(self.parse_primary()?);
21334            } else if self.check(TokenType::String) || self.check(TokenType::Number) {
21335                // String or number value
21336                value = Some(self.parse_copy_param_value()?);
21337            } else if self.check(TokenType::True) || self.check(TokenType::False) {
21338                // Boolean value (TRUE/FALSE are keyword tokens)
21339                value = Some(self.parse_copy_param_value()?);
21340            } else if !self.check(TokenType::Comma)
21341                && !self.check(TokenType::RParen)
21342                && !self.is_at_end()
21343                && !self.check(TokenType::Semicolon)
21344            {
21345                // Identifier value: FORMAT JSON, HEADER MATCH, etc.
21346                // But skip if this is a known flag-only parameter (Redshift COPY options that take no value)
21347                let name_upper = name.to_ascii_uppercase();
21348                let is_flag_param = matches!(
21349                    name_upper.as_str(),
21350                    "EMPTYASNULL"
21351                        | "BLANKSASNULL"
21352                        | "ACCEPTINVCHARS"
21353                        | "COMPUPDATE"
21354                        | "STATUPDATE"
21355                        | "NOLOAD"
21356                        | "ESCAPE"
21357                        | "REMOVEQUOTES"
21358                        | "EXPLICIT_IDS"
21359                        | "FILLRECORD"
21360                        | "TRIMBLANKS"
21361                        | "TRUNCATECOLUMNS"
21362                        | "ROUNDEC"
21363                        | "IGNOREHEADER"
21364                        | "IGNOREBLANKLINES"
21365                        | "ACCEPTANYDATE"
21366                );
21367                if !is_flag_param && (self.check(TokenType::Var) || self.check_keyword()) {
21368                    value = Some(self.parse_copy_param_value()?);
21369                }
21370            }
21371            // If nothing matched, it's a bare flag parameter with no value (allowed)
21372        }
21373
21374        Ok(CopyParameter {
21375            name,
21376            value,
21377            values,
21378            eq: has_eq,
21379        })
21380    }
21381
21382    /// Parse a value for COPY parameters (handles strings, identifiers, numbers, lists)
21383    fn parse_copy_param_value(&mut self) -> Result<Expression> {
21384        // Handle lists like ('file1', 'file2')
21385        if self.match_token(TokenType::LParen) {
21386            let mut items = Vec::new();
21387            while !self.check(TokenType::RParen) && !self.is_at_end() {
21388                items.push(self.parse_primary()?);
21389                if !self.match_token(TokenType::Comma) {
21390                    break;
21391                }
21392            }
21393            self.expect(TokenType::RParen)?;
21394            return Ok(Expression::Tuple(Box::new(Tuple { expressions: items })));
21395        }
21396
21397        // Handle string, number, boolean, identifier
21398        if self.check(TokenType::String) {
21399            let token = self.advance();
21400            return Ok(Expression::Literal(Box::new(Literal::String(
21401                token.text.clone(),
21402            ))));
21403        }
21404        // Handle quoted identifier (e.g., STORAGE_INTEGRATION = "storage")
21405        if self.check(TokenType::QuotedIdentifier) {
21406            let token = self.advance();
21407            return Ok(Expression::boxed_column(Column {
21408                name: Identifier::quoted(token.text.clone()),
21409                table: None,
21410                join_mark: false,
21411                trailing_comments: Vec::new(),
21412                span: None,
21413                inferred_type: None,
21414            }));
21415        }
21416        if self.check(TokenType::Number) {
21417            let token = self.advance();
21418            return Ok(Expression::Literal(Box::new(Literal::Number(
21419                token.text.clone(),
21420            ))));
21421        }
21422        if self.match_token(TokenType::True) {
21423            return Ok(Expression::Boolean(BooleanLiteral { value: true }));
21424        }
21425        if self.match_token(TokenType::False) {
21426            return Ok(Expression::Boolean(BooleanLiteral { value: false }));
21427        }
21428        // Identifier (e.g., FORMAT_NAME=my_format)
21429        if self.check(TokenType::Var) || self.check_keyword() {
21430            // Could be a qualified name like MY_DATABASE.MY_SCHEMA.MY_FORMAT
21431            let first = self.advance().text.clone();
21432            if self.match_token(TokenType::Dot) {
21433                let second = self.expect_identifier_or_keyword()?;
21434                if self.match_token(TokenType::Dot) {
21435                    let third = self.expect_identifier_or_keyword()?;
21436                    return Ok(Expression::boxed_column(Column {
21437                        name: Identifier::new(format!("{}.{}.{}", first, second, third)),
21438                        table: None,
21439                        join_mark: false,
21440                        trailing_comments: Vec::new(),
21441                        span: None,
21442                        inferred_type: None,
21443                    }));
21444                }
21445                return Ok(Expression::boxed_column(Column {
21446                    name: Identifier::new(format!("{}.{}", first, second)),
21447                    table: None,
21448                    join_mark: false,
21449                    trailing_comments: Vec::new(),
21450                    span: None,
21451                    inferred_type: None,
21452                }));
21453            }
21454            return Ok(Expression::boxed_column(Column {
21455                name: Identifier::new(first),
21456                table: None,
21457                join_mark: false,
21458                trailing_comments: Vec::new(),
21459                span: None,
21460                inferred_type: None,
21461            }));
21462        }
21463
21464        Err(self.parse_error("Expected value for COPY parameter"))
21465    }
21466
21467    /// Parse Snowflake stage reference when tokenized as String (e.g., '@mystage', '@external/location')
21468    /// Handles: '@mystage', '@external/location'
21469    fn parse_stage_reference_from_string(&mut self) -> Result<Expression> {
21470        use crate::expressions::StageReference;
21471
21472        // The String token contains @ and the entire path
21473        let string_token = self.advance();
21474        let full_path = string_token.text.clone();
21475
21476        // Split on / to get stage name and path
21477        let parts: Vec<&str> = full_path.splitn(2, '/').collect();
21478        let name = parts[0].to_string();
21479        let path = if parts.len() > 1 {
21480            Some(format!("/{}", parts[1]))
21481        } else {
21482            None
21483        };
21484
21485        // Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
21486        let (file_format, pattern) = if self.match_token(TokenType::LParen) {
21487            let mut ff = None;
21488            let mut pat = None;
21489
21490            loop {
21491                if self.match_identifier("FILE_FORMAT") {
21492                    self.expect(TokenType::FArrow)?; // =>
21493                    ff = Some(self.parse_primary()?);
21494                } else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
21495                    // PATTERN can be tokenized as keyword or identifier
21496                    self.expect(TokenType::FArrow)?; // =>
21497                    if let Expression::Literal(lit) = self.parse_primary()? {
21498                        if let Literal::String(s) = lit.as_ref() {
21499                            pat = Some(s.clone());
21500                        }
21501                    }
21502                } else {
21503                    break;
21504                }
21505
21506                if !self.match_token(TokenType::Comma) {
21507                    break;
21508                }
21509            }
21510
21511            self.expect(TokenType::RParen)?;
21512            (ff, pat)
21513        } else {
21514            (None, None)
21515        };
21516
21517        Ok(Expression::StageReference(Box::new(StageReference {
21518            name,
21519            path,
21520            file_format,
21521            pattern,
21522            quoted: true, // Stage reference came from a quoted string
21523        })))
21524    }
21525
21526    /// Parse Snowflake stage reference when tokenized as Var (e.g., @mystage becomes Var token)
21527    /// Handles: @mystage, @mystage/path/to/file.csv
21528    fn parse_stage_reference_from_var(&mut self) -> Result<Expression> {
21529        use crate::expressions::StageReference;
21530
21531        // The Var token already contains @ and the stage name
21532        let var_token = self.advance();
21533        let mut name = var_token.text.clone();
21534
21535        // Handle qualified names: @namespace.stage
21536        while self.match_token(TokenType::Dot) {
21537            name.push('.');
21538            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
21539                name.push_str(&self.advance().text);
21540            } else if self.check(TokenType::Percent) {
21541                // Handle table stage in qualified path: @namespace.%table_name
21542                self.skip();
21543                name.push('%');
21544                if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
21545                    name.push_str(&self.advance().text);
21546                }
21547            } else {
21548                break;
21549            }
21550        }
21551
21552        // Handle path after stage: @stage/path/to/file.csv
21553        let path = if self.match_token(TokenType::Slash) {
21554            let mut path_str = String::from("/");
21555            // Consume path components until we hit whitespace/paren/etc.
21556            while !self.is_at_end() {
21557                if self.check(TokenType::Identifier)
21558                    || self.check(TokenType::Var)
21559                    || self.check(TokenType::Number)
21560                    || self.check(TokenType::Dot)
21561                    || self.check(TokenType::Dash)
21562                    || self.check(TokenType::Star)
21563                    || self.check(TokenType::To)
21564                    || self.is_safe_keyword_as_identifier()
21565                {
21566                    path_str.push_str(&self.advance().text);
21567                } else if self.match_token(TokenType::Slash) {
21568                    path_str.push('/');
21569                } else {
21570                    break;
21571                }
21572            }
21573            Some(path_str)
21574        } else {
21575            None
21576        };
21577
21578        // Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
21579        let (file_format, pattern) = if self.match_token(TokenType::LParen) {
21580            let mut ff = None;
21581            let mut pat = None;
21582
21583            loop {
21584                if self.match_identifier("FILE_FORMAT") {
21585                    self.expect(TokenType::FArrow)?; // =>
21586                    ff = Some(self.parse_primary()?);
21587                } else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
21588                    // PATTERN can be tokenized as keyword or identifier
21589                    self.expect(TokenType::FArrow)?; // =>
21590                    if let Expression::Literal(lit) = self.parse_primary()? {
21591                        if let Literal::String(s) = lit.as_ref() {
21592                            pat = Some(s.clone());
21593                        }
21594                    }
21595                } else {
21596                    break;
21597                }
21598
21599                if !self.match_token(TokenType::Comma) {
21600                    break;
21601                }
21602            }
21603
21604            self.expect(TokenType::RParen)?;
21605            (ff, pat)
21606        } else {
21607            (None, None)
21608        };
21609
21610        Ok(Expression::StageReference(Box::new(StageReference {
21611            name,
21612            path,
21613            file_format,
21614            pattern,
21615            quoted: false,
21616        })))
21617    }
21618
21619    /// Parse Snowflake stage reference in FROM clause
21620    /// Handles: @stage, @"stage", @namespace.stage, @stage/path/file.csv, @~, @%table
21621    fn parse_stage_reference(&mut self) -> Result<Expression> {
21622        use crate::expressions::StageReference;
21623
21624        self.expect(TokenType::DAt)?; // consume @
21625
21626        // Build the stage name - can include dots, slashes, etc.
21627        let mut name = String::from("@");
21628
21629        // Handle special stage types:
21630        // @~ = user stage
21631        // @% = table stage (followed by table name)
21632        if self.check(TokenType::Tilde) {
21633            self.skip();
21634            name.push('~');
21635        } else if self.check(TokenType::Percent) {
21636            self.skip();
21637            name.push('%');
21638            // Table name follows (can be qualified: schema.table)
21639            loop {
21640                if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
21641                    name.push_str(&self.advance().text);
21642                } else {
21643                    break;
21644                }
21645                // Handle qualified table names: %db.schema.table
21646                if self.match_token(TokenType::Dot) {
21647                    name.push('.');
21648                } else {
21649                    break;
21650                }
21651            }
21652        } else {
21653            // Handle quoted or unquoted stage names
21654            loop {
21655                if self.check(TokenType::QuotedIdentifier) {
21656                    // Preserve quotes for quoted identifiers
21657                    let text = self.advance().text;
21658                    name.push('"');
21659                    name.push_str(&text);
21660                    name.push('"');
21661                } else if self.check(TokenType::Percent) {
21662                    // Handle table stage in qualified path: @namespace.%table_name
21663                    self.skip();
21664                    name.push('%');
21665                    if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
21666                        name.push_str(&self.advance().text);
21667                    }
21668                } else if self.check(TokenType::Identifier)
21669                    || self.check(TokenType::Var)
21670                    || self.is_safe_keyword_as_identifier()
21671                {
21672                    name.push_str(&self.advance().text);
21673                } else {
21674                    break;
21675                }
21676
21677                // Handle dots for qualified names: @namespace.stage or @"schema"."stage"
21678                if self.match_token(TokenType::Dot) {
21679                    name.push('.');
21680                } else {
21681                    break;
21682                }
21683            }
21684        }
21685
21686        // Handle path after stage: @stage/path/to/file.csv
21687        let path = if self.match_token(TokenType::Slash) {
21688            let mut path_str = String::from("/");
21689            // Consume path components until we hit whitespace/paren/etc.
21690            // Note: path can include keywords like 'to', 'data', etc.
21691            while !self.is_at_end() {
21692                if self.check(TokenType::Identifier)
21693                    || self.check(TokenType::Var)
21694                    || self.check(TokenType::Number)
21695                    || self.check(TokenType::Dot)
21696                    || self.check(TokenType::Dash)
21697                    || self.check(TokenType::Star)
21698                    || self.check(TokenType::To)
21699                    || self.is_safe_keyword_as_identifier()
21700                {
21701                    path_str.push_str(&self.advance().text);
21702                } else if self.match_token(TokenType::Slash) {
21703                    path_str.push('/');
21704                } else {
21705                    break;
21706                }
21707            }
21708            Some(path_str)
21709        } else {
21710            None
21711        };
21712
21713        // Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
21714        let (file_format, pattern) = if self.match_token(TokenType::LParen) {
21715            let mut ff = None;
21716            let mut pat = None;
21717
21718            loop {
21719                if self.match_identifier("FILE_FORMAT") {
21720                    self.expect(TokenType::FArrow)?; // =>
21721                    ff = Some(self.parse_primary()?);
21722                } else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
21723                    // PATTERN can be tokenized as keyword or identifier
21724                    self.expect(TokenType::FArrow)?; // =>
21725                    if let Expression::Literal(lit) = self.parse_primary()? {
21726                        if let Literal::String(s) = lit.as_ref() {
21727                            pat = Some(s.clone());
21728                        }
21729                    }
21730                } else {
21731                    break;
21732                }
21733
21734                if !self.match_token(TokenType::Comma) {
21735                    break;
21736                }
21737            }
21738
21739            self.expect(TokenType::RParen)?;
21740            (ff, pat)
21741        } else {
21742            (None, None)
21743        };
21744
21745        Ok(Expression::StageReference(Box::new(StageReference {
21746            name,
21747            path,
21748            file_format,
21749            pattern,
21750            quoted: false,
21751        })))
21752    }
21753
21754    /// Parse file location for COPY/PUT statements
21755    /// Handles: @stage, @db.schema.stage, @stage/path, 's3://bucket/path', file:///path
21756    fn parse_file_location(&mut self) -> Result<Expression> {
21757        // Stage reference starting with @ (tokenized as DAt or as a Var starting with @)
21758        if self.check(TokenType::DAt) {
21759            self.skip(); // consume @
21760            let mut stage_path = String::from("@");
21761
21762            // Handle table stage prefix: @%table
21763            if self.check(TokenType::Percent) || self.check(TokenType::Mod) {
21764                stage_path.push('%');
21765                self.skip(); // consume %
21766            }
21767            // Handle user stage: @~
21768            else if self.check(TokenType::Tilde) {
21769                stage_path.push('~');
21770                self.skip(); // consume ~
21771            }
21772
21773            // Get stage name
21774            if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
21775                stage_path.push_str(&self.advance().text);
21776            }
21777            // Parse qualified name parts: .schema.stage
21778            while self.check(TokenType::Dot) {
21779                self.skip(); // consume .
21780                stage_path.push('.');
21781                if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token()
21782                {
21783                    stage_path.push_str(&self.advance().text);
21784                }
21785            }
21786            // Parse path after stage: /path/to/file.csv
21787            // Consume all connected path components (dots, dashes, numbers, etc.)
21788            // matching the logic in parse_stage_reference.
21789            if self.match_token(TokenType::Slash) {
21790                stage_path.push('/');
21791                while !self.is_at_end() {
21792                    if (self.check(TokenType::Var)
21793                        || self.check(TokenType::Identifier)
21794                        || self.check(TokenType::Number)
21795                        || self.check(TokenType::Dot)
21796                        || self.check(TokenType::Dash)
21797                        || self.check(TokenType::Star)
21798                        || self.check(TokenType::To)
21799                        || self.is_safe_keyword_as_identifier())
21800                        && !self.check_next(TokenType::Eq)
21801                    {
21802                        stage_path.push_str(&self.advance().text);
21803                    } else if self.match_token(TokenType::Slash) {
21804                        stage_path.push('/');
21805                    } else {
21806                        break;
21807                    }
21808                }
21809            }
21810            return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
21811        }
21812
21813        // Stage reference tokenized as a Var starting with @ (e.g., @random_stage)
21814        // This happens when the tokenizer combines @ with the following identifier
21815        if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
21816            let mut stage_path = self.advance().text.clone();
21817            // Parse qualified name parts: .schema.stage
21818            while self.check(TokenType::Dot) {
21819                self.skip(); // consume .
21820                stage_path.push('.');
21821                if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token()
21822                {
21823                    stage_path.push_str(&self.advance().text);
21824                }
21825            }
21826            // Parse path after stage: /path/to/file.csv
21827            if self.match_token(TokenType::Slash) {
21828                stage_path.push('/');
21829                while !self.is_at_end() {
21830                    if (self.check(TokenType::Var)
21831                        || self.check(TokenType::Identifier)
21832                        || self.check(TokenType::Number)
21833                        || self.check(TokenType::Dot)
21834                        || self.check(TokenType::Dash)
21835                        || self.check(TokenType::Star)
21836                        || self.check(TokenType::To)
21837                        || self.is_safe_keyword_as_identifier())
21838                        && !self.check_next(TokenType::Eq)
21839                    {
21840                        stage_path.push_str(&self.advance().text);
21841                    } else if self.match_token(TokenType::Slash) {
21842                        stage_path.push('/');
21843                    } else {
21844                        break;
21845                    }
21846                }
21847            }
21848            return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
21849        }
21850
21851        // String literal (file path or URL)
21852        if self.check(TokenType::String) {
21853            let token = self.advance();
21854            return Ok(Expression::Literal(Box::new(Literal::String(
21855                token.text.clone(),
21856            ))));
21857        }
21858
21859        // Backtick-quoted identifier (Databricks style: `s3://link`)
21860        if self.check(TokenType::QuotedIdentifier) {
21861            let token = self.advance();
21862            return Ok(Expression::Identifier(Identifier::quoted(
21863                token.text.clone(),
21864            )));
21865        }
21866
21867        // Identifier (could be a stage name without @)
21868        if self.check(TokenType::Var) || self.check_keyword() {
21869            let ident = self.advance().text.clone();
21870            return Ok(Expression::boxed_column(Column {
21871                name: Identifier::new(ident),
21872                table: None,
21873                join_mark: false,
21874                trailing_comments: Vec::new(),
21875                span: None,
21876                inferred_type: None,
21877            }));
21878        }
21879
21880        Err(self.parse_error("Expected file location"))
21881    }
21882
21883    /// Parse Snowflake stage reference as a string for PUT/GET/COPY statements
21884    /// Handles: @stage, @%table, @~, @db.schema.stage, @"quoted"."stage", @stage/path
21885    /// Returns a Literal::String containing the stage path
21886    fn parse_stage_reference_as_string(&mut self) -> Result<Expression> {
21887        // Stage reference starting with @ (tokenized as DAt)
21888        if self.check(TokenType::DAt) {
21889            self.skip(); // consume @
21890            let mut stage_path = String::from("@");
21891
21892            // Handle table stage prefix: @%table
21893            if self.check(TokenType::Percent) || self.check(TokenType::Mod) {
21894                stage_path.push('%');
21895                self.skip(); // consume %
21896            }
21897            // Handle user stage: @~
21898            else if self.check(TokenType::Tilde) {
21899                stage_path.push('~');
21900                self.skip(); // consume ~
21901                             // After @~, parse any path segments
21902                while self.check(TokenType::Slash) {
21903                    self.skip(); // consume /
21904                    stage_path.push('/');
21905                    if (self.check(TokenType::Var)
21906                        || self.check_keyword()
21907                        || self.is_identifier_token())
21908                        && !self.check_next(TokenType::Eq)
21909                    {
21910                        stage_path.push_str(&self.advance().text);
21911                    }
21912                }
21913                return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
21914            }
21915
21916            // Get stage name (could be quoted identifier)
21917            if self.check(TokenType::QuotedIdentifier) {
21918                // Preserve quoted identifier with quotes
21919                let text = &self.peek().text;
21920                stage_path.push('"');
21921                stage_path.push_str(text);
21922                stage_path.push('"');
21923                self.skip();
21924            } else if self.check(TokenType::Var)
21925                || self.check_keyword()
21926                || self.check(TokenType::Identifier)
21927            {
21928                stage_path.push_str(&self.advance().text);
21929            }
21930
21931            // Parse qualified name parts: .schema.stage (may include quoted identifiers)
21932            while self.check(TokenType::Dot) {
21933                self.skip(); // consume .
21934                stage_path.push('.');
21935                if self.check(TokenType::QuotedIdentifier) {
21936                    // Preserve quoted identifier with quotes
21937                    let text = &self.peek().text;
21938                    stage_path.push('"');
21939                    stage_path.push_str(text);
21940                    stage_path.push('"');
21941                    self.skip();
21942                } else if self.check(TokenType::Var)
21943                    || self.check_keyword()
21944                    || self.check(TokenType::Identifier)
21945                {
21946                    stage_path.push_str(&self.advance().text);
21947                }
21948            }
21949
21950            // Parse path segments: /path/to/file
21951            while self.check(TokenType::Slash) {
21952                self.skip(); // consume /
21953                stage_path.push('/');
21954                // Get path segment but don't consume if followed by = (that's a parameter)
21955                if (self.check(TokenType::Var)
21956                    || self.check_keyword()
21957                    || self.is_identifier_token())
21958                    && !self.check_next(TokenType::Eq)
21959                {
21960                    stage_path.push_str(&self.advance().text);
21961                }
21962            }
21963            return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
21964        }
21965
21966        // Stage reference tokenized as a Var starting with @ (e.g., @s1)
21967        if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
21968            let mut stage_path = self.advance().text.clone();
21969
21970            // Parse qualified name parts: .schema.stage (may include quoted identifiers)
21971            while self.check(TokenType::Dot) {
21972                self.skip(); // consume .
21973                stage_path.push('.');
21974                if self.check(TokenType::QuotedIdentifier) {
21975                    let text = &self.peek().text;
21976                    stage_path.push('"');
21977                    stage_path.push_str(text);
21978                    stage_path.push('"');
21979                    self.skip();
21980                } else if self.check(TokenType::Var)
21981                    || self.check_keyword()
21982                    || self.check(TokenType::Identifier)
21983                {
21984                    stage_path.push_str(&self.advance().text);
21985                }
21986            }
21987
21988            // Parse path segments: /path/to/file
21989            while self.check(TokenType::Slash) {
21990                self.skip(); // consume /
21991                stage_path.push('/');
21992                if (self.check(TokenType::Var)
21993                    || self.check_keyword()
21994                    || self.is_identifier_token())
21995                    && !self.check_next(TokenType::Eq)
21996                {
21997                    stage_path.push_str(&self.advance().text);
21998                }
21999            }
22000            return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
22001        }
22002
22003        Err(self.parse_error("Expected stage reference starting with @"))
22004    }
22005
22006    /// Parse PUT statement (Snowflake)
22007    /// PUT file://<path> @<stage> [AUTO_COMPRESS = TRUE|FALSE] ...
22008    fn parse_put(&mut self) -> Result<Expression> {
22009        self.expect(TokenType::Put)?;
22010
22011        // Parse source file path (usually file:///path/to/file)
22012        let (source, source_quoted) = if self.check(TokenType::String) {
22013            (self.advance().text.clone(), true)
22014        } else {
22015            // Handle file://path syntax (parsed as identifier + colon + etc.)
22016            // Stop when we see @ (start of stage reference)
22017            let mut source_parts = Vec::new();
22018            while !self.is_at_end() {
22019                // Stop if we see @ (DAt token or Var starting with @)
22020                if self.check(TokenType::DAt) {
22021                    break;
22022                }
22023                if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
22024                    break;
22025                }
22026                let token = self.advance();
22027                source_parts.push(token.text.clone());
22028            }
22029            (source_parts.join(""), false)
22030        };
22031
22032        // Parse target stage (@stage_name)
22033        let target = self.parse_stage_reference_as_string()?;
22034
22035        // Parse optional parameters
22036        // Note: Some parameter names like OVERWRITE are keywords, so we check for those explicitly
22037        // Preserve original casing for identity tests
22038        let mut params = Vec::new();
22039        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22040            let is_param_name = self.check(TokenType::Var)
22041                || self.check_keyword()
22042                || self.check(TokenType::Overwrite);
22043            if is_param_name {
22044                let name = self.advance().text.clone();
22045                let value = if self.match_token(TokenType::Eq) {
22046                    Some(self.parse_primary()?)
22047                } else {
22048                    None
22049                };
22050                params.push(CopyParameter {
22051                    name,
22052                    value,
22053                    values: Vec::new(),
22054                    eq: true,
22055                });
22056            } else {
22057                break;
22058            }
22059        }
22060
22061        Ok(Expression::Put(Box::new(PutStmt {
22062            source,
22063            source_quoted,
22064            target,
22065            params,
22066        })))
22067    }
22068
22069    /// Helper to join command tokens with smart spacing
22070    /// Preserves the structure of file paths, stage references, etc.
22071    fn join_command_tokens(&self, tokens: Vec<(String, TokenType)>) -> String {
22072        let mut result = String::new();
22073        let mut prev_token_type: Option<TokenType> = None;
22074        let mut prev_prev_token_type: Option<TokenType> = None;
22075
22076        for (i, (text, token_type)) in tokens.iter().enumerate() {
22077            let needs_space = if result.is_empty() {
22078                false
22079            } else {
22080                match (prev_token_type, *token_type) {
22081                    // No space after @ (stage references: @stage, @%, @~)
22082                    (Some(TokenType::DAt), _) => false,
22083                    // No space around dots (identifiers: a.b.c)
22084                    (Some(TokenType::Dot), _) => false,
22085                    (_, TokenType::Dot) => false,
22086                    // No space around parentheses
22087                    (Some(TokenType::LParen), _) => false,
22088                    (_, TokenType::LParen) => false,
22089                    (_, TokenType::RParen) => false,
22090                    // No space around square brackets (array access: arr[i])
22091                    (Some(TokenType::LBracket), _) => false,
22092                    (_, TokenType::LBracket) => false,
22093                    (_, TokenType::RBracket) => false,
22094                    // No space before ,
22095                    (_, TokenType::Comma) => false,
22096                    // No space around / (paths: @s1/test)
22097                    (Some(TokenType::Slash), _) => false,
22098                    (_, TokenType::Slash) => false,
22099                    // No space around : (file://path)
22100                    (Some(TokenType::Colon), _) => false,
22101                    (_, TokenType::Colon) => false,
22102                    // No space around % (table stage: @%table)
22103                    (Some(TokenType::Mod), _) => false,
22104                    (_, TokenType::Mod) => false,
22105                    (Some(TokenType::Percent), _) => false,
22106                    (_, TokenType::Percent) => false,
22107                    // Handle = contextually:
22108                    // - No space around = in simple KEY=VALUE patterns where value is terminal
22109                    //   (PARALLEL=1, ENABLED=TRUE, FILE_FORMAT='csv')
22110                    // - Keep space for expressions like SET x = x + 1
22111                    (Some(TokenType::Var), TokenType::Eq) => {
22112                        // If the var starts with @ (parameter like @id = 123), always use spaces
22113                        if i >= 1 && tokens[i - 1].0.starts_with('@') {
22114                            true
22115                        } else if i + 1 < tokens.len() {
22116                            // Check what follows: Var=Number where number is terminal (end or followed by Var)
22117                            let next_type = tokens[i + 1].1;
22118                            // Is the value terminal (end of tokens, or followed by another Var=... pattern)?
22119                            let is_terminal_value =
22120                                i + 2 >= tokens.len() || tokens[i + 2].1 == TokenType::Var;
22121                            match next_type {
22122                                // No space for terminal numbers/bools: PARALLEL=1, ENABLED=TRUE
22123                                // Return false (no space) when terminal
22124                                TokenType::Number | TokenType::True | TokenType::False => {
22125                                    !is_terminal_value
22126                                }
22127                                // No space for terminal strings: FILE_FORMAT='csv'
22128                                TokenType::String => !is_terminal_value,
22129                                // Always space if followed by Var (SET x = y ...)
22130                                _ => true,
22131                            }
22132                        } else {
22133                            true
22134                        }
22135                    }
22136                    // No space after = in terminal KEY=VALUE patterns
22137                    (Some(TokenType::Eq), TokenType::Number)
22138                    | (Some(TokenType::Eq), TokenType::True)
22139                    | (Some(TokenType::Eq), TokenType::False)
22140                    | (Some(TokenType::Eq), TokenType::String) => {
22141                        // Is this a terminal value (end or followed by another Var=...)?
22142                        let is_terminal =
22143                            i + 1 >= tokens.len() || tokens[i + 1].1 == TokenType::Var;
22144                        match prev_prev_token_type {
22145                            // No space (return false) when terminal, space otherwise
22146                            // But always space if the var before = was preceded by @ (parameter)
22147                            Some(TokenType::Var) => {
22148                                // Always space if the var before = starts with @ (parameter)
22149                                if i >= 2 && tokens[i - 2].0.starts_with('@') {
22150                                    true
22151                                } else {
22152                                    !is_terminal
22153                                }
22154                            }
22155                            _ => true, // Space for other cases
22156                        }
22157                    }
22158                    // Always space after = when followed by Var (SET x = y, could be expression)
22159                    (Some(TokenType::Eq), TokenType::Var) => true,
22160                    // No space around :: (cast)
22161                    (Some(TokenType::DColon), _) => false,
22162                    (_, TokenType::DColon) => false,
22163                    // Default: add space
22164                    _ => true,
22165                }
22166            };
22167
22168            if needs_space {
22169                result.push(' ');
22170            }
22171            result.push_str(text);
22172            prev_prev_token_type = prev_token_type;
22173            prev_token_type = Some(*token_type);
22174        }
22175        result
22176    }
22177
22178    /// Join Teradata table option tokens with Teradata-specific spacing
22179    /// - No spaces around '='
22180    /// - No spaces around dots or parentheses
22181    /// - Space-separated words otherwise
22182    fn join_teradata_option_tokens(&self, tokens: Vec<(String, TokenType)>) -> String {
22183        let mut result = String::new();
22184        let mut prev_token_type: Option<TokenType> = None;
22185
22186        for (text, token_type) in tokens {
22187            let needs_space = if result.is_empty() {
22188                false
22189            } else {
22190                match (prev_token_type, token_type) {
22191                    (Some(TokenType::Dot), _) => false,
22192                    (_, TokenType::Dot) => false,
22193                    (Some(TokenType::LParen), _) => false,
22194                    (_, TokenType::LParen) => false,
22195                    (_, TokenType::RParen) => false,
22196                    (_, TokenType::Comma) => false,
22197                    (Some(TokenType::Eq), _) => false,
22198                    (_, TokenType::Eq) => false,
22199                    _ => true,
22200                }
22201            };
22202
22203            if needs_space {
22204                result.push(' ');
22205            }
22206            result.push_str(&text);
22207            prev_token_type = Some(token_type);
22208        }
22209
22210        result
22211    }
22212
22213    /// Parse RM or REMOVE command (Snowflake)
22214    /// RM @stage_name / REMOVE @stage_name
22215    fn parse_rm_command(&mut self) -> Result<Expression> {
22216        let command_token = self.advance(); // RM or REMOVE
22217        let command_name = command_token.text.to_ascii_uppercase();
22218
22219        // Collect remaining tokens with their types
22220        let mut tokens = vec![(command_name, command_token.token_type)];
22221        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22222            let token = self.advance();
22223            tokens.push((token.text.clone(), token.token_type));
22224        }
22225
22226        Ok(Expression::Command(Box::new(Command {
22227            this: self.join_command_tokens(tokens),
22228        })))
22229    }
22230
22231    /// Parse GET command (Snowflake)
22232    /// GET @stage_name 'file:///path'
22233    fn parse_get_command(&mut self) -> Result<Expression> {
22234        let get_token = self.advance(); // consume GET (it's already matched)
22235
22236        // Collect remaining tokens with their types, preserving quotes
22237        let mut tokens = vec![("GET".to_string(), get_token.token_type)];
22238        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22239            let token = self.advance();
22240            // Re-add quotes around string and quoted identifier tokens
22241            let text = match token.token_type {
22242                TokenType::String => format!("'{}'", token.text),
22243                TokenType::QuotedIdentifier => format!("\"{}\"", token.text),
22244                _ => token.text.clone(),
22245            };
22246            tokens.push((text, token.token_type));
22247        }
22248
22249        Ok(Expression::Command(Box::new(Command {
22250            this: self.join_command_tokens(tokens),
22251        })))
22252    }
22253
22254    /// Parse CALL statement (stored procedure call)
22255    /// CALL procedure_name(args, ...)
22256    fn parse_call(&mut self) -> Result<Expression> {
22257        let call_token = self.advance(); // consume CALL
22258
22259        // Collect remaining tokens with their types
22260        let mut tokens = vec![("CALL".to_string(), call_token.token_type)];
22261        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22262            let token = self.advance();
22263            tokens.push((token.text.clone(), token.token_type));
22264        }
22265
22266        Ok(Expression::Command(Box::new(Command {
22267            this: self.join_command_tokens(tokens),
22268        })))
22269    }
22270
22271    /// Parse KILL statement (MySQL/MariaDB)
22272    /// KILL [CONNECTION | QUERY] <id>
22273    fn parse_kill(&mut self) -> Result<Expression> {
22274        self.expect(TokenType::Kill)?;
22275
22276        // Check for optional kind: CONNECTION or QUERY
22277        let kind = if self.match_identifier("CONNECTION") {
22278            Some("CONNECTION".to_string())
22279        } else if self.match_identifier("QUERY") {
22280            Some("QUERY".to_string())
22281        } else {
22282            None
22283        };
22284
22285        // Parse the target (process ID - usually a number or string)
22286        let this = self.parse_primary()?;
22287
22288        Ok(Expression::Kill(Box::new(Kill { this, kind })))
22289    }
22290
22291    /// Parse EXEC/EXECUTE statement (TSQL stored procedure call)
22292    /// EXEC [schema.]procedure_name [@param=value, ...]
22293    fn parse_execute(&mut self) -> Result<Expression> {
22294        self.expect(TokenType::Execute)?;
22295
22296        // Dynamic SQL: EXEC(@sql) or EXEC (@sql)
22297        let this = if self.check(TokenType::LParen) {
22298            self.skip(); // consume (
22299            let expr = self
22300                .parse_disjunction()?
22301                .unwrap_or(Expression::Null(crate::expressions::Null));
22302            self.expect(TokenType::RParen)?;
22303            Expression::Paren(Box::new(crate::expressions::Paren {
22304                this: expr,
22305                trailing_comments: Vec::new(),
22306            }))
22307        } else {
22308            // Parse procedure name (can be qualified: schema.proc_name)
22309            let proc_name = self.parse_table_ref()?;
22310            Expression::Table(Box::new(proc_name))
22311        };
22312
22313        // Parse optional parameters: @param=value [OUTPUT], ...
22314        let mut parameters = Vec::new();
22315
22316        // Check if there are parameters (starts with @ or identifier)
22317        while self.check(TokenType::Var) || self.check(TokenType::Parameter) {
22318            // Get the parameter name (starts with @)
22319            let token = self.advance();
22320            let param_name = if token.text.starts_with('@') {
22321                token.text.clone()
22322            } else {
22323                format!("@{}", token.text)
22324            };
22325
22326            // Check for = (named parameter) or positional parameter
22327            if self.match_token(TokenType::Eq) {
22328                // Named parameter: @param = value
22329                let value = self.parse_primary()?;
22330                let output = self.match_token(TokenType::Output);
22331                parameters.push(ExecuteParameter {
22332                    name: param_name,
22333                    value,
22334                    positional: false,
22335                    output,
22336                });
22337            } else {
22338                // Positional parameter: @var (no = sign)
22339                let output = self.match_token(TokenType::Output);
22340                parameters.push(ExecuteParameter {
22341                    name: param_name.clone(),
22342                    value: Expression::boxed_column(Column {
22343                        name: Identifier::new(&param_name),
22344                        table: None,
22345                        join_mark: false,
22346                        trailing_comments: Vec::new(),
22347                        span: None,
22348                        inferred_type: None,
22349                    }),
22350                    positional: true,
22351                    output,
22352                });
22353            }
22354
22355            // Check for comma to continue
22356            if !self.match_token(TokenType::Comma) {
22357                break;
22358            }
22359        }
22360
22361        // TSQL: WITH RESULT SETS ((...), ...) or WITH RECOMPILE etc.
22362        let suffix = if self.check(TokenType::With) {
22363            let start = self.current;
22364            // Collect remaining tokens until semicolon or end
22365            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22366                self.skip();
22367            }
22368            Some(self.tokens_to_sql(start, self.current))
22369        } else {
22370            None
22371        };
22372
22373        Ok(Expression::Execute(Box::new(ExecuteStatement {
22374            this,
22375            parameters,
22376            suffix,
22377        })))
22378    }
22379
22380    /// Parse GRANT statement
22381    /// GRANT <privileges> ON [<kind>] <object> TO <principals> [WITH GRANT OPTION]
22382    fn parse_grant(&mut self) -> Result<Expression> {
22383        self.expect(TokenType::Grant)?;
22384
22385        // ClickHouse: GRANT can grant roles (no ON clause), grant privileges (has ON clause),
22386        // or use complex syntax. If we see TO before ON, treat as command.
22387        // Also: multi-privilege grants (multiple ON), wildcard grants (test*.*),
22388        // WITH REPLACE OPTION all parse as commands.
22389        if matches!(
22390            self.config.dialect,
22391            Some(crate::dialects::DialectType::ClickHouse)
22392        ) {
22393            // Save position after GRANT keyword
22394            let saved_pos = self.current;
22395            // Scan ahead to check grant structure
22396            let mut depth = 0i32;
22397            let mut on_count = 0;
22398            let mut found_to = false;
22399            let mut has_star_in_name = false;
22400            let mut has_replace_option = false;
22401            let mut i = self.current;
22402            while i < self.tokens.len() && self.tokens[i].token_type != TokenType::Semicolon {
22403                match self.tokens[i].token_type {
22404                    TokenType::LParen => depth += 1,
22405                    TokenType::RParen => depth -= 1,
22406                    TokenType::On if depth == 0 => on_count += 1,
22407                    TokenType::To if depth == 0 => {
22408                        found_to = true;
22409                    }
22410                    TokenType::Star if depth == 0 && on_count > 0 && !found_to => {
22411                        // Check if star is part of a wildcard name (e.g., test*.*)
22412                        if i > 0
22413                            && self.tokens[i - 1].token_type != TokenType::Dot
22414                            && self.tokens[i - 1].token_type != TokenType::On
22415                        {
22416                            has_star_in_name = true;
22417                        }
22418                    }
22419                    TokenType::Replace if depth == 0 && found_to => {
22420                        has_replace_option = true;
22421                    }
22422                    _ => {}
22423                }
22424                i += 1;
22425            }
22426            if (found_to && on_count == 0) || on_count > 1 || has_star_in_name || has_replace_option
22427            {
22428                // Role grant, multi-privilege grant, wildcard grant, or REPLACE OPTION — parse as command
22429                self.current = saved_pos;
22430                return self
22431                    .parse_command()?
22432                    .ok_or_else(|| self.parse_error("Failed to parse GRANT statement"));
22433            }
22434            self.current = saved_pos;
22435        }
22436
22437        // Parse privileges (e.g., SELECT, INSERT, UPDATE)
22438        let privileges = self.parse_privileges()?;
22439
22440        // Expect ON
22441        self.expect(TokenType::On)?;
22442
22443        // Parse optional kind (TABLE, SCHEMA, FUNCTION, etc.)
22444        let kind = self.parse_object_kind()?;
22445
22446        // Parse securable (the object) - may be dot-separated qualified name
22447        let securable = self.parse_securable_name()?;
22448
22449        // Parse optional function parameter types: func(type1, type2, ...)
22450        let function_params = if self.check(TokenType::LParen) {
22451            self.parse_function_param_types()?
22452        } else {
22453            Vec::new()
22454        };
22455
22456        // Expect TO
22457        self.expect(TokenType::To)?;
22458
22459        // Parse principals
22460        let principals = self.parse_principals()?;
22461
22462        // Check for WITH GRANT OPTION
22463        let grant_option = self.match_token(TokenType::With)
22464            && self.check(TokenType::Grant)
22465            && {
22466                self.skip();
22467                self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OPTION")
22468            }
22469            && {
22470                self.skip();
22471                true
22472            };
22473
22474        // Check for TSQL AS principal clause
22475        let as_principal = if self.match_token(TokenType::As) {
22476            let name = self.expect_identifier_or_keyword()?;
22477            Some(Identifier::new(name))
22478        } else {
22479            None
22480        };
22481
22482        Ok(Expression::Grant(Box::new(Grant {
22483            privileges,
22484            kind,
22485            securable,
22486            function_params,
22487            principals,
22488            grant_option,
22489            as_principal,
22490        })))
22491    }
22492
22493    /// Parse REVOKE statement
22494    /// REVOKE [GRANT OPTION FOR] <privileges> ON [<kind>] <object> FROM <principals> [CASCADE]
22495    fn parse_revoke(&mut self) -> Result<Expression> {
22496        self.expect(TokenType::Revoke)?;
22497
22498        // ClickHouse: REVOKE role FROM user (no ON clause), multi-privilege, or wildcard — parse as command
22499        if matches!(
22500            self.config.dialect,
22501            Some(crate::dialects::DialectType::ClickHouse)
22502        ) {
22503            let saved_pos = self.current;
22504            let mut depth = 0i32;
22505            let mut on_count = 0;
22506            let mut found_from = false;
22507            let mut has_star_in_name = false;
22508            let mut i = self.current;
22509            while i < self.tokens.len() && self.tokens[i].token_type != TokenType::Semicolon {
22510                match self.tokens[i].token_type {
22511                    TokenType::LParen => depth += 1,
22512                    TokenType::RParen => depth -= 1,
22513                    TokenType::On if depth == 0 => on_count += 1,
22514                    TokenType::From if depth == 0 => {
22515                        found_from = true;
22516                    }
22517                    TokenType::Star if depth == 0 && on_count > 0 && !found_from => {
22518                        if i > 0
22519                            && self.tokens[i - 1].token_type != TokenType::Dot
22520                            && self.tokens[i - 1].token_type != TokenType::On
22521                        {
22522                            has_star_in_name = true;
22523                        }
22524                    }
22525                    _ => {}
22526                }
22527                i += 1;
22528            }
22529            if (found_from && on_count == 0) || on_count > 1 || has_star_in_name {
22530                self.current = saved_pos;
22531                return self
22532                    .parse_command()?
22533                    .ok_or_else(|| self.parse_error("Failed to parse REVOKE statement"));
22534            }
22535            self.current = saved_pos;
22536        }
22537
22538        // Check for GRANT OPTION FOR
22539        let grant_option = if self.check(TokenType::Grant) {
22540            self.skip();
22541            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OPTION") {
22542                self.skip();
22543                self.expect(TokenType::For)?;
22544                true
22545            } else {
22546                return Err(self.parse_error("Expected OPTION after GRANT in REVOKE"));
22547            }
22548        } else {
22549            false
22550        };
22551
22552        // Parse privileges
22553        let privileges = self.parse_privileges()?;
22554
22555        // Expect ON
22556        self.expect(TokenType::On)?;
22557
22558        // Parse optional kind
22559        let kind = self.parse_object_kind()?;
22560
22561        // Parse securable - may be dot-separated qualified name
22562        let securable = self.parse_securable_name()?;
22563
22564        // Parse optional function parameter types: func(type1, type2, ...)
22565        let function_params = if self.check(TokenType::LParen) {
22566            self.parse_function_param_types()?
22567        } else {
22568            Vec::new()
22569        };
22570
22571        // Expect FROM
22572        self.expect(TokenType::From)?;
22573
22574        // Parse principals
22575        let principals = self.parse_principals()?;
22576
22577        // Check for CASCADE or RESTRICT
22578        let cascade = self.match_token(TokenType::Cascade);
22579        let restrict = if !cascade {
22580            self.match_token(TokenType::Restrict)
22581        } else {
22582            false
22583        };
22584
22585        Ok(Expression::Revoke(Box::new(Revoke {
22586            privileges,
22587            kind,
22588            securable,
22589            function_params,
22590            principals,
22591            grant_option,
22592            cascade,
22593            restrict,
22594        })))
22595    }
22596
22597    /// Parse privilege list for GRANT/REVOKE
22598    /// Handles multi-word privileges like "ALL PRIVILEGES" and column-level privileges like "SELECT(col1, col2)"
22599    fn parse_privileges(&mut self) -> Result<Vec<Privilege>> {
22600        let mut privileges = Vec::new();
22601        loop {
22602            let mut priv_parts = Vec::new();
22603            // Collect privilege words until we hit ON, comma, LParen, or similar terminator
22604            while !self.is_at_end() {
22605                if self.check(TokenType::On)
22606                    || self.check(TokenType::Comma)
22607                    || self.check(TokenType::LParen)
22608                {
22609                    break;
22610                }
22611                if self.is_identifier_or_keyword_token() {
22612                    priv_parts.push(self.advance().text.to_ascii_uppercase());
22613                } else {
22614                    break;
22615                }
22616            }
22617            if priv_parts.is_empty() {
22618                break;
22619            }
22620            let priv_name = priv_parts.join(" ");
22621
22622            // Check for column list in parentheses: SELECT(col1, col2)
22623            let columns = if self.match_token(TokenType::LParen) {
22624                let mut cols = Vec::new();
22625                loop {
22626                    // Parse column name (identifier)
22627                    if self.is_identifier_or_keyword_token() {
22628                        cols.push(self.advance().text.to_string());
22629                    } else if self.check(TokenType::RParen) {
22630                        break;
22631                    } else {
22632                        break;
22633                    }
22634                    if !self.match_token(TokenType::Comma) {
22635                        break;
22636                    }
22637                }
22638                self.expect(TokenType::RParen)?;
22639                cols
22640            } else {
22641                Vec::new()
22642            };
22643
22644            privileges.push(Privilege {
22645                name: priv_name,
22646                columns,
22647            });
22648            if !self.match_token(TokenType::Comma) {
22649                break;
22650            }
22651        }
22652        Ok(privileges)
22653    }
22654
22655    /// Parse object kind (TABLE, SCHEMA, FUNCTION, PROCEDURE, SEQUENCE, etc.)
22656    fn parse_object_kind(&mut self) -> Result<Option<String>> {
22657        if self.check(TokenType::Table) {
22658            self.skip();
22659            Ok(Some("TABLE".to_string()))
22660        } else if self.check(TokenType::Schema) {
22661            self.skip();
22662            Ok(Some("SCHEMA".to_string()))
22663        } else if self.check(TokenType::Database) {
22664            self.skip();
22665            Ok(Some("DATABASE".to_string()))
22666        } else if self.check(TokenType::Function) {
22667            self.skip();
22668            Ok(Some("FUNCTION".to_string()))
22669        } else if self.check(TokenType::View) {
22670            self.skip();
22671            Ok(Some("VIEW".to_string()))
22672        } else if self.check(TokenType::Procedure) {
22673            self.skip();
22674            Ok(Some("PROCEDURE".to_string()))
22675        } else if self.check(TokenType::Sequence) {
22676            self.skip();
22677            Ok(Some("SEQUENCE".to_string()))
22678        } else if self.check(TokenType::Warehouse) {
22679            self.skip();
22680            Ok(Some("WAREHOUSE".to_string()))
22681        } else if self.check_identifier("STAGE")
22682            || self.check_identifier("INTEGRATION")
22683            || self.check_identifier("TASK")
22684            || self.check_identifier("STREAM")
22685            || self.check_identifier("PIPE")
22686            || self.check_identifier("TAG")
22687            || self.check_identifier("SHARE")
22688        {
22689            let kind = self.advance().text.to_ascii_uppercase();
22690            Ok(Some(kind))
22691        } else if self.check_identifier("FILE")
22692            && self.current + 1 < self.tokens.len()
22693            && self.tokens[self.current + 1]
22694                .text
22695                .eq_ignore_ascii_case("FORMAT")
22696        {
22697            self.skip(); // consume FILE
22698            self.skip(); // consume FORMAT
22699            Ok(Some("FILE FORMAT".to_string()))
22700        } else if self.check_identifier("NETWORK")
22701            && self.current + 1 < self.tokens.len()
22702            && self.tokens[self.current + 1]
22703                .text
22704                .eq_ignore_ascii_case("POLICY")
22705        {
22706            self.skip(); // consume NETWORK
22707            self.skip(); // consume POLICY
22708            Ok(Some("NETWORK POLICY".to_string()))
22709        } else {
22710            Ok(None)
22711        }
22712    }
22713
22714    /// Parse principal list for GRANT/REVOKE
22715    fn parse_principals(&mut self) -> Result<Vec<GrantPrincipal>> {
22716        let mut principals = Vec::new();
22717        loop {
22718            // Check for ROLE keyword (TokenType::Var with text "ROLE")
22719            let is_role =
22720                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ROLE") {
22721                    self.skip();
22722                    true
22723                } else {
22724                    false
22725                };
22726            // Check for GROUP keyword (Redshift) - TokenType::Group
22727            let is_group = if !is_role && self.check(TokenType::Group) {
22728                self.skip();
22729                true
22730            } else {
22731                false
22732            };
22733            // Check for SHARE keyword (Snowflake)
22734            let is_share = if !is_role && !is_group && self.check_identifier("SHARE") {
22735                self.skip();
22736                true
22737            } else {
22738                false
22739            };
22740            // Parse principal name (with quoted flag preserved for backtick-quoted identifiers)
22741            let name = self.expect_identifier_or_keyword_with_quoted()?;
22742            principals.push(GrantPrincipal {
22743                name,
22744                is_role,
22745                is_group,
22746                is_share,
22747            });
22748            if !self.match_token(TokenType::Comma) {
22749                break;
22750            }
22751        }
22752        Ok(principals)
22753    }
22754
22755    /// Parse a securable name (potentially dot-separated qualified name)
22756    /// e.g., "mydb.myschema.ADD5" -> Identifier("mydb.myschema.ADD5")
22757    fn parse_securable_name(&mut self) -> Result<Identifier> {
22758        // Accept * as a name part (e.g., GRANT ON *.* or GRANT ON db.*)
22759        let first = if self.match_token(TokenType::Star) {
22760            "*".to_string()
22761        } else {
22762            self.expect_identifier_or_keyword()?
22763        };
22764        let mut parts = vec![first];
22765
22766        while self.match_token(TokenType::Dot) {
22767            let next = if self.match_token(TokenType::Star) {
22768                "*".to_string()
22769            } else {
22770                self.expect_identifier_or_keyword()?
22771            };
22772            parts.push(next);
22773        }
22774
22775        Ok(Identifier::new(parts.join(".")))
22776    }
22777
22778    /// Parse function parameter types for GRANT/REVOKE ON FUNCTION
22779    /// e.g., "(number, varchar)" -> vec!["number", "varchar"]
22780    fn parse_function_param_types(&mut self) -> Result<Vec<String>> {
22781        self.expect(TokenType::LParen)?;
22782
22783        let mut params = Vec::new();
22784        if !self.check(TokenType::RParen) {
22785            loop {
22786                // Parse parameter type - can be a keyword (INT, VARCHAR) or identifier
22787                let param_type = self.expect_identifier_or_keyword()?;
22788                params.push(param_type);
22789                if !self.match_token(TokenType::Comma) {
22790                    break;
22791                }
22792            }
22793        }
22794
22795        self.expect(TokenType::RParen)?;
22796        Ok(params)
22797    }
22798
22799    /// Parse COMMENT ON statement
22800    fn parse_comment(&mut self) -> Result<Expression> {
22801        self.expect(TokenType::Comment)?;
22802
22803        // Check for IF EXISTS
22804        let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
22805
22806        // Expect ON
22807        self.expect(TokenType::On)?;
22808
22809        // Check for MATERIALIZED (can be TokenType::Materialized or TokenType::Var)
22810        let materialized = if self.match_token(TokenType::Materialized) {
22811            true
22812        } else if self.check(TokenType::Var)
22813            && self.peek().text.eq_ignore_ascii_case("MATERIALIZED")
22814        {
22815            self.skip();
22816            true
22817        } else {
22818            false
22819        };
22820
22821        // Parse the object kind (COLUMN, TABLE, DATABASE, PROCEDURE, etc.)
22822        let kind = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
22823
22824        // Parse the object name (can be qualified like schema.table.column)
22825        // For PROCEDURE/FUNCTION, we need to handle the parameter list like my_proc(integer, integer)
22826        let this = if kind == "PROCEDURE" || kind == "FUNCTION" {
22827            // Parse name possibly with parameter types, preserving original case
22828            let name_token = self.advance();
22829            let mut name_str = name_token.text.clone();
22830
22831            // Parse additional qualified parts
22832            while self.match_token(TokenType::Dot) {
22833                let next = self.advance();
22834                name_str.push('.');
22835                name_str.push_str(&next.text);
22836            }
22837
22838            // Check for parameter types in parentheses
22839            if self.match_token(TokenType::LParen) {
22840                name_str.push('(');
22841                let mut first = true;
22842                while !self.check(TokenType::RParen) && !self.is_at_end() {
22843                    if !first {
22844                        name_str.push_str(", ");
22845                    }
22846                    first = false;
22847                    let param_token = self.advance();
22848                    name_str.push_str(&param_token.text);
22849                    self.match_token(TokenType::Comma);
22850                }
22851                self.expect(TokenType::RParen)?;
22852                name_str.push(')');
22853            }
22854
22855            Expression::Identifier(Identifier::new(name_str))
22856        } else {
22857            self.parse_qualified_name()?
22858        };
22859
22860        // Expect IS
22861        if self.check(TokenType::Is) {
22862            self.skip();
22863        } else {
22864            return Err(self.parse_error("Expected IS in COMMENT ON statement"));
22865        }
22866
22867        // Parse the comment expression (usually a string literal)
22868        let expression = self.parse_primary()?;
22869
22870        Ok(Expression::Comment(Box::new(Comment {
22871            this,
22872            kind,
22873            expression,
22874            exists,
22875            materialized,
22876        })))
22877    }
22878
22879    /// Parse SET statement
22880    fn parse_set(&mut self) -> Result<Expression> {
22881        self.expect(TokenType::Set)?;
22882
22883        let mut items = Vec::new();
22884
22885        // ClickHouse: SET DEFAULT ROLE ... TO user - parse as command
22886        if matches!(
22887            self.config.dialect,
22888            Some(crate::dialects::DialectType::ClickHouse)
22889        ) && self.check(TokenType::Default)
22890        {
22891            let mut parts = vec!["SET".to_string()];
22892            while !self.is_at_end() && self.peek().token_type != TokenType::Semicolon {
22893                parts.push(self.advance().text.clone());
22894            }
22895            return Ok(Expression::Command(Box::new(crate::expressions::Command {
22896                this: parts.join(" "),
22897            })));
22898        }
22899
22900        // Teradata: SET QUERY_BAND = ... [UPDATE] [FOR scope]
22901        if matches!(
22902            self.config.dialect,
22903            Some(crate::dialects::DialectType::Teradata)
22904        ) && self.match_identifier("QUERY_BAND")
22905        {
22906            return self.parse_query_band();
22907        }
22908
22909        // Handle MySQL SET CHARACTER SET / SET NAMES
22910        if self.match_identifier("CHARACTER") {
22911            // SET CHARACTER SET <charset> | SET CHARACTER SET DEFAULT
22912            self.expect(TokenType::Set)?;
22913            let value = if self.match_token(TokenType::Default) {
22914                Expression::Identifier(Identifier::new("DEFAULT".to_string()))
22915            } else {
22916                self.parse_primary()?
22917            };
22918            items.push(SetItem {
22919                name: Expression::Identifier(Identifier::new("CHARACTER SET".to_string())),
22920                value,
22921                kind: None,
22922                no_equals: false,
22923            });
22924            return Ok(Expression::SetStatement(Box::new(SetStatement { items })));
22925        }
22926
22927        if self.match_identifier("NAMES") {
22928            // SET NAMES <charset> [COLLATE <collation>] | SET NAMES DEFAULT
22929            let value = if self.match_token(TokenType::Default) {
22930                Expression::Identifier(Identifier::new("DEFAULT".to_string()))
22931            } else {
22932                self.parse_primary()?
22933            };
22934            // Check for optional COLLATE clause
22935            let collation = if self.match_identifier("COLLATE") {
22936                Some(self.parse_primary()?)
22937            } else {
22938                None
22939            };
22940            items.push(SetItem {
22941                name: Expression::Identifier(Identifier::new("NAMES".to_string())),
22942                value,
22943                kind: None,
22944                no_equals: false,
22945            });
22946            if let Some(coll) = collation {
22947                items.push(SetItem {
22948                    name: Expression::Identifier(Identifier::new("COLLATE".to_string())),
22949                    value: coll,
22950                    kind: None,
22951                    no_equals: false,
22952                });
22953            }
22954            return Ok(Expression::SetStatement(Box::new(SetStatement { items })));
22955        }
22956
22957        // Track whether SET VAR/VARIABLE was used (only first item gets the VARIABLE kind)
22958        let mut set_is_variable = if self.check(TokenType::Var) {
22959            let text = self.peek().text.to_uppercase();
22960            if text == "VARIABLE" || text == "VAR" {
22961                // Look ahead: VAR/VARIABLE should be followed by another name, not by = or TO
22962                if let Some(next) = self.tokens.get(self.current + 1) {
22963                    if next.token_type != TokenType::Eq
22964                        && next.token_type != TokenType::To
22965                        && next.token_type != TokenType::ColonEq
22966                    {
22967                        self.skip(); // consume VAR/VARIABLE
22968                        true
22969                    } else {
22970                        false
22971                    }
22972                } else {
22973                    false
22974                }
22975            } else {
22976                false
22977            }
22978        } else {
22979            false
22980        };
22981
22982        loop {
22983            // Check for GLOBAL, LOCAL, SESSION, PERSIST, PERSIST_ONLY modifiers
22984            // LOCAL is a token type, others are identifiers
22985            let kind = if self.match_identifier("GLOBAL") {
22986                Some("GLOBAL".to_string())
22987            } else if self.match_token(TokenType::Local) {
22988                Some("LOCAL".to_string())
22989            } else if self.match_identifier("SESSION") {
22990                Some("SESSION".to_string())
22991            } else if self.match_identifier("PERSIST") {
22992                Some("PERSIST".to_string())
22993            } else if self.match_identifier("PERSIST_ONLY") {
22994                Some("PERSIST_ONLY".to_string())
22995            } else if set_is_variable {
22996                set_is_variable = false; // Only first item gets VARIABLE kind
22997                Some("VARIABLE".to_string())
22998            } else {
22999                None
23000            };
23001
23002            // Check for SET [GLOBAL|SESSION] TRANSACTION (MySQL)
23003            if self.match_token(TokenType::Transaction) {
23004                // Parse transaction characteristics (ISOLATION LEVEL, READ ONLY, READ WRITE)
23005                let mut characteristics = Vec::new();
23006                loop {
23007                    let mut char_tokens = Vec::new();
23008                    // Parse ISOLATION LEVEL ... or READ ONLY/WRITE
23009                    // Must handle keywords like ONLY, REPEATABLE, SERIALIZABLE, etc.
23010                    while !self.is_at_end()
23011                        && !self.check(TokenType::Comma)
23012                        && !self.check(TokenType::Semicolon)
23013                    {
23014                        // Allow identifiers and common transaction-related keywords
23015                        if self.is_identifier_token()
23016                            || self.is_safe_keyword_as_identifier()
23017                            || self.check(TokenType::Only)
23018                            || self.check(TokenType::Repeatable)
23019                        {
23020                            char_tokens.push(self.advance().text);
23021                        } else {
23022                            break;
23023                        }
23024                    }
23025                    if !char_tokens.is_empty() {
23026                        characteristics.push(char_tokens.join(" "));
23027                    }
23028                    if !self.match_token(TokenType::Comma) {
23029                        break;
23030                    }
23031                }
23032
23033                let name = Expression::Identifier(Identifier::new("TRANSACTION".to_string()));
23034                let value = if characteristics.is_empty() {
23035                    Expression::Identifier(Identifier::new("".to_string()))
23036                } else {
23037                    Expression::Identifier(Identifier::new(characteristics.join(", ")))
23038                };
23039
23040                items.push(SetItem {
23041                    name,
23042                    value,
23043                    kind,
23044                    no_equals: false,
23045                });
23046                break;
23047            }
23048
23049            // Parse variable name - use a simple approach to avoid expression parsing issues
23050            // Variable names can be dotted identifiers or keywords used as names
23051            let name = {
23052                if self.check(TokenType::AtAt) {
23053                    // @@SCOPE.variable or @@variable syntax (MySQL system variables)
23054                    self.skip(); // consume @@
23055                    let mut name_str = "@@".to_string();
23056                    let first = self.advance().text.clone();
23057                    name_str.push_str(&first);
23058                    // Handle @@scope.variable (e.g., @@GLOBAL.max_connections)
23059                    while self.match_token(TokenType::Dot) {
23060                        let next = self.advance().text.clone();
23061                        name_str.push('.');
23062                        name_str.push_str(&next);
23063                    }
23064                    Expression::Identifier(Identifier::new(name_str))
23065                } else if self.check(TokenType::DAt) {
23066                    // @variable syntax (MySQL user variables)
23067                    self.skip(); // consume @
23068                    let mut name_str = "@".to_string();
23069                    let first = self.advance().text.clone();
23070                    name_str.push_str(&first);
23071                    Expression::Identifier(Identifier::new(name_str))
23072                } else if self.check(TokenType::LParen) {
23073                    // Tuple of variable names: SET VARIABLE (v1, v2) = (SELECT ...)
23074                    self.skip(); // consume (
23075                    let mut vars = Vec::new();
23076                    loop {
23077                        let var_name = self.advance().text.clone();
23078                        vars.push(Expression::Column(Box::new(Column {
23079                            name: Identifier::new(var_name),
23080                            table: None,
23081                            join_mark: false,
23082                            trailing_comments: Vec::new(),
23083                            span: None,
23084                            inferred_type: None,
23085                        })));
23086                        if !self.match_token(TokenType::Comma) {
23087                            break;
23088                        }
23089                    }
23090                    self.expect(TokenType::RParen)?;
23091                    Expression::Tuple(Box::new(crate::expressions::Tuple { expressions: vars }))
23092                } else {
23093                    let first = self.advance().text.clone();
23094                    let mut name_str = first;
23095                    // Handle dotted identifiers (e.g., schema.variable)
23096                    while self.match_token(TokenType::Dot) {
23097                        let next = self.advance().text.clone();
23098                        name_str.push('.');
23099                        name_str.push_str(&next);
23100                    }
23101                    // Handle Hive-style colon-separated names (e.g., hiveconf:some_var)
23102                    // But not := which is assignment
23103                    while self.check(TokenType::Colon) && !self.check_next(TokenType::Eq) {
23104                        self.skip(); // consume :
23105                        let next = self.advance().text.clone();
23106                        name_str.push(':');
23107                        name_str.push_str(&next);
23108                    }
23109                    Expression::Identifier(Identifier::new(name_str))
23110                }
23111            };
23112
23113            // Expect = or := or TO
23114            if self.match_token(TokenType::Eq) || self.match_token(TokenType::ColonEq) {
23115                // ok - standard assignment
23116            } else if self.match_token(TokenType::To) {
23117                // PostgreSQL uses SET var TO value
23118            } else if self.is_at_end()
23119                || self.check(TokenType::Semicolon)
23120                || self.check(TokenType::Comma)
23121            {
23122                // SET x ON/OFF without = (TSQL: SET XACT_ABORT ON)
23123                // The ON/OFF was already parsed as part of the name expression
23124                // Handle as a name-only set (value is empty)
23125                items.push(SetItem {
23126                    name,
23127                    value: Expression::Identifier(Identifier::new("".to_string())),
23128                    kind,
23129                    no_equals: false,
23130                });
23131                if !self.match_token(TokenType::Comma) {
23132                    break;
23133                }
23134                continue;
23135            } else {
23136                // Check if the next token looks like a value (ON/OFF without =)
23137                // TSQL: SET XACT_ABORT ON, SET NOCOUNT ON
23138                if self.check(TokenType::On) || self.check_keyword_text("OFF") {
23139                    let val = self.advance().text;
23140                    // Include ON/OFF in the name so generator doesn't add "="
23141                    let name_with_val = match &name {
23142                        Expression::Column(col) => format!("{} {}", col.name.name, val),
23143                        Expression::Identifier(id) => format!("{} {}", id.name, val),
23144                        _ => val.clone(),
23145                    };
23146                    items.push(SetItem {
23147                        name: Expression::Identifier(Identifier::new(name_with_val)),
23148                        value: Expression::Identifier(Identifier::new("".to_string())),
23149                        kind,
23150                        no_equals: false,
23151                    });
23152                    if !self.match_token(TokenType::Comma) {
23153                        break;
23154                    }
23155                    continue;
23156                }
23157                // TSQL/Generic: SET key value (without = or TO)
23158                // Parse the next token as the value
23159                if !self.is_at_end() && !self.check(TokenType::Semicolon) {
23160                    let value = self.parse_expression()?;
23161                    items.push(SetItem {
23162                        name,
23163                        value,
23164                        kind,
23165                        no_equals: true,
23166                    });
23167                    if !self.match_token(TokenType::Comma) {
23168                        break;
23169                    }
23170                    continue;
23171                }
23172                return Err(self.parse_error("Expected '=' or 'TO' in SET statement"));
23173            }
23174
23175            // Parse value - handle ON/OFF keywords as identifiers (MySQL: SET autocommit = ON)
23176            let value = if self.check(TokenType::On) || self.check_keyword_text("OFF") {
23177                Expression::Identifier(Identifier::new(self.advance().text.clone()))
23178            } else if self.match_token(TokenType::Default) {
23179                Expression::Identifier(Identifier::new("DEFAULT".to_string()))
23180            } else {
23181                self.parse_expression()?
23182            };
23183
23184            items.push(SetItem {
23185                name,
23186                value,
23187                kind,
23188                no_equals: false,
23189            });
23190
23191            if !self.match_token(TokenType::Comma) {
23192                break;
23193            }
23194        }
23195
23196        Ok(Expression::SetStatement(Box::new(SetStatement { items })))
23197    }
23198
23199    /// Parse Teradata SET QUERY_BAND statement
23200    fn parse_query_band(&mut self) -> Result<Expression> {
23201        self.expect(TokenType::Eq)?;
23202
23203        let value = if self.match_identifier("NONE") {
23204            Expression::Var(Box::new(Var {
23205                this: "NONE".to_string(),
23206            }))
23207        } else if self.check(TokenType::String) {
23208            Expression::Literal(Box::new(Literal::String(self.expect_string()?)))
23209        } else {
23210            self.parse_primary()?
23211        };
23212
23213        let update = if self.match_token(TokenType::Update) || self.match_identifier("UPDATE") {
23214            Some(Box::new(Expression::Boolean(BooleanLiteral {
23215                value: true,
23216            })))
23217        } else {
23218            None
23219        };
23220
23221        let _ = self.match_token(TokenType::For);
23222
23223        let scope = if self.match_token(TokenType::Session) || self.match_identifier("SESSION") {
23224            if self.match_identifier("VOLATILE") {
23225                Some("SESSION VOLATILE".to_string())
23226            } else {
23227                Some("SESSION".to_string())
23228            }
23229        } else if self.match_token(TokenType::Transaction) || self.match_identifier("TRANSACTION") {
23230            Some("TRANSACTION".to_string())
23231        } else if self.match_identifier("VOLATILE") {
23232            Some("VOLATILE".to_string())
23233        } else {
23234            None
23235        };
23236
23237        Ok(Expression::QueryBand(Box::new(QueryBand {
23238            this: Box::new(value),
23239            scope: scope.map(|s| Box::new(Expression::Var(Box::new(Var { this: s })))),
23240            update,
23241        })))
23242    }
23243
23244    /// Parse FETCH FIRST/NEXT clause
23245    fn parse_fetch(&mut self) -> Result<Fetch> {
23246        // FETCH [FIRST|NEXT] [count] [PERCENT] [ROW|ROWS] [ONLY|WITH TIES]
23247
23248        // FIRST or NEXT
23249        let direction = if self.match_token(TokenType::First) {
23250            "FIRST".to_string()
23251        } else if self.match_token(TokenType::Next) {
23252            "NEXT".to_string()
23253        } else {
23254            "FIRST".to_string() // Default
23255        };
23256
23257        // Optional count - but check if next token is ROW/ROWS/PERCENT/ONLY (no count)
23258        let count = if !self.check(TokenType::Row)
23259            && !self.check(TokenType::Rows)
23260            && !self.check(TokenType::Percent)
23261            && !self.check(TokenType::Only)
23262        {
23263            // Accept number, parenthesized expression, or TSQL @variable (Var token)
23264            if self.check(TokenType::Number)
23265                || self.check(TokenType::LParen)
23266                || self.check(TokenType::DAt)
23267                || self.check(TokenType::Var)
23268            {
23269                Some(self.parse_primary()?)
23270            } else {
23271                None
23272            }
23273        } else {
23274            None
23275        };
23276
23277        // PERCENT modifier
23278        let percent = self.match_token(TokenType::Percent);
23279
23280        // ROW or ROWS
23281        let rows = self.match_token(TokenType::Row) || self.match_token(TokenType::Rows);
23282
23283        // ONLY or WITH TIES
23284        self.match_token(TokenType::Only);
23285        let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
23286
23287        Ok(Fetch {
23288            direction,
23289            count,
23290            percent,
23291            rows,
23292            with_ties,
23293        })
23294    }
23295
23296    /// Parse a qualified name (schema.table.column or just table)
23297    fn parse_qualified_name(&mut self) -> Result<Expression> {
23298        let first = self.expect_identifier_or_keyword()?;
23299        let mut parts = vec![first];
23300
23301        while self.match_token(TokenType::Dot) {
23302            let next = self.expect_identifier_or_keyword()?;
23303            parts.push(next);
23304        }
23305
23306        if parts.len() == 1 {
23307            Ok(Expression::Identifier(Identifier::new(parts.remove(0))))
23308        } else if parts.len() == 2 {
23309            Ok(Expression::boxed_column(Column {
23310                table: Some(Identifier::new(parts[0].clone())),
23311                name: Identifier::new(parts[1].clone()),
23312                join_mark: false,
23313                trailing_comments: Vec::new(),
23314                span: None,
23315                inferred_type: None,
23316            }))
23317        } else {
23318            // For 3+ parts, create a Column with concatenated table parts
23319            let column_name = parts.pop().unwrap();
23320            let table_name = parts.join(".");
23321            Ok(Expression::boxed_column(Column {
23322                table: Some(Identifier::new(table_name)),
23323                name: Identifier::new(column_name),
23324                join_mark: false,
23325                trailing_comments: Vec::new(),
23326                span: None,
23327                inferred_type: None,
23328            }))
23329        }
23330    }
23331
23332    // ==================== Phase 4: Additional DDL Parsing ====================
23333
23334    /// Parse CREATE SCHEMA statement
23335    fn parse_create_schema(&mut self, leading_comments: Vec<String>) -> Result<Expression> {
23336        self.expect(TokenType::Schema)?;
23337
23338        let if_not_exists =
23339            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
23340        let name = self.parse_identifier_parts()?;
23341
23342        // Parse CLONE clause (Snowflake)
23343        let clone_from = if self.match_identifier("CLONE") {
23344            Some(self.parse_identifier_parts()?)
23345        } else {
23346            None
23347        };
23348
23349        // Parse AT/BEFORE clause for time travel (Snowflake)
23350        // Note: BEFORE is a keyword token, AT is an identifier
23351        let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
23352            let keyword = self.previous().text.to_ascii_uppercase();
23353            self.expect(TokenType::LParen)?;
23354            // Parse the content: OFFSET => value or TIMESTAMP => value
23355            let mut result = format!("{} (", keyword);
23356            let mut prev_token_type: Option<TokenType> = None;
23357            let mut paren_depth = 1; // Track nested parens
23358            while !self.is_at_end() && paren_depth > 0 {
23359                let token = self.advance();
23360                if token.token_type == TokenType::LParen {
23361                    paren_depth += 1;
23362                } else if token.token_type == TokenType::RParen {
23363                    paren_depth -= 1;
23364                    if paren_depth == 0 {
23365                        break; // Don't include the closing paren in result yet
23366                    }
23367                }
23368                // Smart spacing: no space after ( or => or - and no space before (
23369                let needs_space = !result.ends_with('(')
23370                    && prev_token_type != Some(TokenType::Arrow)
23371                    && prev_token_type != Some(TokenType::Dash)
23372                    && prev_token_type != Some(TokenType::LParen)
23373                    && token.token_type != TokenType::LParen; // no space before (
23374                if needs_space
23375                    && token.token_type != TokenType::RParen
23376                    && token.token_type != TokenType::Comma
23377                {
23378                    result.push(' ');
23379                }
23380                // Properly quote string literals
23381                if token.token_type == TokenType::String {
23382                    result.push('\'');
23383                    result.push_str(&token.text.replace('\'', "''"));
23384                    result.push('\'');
23385                } else {
23386                    result.push_str(&token.text);
23387                }
23388                if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma {
23389                    result.push(' ');
23390                }
23391                prev_token_type = Some(token.token_type);
23392            }
23393            result.push(')');
23394            Some(Expression::Raw(Raw { sql: result }))
23395        } else {
23396            None
23397        };
23398
23399        let authorization = if self.match_token(TokenType::Authorization) {
23400            Some(Identifier::new(self.expect_identifier()?))
23401        } else {
23402            None
23403        };
23404
23405        // Parse schema properties like DEFAULT COLLATE or WITH (properties)
23406        let mut properties = Vec::new();
23407
23408        // Parse WITH (prop1=val1, prop2=val2, ...) (Trino/Presto)
23409        if self.match_token(TokenType::With) {
23410            self.expect(TokenType::LParen)?;
23411            loop {
23412                // Parse property name (identifier or string)
23413                let prop_name = if self.check(TokenType::String) {
23414                    Expression::Literal(Box::new(Literal::String(self.expect_string()?)))
23415                } else {
23416                    Expression::Identifier(Identifier::new(self.expect_identifier_or_keyword()?))
23417                };
23418                self.expect(TokenType::Eq)?;
23419                // Parse property value
23420                let prop_value = self.parse_expression()?;
23421                // Create Property expression: key=value
23422                properties.push(Expression::Property(Box::new(Property {
23423                    this: Box::new(prop_name),
23424                    value: Some(Box::new(prop_value)),
23425                })));
23426                if !self.match_token(TokenType::Comma) {
23427                    break;
23428                }
23429            }
23430            self.expect(TokenType::RParen)?;
23431        }
23432
23433        // Parse DEFAULT COLLATE 'value' (BigQuery)
23434        if self.match_token(TokenType::Default) && self.match_token(TokenType::Collate) {
23435            // Parse the collation value (could be string literal or identifier)
23436            let collation = self.parse_primary()?;
23437            properties.push(Expression::CollateProperty(Box::new(CollateProperty {
23438                this: Box::new(collation),
23439                default: Some(Box::new(Expression::Boolean(BooleanLiteral {
23440                    value: true,
23441                }))),
23442            })));
23443        }
23444
23445        Ok(Expression::CreateSchema(Box::new(CreateSchema {
23446            name,
23447            if_not_exists,
23448            authorization,
23449            clone_from,
23450            at_clause,
23451            properties,
23452            leading_comments,
23453        })))
23454    }
23455
23456    /// Parse DROP SCHEMA statement
23457    fn parse_drop_schema(&mut self) -> Result<Expression> {
23458        self.expect(TokenType::Schema)?;
23459
23460        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
23461        let name = Identifier::new(self.expect_identifier()?);
23462
23463        let cascade = self.match_token(TokenType::Cascade);
23464        if !cascade {
23465            self.match_token(TokenType::Restrict);
23466        }
23467
23468        Ok(Expression::DropSchema(Box::new(DropSchema {
23469            name,
23470            if_exists,
23471            cascade,
23472        })))
23473    }
23474
23475    /// Parse CREATE DATABASE statement
23476    fn parse_create_database(&mut self) -> Result<Expression> {
23477        self.expect(TokenType::Database)?;
23478
23479        let if_not_exists =
23480            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
23481        let name = Identifier::new(self.expect_identifier()?);
23482
23483        // Check for Snowflake CLONE clause
23484        let clone_from = if self.match_identifier("CLONE") {
23485            Some(Identifier::new(self.expect_identifier()?))
23486        } else {
23487            None
23488        };
23489
23490        // Parse AT/BEFORE clause for time travel (Snowflake)
23491        // Note: BEFORE is a keyword token, AT is an identifier
23492        let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
23493            let keyword = self.previous().text.to_ascii_uppercase();
23494            self.expect(TokenType::LParen)?;
23495            // Parse the content: OFFSET => value or TIMESTAMP => value
23496            let mut result = format!("{} (", keyword);
23497            let mut prev_token_type: Option<TokenType> = None;
23498            let mut paren_depth = 1; // Track nested parens
23499            while !self.is_at_end() && paren_depth > 0 {
23500                let token = self.advance();
23501                if token.token_type == TokenType::LParen {
23502                    paren_depth += 1;
23503                } else if token.token_type == TokenType::RParen {
23504                    paren_depth -= 1;
23505                    if paren_depth == 0 {
23506                        break; // Don't include the closing paren in result yet
23507                    }
23508                }
23509                // Smart spacing: no space after ( or => or - and no space before (
23510                let needs_space = !result.ends_with('(')
23511                    && prev_token_type != Some(TokenType::Arrow)
23512                    && prev_token_type != Some(TokenType::Dash)
23513                    && prev_token_type != Some(TokenType::LParen)
23514                    && token.token_type != TokenType::LParen; // no space before (
23515                if needs_space
23516                    && token.token_type != TokenType::RParen
23517                    && token.token_type != TokenType::Comma
23518                {
23519                    result.push(' ');
23520                }
23521                // Properly quote string literals
23522                if token.token_type == TokenType::String {
23523                    result.push('\'');
23524                    result.push_str(&token.text.replace('\'', "''"));
23525                    result.push('\'');
23526                } else {
23527                    result.push_str(&token.text);
23528                }
23529                if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma {
23530                    result.push(' ');
23531                }
23532                prev_token_type = Some(token.token_type);
23533            }
23534            result.push(')');
23535            Some(Expression::Raw(Raw { sql: result }))
23536        } else {
23537            None
23538        };
23539
23540        // ClickHouse: ON CLUSTER clause
23541        let _on_cluster = self.parse_on_cluster_clause()?;
23542
23543        let mut options = Vec::new();
23544
23545        // Parse database options
23546        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
23547            if self.match_identifier("OWNER") || self.match_token(TokenType::Eq) {
23548                self.match_token(TokenType::Eq);
23549                options.push(DatabaseOption::Owner(Identifier::new(
23550                    self.expect_identifier()?,
23551                )));
23552            } else if self.match_identifier("TEMPLATE") {
23553                self.match_token(TokenType::Eq);
23554                options.push(DatabaseOption::Template(Identifier::new(
23555                    self.expect_identifier()?,
23556                )));
23557            } else if self.match_identifier("ENCODING") {
23558                self.match_token(TokenType::Eq);
23559                let encoding = if self.check(TokenType::String) {
23560                    let tok = self.advance();
23561                    tok.text.trim_matches('\'').to_string()
23562                } else {
23563                    self.expect_identifier()?
23564                };
23565                options.push(DatabaseOption::Encoding(encoding));
23566            } else if self.match_identifier("CHARACTER") {
23567                self.match_token(TokenType::Set);
23568                self.match_token(TokenType::Eq);
23569                let charset = if self.check(TokenType::String) {
23570                    let tok = self.advance();
23571                    tok.text.trim_matches('\'').to_string()
23572                } else {
23573                    self.expect_identifier()?
23574                };
23575                options.push(DatabaseOption::CharacterSet(charset));
23576            } else if self.match_identifier("COLLATE") {
23577                self.match_token(TokenType::Eq);
23578                let collate = if self.check(TokenType::String) {
23579                    let tok = self.advance();
23580                    tok.text.trim_matches('\'').to_string()
23581                } else {
23582                    self.expect_identifier()?
23583                };
23584                options.push(DatabaseOption::Collate(collate));
23585            } else if self.match_identifier("LOCATION") {
23586                self.match_token(TokenType::Eq);
23587                let loc = if self.check(TokenType::String) {
23588                    let tok = self.advance();
23589                    tok.text.trim_matches('\'').to_string()
23590                } else {
23591                    self.expect_identifier()?
23592                };
23593                options.push(DatabaseOption::Location(loc));
23594            } else {
23595                break;
23596            }
23597        }
23598
23599        Ok(Expression::CreateDatabase(Box::new(CreateDatabase {
23600            name,
23601            if_not_exists,
23602            options,
23603            clone_from,
23604            at_clause,
23605        })))
23606    }
23607
23608    /// Parse DROP DATABASE statement
23609    fn parse_drop_database(&mut self) -> Result<Expression> {
23610        self.expect(TokenType::Database)?;
23611
23612        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
23613
23614        // ClickHouse: IF EMPTY
23615        if !if_exists
23616            && matches!(
23617                self.config.dialect,
23618                Some(crate::dialects::DialectType::ClickHouse)
23619            )
23620        {
23621            if self.check(TokenType::If)
23622                && self.current + 1 < self.tokens.len()
23623                && self.tokens[self.current + 1]
23624                    .text
23625                    .eq_ignore_ascii_case("EMPTY")
23626            {
23627                self.skip(); // consume IF
23628                self.skip(); // consume EMPTY
23629            }
23630        }
23631        let name = Identifier::new(self.expect_identifier()?);
23632
23633        // ClickHouse: ON CLUSTER clause
23634        let sync = if matches!(
23635            self.config.dialect,
23636            Some(crate::dialects::DialectType::ClickHouse)
23637        ) {
23638            let _ = self.parse_on_cluster_clause()?;
23639            self.match_identifier("SYNC")
23640        } else {
23641            false
23642        };
23643
23644        Ok(Expression::DropDatabase(Box::new(DropDatabase {
23645            name,
23646            if_exists,
23647            sync,
23648        })))
23649    }
23650
23651    /// Parse CREATE FUNCTION statement
23652    fn parse_create_function(
23653        &mut self,
23654        or_replace: bool,
23655        or_alter: bool,
23656        temporary: bool,
23657        is_table_function: bool,
23658    ) -> Result<Expression> {
23659        self.expect(TokenType::Function)?;
23660
23661        let if_not_exists =
23662            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
23663        let name = self.parse_table_ref()?;
23664
23665        // Parse parameters (optional - some dialects allow CREATE FUNCTION f AS 'body')
23666        let (parameters, has_parens) = if self.match_token(TokenType::LParen) {
23667            let params = self.parse_function_parameters()?;
23668            self.expect(TokenType::RParen)?;
23669            (params, true)
23670        } else {
23671            (Vec::new(), false)
23672        };
23673
23674        // Track if LANGUAGE appears before RETURNS
23675        let mut language_first = false;
23676        let mut return_type = None;
23677        let mut language = None;
23678        let mut sql_data_access = None;
23679
23680        // Check for LANGUAGE before RETURNS
23681        if self.match_token(TokenType::Language) {
23682            language = Some(self.expect_identifier_or_keyword()?);
23683            language_first = true;
23684        }
23685
23686        // Parse RETURNS clause (may come before or after LANGUAGE)
23687        let mut returns_table_body: Option<String> = None;
23688        if self.match_token(TokenType::Returns) {
23689            if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
23690                // TSQL: RETURNS @var TABLE (col_defs)
23691                let var_name = self.advance().text.clone();
23692                if self.check(TokenType::Table) {
23693                    self.skip(); // consume TABLE
23694                    return_type = Some(DataType::Custom {
23695                        name: "TABLE".to_string(),
23696                    });
23697                    // Parse column definitions
23698                    if self.match_token(TokenType::LParen) {
23699                        let start = self.current;
23700                        let mut depth = 1;
23701                        while depth > 0 && !self.is_at_end() {
23702                            if self.check(TokenType::LParen) {
23703                                depth += 1;
23704                            }
23705                            if self.check(TokenType::RParen) {
23706                                depth -= 1;
23707                                if depth == 0 {
23708                                    break;
23709                                }
23710                            }
23711                            self.skip();
23712                        }
23713                        // Reconstruct the column definitions with proper spacing
23714                        let mut col_defs_str = String::new();
23715                        for (i, tok) in self.tokens[start..self.current].iter().enumerate() {
23716                            // Don't add space before comma, LParen, RParen
23717                            // Don't add space after LParen
23718                            let prev_tok = if i > 0 {
23719                                Some(&self.tokens[start + i - 1])
23720                            } else {
23721                                None
23722                            };
23723                            let needs_space = i > 0
23724                                && tok.token_type != TokenType::Comma
23725                                && tok.token_type != TokenType::RParen
23726                                && tok.token_type != TokenType::LParen
23727                                && prev_tok
23728                                    .map(|p| p.token_type != TokenType::LParen)
23729                                    .unwrap_or(true);
23730                            if needs_space {
23731                                col_defs_str.push(' ');
23732                            }
23733                            col_defs_str.push_str(&tok.text);
23734                        }
23735                        returns_table_body = Some(format!("{} TABLE ({})", var_name, col_defs_str));
23736                        self.expect(TokenType::RParen)?;
23737                    } else {
23738                        returns_table_body = Some(format!("{} TABLE", var_name));
23739                    }
23740                } else {
23741                    // Parse data type after var name
23742                    return_type = Some(self.parse_data_type()?);
23743                }
23744            } else if self.check(TokenType::Table) {
23745                // Could be:
23746                // - TSQL: RETURNS TABLE AS RETURN ...
23747                // - BigQuery: RETURNS TABLE <col1 TYPE, col2 TYPE>
23748                // - Snowflake: RETURNS TABLE(col1 TYPE, col2 TYPE)
23749                self.skip(); // consume TABLE
23750                if self.check(TokenType::Lt) {
23751                    // BigQuery: RETURNS TABLE <col1 TYPE, col2 TYPE>
23752                    self.skip(); // consume <
23753                    let mut cols = Vec::new();
23754                    loop {
23755                        let col_name = self.expect_identifier()?;
23756                        let col_type = self.parse_data_type()?;
23757                        cols.push(format!(
23758                            "{} {}",
23759                            col_name,
23760                            self.data_type_to_string(&col_type)
23761                        ));
23762                        if !self.match_token(TokenType::Comma) {
23763                            break;
23764                        }
23765                    }
23766                    if !self.match_token(TokenType::Gt) {
23767                        return Err(self.parse_error("Expected > after TABLE column definitions"));
23768                    }
23769                    returns_table_body = Some(format!("TABLE <{}>", cols.join(", ")));
23770                } else if self.check(TokenType::LParen) {
23771                    // Snowflake: RETURNS TABLE(col1 TYPE, col2 TYPE)
23772                    self.skip(); // consume (
23773                    let mut cols = Vec::new();
23774                    loop {
23775                        let col_name = self.expect_identifier()?;
23776                        let col_type = self.parse_data_type()?;
23777                        cols.push(format!(
23778                            "{} {}",
23779                            col_name,
23780                            self.data_type_to_string(&col_type)
23781                        ));
23782                        if !self.match_token(TokenType::Comma) {
23783                            break;
23784                        }
23785                    }
23786                    self.expect(TokenType::RParen)?;
23787                    returns_table_body = Some(format!("TABLE ({})", cols.join(", ")));
23788                } else {
23789                    // TSQL: RETURNS TABLE AS RETURN ...
23790                    return_type = Some(DataType::Custom {
23791                        name: "TABLE".to_string(),
23792                    });
23793                }
23794            } else {
23795                // Use parse_function_return_type to preserve original type names like 'integer'
23796                return_type = Some(self.parse_function_return_type()?);
23797            }
23798        }
23799
23800        let mut deterministic = None;
23801        let mut returns_null_on_null_input = None;
23802        let mut strict = false;
23803        let mut security = None;
23804        let mut body = None;
23805        let mut set_options: Vec<FunctionSetOption> = Vec::new();
23806        let mut property_order: Vec<FunctionPropertyKind> = Vec::new();
23807        let mut options: Vec<Expression> = Vec::new();
23808        let mut environment: Vec<Expression> = Vec::new();
23809        let mut handler: Option<String> = None;
23810        let mut parameter_style: Option<String> = None;
23811
23812        // Parse function options
23813        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
23814            if self.check(TokenType::Returns)
23815                && self.current + 1 < self.tokens.len()
23816                && self.tokens[self.current + 1].token_type == TokenType::Null
23817            {
23818                // RETURNS NULL ON NULL INPUT
23819                self.skip(); // consume RETURNS
23820                self.skip(); // consume NULL
23821                self.match_token(TokenType::On);
23822                self.match_token(TokenType::Null);
23823                self.match_token(TokenType::Input);
23824                returns_null_on_null_input = Some(true);
23825                if !property_order.contains(&FunctionPropertyKind::NullInput) {
23826                    property_order.push(FunctionPropertyKind::NullInput);
23827                }
23828            } else if self.match_token(TokenType::Returns) {
23829                // RETURNS can come after LANGUAGE
23830                return_type = Some(self.parse_data_type()?);
23831            } else if self.match_token(TokenType::Language) {
23832                // Language can be SQL, PLPGSQL, PYTHON, etc.
23833                language = Some(self.expect_identifier_or_keyword()?);
23834                if !property_order.contains(&FunctionPropertyKind::Language) {
23835                    property_order.push(FunctionPropertyKind::Language);
23836                }
23837            } else if self.match_token(TokenType::Not) && self.match_identifier("DETERMINISTIC") {
23838                deterministic = Some(false);
23839                if !property_order.contains(&FunctionPropertyKind::Determinism) {
23840                    property_order.push(FunctionPropertyKind::Determinism);
23841                }
23842            } else if self.match_identifier("DETERMINISTIC") {
23843                deterministic = Some(true);
23844                if !property_order.contains(&FunctionPropertyKind::Determinism) {
23845                    property_order.push(FunctionPropertyKind::Determinism);
23846                }
23847            } else if self.match_identifier("IMMUTABLE") {
23848                deterministic = Some(true);
23849                if !property_order.contains(&FunctionPropertyKind::Determinism) {
23850                    property_order.push(FunctionPropertyKind::Determinism);
23851                }
23852            } else if self.match_identifier("STABLE") || self.match_identifier("VOLATILE") {
23853                deterministic = Some(false);
23854                if !property_order.contains(&FunctionPropertyKind::Determinism) {
23855                    property_order.push(FunctionPropertyKind::Determinism);
23856                }
23857            } else if self.match_identifier("STRICT") {
23858                returns_null_on_null_input = Some(true);
23859                strict = true;
23860                if !property_order.contains(&FunctionPropertyKind::NullInput) {
23861                    property_order.push(FunctionPropertyKind::NullInput);
23862                }
23863            } else if self.match_identifier("CALLED") {
23864                self.match_token(TokenType::On);
23865                self.match_token(TokenType::Null);
23866                self.match_token(TokenType::Input);
23867                returns_null_on_null_input = Some(false);
23868                if !property_order.contains(&FunctionPropertyKind::NullInput) {
23869                    property_order.push(FunctionPropertyKind::NullInput);
23870                }
23871            } else if self.match_identifier("SECURITY") {
23872                if self.match_identifier("DEFINER") {
23873                    security = Some(FunctionSecurity::Definer);
23874                } else if self.match_identifier("INVOKER") {
23875                    security = Some(FunctionSecurity::Invoker);
23876                }
23877                if !property_order.contains(&FunctionPropertyKind::Security) {
23878                    property_order.push(FunctionPropertyKind::Security);
23879                }
23880            } else if self.match_identifier("CONTAINS") {
23881                // CONTAINS SQL
23882                self.match_identifier("SQL");
23883                sql_data_access = Some(SqlDataAccess::ContainsSql);
23884                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
23885                    property_order.push(FunctionPropertyKind::SqlDataAccess);
23886                }
23887            } else if self.match_identifier("READS") {
23888                // READS SQL DATA
23889                self.match_identifier("SQL");
23890                self.match_identifier("DATA");
23891                sql_data_access = Some(SqlDataAccess::ReadsSqlData);
23892                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
23893                    property_order.push(FunctionPropertyKind::SqlDataAccess);
23894                }
23895            } else if self.match_identifier("MODIFIES") {
23896                // MODIFIES SQL DATA
23897                self.match_identifier("SQL");
23898                self.match_identifier("DATA");
23899                sql_data_access = Some(SqlDataAccess::ModifiesSqlData);
23900                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
23901                    property_order.push(FunctionPropertyKind::SqlDataAccess);
23902                }
23903            } else if self.match_token(TokenType::No) && self.match_identifier("SQL") {
23904                // NO SQL
23905                sql_data_access = Some(SqlDataAccess::NoSql);
23906                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
23907                    property_order.push(FunctionPropertyKind::SqlDataAccess);
23908                }
23909            } else if self.match_token(TokenType::Set) {
23910                // PostgreSQL: SET key = value / SET key TO value / SET key FROM CURRENT
23911                let opt_name = self.expect_identifier_or_keyword()?;
23912                let value = if self.match_token(TokenType::From) {
23913                    // SET key FROM CURRENT
23914                    if !self.match_token(TokenType::Current) {
23915                        return Err(self.parse_error("Expected CURRENT after FROM in SET option"));
23916                    }
23917                    FunctionSetValue::FromCurrent
23918                } else {
23919                    // SET key = value or SET key TO value
23920                    let use_to = self.match_token(TokenType::To);
23921                    if !use_to && !self.match_token(TokenType::Eq) {
23922                        return Err(self.parse_error("Expected = or TO after SET key"));
23923                    }
23924                    // Value can be a string literal or identifier
23925                    let val = if self.check(TokenType::String) {
23926                        let tok = self.advance();
23927                        format!("'{}'", tok.text)
23928                    } else {
23929                        self.expect_identifier_or_keyword()?
23930                    };
23931                    FunctionSetValue::Value { value: val, use_to }
23932                };
23933                set_options.push(FunctionSetOption {
23934                    name: opt_name,
23935                    value,
23936                });
23937                if !property_order.contains(&FunctionPropertyKind::Set) {
23938                    property_order.push(FunctionPropertyKind::Set);
23939                }
23940            } else if self.match_token(TokenType::As) {
23941                // Parse function body: AS RETURN x, AS $$ ... $$, AS BEGIN ... END, AS 'body'
23942                if !property_order.contains(&FunctionPropertyKind::As) {
23943                    property_order.push(FunctionPropertyKind::As);
23944                }
23945                if self.match_identifier("RETURN") {
23946                    // AS RETURN expression (or SELECT statement for TSQL TVFs)
23947                    let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
23948                        // TSQL: AS RETURN SELECT ... for table-valued functions
23949                        self.parse_statement()?
23950                    } else {
23951                        self.parse_expression()?
23952                    };
23953                    body = Some(FunctionBody::Return(expr));
23954                } else if self.check(TokenType::Select) || self.check(TokenType::With) {
23955                    // TSQL: AS SELECT ... for table-valued functions (without RETURN keyword)
23956                    let stmt = self.parse_statement()?;
23957                    body = Some(FunctionBody::Expression(stmt));
23958                } else if self.check(TokenType::DollarString) {
23959                    let tok = self.advance();
23960                    // Parse the dollar string token to extract tag and content
23961                    let (tag, content) = crate::tokens::parse_dollar_string_token(&tok.text);
23962                    body = Some(FunctionBody::DollarQuoted { content, tag });
23963                } else if self.check(TokenType::String) {
23964                    let tok = self.advance();
23965                    body = Some(FunctionBody::StringLiteral(tok.text.clone()));
23966                } else if self.match_token(TokenType::Begin) {
23967                    // Parse BEGIN...END block
23968                    let mut block_content = String::new();
23969                    let mut depth = 1;
23970                    while depth > 0 && !self.is_at_end() {
23971                        let tok = self.advance();
23972                        if tok.token_type == TokenType::Begin {
23973                            depth += 1;
23974                        } else if tok.token_type == TokenType::End {
23975                            depth -= 1;
23976                            if depth == 0 {
23977                                break;
23978                            }
23979                        }
23980                        block_content.push_str(&tok.text);
23981                        block_content.push(' ');
23982                    }
23983                    body = Some(FunctionBody::Block(block_content.trim().to_string()));
23984                } else if self.check(TokenType::Table) {
23985                    // DuckDB: AS TABLE SELECT ... (table macro)
23986                    self.advance(); // consume TABLE
23987                    if return_type.is_none() {
23988                        return_type = Some(DataType::Custom {
23989                            name: "TABLE".to_string(),
23990                        });
23991                    }
23992                    let stmt = self.parse_statement()?;
23993                    body = Some(FunctionBody::Return(stmt));
23994                } else {
23995                    // Expression-based body
23996                    let expr = self.parse_expression()?;
23997                    body = Some(FunctionBody::Expression(expr));
23998                }
23999            } else if self.match_identifier("RETURN") {
24000                // RETURN expression (or SELECT statement for TSQL TVFs)
24001                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
24002                    self.parse_statement()?
24003                } else {
24004                    self.parse_expression()?
24005                };
24006                body = Some(FunctionBody::Return(expr));
24007            } else if self.match_identifier("EXTERNAL") {
24008                self.match_identifier("NAME");
24009                let ext_name = if self.check(TokenType::String) {
24010                    let tok = self.advance();
24011                    tok.text.trim_matches('\'').to_string()
24012                } else {
24013                    self.expect_identifier()?
24014                };
24015                body = Some(FunctionBody::External(ext_name));
24016            } else if self.match_identifier("OPTIONS") {
24017                // BigQuery: OPTIONS (key=value, ...) - track in property_order
24018                let parsed_options = self.parse_options_list()?;
24019                options.extend(parsed_options);
24020                if !property_order.contains(&FunctionPropertyKind::Options) {
24021                    property_order.push(FunctionPropertyKind::Options);
24022                }
24023            } else if self.match_identifier("ENVIRONMENT") {
24024                // Databricks: ENVIRONMENT (dependencies = '...', environment_version = '...')
24025                let parsed_env = self.parse_environment_list()?;
24026                environment.extend(parsed_env);
24027                if !property_order.contains(&FunctionPropertyKind::Environment) {
24028                    property_order.push(FunctionPropertyKind::Environment);
24029                }
24030            } else if self.match_identifier("HANDLER") {
24031                // Databricks: HANDLER 'handler_function'
24032                if self.check(TokenType::String) {
24033                    let tok = self.advance();
24034                    handler = Some(tok.text.clone());
24035                }
24036                if !property_order.contains(&FunctionPropertyKind::Handler) {
24037                    property_order.push(FunctionPropertyKind::Handler);
24038                }
24039            } else if self.match_text_seq(&["PARAMETER", "STYLE"]) {
24040                // Databricks: PARAMETER STYLE PANDAS
24041                let style = self.expect_identifier_or_keyword()?;
24042                parameter_style = Some(style.to_ascii_uppercase());
24043                if !property_order.contains(&FunctionPropertyKind::ParameterStyle) {
24044                    property_order.push(FunctionPropertyKind::ParameterStyle);
24045                }
24046            } else if self.check_identifier("SQL")
24047                && self.current + 1 < self.tokens.len()
24048                && self.tokens[self.current + 1]
24049                    .text
24050                    .eq_ignore_ascii_case("SECURITY")
24051            {
24052                // SQL SECURITY DEFINER/INVOKER
24053                self.skip(); // consume SQL
24054                self.skip(); // consume SECURITY
24055                if self.match_identifier("DEFINER") {
24056                    security = Some(FunctionSecurity::Definer);
24057                } else if self.match_identifier("INVOKER") {
24058                    security = Some(FunctionSecurity::Invoker);
24059                }
24060                if !property_order.contains(&FunctionPropertyKind::Security) {
24061                    property_order.push(FunctionPropertyKind::Security);
24062                }
24063            } else if self.check(TokenType::Select) || self.check(TokenType::With) {
24064                // Bare SELECT/WITH body (without AS keyword) - e.g., MySQL
24065                let stmt = self.parse_statement()?;
24066                body = Some(FunctionBody::Expression(stmt));
24067                if !property_order.contains(&FunctionPropertyKind::As) {
24068                    property_order.push(FunctionPropertyKind::As);
24069                }
24070            } else {
24071                break;
24072            }
24073        }
24074
24075        // BigQuery: OPTIONS (key=value, ...) can also appear after AS body (legacy position)
24076        if options.is_empty() && self.match_identifier("OPTIONS") {
24077            let parsed_options = self.parse_options_list()?;
24078            options.extend(parsed_options);
24079            if !property_order.contains(&FunctionPropertyKind::Options) {
24080                property_order.push(FunctionPropertyKind::Options);
24081            }
24082        }
24083
24084        Ok(Expression::CreateFunction(Box::new(CreateFunction {
24085            name,
24086            parameters,
24087            return_type,
24088            body,
24089            or_replace,
24090            or_alter,
24091            if_not_exists,
24092            temporary,
24093            language,
24094            deterministic,
24095            returns_null_on_null_input,
24096            security,
24097            has_parens,
24098            sql_data_access,
24099            returns_table_body,
24100            language_first,
24101            set_options,
24102            strict,
24103            options,
24104            is_table_function,
24105            property_order,
24106            environment,
24107            handler,
24108            parameter_style,
24109        })))
24110    }
24111
24112    /// Parse function parameters
24113    fn parse_function_parameters(&mut self) -> Result<Vec<FunctionParameter>> {
24114        let mut params = Vec::new();
24115
24116        if self.check(TokenType::RParen) {
24117            return Ok(params);
24118        }
24119
24120        loop {
24121            let mut mode = None;
24122            let mut mode_text: Option<String> = None;
24123
24124            // Check for parameter mode (IN, OUT, INOUT, VARIADIC)
24125            // Note: OUT, INOUT, VARIADIC are tokenized as Var, not as dedicated keywords
24126            if self.match_token(TokenType::In) {
24127                // IN or IN OUT
24128                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OUT") {
24129                    let out_text = self.advance().text.clone(); // consume OUT
24130                    mode_text = Some(format!("IN {}", out_text));
24131                    mode = Some(ParameterMode::InOut);
24132                } else {
24133                    mode_text = Some("IN".to_string());
24134                    mode = Some(ParameterMode::In);
24135                }
24136            } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OUT") {
24137                let text = self.advance().text.clone();
24138                mode_text = Some(text);
24139                mode = Some(ParameterMode::Out);
24140            } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("INOUT") {
24141                let text = self.advance().text.clone();
24142                mode_text = Some(text);
24143                mode = Some(ParameterMode::InOut);
24144            } else if self.check(TokenType::Var)
24145                && self.peek().text.eq_ignore_ascii_case("VARIADIC")
24146            {
24147                let text = self.advance().text.clone();
24148                mode_text = Some(text);
24149                mode = Some(ParameterMode::Variadic);
24150            }
24151
24152            // Try to parse name and type
24153            // After a mode keyword (VARIADIC, OUT, etc.), the next thing could be:
24154            //   - a type directly (e.g., VARIADIC INT[], OUT INT)
24155            //   - a name then a type (e.g., VARIADIC a INT[], OUT result INT)
24156            //
24157            // Strategy: use backtracking. Save position, try parsing as data type.
24158            // If the result is followed by , or ) or DEFAULT, it was a type-only param.
24159            // Otherwise, restore position and parse as name + type.
24160            let (name, data_type) = if mode.is_some() {
24161                let saved = self.current;
24162                // Try parsing as a data type directly
24163                let type_result = self.parse_data_type();
24164                if let Ok(dt) = type_result {
24165                    if self.check(TokenType::Comma)
24166                        || self.check(TokenType::RParen)
24167                        || self.check(TokenType::Default)
24168                        || self.check(TokenType::Eq)
24169                    {
24170                        // Successfully parsed as a type-only parameter
24171                        (None, dt)
24172                    } else {
24173                        // Not followed by comma/rparen — restore and parse as name + type
24174                        self.current = saved;
24175                        let first_ident =
24176                            if self.check(TokenType::Input) || self.check(TokenType::Output) {
24177                                let token = self.advance();
24178                                Identifier {
24179                                    name: token.text,
24180                                    quoted: false,
24181                                    trailing_comments: Vec::new(),
24182                                    span: None,
24183                                }
24184                            } else {
24185                                self.expect_identifier_with_quoted()?
24186                            };
24187                        self.match_token(TokenType::As);
24188                        let dt = self.parse_data_type()?;
24189                        (Some(first_ident), dt)
24190                    }
24191                } else {
24192                    // Type parse failed — restore and try as name + type
24193                    self.current = saved;
24194                    let first_ident =
24195                        if self.check(TokenType::Input) || self.check(TokenType::Output) {
24196                            let token = self.advance();
24197                            Identifier {
24198                                name: token.text,
24199                                quoted: false,
24200                                trailing_comments: Vec::new(),
24201                                span: None,
24202                            }
24203                        } else {
24204                            self.expect_identifier_with_quoted()?
24205                        };
24206                    if self.check(TokenType::Comma)
24207                        || self.check(TokenType::RParen)
24208                        || self.check(TokenType::Default)
24209                    {
24210                        (None, self.identifier_to_datatype(&first_ident.name)?)
24211                    } else {
24212                        self.match_token(TokenType::As);
24213                        let dt = self.parse_data_type()?;
24214                        (Some(first_ident), dt)
24215                    }
24216                }
24217            } else {
24218                // No mode keyword — original logic
24219                // Handle keywords like INPUT that may be used as parameter names
24220                let first_ident = if self.check(TokenType::Input) || self.check(TokenType::Output) {
24221                    let token = self.advance();
24222                    Identifier {
24223                        name: token.text,
24224                        quoted: false,
24225                        trailing_comments: Vec::new(),
24226                        span: None,
24227                    }
24228                } else {
24229                    self.expect_identifier_with_quoted()?
24230                };
24231
24232                // Check if next token is a type or if this was the type
24233                if self.check(TokenType::Comma)
24234                    || self.check(TokenType::RParen)
24235                    || self.check(TokenType::Default)
24236                {
24237                    // This was the type, no name
24238                    (None, self.identifier_to_datatype(&first_ident.name)?)
24239                } else {
24240                    // This was the name, next is type
24241                    // TSQL allows: @param AS type (optional AS keyword)
24242                    self.match_token(TokenType::As);
24243                    let dt = self.parse_data_type()?;
24244                    (Some(first_ident), dt)
24245                }
24246            };
24247
24248            let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq)
24249            {
24250                Some(self.parse_expression()?)
24251            } else {
24252                None
24253            };
24254
24255            params.push(FunctionParameter {
24256                name,
24257                data_type,
24258                mode,
24259                default,
24260                mode_text: mode_text.clone(),
24261            });
24262
24263            if !self.match_token(TokenType::Comma) {
24264                break;
24265            }
24266        }
24267
24268        Ok(params)
24269    }
24270
24271    /// Parse TSQL-style unparenthesized procedure parameters
24272    /// Format: @param1 TYPE, @param2 TYPE, ... AS
24273    fn parse_tsql_procedure_params(&mut self) -> Result<Vec<FunctionParameter>> {
24274        let mut params = Vec::new();
24275        loop {
24276            if !self.check(TokenType::Var) {
24277                break;
24278            }
24279            let name = self.advance().text.clone();
24280            // Skip optional AS keyword between name and type
24281            self.match_token(TokenType::As);
24282            let data_type = self.parse_data_type()?;
24283            let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq)
24284            {
24285                Some(self.parse_expression()?)
24286            } else {
24287                None
24288            };
24289            params.push(FunctionParameter {
24290                name: Some(Identifier::new(name)),
24291                data_type,
24292                mode: None,
24293                default,
24294                mode_text: None,
24295            });
24296            if !self.match_token(TokenType::Comma) {
24297                break;
24298            }
24299        }
24300        Ok(params)
24301    }
24302
24303    /// Convert identifier to DataType for function parameters.
24304    /// Preserves the original identifier name to maintain exact type name as written.
24305    /// This matches Python sqlglot's behavior where function parameter types like 'integer'
24306    /// are stored as Identifiers rather than normalized DataTypes.
24307    fn identifier_to_datatype(&self, ident: &str) -> Result<DataType> {
24308        // Always use DataType::Custom to preserve the exact type name as written.
24309        // This is important for identity tests where e.g. 'integer' should not be normalized to 'INT'.
24310        Ok(DataType::Custom {
24311            name: ident.to_string(),
24312        })
24313    }
24314
24315    /// Parse a data type for function RETURNS clause, preserving original type names.
24316    /// For simple type names like 'integer', preserves the original name rather than
24317    /// normalizing to INT. This matches Python sqlglot's behavior.
24318    /// For MySQL, uses standard parse_data_type() to ensure proper type mapping (e.g., VARCHAR -> TEXT).
24319    fn parse_function_return_type(&mut self) -> Result<DataType> {
24320        // MySQL needs standard data type parsing for proper type mapping
24321        if matches!(
24322            self.config.dialect,
24323            Some(crate::dialects::DialectType::MySQL)
24324        ) {
24325            return self.parse_data_type();
24326        }
24327
24328        // Check if it's a simple identifier that could be a type name
24329        if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
24330            && !self.check_next(TokenType::LParen)  // Not a parameterized type like VARCHAR(10)
24331            && !self.check_next(TokenType::LBracket)
24332        // Not an array type
24333        {
24334            let type_name = self.advance().text.clone();
24335            // Check if the next token indicates we should use parse_data_type instead
24336            // For complex types, fall through to parse_data_type
24337            return Ok(DataType::Custom { name: type_name });
24338        }
24339
24340        // For complex types, use standard parsing
24341        self.parse_data_type()
24342    }
24343
24344    /// Parse DROP FUNCTION statement
24345    fn parse_drop_function(&mut self) -> Result<Expression> {
24346        self.expect(TokenType::Function)?;
24347
24348        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
24349        let name = self.parse_table_ref()?;
24350
24351        // Optional parameter types for overloaded functions
24352        let parameters = if self.match_token(TokenType::LParen) {
24353            let mut types = Vec::new();
24354            if !self.check(TokenType::RParen) {
24355                loop {
24356                    types.push(self.parse_data_type()?);
24357                    if !self.match_token(TokenType::Comma) {
24358                        break;
24359                    }
24360                }
24361            }
24362            self.expect(TokenType::RParen)?;
24363            Some(types)
24364        } else {
24365            None
24366        };
24367
24368        let cascade = self.match_token(TokenType::Cascade);
24369        if !cascade {
24370            self.match_token(TokenType::Restrict);
24371        }
24372
24373        Ok(Expression::DropFunction(Box::new(DropFunction {
24374            name,
24375            parameters,
24376            if_exists,
24377            cascade,
24378        })))
24379    }
24380
24381    /// Parse CREATE PROCEDURE statement
24382    fn parse_create_procedure(&mut self, or_replace: bool, or_alter: bool) -> Result<Expression> {
24383        // Check if PROC shorthand was used before consuming the token
24384        let use_proc_keyword = self.peek().text.eq_ignore_ascii_case("PROC");
24385        self.expect(TokenType::Procedure)?;
24386
24387        let if_not_exists =
24388            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
24389        let name = self.parse_table_ref()?;
24390
24391        // Parse parameters (optional parentheses for TSQL)
24392        let (parameters, has_parens) = if self.match_token(TokenType::LParen) {
24393            let params = self.parse_function_parameters()?;
24394            self.expect(TokenType::RParen)?;
24395            (params, true)
24396        } else if self.check(TokenType::Var) && !self.check(TokenType::As) {
24397            // TSQL: CREATE PROCEDURE foo @a INTEGER, @b INTEGER AS ...
24398            // Parameters without parentheses
24399            let params = self.parse_tsql_procedure_params()?;
24400            (params, false)
24401        } else {
24402            (Vec::new(), false)
24403        };
24404
24405        let mut language = None;
24406        let mut security = None;
24407        let mut body = None;
24408        let mut return_type = None;
24409        let mut execute_as = None;
24410        let mut with_options: Vec<String> = Vec::new();
24411
24412        // Parse procedure options
24413        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24414            if self.match_token(TokenType::Returns) {
24415                // RETURNS type (Snowflake)
24416                return_type = Some(self.parse_data_type()?);
24417            } else if self.match_identifier("EXECUTE") || self.match_token(TokenType::Execute) {
24418                // EXECUTE AS CALLER/OWNER (Snowflake)
24419                if self.match_token(TokenType::As) {
24420                    if self.match_identifier("CALLER") {
24421                        execute_as = Some("CALLER".to_string());
24422                    } else if self.match_identifier("OWNER") {
24423                        execute_as = Some("OWNER".to_string());
24424                    } else if self.match_identifier("SELF") {
24425                        execute_as = Some("SELF".to_string());
24426                    }
24427                }
24428            } else if self.match_token(TokenType::Language) {
24429                // Language can be SQL, PLPGSQL, PYTHON, etc.
24430                language = Some(self.expect_identifier_or_keyword()?);
24431            } else if self.match_identifier("SECURITY") {
24432                if self.match_identifier("DEFINER") {
24433                    security = Some(FunctionSecurity::Definer);
24434                } else if self.match_identifier("INVOKER") {
24435                    security = Some(FunctionSecurity::Invoker);
24436                }
24437            } else if self.match_token(TokenType::With) {
24438                // TSQL: WITH option1, option2, ... AS body
24439                // Options: ENCRYPTION, RECOMPILE, SCHEMABINDING, NATIVE_COMPILATION,
24440                //          EXECUTE AS {OWNER|SELF|CALLER|'username'}
24441                loop {
24442                    if self.match_identifier("EXECUTE") || self.match_token(TokenType::Execute) {
24443                        // EXECUTE AS {OWNER|SELF|CALLER|'username'}
24444                        self.expect(TokenType::As)?;
24445                        if self.check(TokenType::String) {
24446                            let tok = self.advance();
24447                            with_options.push(format!("EXECUTE AS '{}'", tok.text));
24448                        } else {
24449                            let ident = self.expect_identifier_or_keyword()?;
24450                            with_options.push(format!("EXECUTE AS {}", ident.to_ascii_uppercase()));
24451                        }
24452                    } else {
24453                        let opt = self.expect_identifier_or_keyword()?;
24454                        with_options.push(opt.to_ascii_uppercase());
24455                    }
24456                    if !self.match_token(TokenType::Comma) {
24457                        break;
24458                    }
24459                }
24460            } else if self.match_token(TokenType::As) {
24461                // Parse procedure body
24462                if self.check(TokenType::String) {
24463                    // TokenType::String means single-quoted - tokenizer strips quotes
24464                    let tok = self.advance();
24465                    body = Some(FunctionBody::StringLiteral(tok.text.clone()));
24466                } else if self.match_token(TokenType::Begin) {
24467                    // Parse BEGIN ... END block as a list of statements
24468                    let mut statements = Vec::new();
24469                    while !self.check(TokenType::End) && !self.is_at_end() {
24470                        // Skip optional semicolons between statements
24471                        while self.match_token(TokenType::Semicolon) {}
24472                        if self.check(TokenType::End) {
24473                            break;
24474                        }
24475                        statements.push(self.parse_statement()?);
24476                        // Skip optional semicolon after statement
24477                        self.match_token(TokenType::Semicolon);
24478                    }
24479                    self.expect(TokenType::End)?;
24480                    body = Some(FunctionBody::Statements(statements));
24481                } else {
24482                    // TSQL: AS <statement> (e.g., AS SELECT 1)
24483                    let stmt = self.parse_statement()?;
24484                    body = Some(FunctionBody::Expression(stmt));
24485                }
24486            } else if self.check(TokenType::Begin) {
24487                // MySQL: BEGIN...END without AS keyword
24488                // Collect entire block as raw text since MySQL procedural
24489                // constructs (IF/SIGNAL/WHILE/etc.) aren't parseable as statements
24490                let start = self.current;
24491                self.skip(); // consume BEGIN
24492                let mut depth = 1;
24493                while !self.is_at_end() && depth > 0 {
24494                    if self.check(TokenType::Begin) {
24495                        depth += 1;
24496                    } else if self.check(TokenType::End) {
24497                        depth -= 1;
24498                        if depth == 0 {
24499                            break;
24500                        }
24501                    }
24502                    self.skip();
24503                }
24504                let raw = self.tokens_to_sql(start, self.current);
24505                self.expect(TokenType::End)?;
24506                // Consume optional label after END (e.g., END myproc)
24507                if self.is_identifier_token() || self.check(TokenType::Var) {
24508                    self.skip();
24509                }
24510                body = Some(FunctionBody::RawBlock(format!("{} END", raw)));
24511                break;
24512            } else {
24513                break;
24514            }
24515        }
24516
24517        Ok(Expression::CreateProcedure(Box::new(CreateProcedure {
24518            name,
24519            parameters,
24520            body,
24521            or_replace,
24522            or_alter,
24523            if_not_exists,
24524            language,
24525            security,
24526            return_type,
24527            execute_as,
24528            with_options,
24529            has_parens,
24530            use_proc_keyword,
24531        })))
24532    }
24533
24534    /// Parse DROP PROCEDURE statement
24535    fn parse_drop_procedure(&mut self) -> Result<Expression> {
24536        self.expect(TokenType::Procedure)?;
24537
24538        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
24539        let name = self.parse_table_ref()?;
24540
24541        let parameters = if self.match_token(TokenType::LParen) {
24542            let mut types = Vec::new();
24543            if !self.check(TokenType::RParen) {
24544                loop {
24545                    types.push(self.parse_data_type()?);
24546                    if !self.match_token(TokenType::Comma) {
24547                        break;
24548                    }
24549                }
24550            }
24551            self.expect(TokenType::RParen)?;
24552            Some(types)
24553        } else {
24554            None
24555        };
24556
24557        let cascade = self.match_token(TokenType::Cascade);
24558        if !cascade {
24559            self.match_token(TokenType::Restrict);
24560        }
24561
24562        Ok(Expression::DropProcedure(Box::new(DropProcedure {
24563            name,
24564            parameters,
24565            if_exists,
24566            cascade,
24567        })))
24568    }
24569
24570    /// Parse CREATE SEQUENCE statement
24571    fn parse_create_sequence(&mut self, temporary: bool, or_replace: bool) -> Result<Expression> {
24572        self.expect(TokenType::Sequence)?;
24573
24574        let if_not_exists =
24575            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
24576        let name = self.parse_table_ref()?;
24577
24578        let mut seq = CreateSequence {
24579            name,
24580            if_not_exists,
24581            temporary,
24582            or_replace,
24583            as_type: None,
24584            increment: None,
24585            minvalue: None,
24586            maxvalue: None,
24587            start: None,
24588            cache: None,
24589            cycle: false,
24590            owned_by: None,
24591            owned_by_none: false,
24592            order: None,
24593            comment: None,
24594            sharing: None,
24595            scale_modifier: None,
24596            shard_modifier: None,
24597            property_order: Vec::new(),
24598        };
24599
24600        // Parse optional AS <type> clause (e.g., AS SMALLINT, AS BIGINT)
24601        if self.match_token(TokenType::As) {
24602            seq.as_type = Some(self.parse_data_type()?);
24603        }
24604
24605        // Parse sequence options
24606        // Handle optional WITH keyword before options (Snowflake: WITH START = n INCREMENT = n)
24607        self.match_token(TokenType::With);
24608
24609        loop {
24610            // Skip optional commas between options (Snowflake uses comma-separated options)
24611            self.match_token(TokenType::Comma);
24612
24613            if self.is_at_end() || self.check(TokenType::Semicolon) {
24614                break;
24615            }
24616
24617            if self.match_token(TokenType::Increment) || self.match_identifier("INCREMENT") {
24618                self.match_token(TokenType::By);
24619                self.match_token(TokenType::Eq); // Snowflake uses = instead of BY
24620                seq.increment = Some(self.parse_signed_integer()?);
24621                seq.property_order.push(SeqPropKind::Increment);
24622            } else if self.match_token(TokenType::Minvalue) {
24623                seq.minvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
24624                seq.property_order.push(SeqPropKind::Minvalue);
24625            } else if self.match_keywords(&[TokenType::No, TokenType::Minvalue]) {
24626                seq.minvalue = Some(SequenceBound::None);
24627                seq.property_order.push(SeqPropKind::Minvalue);
24628            } else if self.match_identifier("NOMINVALUE") {
24629                seq.minvalue = Some(SequenceBound::None);
24630                seq.property_order.push(SeqPropKind::NoMinvalueWord);
24631            } else if self.match_token(TokenType::Maxvalue) {
24632                seq.maxvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
24633                seq.property_order.push(SeqPropKind::Maxvalue);
24634            } else if self.match_keywords(&[TokenType::No, TokenType::Maxvalue]) {
24635                seq.maxvalue = Some(SequenceBound::None);
24636                seq.property_order.push(SeqPropKind::Maxvalue);
24637            } else if self.match_identifier("NOMAXVALUE") {
24638                seq.maxvalue = Some(SequenceBound::None);
24639                seq.property_order.push(SeqPropKind::NoMaxvalueWord);
24640            } else if self.match_token(TokenType::Start) {
24641                self.match_token(TokenType::With);
24642                self.match_token(TokenType::Eq); // Snowflake uses = instead of WITH
24643                seq.start = Some(self.parse_signed_integer()?);
24644                seq.property_order.push(SeqPropKind::Start);
24645            } else if self.match_token(TokenType::Cache) {
24646                seq.cache = Some(self.parse_signed_integer()?);
24647                seq.property_order.push(SeqPropKind::Cache);
24648            } else if self.match_identifier("NOCACHE") {
24649                // Oracle: NOCACHE (single word)
24650                seq.property_order.push(SeqPropKind::NoCacheWord);
24651            } else if self.match_token(TokenType::Cycle) {
24652                seq.cycle = true;
24653                seq.property_order.push(SeqPropKind::Cycle);
24654            } else if self.match_token(TokenType::NoCycle) {
24655                // NOCYCLE keyword token - preserve as single word
24656                seq.cycle = false;
24657                seq.property_order.push(SeqPropKind::NoCycleWord);
24658            } else if self.match_token(TokenType::No) {
24659                // Two-word NO forms
24660                if self.match_token(TokenType::Cycle) {
24661                    seq.cycle = false;
24662                    seq.property_order.push(SeqPropKind::NoCycle);
24663                } else if self.match_token(TokenType::Cache) || self.match_identifier("CACHE") {
24664                    seq.property_order.push(SeqPropKind::NoCache);
24665                } else if self.match_token(TokenType::Minvalue) {
24666                    seq.minvalue = Some(SequenceBound::None);
24667                    seq.property_order.push(SeqPropKind::Minvalue);
24668                } else if self.match_token(TokenType::Maxvalue) {
24669                    seq.maxvalue = Some(SequenceBound::None);
24670                    seq.property_order.push(SeqPropKind::Maxvalue);
24671                } else {
24672                    // Unexpected token after NO
24673                    break;
24674                }
24675            } else if self.match_token(TokenType::Owned) {
24676                self.expect(TokenType::By)?;
24677                if self.match_identifier("NONE") {
24678                    seq.owned_by = None;
24679                    seq.owned_by_none = true;
24680                } else {
24681                    seq.owned_by = Some(self.parse_table_ref()?);
24682                }
24683                seq.property_order.push(SeqPropKind::OwnedBy);
24684            } else if self.match_token(TokenType::Order) {
24685                // Snowflake/Oracle: ORDER option
24686                seq.order = Some(true);
24687                seq.property_order.push(SeqPropKind::Order);
24688            } else if self.match_identifier("NOORDER") {
24689                // Snowflake/Oracle: NOORDER option
24690                seq.order = Some(false);
24691                seq.property_order.push(SeqPropKind::NoOrder);
24692            } else if self.match_token(TokenType::Comment) || self.match_identifier("COMMENT") {
24693                // Snowflake: COMMENT = 'value'
24694                self.expect(TokenType::Eq)?;
24695                let comment_val = self.expect(TokenType::String)?;
24696                seq.comment = Some(comment_val.text.clone());
24697                seq.property_order.push(SeqPropKind::Comment);
24698            } else if self.match_identifier("SHARING") {
24699                // Oracle: SHARING=value
24700                self.expect(TokenType::Eq)?;
24701                let val = self.expect_identifier_or_keyword()?;
24702                seq.sharing = Some(val);
24703                seq.property_order.push(SeqPropKind::Sharing);
24704            } else if self.match_identifier("NOKEEP") {
24705                seq.property_order.push(SeqPropKind::NoKeep);
24706            } else if self.match_token(TokenType::Keep) || self.match_identifier("KEEP") {
24707                seq.property_order.push(SeqPropKind::Keep);
24708            } else if self.match_identifier("SCALE") {
24709                let modifier = if self.match_identifier("EXTEND") {
24710                    "EXTEND".to_string()
24711                } else if self.match_identifier("NOEXTEND") {
24712                    "NOEXTEND".to_string()
24713                } else {
24714                    String::new()
24715                };
24716                seq.scale_modifier = Some(modifier);
24717                seq.property_order.push(SeqPropKind::Scale);
24718            } else if self.match_identifier("NOSCALE") {
24719                seq.property_order.push(SeqPropKind::NoScale);
24720            } else if self.match_identifier("SHARD") {
24721                let modifier = if self.match_identifier("EXTEND") {
24722                    "EXTEND".to_string()
24723                } else if self.match_identifier("NOEXTEND") {
24724                    "NOEXTEND".to_string()
24725                } else {
24726                    String::new()
24727                };
24728                seq.shard_modifier = Some(modifier);
24729                seq.property_order.push(SeqPropKind::Shard);
24730            } else if self.match_identifier("NOSHARD") {
24731                seq.property_order.push(SeqPropKind::NoShard);
24732            } else if self.match_identifier("SESSION") {
24733                seq.property_order.push(SeqPropKind::Session);
24734            } else if self.match_identifier("GLOBAL") {
24735                seq.property_order.push(SeqPropKind::Global);
24736            } else {
24737                break;
24738            }
24739        }
24740
24741        Ok(Expression::CreateSequence(Box::new(seq)))
24742    }
24743
24744    /// Parse a signed integer (positive or negative)
24745    fn parse_signed_integer(&mut self) -> Result<i64> {
24746        let negative = self.match_token(TokenType::Dash);
24747        let tok = self.expect(TokenType::Number)?;
24748        let value: i64 = tok
24749            .text
24750            .parse()
24751            .map_err(|_| self.parse_error(format!("Invalid integer: {}", tok.text)))?;
24752        Ok(if negative { -value } else { value })
24753    }
24754
24755    /// Parse DROP SEQUENCE statement
24756    fn parse_drop_sequence(&mut self) -> Result<Expression> {
24757        self.expect(TokenType::Sequence)?;
24758
24759        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
24760        let name = self.parse_table_ref()?;
24761
24762        let cascade = self.match_token(TokenType::Cascade);
24763        if !cascade {
24764            self.match_token(TokenType::Restrict);
24765        }
24766
24767        Ok(Expression::DropSequence(Box::new(DropSequence {
24768            name,
24769            if_exists,
24770            cascade,
24771        })))
24772    }
24773
24774    /// Parse ALTER SEQUENCE statement
24775    fn parse_alter_sequence(&mut self) -> Result<Expression> {
24776        self.expect(TokenType::Sequence)?;
24777
24778        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
24779        let name = self.parse_table_ref()?;
24780
24781        let mut seq = AlterSequence {
24782            name,
24783            if_exists,
24784            increment: None,
24785            minvalue: None,
24786            maxvalue: None,
24787            start: None,
24788            restart: None,
24789            cache: None,
24790            cycle: None,
24791            owned_by: None,
24792        };
24793
24794        // Parse sequence options
24795        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24796            if self.match_token(TokenType::Increment) || self.match_identifier("INCREMENT") {
24797                self.match_token(TokenType::By);
24798                seq.increment = Some(self.parse_signed_integer()?);
24799            } else if self.match_token(TokenType::Minvalue) {
24800                seq.minvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
24801            } else if self.match_keywords(&[TokenType::No, TokenType::Minvalue]) {
24802                seq.minvalue = Some(SequenceBound::None);
24803            } else if self.match_token(TokenType::Maxvalue) {
24804                seq.maxvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
24805            } else if self.match_keywords(&[TokenType::No, TokenType::Maxvalue]) {
24806                seq.maxvalue = Some(SequenceBound::None);
24807            } else if self.match_token(TokenType::Start) {
24808                self.match_token(TokenType::With);
24809                seq.start = Some(self.parse_signed_integer()?);
24810            } else if self.match_token(TokenType::Restart) {
24811                if self.match_token(TokenType::With)
24812                    || self.check(TokenType::Number)
24813                    || self.check(TokenType::Dash)
24814                {
24815                    seq.restart = Some(Some(self.parse_signed_integer()?));
24816                } else {
24817                    seq.restart = Some(None);
24818                }
24819            } else if self.match_token(TokenType::Cache) {
24820                seq.cache = Some(self.parse_signed_integer()?);
24821            } else if self.match_token(TokenType::Cycle) {
24822                seq.cycle = Some(true);
24823            } else if self.match_token(TokenType::NoCycle) {
24824                seq.cycle = Some(false);
24825            } else if self.match_token(TokenType::Owned) {
24826                self.expect(TokenType::By)?;
24827                if self.match_identifier("NONE") {
24828                    seq.owned_by = Some(None);
24829                } else {
24830                    seq.owned_by = Some(Some(self.parse_table_ref()?));
24831                }
24832            } else {
24833                break;
24834            }
24835        }
24836
24837        Ok(Expression::AlterSequence(Box::new(seq)))
24838    }
24839
24840    /// Parse CREATE TRIGGER statement
24841    fn parse_create_trigger(
24842        &mut self,
24843        or_replace: bool,
24844        or_alter: bool,
24845        constraint: bool,
24846        create_pos: usize,
24847    ) -> Result<Expression> {
24848        self.expect(TokenType::Trigger)?;
24849
24850        let name = self.expect_identifier_with_quoted()?;
24851
24852        // TSQL triggers: CREATE TRIGGER name ON table AFTER INSERT AS BEGIN...END
24853        // These have ON before timing, unlike standard triggers.
24854        // Fall back to Command for these (matches Python sqlglot behavior).
24855        if self.check(TokenType::On) && !constraint {
24856            self.current = create_pos;
24857            return self.fallback_to_command(create_pos);
24858        }
24859
24860        // Parse timing (BEFORE, AFTER, INSTEAD OF)
24861        let timing = if self.match_token(TokenType::Before) {
24862            TriggerTiming::Before
24863        } else if self.match_token(TokenType::After) {
24864            TriggerTiming::After
24865        } else if self.match_token(TokenType::Instead) {
24866            self.expect(TokenType::Of)?;
24867            TriggerTiming::InsteadOf
24868        } else {
24869            // Fall back to Command for unknown trigger syntax
24870            self.current = create_pos;
24871            return self.fallback_to_command(create_pos);
24872        };
24873
24874        // Parse events
24875        let mut events = Vec::new();
24876        loop {
24877            if self.match_token(TokenType::Insert) {
24878                events.push(TriggerEvent::Insert);
24879            } else if self.match_token(TokenType::Update) {
24880                if self.match_token(TokenType::Of) {
24881                    let mut cols = Vec::new();
24882                    loop {
24883                        cols.push(Identifier::new(self.expect_identifier()?));
24884                        if !self.match_token(TokenType::Comma) {
24885                            break;
24886                        }
24887                    }
24888                    events.push(TriggerEvent::Update(Some(cols)));
24889                } else {
24890                    events.push(TriggerEvent::Update(None));
24891                }
24892            } else if self.match_token(TokenType::Delete) {
24893                events.push(TriggerEvent::Delete);
24894            } else if self.match_token(TokenType::Truncate) {
24895                events.push(TriggerEvent::Truncate);
24896            } else {
24897                break;
24898            }
24899
24900            if !self.match_token(TokenType::Or) {
24901                break;
24902            }
24903        }
24904
24905        self.expect(TokenType::On)?;
24906        let table = self.parse_table_ref()?;
24907
24908        // Parse optional REFERENCING clause (for non-constraint triggers)
24909        let referencing = if !constraint && self.match_token(TokenType::Referencing) {
24910            let mut ref_clause = TriggerReferencing {
24911                old_table: None,
24912                new_table: None,
24913                old_row: None,
24914                new_row: None,
24915            };
24916            while self.match_token(TokenType::Old) || self.match_token(TokenType::New) {
24917                let is_old = self.previous().token_type == TokenType::Old;
24918                let is_table = self.match_token(TokenType::Table);
24919                let _is_row = !is_table && self.match_token(TokenType::Row);
24920                self.match_token(TokenType::As);
24921                let alias = Identifier::new(self.expect_identifier()?);
24922
24923                if is_old {
24924                    if is_table {
24925                        ref_clause.old_table = Some(alias);
24926                    } else {
24927                        ref_clause.old_row = Some(alias);
24928                    }
24929                } else {
24930                    if is_table {
24931                        ref_clause.new_table = Some(alias);
24932                    } else {
24933                        ref_clause.new_row = Some(alias);
24934                    }
24935                }
24936            }
24937            Some(ref_clause)
24938        } else {
24939            None
24940        };
24941
24942        // Parse deferrable options for constraint triggers (comes before FOR EACH ROW in PostgreSQL)
24943        let mut deferrable = None;
24944        let mut initially_deferred = None;
24945        if constraint {
24946            if self.match_identifier("DEFERRABLE") {
24947                deferrable = Some(true);
24948            } else if self.match_keywords(&[TokenType::Not, TokenType::Identifier]) {
24949                // NOT DEFERRABLE
24950                deferrable = Some(false);
24951            }
24952            if self.match_identifier("INITIALLY") {
24953                if self.match_identifier("DEFERRED") {
24954                    initially_deferred = Some(true);
24955                } else if self.match_identifier("IMMEDIATE") {
24956                    initially_deferred = Some(false);
24957                }
24958            }
24959        }
24960
24961        // Parse FOR EACH ROW/STATEMENT (optional)
24962        let for_each = if self.match_token(TokenType::For) {
24963            self.match_token(TokenType::Each);
24964            if self.match_token(TokenType::Row) {
24965                Some(TriggerForEach::Row)
24966            } else if self.match_token(TokenType::Statement) {
24967                Some(TriggerForEach::Statement)
24968            } else {
24969                Some(TriggerForEach::Row)
24970            }
24971        } else {
24972            None
24973        };
24974
24975        // Parse optional WHEN clause (parentheses are optional, e.g. SQLite)
24976        let (when, when_paren) = if self.match_token(TokenType::When) {
24977            let has_paren = self.match_token(TokenType::LParen);
24978            let expr = self.parse_expression()?;
24979            if has_paren {
24980                self.expect(TokenType::RParen)?;
24981            }
24982            (Some(expr), has_paren)
24983        } else {
24984            (None, false)
24985        };
24986
24987        // Parse trigger body
24988        let body = if self.match_token(TokenType::Execute) {
24989            self.match_token(TokenType::Function);
24990            self.match_token(TokenType::Procedure);
24991            let func_name = self.parse_table_ref()?;
24992            self.expect(TokenType::LParen)?;
24993            let mut args = Vec::new();
24994            if !self.check(TokenType::RParen) {
24995                loop {
24996                    args.push(self.parse_expression()?);
24997                    if !self.match_token(TokenType::Comma) {
24998                        break;
24999                    }
25000                }
25001            }
25002            self.expect(TokenType::RParen)?;
25003            TriggerBody::Execute {
25004                function: func_name,
25005                args,
25006            }
25007        } else if self.match_token(TokenType::Begin) {
25008            // Record start position (first token after BEGIN)
25009            let body_start = if !self.is_at_end() {
25010                self.tokens[self.current].span.start
25011            } else {
25012                0
25013            };
25014            let mut depth = 1;
25015            while depth > 0 && !self.is_at_end() {
25016                let tok = self.advance();
25017                if tok.token_type == TokenType::Begin {
25018                    depth += 1;
25019                } else if tok.token_type == TokenType::End {
25020                    depth -= 1;
25021                    if depth == 0 {
25022                        break;
25023                    }
25024                }
25025            }
25026            // Extract verbatim text from source if available
25027            let block_content = if let Some(ref source) = self.source {
25028                // End position is the start of the END token
25029                let body_end = if self.current > 0 {
25030                    self.tokens[self.current - 1].span.start
25031                } else {
25032                    body_start
25033                };
25034                source[body_start..body_end].trim().to_string()
25035            } else {
25036                // Fallback: no source available
25037                String::new()
25038            };
25039            TriggerBody::Block(block_content)
25040        } else {
25041            return Err(self.parse_error("Expected EXECUTE or BEGIN in trigger body"));
25042        };
25043
25044        Ok(Expression::CreateTrigger(Box::new(CreateTrigger {
25045            name,
25046            table,
25047            timing,
25048            events,
25049            for_each,
25050            when,
25051            when_paren,
25052            body,
25053            or_replace,
25054            or_alter,
25055            constraint,
25056            deferrable,
25057            initially_deferred,
25058            referencing,
25059        })))
25060    }
25061
25062    /// Parse DROP TRIGGER statement
25063    fn parse_drop_trigger(&mut self) -> Result<Expression> {
25064        self.expect(TokenType::Trigger)?;
25065
25066        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
25067        let name = Identifier::new(self.expect_identifier()?);
25068
25069        let table = if self.match_token(TokenType::On) {
25070            Some(self.parse_table_ref()?)
25071        } else {
25072            None
25073        };
25074
25075        let cascade = self.match_token(TokenType::Cascade);
25076        if !cascade {
25077            self.match_token(TokenType::Restrict);
25078        }
25079
25080        Ok(Expression::DropTrigger(Box::new(DropTrigger {
25081            name,
25082            table,
25083            if_exists,
25084            cascade,
25085        })))
25086    }
25087
25088    /// Parse CREATE TYPE statement
25089    fn parse_create_type(&mut self) -> Result<Expression> {
25090        self.expect(TokenType::Type)?;
25091
25092        let if_not_exists =
25093            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
25094        let name = self.parse_table_ref()?;
25095
25096        self.expect(TokenType::As)?;
25097
25098        let definition = if self.match_token(TokenType::Enum) {
25099            // ENUM type
25100            self.expect(TokenType::LParen)?;
25101            let mut values = Vec::new();
25102            loop {
25103                let tok = self.expect(TokenType::String)?;
25104                values.push(tok.text.trim_matches('\'').to_string());
25105                if !self.match_token(TokenType::Comma) {
25106                    break;
25107                }
25108            }
25109            self.expect(TokenType::RParen)?;
25110            TypeDefinition::Enum(values)
25111        } else if self.match_token(TokenType::LParen) {
25112            // Composite type
25113            let mut attrs = Vec::new();
25114            loop {
25115                let attr_name = Identifier::new(self.expect_identifier()?);
25116                let data_type = self.parse_data_type()?;
25117                let collate = if self.match_identifier("COLLATE") {
25118                    Some(Identifier::new(self.expect_identifier()?))
25119                } else {
25120                    None
25121                };
25122                attrs.push(TypeAttribute {
25123                    name: attr_name,
25124                    data_type,
25125                    collate,
25126                });
25127                if !self.match_token(TokenType::Comma) {
25128                    break;
25129                }
25130            }
25131            self.expect(TokenType::RParen)?;
25132            TypeDefinition::Composite(attrs)
25133        } else if self.match_token(TokenType::Range) {
25134            // Range type
25135            self.expect(TokenType::LParen)?;
25136            self.match_identifier("SUBTYPE");
25137            self.match_token(TokenType::Eq);
25138            let subtype = self.parse_data_type()?;
25139
25140            let mut subtype_diff = None;
25141            let mut canonical = None;
25142
25143            while self.match_token(TokenType::Comma) {
25144                if self.match_identifier("SUBTYPE_DIFF") {
25145                    self.match_token(TokenType::Eq);
25146                    subtype_diff = Some(self.expect_identifier()?);
25147                } else if self.match_identifier("CANONICAL") {
25148                    self.match_token(TokenType::Eq);
25149                    canonical = Some(self.expect_identifier()?);
25150                }
25151            }
25152            self.expect(TokenType::RParen)?;
25153
25154            TypeDefinition::Range {
25155                subtype,
25156                subtype_diff,
25157                canonical,
25158            }
25159        } else {
25160            return Err(
25161                self.parse_error("Expected ENUM, composite type definition, or RANGE after AS")
25162            );
25163        };
25164
25165        Ok(Expression::CreateType(Box::new(CreateType {
25166            name,
25167            definition,
25168            if_not_exists,
25169        })))
25170    }
25171
25172    /// Parse CREATE DOMAIN statement
25173    fn parse_create_domain(&mut self) -> Result<Expression> {
25174        self.expect(TokenType::Domain)?;
25175
25176        let if_not_exists =
25177            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
25178        let name = self.parse_table_ref()?;
25179
25180        self.expect(TokenType::As)?;
25181        let base_type = self.parse_data_type()?;
25182
25183        let mut default = None;
25184        let mut constraints = Vec::new();
25185
25186        // Parse domain options
25187        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
25188            if self.match_token(TokenType::Default) {
25189                default = Some(self.parse_expression()?);
25190            } else if self.match_token(TokenType::Constraint) {
25191                let constr_name = Some(Identifier::new(self.expect_identifier()?));
25192                self.expect(TokenType::Check)?;
25193                self.expect(TokenType::LParen)?;
25194                let check_expr = self.parse_expression()?;
25195                self.expect(TokenType::RParen)?;
25196                constraints.push(DomainConstraint {
25197                    name: constr_name,
25198                    check: check_expr,
25199                });
25200            } else if self.match_token(TokenType::Check) {
25201                self.expect(TokenType::LParen)?;
25202                let check_expr = self.parse_expression()?;
25203                self.expect(TokenType::RParen)?;
25204                constraints.push(DomainConstraint {
25205                    name: None,
25206                    check: check_expr,
25207                });
25208            } else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
25209                // NOT NULL is a constraint - represented as VALUE IS NOT NULL
25210                constraints.push(DomainConstraint {
25211                    name: None,
25212                    check: Expression::IsNull(Box::new(IsNull {
25213                        this: Expression::Identifier(Identifier::new("VALUE")),
25214                        not: true,
25215                        postfix_form: false,
25216                    })),
25217                });
25218            } else {
25219                break;
25220            }
25221        }
25222
25223        Ok(Expression::CreateType(Box::new(CreateType {
25224            name,
25225            definition: TypeDefinition::Domain {
25226                base_type,
25227                default,
25228                constraints,
25229            },
25230            if_not_exists,
25231        })))
25232    }
25233
25234    /// Parse CREATE STAGE statement (Snowflake)
25235    fn parse_create_stage(&mut self, or_replace: bool, temporary: bool) -> Result<Expression> {
25236        self.skip(); // consume STAGE (identifier)
25237                     // Parse remaining tokens, normalizing FILE_FORMAT clause
25238        let start = self.current;
25239        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
25240            self.skip();
25241        }
25242        let sql = self.tokens_to_sql_stage_format(start, self.current);
25243
25244        // Build the CREATE prefix with modifiers
25245        let mut prefix = String::from("CREATE");
25246        if or_replace {
25247            prefix.push_str(" OR REPLACE");
25248        }
25249        if temporary {
25250            prefix.push_str(" TEMPORARY");
25251        }
25252        prefix.push_str(" STAGE");
25253
25254        Ok(Expression::Raw(Raw {
25255            sql: format!("{} {}", prefix, sql),
25256        }))
25257    }
25258
25259    /// Parse CREATE TAG statement (Snowflake)
25260    fn parse_create_tag(&mut self, or_replace: bool) -> Result<Expression> {
25261        self.skip(); // consume TAG
25262                     // Capture remaining tokens as raw SQL
25263        let start = self.current;
25264        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
25265            self.skip();
25266        }
25267        let sql = self.tokens_to_sql(start, self.current);
25268        let prefix = if or_replace {
25269            "CREATE OR REPLACE TAG"
25270        } else {
25271            "CREATE TAG"
25272        };
25273        Ok(Expression::Raw(Raw {
25274            sql: format!("{} {}", prefix, sql),
25275        }))
25276    }
25277
25278    /// Parse CREATE STREAM statement (Snowflake)
25279    fn parse_create_stream(&mut self, _or_replace: bool) -> Result<Expression> {
25280        self.skip(); // consume STREAM
25281                     // Capture remaining tokens as raw SQL
25282        let start = self.current;
25283        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
25284            self.skip();
25285        }
25286        let sql = self.tokens_to_sql(start, self.current);
25287        Ok(Expression::Raw(Raw {
25288            sql: format!("CREATE STREAM {}", sql),
25289        }))
25290    }
25291
25292    /// Parse CREATE TASK statement (Snowflake)
25293    /// CREATE [OR REPLACE] TASK [IF NOT EXISTS] name
25294    ///   [WAREHOUSE = wh] [SCHEDULE = '...'] [AFTER task1, ...] [WHEN expr]
25295    ///   AS sql_statement
25296    fn parse_create_task(&mut self, or_replace: bool) -> Result<Expression> {
25297        self.skip(); // consume TASK
25298
25299        let if_not_exists =
25300            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
25301
25302        // Parse task name (possibly qualified: db.schema.task)
25303        let mut name = String::new();
25304        if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
25305            name.push_str(&self.advance().text);
25306        }
25307        while self.check(TokenType::Dot) {
25308            self.skip();
25309            name.push('.');
25310            if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
25311                name.push_str(&self.advance().text);
25312            }
25313        }
25314
25315        // Capture properties as raw text until AS keyword
25316        let props_start = self.current;
25317        while !self.is_at_end() && !self.check(TokenType::Semicolon) && !self.check(TokenType::As) {
25318            self.skip();
25319        }
25320        let properties = self.tokens_to_sql(props_start, self.current);
25321
25322        // Expect AS keyword followed by the SQL body
25323        if !self.match_token(TokenType::As) {
25324            return Err(self.parse_error("Expected AS keyword in CREATE TASK"));
25325        }
25326
25327        let body = self.parse_statement()?;
25328
25329        Ok(Expression::CreateTask(Box::new(
25330            crate::expressions::CreateTask {
25331                or_replace,
25332                if_not_exists,
25333                name,
25334                properties,
25335                body,
25336            },
25337        )))
25338    }
25339
25340    /// Parse CREATE FILE FORMAT statement (Snowflake)
25341    fn parse_create_file_format(
25342        &mut self,
25343        or_replace: bool,
25344        temporary: bool,
25345    ) -> Result<Expression> {
25346        self.skip(); // consume FILE
25347        self.skip(); // consume FORMAT
25348                     // Capture remaining tokens as raw SQL
25349        let start = self.current;
25350        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
25351            self.skip();
25352        }
25353        let sql = self.tokens_to_sql(start, self.current);
25354        let mut prefix = String::from("CREATE");
25355        if or_replace {
25356            prefix.push_str(" OR REPLACE");
25357        }
25358        if temporary {
25359            prefix.push_str(" TEMPORARY");
25360        }
25361        prefix.push_str(" FILE FORMAT ");
25362        prefix.push_str(&sql);
25363        Ok(Expression::Raw(Raw { sql: prefix }))
25364    }
25365
25366    /// Parse DROP TYPE statement
25367    fn parse_drop_type(&mut self) -> Result<Expression> {
25368        self.expect(TokenType::Type)?;
25369
25370        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
25371        let name = self.parse_table_ref()?;
25372
25373        let cascade = self.match_token(TokenType::Cascade);
25374        if !cascade {
25375            self.match_token(TokenType::Restrict);
25376        }
25377
25378        Ok(Expression::DropType(Box::new(DropType {
25379            name,
25380            if_exists,
25381            cascade,
25382        })))
25383    }
25384
25385    fn parse_alter_view_with_modifiers(
25386        &mut self,
25387        algorithm: Option<String>,
25388        definer: Option<String>,
25389        sql_security: Option<String>,
25390    ) -> Result<Expression> {
25391        self.expect(TokenType::View)?;
25392
25393        let name = self.parse_table_ref()?;
25394        let mut actions = Vec::new();
25395
25396        // Hive: Optional column aliases with optional COMMENT: (c1, c2) or (c1 COMMENT 'text', c2)
25397        // Only parse if we see LParen followed by identifier (not SELECT for subquery)
25398        let columns = if self.check(TokenType::LParen) {
25399            // Peek ahead to see if this looks like column aliases
25400            let saved = self.current;
25401            self.skip(); // consume LParen
25402
25403            // Check if this is an identifier (column name) vs SELECT keyword
25404            let is_column_aliases = self.check(TokenType::Identifier)
25405                || self.check(TokenType::Var)
25406                || self.check(TokenType::QuotedIdentifier);
25407
25408            if is_column_aliases {
25409                // Parse column aliases
25410                let mut cols = Vec::new();
25411                loop {
25412                    let col_name = self.expect_identifier()?;
25413                    // Optional COMMENT 'text'
25414                    let comment = if self.match_token(TokenType::Comment) {
25415                        Some(self.expect_string()?)
25416                    } else {
25417                        None
25418                    };
25419                    cols.push(ViewColumn {
25420                        name: Identifier::new(col_name),
25421                        comment,
25422                        options: Vec::new(),
25423                    });
25424                    if !self.match_token(TokenType::Comma) {
25425                        break;
25426                    }
25427                }
25428                self.expect(TokenType::RParen)?;
25429                cols
25430            } else {
25431                self.current = saved; // retreat
25432                Vec::new()
25433            }
25434        } else {
25435            Vec::new()
25436        };
25437
25438        // TSQL: WITH option (SCHEMABINDING, ENCRYPTION, VIEW_METADATA) before AS
25439        let with_option = if self.match_token(TokenType::With) {
25440            let opt = self.expect_identifier_or_keyword()?;
25441            Some(opt.to_ascii_uppercase())
25442        } else {
25443            None
25444        };
25445
25446        // Parse actions
25447        if self.match_token(TokenType::Rename) {
25448            self.expect(TokenType::To)?;
25449            actions.push(AlterViewAction::Rename(self.parse_table_ref()?));
25450        } else if self.match_identifier("OWNER") {
25451            self.expect(TokenType::To)?;
25452            actions.push(AlterViewAction::OwnerTo(Identifier::new(
25453                self.expect_identifier()?,
25454            )));
25455        } else if self.match_token(TokenType::Set) {
25456            // Hive: SET TBLPROPERTIES ('key'='value', ...) or SET SCHEMA name
25457            // Trino: SET AUTHORIZATION [ROLE] user
25458            if self.match_identifier("TBLPROPERTIES") {
25459                let props = self.parse_tblproperties_key_value_list()?;
25460                actions.push(AlterViewAction::SetTblproperties(props));
25461            } else if self.match_token(TokenType::Authorization) {
25462                let mut auth_text = String::new();
25463                if self.match_texts(&["ROLE"]) {
25464                    auth_text.push_str("ROLE ");
25465                }
25466                let user = self.expect_identifier()?;
25467                auth_text.push_str(&user);
25468                actions.push(AlterViewAction::SetAuthorization(auth_text));
25469            } else {
25470                self.expect(TokenType::Schema)?;
25471                actions.push(AlterViewAction::SetSchema(Identifier::new(
25472                    self.expect_identifier()?,
25473                )));
25474            }
25475        } else if self.match_identifier("UNSET") {
25476            // Hive: UNSET TBLPROPERTIES ('key1', 'key2', ...)
25477            if !self.match_identifier("TBLPROPERTIES") {
25478                return Err(self.parse_error("Expected TBLPROPERTIES after UNSET"));
25479            }
25480            let keys = self.parse_tblproperties_key_list()?;
25481            actions.push(AlterViewAction::UnsetTblproperties(keys));
25482        } else if self.match_token(TokenType::Alter) {
25483            self.match_token(TokenType::Column);
25484            let col_name = Identifier::new(self.expect_identifier()?);
25485            let action = self.parse_alter_column_action()?;
25486            actions.push(AlterViewAction::AlterColumn {
25487                name: col_name,
25488                action,
25489            });
25490        } else if self.match_token(TokenType::As) {
25491            // AS SELECT ... or AS SELECT ... UNION ... (redefine view query)
25492            let query = self.parse_statement()?;
25493            actions.push(AlterViewAction::AsSelect(Box::new(query)));
25494        }
25495
25496        Ok(Expression::AlterView(Box::new(AlterView {
25497            name,
25498            actions,
25499            algorithm,
25500            definer,
25501            sql_security,
25502            with_option,
25503            columns,
25504        })))
25505    }
25506
25507    /// Parse TBLPROPERTIES key-value list: ('key1'='value1', 'key2'='value2', ...)
25508    fn parse_tblproperties_key_value_list(&mut self) -> Result<Vec<(String, String)>> {
25509        self.expect(TokenType::LParen)?;
25510        let mut props = Vec::new();
25511        loop {
25512            let key = self.expect_string()?;
25513            self.expect(TokenType::Eq)?;
25514            let value = self.expect_string()?;
25515            props.push((key, value));
25516            if !self.match_token(TokenType::Comma) {
25517                break;
25518            }
25519        }
25520        self.expect(TokenType::RParen)?;
25521        Ok(props)
25522    }
25523
25524    /// Parse TBLPROPERTIES key list (for UNSET): ('key1', 'key2', ...)
25525    fn parse_tblproperties_key_list(&mut self) -> Result<Vec<String>> {
25526        self.expect(TokenType::LParen)?;
25527        let mut keys = Vec::new();
25528        loop {
25529            let key = self.expect_string()?;
25530            keys.push(key);
25531            if !self.match_token(TokenType::Comma) {
25532                break;
25533            }
25534        }
25535        self.expect(TokenType::RParen)?;
25536        Ok(keys)
25537    }
25538
25539    /// Parse ALTER INDEX statement
25540    fn parse_alter_index(&mut self) -> Result<Expression> {
25541        self.expect(TokenType::Index)?;
25542
25543        // Use expect_identifier_or_keyword_with_quoted to preserve quoted flag
25544        let name = self.expect_identifier_or_keyword_with_quoted()?;
25545
25546        let table = if self.match_token(TokenType::On) {
25547            Some(self.parse_table_ref()?)
25548        } else {
25549            None
25550        };
25551
25552        let mut actions = Vec::new();
25553
25554        // Parse actions
25555        if self.match_token(TokenType::Rename) {
25556            self.expect(TokenType::To)?;
25557            // Also preserve quoted flag for the new name
25558            actions.push(AlterIndexAction::Rename(
25559                self.expect_identifier_or_keyword_with_quoted()?,
25560            ));
25561        } else if self.match_token(TokenType::Set) {
25562            self.match_identifier("TABLESPACE");
25563            actions.push(AlterIndexAction::SetTablespace(
25564                self.expect_identifier_or_keyword_with_quoted()?,
25565            ));
25566        } else if self.match_identifier("VISIBLE") {
25567            actions.push(AlterIndexAction::Visible(true));
25568        } else if self.match_identifier("INVISIBLE") {
25569            actions.push(AlterIndexAction::Visible(false));
25570        }
25571
25572        Ok(Expression::AlterIndex(Box::new(AlterIndex {
25573            name,
25574            table,
25575            actions,
25576        })))
25577    }
25578
25579    // ==================== End DDL Parsing ====================
25580
25581    /// Parse an expression (with precedence)
25582    /// Assignment (:=) has lower precedence than OR, matching Python sqlglot's
25583    /// _parse_expression -> _parse_assignment -> _parse_disjunction chain
25584    fn parse_expression(&mut self) -> Result<Expression> {
25585        let mut left = self.parse_or()?;
25586
25587        // Handle := assignment operator (MySQL @var := val, DuckDB named args/settings)
25588        // This has lower precedence than OR
25589        while self.match_token(TokenType::ColonEq) {
25590            let right = self.parse_or()?;
25591            left = Expression::PropertyEQ(Box::new(BinaryOp::new(left, right)));
25592        }
25593
25594        // ClickHouse ternary operator: condition ? true_value : false_value
25595        // Parsed as: CASE WHEN condition THEN true_value ELSE false_value END
25596        if matches!(
25597            self.config.dialect,
25598            Some(crate::dialects::DialectType::ClickHouse)
25599        ) && self.match_token(TokenType::Parameter)
25600        {
25601            if self.check(TokenType::Colon) {
25602                return Err(
25603                    self.parse_error("Expected true expression after ? in ClickHouse ternary")
25604                );
25605            }
25606            let true_value = self.parse_or()?;
25607            let false_value = if self.match_token(TokenType::Colon) {
25608                self.parse_or()?
25609            } else {
25610                Expression::Null(Null)
25611            };
25612            left = Expression::IfFunc(Box::new(IfFunc {
25613                original_name: None,
25614                condition: left,
25615                true_value,
25616                false_value: Some(false_value),
25617                inferred_type: None,
25618            }));
25619        }
25620
25621        // ClickHouse: APPLY(func) column transformer
25622        // e.g., COLUMNS('pattern') APPLY(toString) APPLY(length)
25623        // Also: APPLY func (no parens), APPLY(x -> expr) (lambda)
25624        // Only match APPLY when followed by ( — bare APPLY without ( is treated as an alias
25625        // by the select expression parser (e.g., SELECT col apply -> SELECT col AS apply)
25626        if matches!(
25627            self.config.dialect,
25628            Some(crate::dialects::DialectType::ClickHouse)
25629        ) {
25630            while self.check(TokenType::Apply) && self.check_next(TokenType::LParen) {
25631                self.skip(); // consume APPLY
25632                self.skip(); // consume (
25633                let expr = self.parse_expression()?;
25634                self.expect(TokenType::RParen)?;
25635                left = Expression::Apply(Box::new(crate::expressions::Apply {
25636                    this: Box::new(left),
25637                    expression: Box::new(expr),
25638                }));
25639            }
25640        }
25641
25642        Ok(left)
25643    }
25644
25645    /// Parse OR expressions
25646    fn parse_or(&mut self) -> Result<Expression> {
25647        let mut left = self.parse_xor()?;
25648
25649        while self.check(TokenType::Or)
25650            || (self.dpipe_is_logical_or() && self.check(TokenType::DPipe))
25651        {
25652            let mut all_comments = self.previous_trailing_comments().to_vec();
25653            // Also capture leading comments on the OR token (comments on a separate line before OR)
25654            all_comments.extend_from_slice(self.current_leading_comments());
25655            self.skip(); // consume OR
25656            all_comments.extend_from_slice(self.previous_trailing_comments());
25657            // Clear trailing_comments from left expression to avoid duplication
25658            if !all_comments.is_empty() {
25659                Self::clear_rightmost_trailing_comments(&mut left);
25660            }
25661            // Filter out empty/whitespace-only comments
25662            all_comments.retain(|c| !c.trim().is_empty());
25663            // Split: block comments go before operator, line comments go after
25664            let mut left_comments = Vec::new();
25665            let mut operator_comments = Vec::new();
25666            for comment in all_comments {
25667                if comment.starts_with("/*") {
25668                    left_comments.push(comment);
25669                } else {
25670                    operator_comments.push(comment);
25671                }
25672            }
25673            let mut right = self.parse_xor()?;
25674            // If parse_comparison stored pending leading comments, attach them
25675            if !self.pending_leading_comments.is_empty() {
25676                let pending = std::mem::take(&mut self.pending_leading_comments);
25677                right = Expression::Annotated(Box::new(Annotated {
25678                    this: right,
25679                    trailing_comments: pending,
25680                }));
25681            }
25682            left = Expression::Or(Box::new(BinaryOp {
25683                left,
25684                right,
25685                left_comments,
25686                operator_comments,
25687                trailing_comments: Vec::new(),
25688                inferred_type: None,
25689            }));
25690        }
25691
25692        Ok(Self::maybe_rebalance_boolean_chain(left, false))
25693    }
25694
25695    /// Whether `||` should be parsed as logical OR for the active dialect.
25696    fn dpipe_is_logical_or(&self) -> bool {
25697        matches!(
25698            self.config.dialect,
25699            Some(crate::dialects::DialectType::MySQL | crate::dialects::DialectType::Solr)
25700        )
25701    }
25702
25703    /// Parse XOR expressions (MySQL logical XOR)
25704    fn parse_xor(&mut self) -> Result<Expression> {
25705        let mut left = self.parse_and()?;
25706
25707        while self.match_token(TokenType::Xor) {
25708            let right = self.parse_and()?;
25709            left = Expression::Xor(Box::new(Xor {
25710                this: Some(Box::new(left)),
25711                expression: Some(Box::new(right)),
25712                expressions: Vec::new(),
25713            }));
25714        }
25715
25716        Ok(left)
25717    }
25718
25719    /// Parse AND expressions
25720    fn parse_and(&mut self) -> Result<Expression> {
25721        let mut left = self.parse_not()?;
25722
25723        while self.check(TokenType::And) {
25724            // Capture comments from the token before AND (left operand's last token)
25725            let mut all_comments = self.previous_trailing_comments().to_vec();
25726            // Also capture leading comments on the AND token (comments on a separate line before AND)
25727            all_comments.extend_from_slice(self.current_leading_comments());
25728            self.skip(); // consume AND
25729                         // Also capture any trailing comments on the AND token itself
25730            all_comments.extend_from_slice(self.previous_trailing_comments());
25731            // Clear trailing_comments from left expression to avoid duplication
25732            if !all_comments.is_empty() {
25733                Self::clear_rightmost_trailing_comments(&mut left);
25734            }
25735            // Filter out empty/whitespace-only comments (e.g., bare "--" with no content)
25736            all_comments.retain(|c| !c.trim().is_empty());
25737            // Split comments: block comments (/*...*/) go BEFORE the operator (left_comments),
25738            // line comments (raw text from --) go AFTER the operator (operator_comments).
25739            // This matches Python sqlglot's behavior where inline block comments stay
25740            // in-place and line comments shift to after the operator.
25741            let mut left_comments = Vec::new();
25742            let mut operator_comments = Vec::new();
25743            for comment in all_comments {
25744                if comment.starts_with("/*") {
25745                    left_comments.push(comment);
25746                } else {
25747                    operator_comments.push(comment);
25748                }
25749            }
25750            let mut right = self.parse_not()?;
25751            // If parse_comparison stored pending leading comments (comments before
25752            // the right operand's first token with no comparison following),
25753            // attach them as trailing_comments on the right expression.
25754            if !self.pending_leading_comments.is_empty() {
25755                let pending = std::mem::take(&mut self.pending_leading_comments);
25756                right = Expression::Annotated(Box::new(Annotated {
25757                    this: right,
25758                    trailing_comments: pending,
25759                }));
25760            }
25761            left = Expression::And(Box::new(BinaryOp {
25762                left,
25763                right,
25764                left_comments,
25765                operator_comments,
25766                trailing_comments: Vec::new(),
25767                inferred_type: None,
25768            }));
25769        }
25770
25771        Ok(Self::maybe_rebalance_boolean_chain(left, true))
25772    }
25773
25774    /// Rebalance AND/OR chains into a balanced tree when no connector comments are present.
25775    /// This keeps connector chain depth logarithmic for very large predicates.
25776    fn maybe_rebalance_boolean_chain(expr: Expression, is_and: bool) -> Expression {
25777        if !Self::should_rebalance_boolean_chain(&expr, is_and) {
25778            return expr;
25779        }
25780
25781        let terms = Self::flatten_boolean_terms_owned(expr, is_and);
25782        if terms.len() <= 2 {
25783            return Self::build_balanced_boolean_tree(terms, is_and);
25784        }
25785
25786        Self::build_balanced_boolean_tree(terms, is_and)
25787    }
25788
25789    fn should_rebalance_boolean_chain(expr: &Expression, is_and: bool) -> bool {
25790        let mut leaf_count = 0usize;
25791        let mut stack = vec![expr];
25792
25793        while let Some(node) = stack.pop() {
25794            match (is_and, node) {
25795                (true, Expression::And(op)) => {
25796                    if !op.left_comments.is_empty()
25797                        || !op.operator_comments.is_empty()
25798                        || !op.trailing_comments.is_empty()
25799                    {
25800                        return false;
25801                    }
25802                    stack.push(&op.right);
25803                    stack.push(&op.left);
25804                }
25805                (false, Expression::Or(op)) => {
25806                    if !op.left_comments.is_empty()
25807                        || !op.operator_comments.is_empty()
25808                        || !op.trailing_comments.is_empty()
25809                    {
25810                        return false;
25811                    }
25812                    stack.push(&op.right);
25813                    stack.push(&op.left);
25814                }
25815                _ => leaf_count += 1,
25816            }
25817        }
25818
25819        leaf_count > 2
25820    }
25821
25822    fn flatten_boolean_terms_owned(expr: Expression, is_and: bool) -> Vec<Expression> {
25823        let mut terms = Vec::new();
25824        let mut stack = vec![expr];
25825
25826        while let Some(node) = stack.pop() {
25827            match (is_and, node) {
25828                (true, Expression::And(op)) => {
25829                    stack.push(op.right);
25830                    stack.push(op.left);
25831                }
25832                (false, Expression::Or(op)) => {
25833                    stack.push(op.right);
25834                    stack.push(op.left);
25835                }
25836                (_, other) => terms.push(other),
25837            }
25838        }
25839
25840        terms
25841    }
25842
25843    fn build_balanced_boolean_tree(mut terms: Vec<Expression>, is_and: bool) -> Expression {
25844        if terms.is_empty() {
25845            return Expression::Null(Null);
25846        }
25847
25848        while terms.len() > 1 {
25849            let mut next = Vec::with_capacity((terms.len() + 1) / 2);
25850            let mut iter = terms.into_iter();
25851
25852            while let Some(left) = iter.next() {
25853                if let Some(right) = iter.next() {
25854                    let combined = if is_and {
25855                        Expression::And(Box::new(BinaryOp::new(left, right)))
25856                    } else {
25857                        Expression::Or(Box::new(BinaryOp::new(left, right)))
25858                    };
25859                    next.push(combined);
25860                } else {
25861                    next.push(left);
25862                }
25863            }
25864
25865            terms = next;
25866        }
25867
25868        terms.pop().unwrap_or(Expression::Null(Null))
25869    }
25870
25871    /// Parse NOT expressions
25872    fn parse_not(&mut self) -> Result<Expression> {
25873        if self.match_token(TokenType::Not) {
25874            let expr = self.parse_not()?;
25875            Ok(Expression::Not(Box::new(UnaryOp::new(expr))))
25876        } else {
25877            self.parse_comparison()
25878        }
25879    }
25880
25881    /// Parse comparison expressions
25882    fn parse_comparison(&mut self) -> Result<Expression> {
25883        // Capture leading comments from the first token before parsing the left side.
25884        // If a comparison operator follows, these are placed after the left operand.
25885        let pre_left_comments = self.current_leading_comments().to_vec();
25886        let mut left = self.parse_bitwise_or()?;
25887
25888        // Only attach pre-left comments when a comparison operator follows.
25889        // When no comparison follows (e.g., in SELECT list expressions or AND operands),
25890        // the comments are returned to the caller by being accessible via the
25891        // `comparison_pre_left_comments` field, so they can be placed appropriately
25892        // (e.g., after an alias name, or after the expression in an AND chain).
25893        let has_comparison_op = !self.is_at_end()
25894            && matches!(
25895                self.peek().token_type,
25896                TokenType::Eq
25897                    | TokenType::Neq
25898                    | TokenType::Lt
25899                    | TokenType::Gt
25900                    | TokenType::Lte
25901                    | TokenType::Gte
25902                    | TokenType::Is
25903                    | TokenType::In
25904                    | TokenType::Not
25905                    | TokenType::Between
25906                    | TokenType::Like
25907                    | TokenType::ILike
25908                    | TokenType::RLike
25909                    | TokenType::SimilarTo
25910            );
25911
25912        if !pre_left_comments.is_empty() {
25913            if has_comparison_op {
25914                // Comparison follows: attach comments between left operand and operator
25915                match &mut left {
25916                    Expression::Column(col) => {
25917                        col.trailing_comments.extend(pre_left_comments);
25918                    }
25919                    Expression::Identifier(id) => {
25920                        id.trailing_comments.extend(pre_left_comments);
25921                    }
25922                    _ => {
25923                        left = Expression::Annotated(Box::new(Annotated {
25924                            this: left,
25925                            trailing_comments: pre_left_comments,
25926                        }));
25927                    }
25928                }
25929            } else {
25930                // No comparison operator: store comments for the caller to use.
25931                // Save them as "pending" comments that the caller can retrieve.
25932                self.pending_leading_comments = pre_left_comments;
25933            }
25934        }
25935
25936        loop {
25937            let mut global_in = false;
25938            if matches!(
25939                self.config.dialect,
25940                Some(crate::dialects::DialectType::ClickHouse)
25941            ) && self.check_identifier("GLOBAL")
25942                && (self.check_next(TokenType::Not) || self.check_next(TokenType::In))
25943            {
25944                self.skip();
25945                global_in = true;
25946            }
25947
25948            let expr = if self.match_token(TokenType::Eq) {
25949                // Check for ANY/ALL subquery
25950                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
25951                    let was_any = self.previous_token_type() == Some(TokenType::Any);
25952                    self.expect(TokenType::LParen)?;
25953                    let inner = self.parse_statement()?;
25954                    self.expect(TokenType::RParen)?;
25955                    let subquery = if was_any {
25956                        self.maybe_wrap_in_subquery(inner)
25957                    } else {
25958                        inner
25959                    };
25960                    Expression::Any(Box::new(QuantifiedExpr {
25961                        this: left,
25962                        subquery,
25963                        op: Some(QuantifiedOp::Eq),
25964                    }))
25965                } else if self.match_token(TokenType::All) {
25966                    self.expect(TokenType::LParen)?;
25967                    let inner = self.parse_statement()?;
25968                    self.expect(TokenType::RParen)?;
25969                    let subquery = self.maybe_wrap_in_subquery(inner);
25970                    Expression::All(Box::new(QuantifiedExpr {
25971                        this: left,
25972                        subquery,
25973                        op: Some(QuantifiedOp::Eq),
25974                    }))
25975                } else {
25976                    let right = self.parse_bitwise_or()?;
25977                    let trailing_comments = self.previous_trailing_comments().to_vec();
25978                    Expression::Eq(Box::new(BinaryOp {
25979                        left,
25980                        right,
25981                        left_comments: Vec::new(),
25982                        operator_comments: Vec::new(),
25983                        trailing_comments,
25984                        inferred_type: None,
25985                    }))
25986                }
25987            } else if self.match_token(TokenType::Neq) {
25988                // Check for ANY/ALL subquery
25989                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
25990                    let was_any = self.previous_token_type() == Some(TokenType::Any);
25991                    self.expect(TokenType::LParen)?;
25992                    let inner = self.parse_statement()?;
25993                    self.expect(TokenType::RParen)?;
25994                    let subquery = if was_any {
25995                        self.maybe_wrap_in_subquery(inner)
25996                    } else {
25997                        inner
25998                    };
25999                    Expression::Any(Box::new(QuantifiedExpr {
26000                        this: left,
26001                        subquery,
26002                        op: Some(QuantifiedOp::Neq),
26003                    }))
26004                } else if self.match_token(TokenType::All) {
26005                    self.expect(TokenType::LParen)?;
26006                    let inner = self.parse_statement()?;
26007                    self.expect(TokenType::RParen)?;
26008                    let subquery = self.maybe_wrap_in_subquery(inner);
26009                    Expression::All(Box::new(QuantifiedExpr {
26010                        this: left,
26011                        subquery,
26012                        op: Some(QuantifiedOp::Neq),
26013                    }))
26014                } else {
26015                    let right = self.parse_bitwise_or()?;
26016                    let trailing_comments = self.previous_trailing_comments().to_vec();
26017                    Expression::Neq(Box::new(BinaryOp {
26018                        left,
26019                        right,
26020                        left_comments: Vec::new(),
26021                        operator_comments: Vec::new(),
26022                        trailing_comments,
26023                        inferred_type: None,
26024                    }))
26025                }
26026            } else if self.match_token(TokenType::Lt) {
26027                // Check for ANY/ALL subquery
26028                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
26029                    let was_any = self.previous_token_type() == Some(TokenType::Any);
26030                    self.expect(TokenType::LParen)?;
26031                    let inner = self.parse_statement()?;
26032                    self.expect(TokenType::RParen)?;
26033                    let subquery = if was_any {
26034                        self.maybe_wrap_in_subquery(inner)
26035                    } else {
26036                        inner
26037                    };
26038                    Expression::Any(Box::new(QuantifiedExpr {
26039                        this: left,
26040                        subquery,
26041                        op: Some(QuantifiedOp::Lt),
26042                    }))
26043                } else if self.match_token(TokenType::All) {
26044                    self.expect(TokenType::LParen)?;
26045                    let inner = self.parse_statement()?;
26046                    self.expect(TokenType::RParen)?;
26047                    let subquery = self.maybe_wrap_in_subquery(inner);
26048                    Expression::All(Box::new(QuantifiedExpr {
26049                        this: left,
26050                        subquery,
26051                        op: Some(QuantifiedOp::Lt),
26052                    }))
26053                } else {
26054                    let right = self.parse_bitwise_or()?;
26055                    let trailing_comments = self.previous_trailing_comments().to_vec();
26056                    Expression::Lt(Box::new(BinaryOp {
26057                        left,
26058                        right,
26059                        left_comments: Vec::new(),
26060                        operator_comments: Vec::new(),
26061                        trailing_comments,
26062                        inferred_type: None,
26063                    }))
26064                }
26065            } else if self.match_token(TokenType::Lte) {
26066                // Check for ANY/ALL subquery
26067                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
26068                    let was_any = self.previous_token_type() == Some(TokenType::Any);
26069                    self.expect(TokenType::LParen)?;
26070                    let inner = self.parse_statement()?;
26071                    self.expect(TokenType::RParen)?;
26072                    let subquery = if was_any {
26073                        self.maybe_wrap_in_subquery(inner)
26074                    } else {
26075                        inner
26076                    };
26077                    Expression::Any(Box::new(QuantifiedExpr {
26078                        this: left,
26079                        subquery,
26080                        op: Some(QuantifiedOp::Lte),
26081                    }))
26082                } else if self.match_token(TokenType::All) {
26083                    self.expect(TokenType::LParen)?;
26084                    let inner = self.parse_statement()?;
26085                    self.expect(TokenType::RParen)?;
26086                    let subquery = self.maybe_wrap_in_subquery(inner);
26087                    Expression::All(Box::new(QuantifiedExpr {
26088                        this: left,
26089                        subquery,
26090                        op: Some(QuantifiedOp::Lte),
26091                    }))
26092                } else {
26093                    let right = self.parse_bitwise_or()?;
26094                    let trailing_comments = self.previous_trailing_comments().to_vec();
26095                    Expression::Lte(Box::new(BinaryOp {
26096                        left,
26097                        right,
26098                        left_comments: Vec::new(),
26099                        operator_comments: Vec::new(),
26100                        trailing_comments,
26101                        inferred_type: None,
26102                    }))
26103                }
26104            } else if self.match_token(TokenType::Gt) {
26105                // Check for ANY/ALL subquery
26106                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
26107                    let was_any = self.previous_token_type() == Some(TokenType::Any);
26108                    self.expect(TokenType::LParen)?;
26109                    let inner = self.parse_statement()?;
26110                    self.expect(TokenType::RParen)?;
26111                    let subquery = if was_any {
26112                        self.maybe_wrap_in_subquery(inner)
26113                    } else {
26114                        inner
26115                    };
26116                    Expression::Any(Box::new(QuantifiedExpr {
26117                        this: left,
26118                        subquery,
26119                        op: Some(QuantifiedOp::Gt),
26120                    }))
26121                } else if self.match_token(TokenType::All) {
26122                    self.expect(TokenType::LParen)?;
26123                    let inner = self.parse_statement()?;
26124                    self.expect(TokenType::RParen)?;
26125                    let subquery = self.maybe_wrap_in_subquery(inner);
26126                    Expression::All(Box::new(QuantifiedExpr {
26127                        this: left,
26128                        subquery,
26129                        op: Some(QuantifiedOp::Gt),
26130                    }))
26131                } else {
26132                    let right = self.parse_bitwise_or()?;
26133                    let trailing_comments = self.previous_trailing_comments().to_vec();
26134                    Expression::Gt(Box::new(BinaryOp {
26135                        left,
26136                        right,
26137                        left_comments: Vec::new(),
26138                        operator_comments: Vec::new(),
26139                        trailing_comments,
26140                        inferred_type: None,
26141                    }))
26142                }
26143            } else if self.match_token(TokenType::Gte) {
26144                // Check for ANY/ALL subquery
26145                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
26146                    let was_any = self.previous_token_type() == Some(TokenType::Any);
26147                    self.expect(TokenType::LParen)?;
26148                    let inner = self.parse_statement()?;
26149                    self.expect(TokenType::RParen)?;
26150                    let subquery = if was_any {
26151                        self.maybe_wrap_in_subquery(inner)
26152                    } else {
26153                        inner
26154                    };
26155                    Expression::Any(Box::new(QuantifiedExpr {
26156                        this: left,
26157                        subquery,
26158                        op: Some(QuantifiedOp::Gte),
26159                    }))
26160                } else if self.match_token(TokenType::All) {
26161                    self.expect(TokenType::LParen)?;
26162                    let inner = self.parse_statement()?;
26163                    self.expect(TokenType::RParen)?;
26164                    let subquery = self.maybe_wrap_in_subquery(inner);
26165                    Expression::All(Box::new(QuantifiedExpr {
26166                        this: left,
26167                        subquery,
26168                        op: Some(QuantifiedOp::Gte),
26169                    }))
26170                } else {
26171                    let right = self.parse_bitwise_or()?;
26172                    let trailing_comments = self.previous_trailing_comments().to_vec();
26173                    Expression::Gte(Box::new(BinaryOp {
26174                        left,
26175                        right,
26176                        left_comments: Vec::new(),
26177                        operator_comments: Vec::new(),
26178                        trailing_comments,
26179                        inferred_type: None,
26180                    }))
26181                }
26182            } else if self.match_token(TokenType::NullsafeEq) {
26183                // <=> (MySQL NULL-safe equality)
26184                let right = self.parse_bitwise_or()?;
26185                let trailing_comments = self.previous_trailing_comments().to_vec();
26186                Expression::NullSafeEq(Box::new(BinaryOp {
26187                    left,
26188                    right,
26189                    left_comments: Vec::new(),
26190                    operator_comments: Vec::new(),
26191                    trailing_comments,
26192                    inferred_type: None,
26193                }))
26194            } else if self.check_identifier("SOUNDS") && self.check_next(TokenType::Like) {
26195                // MySQL SOUNDS LIKE: expr SOUNDS LIKE expr -> SOUNDEX(expr) = SOUNDEX(expr)
26196                self.skip(); // consume SOUNDS
26197                self.skip(); // consume LIKE
26198                let right = self.parse_bitwise_or()?;
26199                // Transform: SOUNDEX(left) = SOUNDEX(right)
26200                let soundex_left = Expression::Function(Box::new(Function::new(
26201                    "SOUNDEX".to_string(),
26202                    vec![left],
26203                )));
26204                let soundex_right = Expression::Function(Box::new(Function::new(
26205                    "SOUNDEX".to_string(),
26206                    vec![right],
26207                )));
26208                Expression::Eq(Box::new(BinaryOp::new(soundex_left, soundex_right)))
26209            } else if self.match_token(TokenType::Like) {
26210                // Check for ANY/ALL/SOME quantifier
26211                let quantifier = if self.match_token(TokenType::Any) {
26212                    Some("ANY".to_string())
26213                } else if self.match_token(TokenType::All) {
26214                    Some("ALL".to_string())
26215                } else if self.match_token(TokenType::Some) {
26216                    Some("SOME".to_string())
26217                } else {
26218                    None
26219                };
26220                let right = self.parse_bitwise_or()?;
26221                let escape = if self.match_token(TokenType::Escape) {
26222                    Some(self.parse_primary()?)
26223                } else {
26224                    None
26225                };
26226                Expression::Like(Box::new(LikeOp {
26227                    left,
26228                    right,
26229                    escape,
26230                    quantifier,
26231                    inferred_type: None,
26232                }))
26233            } else if self.match_token(TokenType::ILike) {
26234                // Check for ANY/ALL/SOME quantifier
26235                let quantifier = if self.match_token(TokenType::Any) {
26236                    Some("ANY".to_string())
26237                } else if self.match_token(TokenType::All) {
26238                    Some("ALL".to_string())
26239                } else if self.match_token(TokenType::Some) {
26240                    Some("SOME".to_string())
26241                } else {
26242                    None
26243                };
26244                let right = self.parse_bitwise_or()?;
26245                let escape = if self.match_token(TokenType::Escape) {
26246                    Some(self.parse_primary()?)
26247                } else {
26248                    None
26249                };
26250                Expression::ILike(Box::new(LikeOp {
26251                    left,
26252                    right,
26253                    escape,
26254                    quantifier,
26255                    inferred_type: None,
26256                }))
26257            } else if self.check_identifier("SIMILAR") && self.check_next(TokenType::To) {
26258                // SIMILAR TO operator (PostgreSQL/Redshift regex-like pattern matching)
26259                self.skip(); // consume SIMILAR
26260                self.skip(); // consume TO
26261                let pattern = self.parse_bitwise_or()?;
26262                let escape = if self.match_token(TokenType::Escape) {
26263                    Some(self.parse_primary()?)
26264                } else {
26265                    None
26266                };
26267                Expression::SimilarTo(Box::new(SimilarToExpr {
26268                    this: left,
26269                    pattern,
26270                    escape,
26271                    not: false,
26272                }))
26273            } else if self.match_token(TokenType::Glob) {
26274                let right = self.parse_bitwise_or()?;
26275                Expression::Glob(Box::new(BinaryOp::new(left, right)))
26276            } else if self.match_token(TokenType::Match) {
26277                // SQLite MATCH operator (FTS full-text search)
26278                let right = self.parse_bitwise_or()?;
26279                Expression::Match(Box::new(BinaryOp::new(left, right)))
26280            } else if self.match_token(TokenType::RLike) || self.match_token(TokenType::Tilde) {
26281                // PostgreSQL ~ (regexp match) operator / RLIKE / REGEXP
26282                let right = self.parse_bitwise_or()?;
26283                Expression::RegexpLike(Box::new(RegexpFunc {
26284                    this: left,
26285                    pattern: right,
26286                    flags: None,
26287                }))
26288            } else if matches!(
26289                self.config.dialect,
26290                Some(crate::dialects::DialectType::Exasol)
26291            ) && self.check_identifier("REGEXP_LIKE")
26292            {
26293                // Exasol: REGEXP_LIKE as infix binary operator
26294                self.skip(); // consume REGEXP_LIKE
26295                let right = self.parse_bitwise_or()?;
26296                Expression::RegexpLike(Box::new(RegexpFunc {
26297                    this: left,
26298                    pattern: right,
26299                    flags: None,
26300                }))
26301            } else if self.match_token(TokenType::IRLike) {
26302                // PostgreSQL ~* (case-insensitive regexp match) operator
26303                let right = self.parse_bitwise_or()?;
26304                Expression::RegexpILike(Box::new(RegexpILike {
26305                    this: Box::new(left),
26306                    expression: Box::new(right),
26307                    flag: None,
26308                }))
26309            } else if self.match_token(TokenType::NotLike) {
26310                // PostgreSQL !~~ (NOT LIKE) operator
26311                let right = self.parse_bitwise_or()?;
26312                let escape = if self.match_token(TokenType::Escape) {
26313                    Some(self.parse_primary()?)
26314                } else {
26315                    None
26316                };
26317                let like_expr = Expression::Like(Box::new(LikeOp {
26318                    left,
26319                    right,
26320                    escape,
26321                    quantifier: None,
26322                    inferred_type: None,
26323                }));
26324                Expression::Not(Box::new(UnaryOp::new(like_expr)))
26325            } else if self.match_token(TokenType::NotILike) {
26326                // PostgreSQL !~~* (NOT ILIKE) operator
26327                let right = self.parse_bitwise_or()?;
26328                let escape = if self.match_token(TokenType::Escape) {
26329                    Some(self.parse_primary()?)
26330                } else {
26331                    None
26332                };
26333                let ilike_expr = Expression::ILike(Box::new(LikeOp {
26334                    left,
26335                    right,
26336                    escape,
26337                    quantifier: None,
26338                    inferred_type: None,
26339                }));
26340                Expression::Not(Box::new(UnaryOp::new(ilike_expr)))
26341            } else if self.match_token(TokenType::NotRLike) {
26342                // PostgreSQL !~ (NOT regexp match) operator
26343                let right = self.parse_bitwise_or()?;
26344                let regexp_expr = Expression::RegexpLike(Box::new(RegexpFunc {
26345                    this: left,
26346                    pattern: right,
26347                    flags: None,
26348                }));
26349                Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
26350            } else if self.match_token(TokenType::NotIRLike) {
26351                // PostgreSQL !~* (NOT case-insensitive regexp match) operator
26352                let right = self.parse_bitwise_or()?;
26353                let regexp_expr = Expression::RegexpILike(Box::new(RegexpILike {
26354                    this: Box::new(left),
26355                    expression: Box::new(right),
26356                    flag: None,
26357                }));
26358                Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
26359            } else if self.check(TokenType::Is)
26360                && !self.is_last_expression_token(TokenType::Is)
26361                && self.match_token(TokenType::Is)
26362            {
26363                let not = self.match_token(TokenType::Not);
26364                if self.match_token(TokenType::Null) {
26365                    let expr = Expression::IsNull(Box::new(IsNull {
26366                        this: left,
26367                        not,
26368                        postfix_form: false,
26369                    }));
26370                    // ClickHouse: IS NULL :: Type — handle :: cast after IS NULL
26371                    if matches!(
26372                        self.config.dialect,
26373                        Some(crate::dialects::DialectType::ClickHouse)
26374                    ) && self.check(TokenType::DColon)
26375                    {
26376                        self.skip(); // consume ::
26377                        let data_type = self.parse_data_type_for_cast()?;
26378                        Expression::Cast(Box::new(Cast {
26379                            this: expr,
26380                            to: data_type,
26381                            trailing_comments: Vec::new(),
26382                            double_colon_syntax: true,
26383                            format: None,
26384                            default: None,
26385                            inferred_type: None,
26386                        }))
26387                    } else {
26388                        expr
26389                    }
26390                } else if self.match_token(TokenType::True) {
26391                    // IS TRUE / IS NOT TRUE
26392                    Expression::IsTrue(Box::new(IsTrueFalse { this: left, not }))
26393                } else if self.match_token(TokenType::False) {
26394                    // IS FALSE / IS NOT FALSE
26395                    Expression::IsFalse(Box::new(IsTrueFalse { this: left, not }))
26396                } else if self.match_token(TokenType::Distinct) {
26397                    // IS DISTINCT FROM / IS NOT DISTINCT FROM
26398                    self.expect(TokenType::From)?;
26399                    let right = self.parse_bitwise_or()?;
26400                    if not {
26401                        // IS NOT DISTINCT FROM → null-safe equality
26402                        Expression::NullSafeEq(Box::new(BinaryOp::new(left, right)))
26403                    } else {
26404                        // IS DISTINCT FROM → null-safe inequality
26405                        Expression::NullSafeNeq(Box::new(BinaryOp::new(left, right)))
26406                    }
26407                } else if self.match_identifier("UNKNOWN") {
26408                    // IS UNKNOWN
26409                    Expression::IsNull(Box::new(IsNull {
26410                        this: left,
26411                        not,
26412                        postfix_form: false,
26413                    }))
26414                } else if self.match_texts(&["JSON"]) {
26415                    // IS JSON [VALUE|SCALAR|OBJECT|ARRAY] [WITH UNIQUE KEYS|WITHOUT UNIQUE KEYS|UNIQUE KEYS]
26416                    let json_type = if self.match_texts(&["VALUE"]) {
26417                        Some("VALUE".to_string())
26418                    } else if self.match_texts(&["SCALAR"]) {
26419                        Some("SCALAR".to_string())
26420                    } else if self.match_texts(&["OBJECT"]) {
26421                        Some("OBJECT".to_string())
26422                    } else if self.match_texts(&["ARRAY"]) {
26423                        Some("ARRAY".to_string())
26424                    } else {
26425                        None
26426                    };
26427
26428                    // Parse optional key uniqueness constraint
26429                    let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE", "KEYS"]) {
26430                        Some(JsonUniqueKeys::With)
26431                    } else if self.match_text_seq(&["WITHOUT", "UNIQUE", "KEYS"]) {
26432                        Some(JsonUniqueKeys::Without)
26433                    } else if self.match_text_seq(&["UNIQUE", "KEYS"]) {
26434                        // Shorthand for WITH UNIQUE KEYS
26435                        Some(JsonUniqueKeys::Shorthand)
26436                    } else {
26437                        None
26438                    };
26439
26440                    Expression::IsJson(Box::new(IsJson {
26441                        this: left,
26442                        json_type,
26443                        unique_keys,
26444                        negated: not,
26445                    }))
26446                } else {
26447                    // IS followed by an expression (e.g., IS ?)
26448                    // If we matched NOT, wrap the IS expression in NOT
26449                    let right = self.parse_primary()?;
26450                    let is_expr = Expression::Is(Box::new(BinaryOp::new(left, right)));
26451                    if not {
26452                        Expression::Not(Box::new(UnaryOp::new(is_expr)))
26453                    } else {
26454                        is_expr
26455                    }
26456                }
26457            } else if self.match_token(TokenType::Not) {
26458                // Handle NOT IN, NOT BETWEEN, NOT LIKE, NOT ILIKE, etc.
26459                if self.match_token(TokenType::In) {
26460                    // BigQuery: NOT IN UNNEST(expr)
26461                    if self.check_identifier("UNNEST") {
26462                        self.skip(); // consume UNNEST
26463                        self.expect(TokenType::LParen)?;
26464                        let unnest_expr = self.parse_expression()?;
26465                        self.expect(TokenType::RParen)?;
26466                        Expression::In(Box::new(In {
26467                            this: left,
26468                            expressions: Vec::new(),
26469                            query: None,
26470                            not: true,
26471                            global: global_in,
26472                            unnest: Some(Box::new(unnest_expr)),
26473                            is_field: false,
26474                        }))
26475                    } else if self.match_token(TokenType::LParen) {
26476                        if self.check(TokenType::Select) || self.check(TokenType::With) {
26477                            let subquery = self.parse_statement()?;
26478                            self.expect(TokenType::RParen)?;
26479                            Expression::In(Box::new(In {
26480                                this: left,
26481                                expressions: Vec::new(),
26482                                query: Some(subquery),
26483                                not: true,
26484                                global: global_in,
26485                                unnest: None,
26486                                is_field: false,
26487                            }))
26488                        } else if self.check(TokenType::RParen) {
26489                            // Empty NOT IN set: NOT IN ()
26490                            self.skip();
26491                            Expression::In(Box::new(In {
26492                                this: left,
26493                                expressions: Vec::new(),
26494                                query: None,
26495                                not: true,
26496                                global: global_in,
26497                                unnest: None,
26498                                is_field: false,
26499                            }))
26500                        } else {
26501                            let expressions = self.parse_expression_list()?;
26502                            self.expect(TokenType::RParen)?;
26503                            Expression::In(Box::new(In {
26504                                this: left,
26505                                expressions,
26506                                query: None,
26507                                not: true,
26508                                global: global_in,
26509                                unnest: None,
26510                                is_field: false,
26511                            }))
26512                        }
26513                    } else {
26514                        // ClickHouse/DuckDB: IN without parentheses: expr NOT IN table_name
26515                        let table_expr = self.parse_primary()?;
26516                        Expression::In(Box::new(In {
26517                            this: left,
26518                            expressions: vec![table_expr],
26519                            query: None,
26520                            not: true,
26521                            global: global_in,
26522                            unnest: None,
26523                            is_field: true,
26524                        }))
26525                    }
26526                } else if self.match_token(TokenType::Between) {
26527                    // Check for SYMMETRIC/ASYMMETRIC qualifier
26528                    let symmetric = if self.match_texts(&["SYMMETRIC"]) {
26529                        Some(true)
26530                    } else if self.match_texts(&["ASYMMETRIC"]) {
26531                        Some(false)
26532                    } else {
26533                        None
26534                    };
26535                    let low = self.parse_bitwise_or()?;
26536                    self.expect(TokenType::And)?;
26537                    let high = self.parse_bitwise_or()?;
26538                    Expression::Between(Box::new(Between {
26539                        this: left,
26540                        low,
26541                        high,
26542                        not: true,
26543                        symmetric,
26544                    }))
26545                } else if self.check_identifier("SOUNDS") && self.check_next(TokenType::Like) {
26546                    // MySQL NOT SOUNDS LIKE: expr NOT SOUNDS LIKE expr -> NOT SOUNDEX(expr) = SOUNDEX(expr)
26547                    self.skip(); // consume SOUNDS
26548                    self.skip(); // consume LIKE
26549                    let right = self.parse_bitwise_or()?;
26550                    let soundex_left = Expression::Function(Box::new(Function::new(
26551                        "SOUNDEX".to_string(),
26552                        vec![left],
26553                    )));
26554                    let soundex_right = Expression::Function(Box::new(Function::new(
26555                        "SOUNDEX".to_string(),
26556                        vec![right],
26557                    )));
26558                    let eq_expr =
26559                        Expression::Eq(Box::new(BinaryOp::new(soundex_left, soundex_right)));
26560                    Expression::Not(Box::new(UnaryOp::new(eq_expr)))
26561                } else if self.match_token(TokenType::Like) {
26562                    let right = self.parse_bitwise_or()?;
26563                    let escape = if self.match_token(TokenType::Escape) {
26564                        Some(self.parse_primary()?)
26565                    } else {
26566                        None
26567                    };
26568                    let like_expr = Expression::Like(Box::new(LikeOp {
26569                        left,
26570                        right,
26571                        escape,
26572                        quantifier: None,
26573                        inferred_type: None,
26574                    }));
26575                    Expression::Not(Box::new(UnaryOp::new(like_expr)))
26576                } else if self.match_token(TokenType::ILike) {
26577                    let right = self.parse_bitwise_or()?;
26578                    let escape = if self.match_token(TokenType::Escape) {
26579                        Some(self.parse_primary()?)
26580                    } else {
26581                        None
26582                    };
26583                    let ilike_expr = Expression::ILike(Box::new(LikeOp {
26584                        left,
26585                        right,
26586                        escape,
26587                        quantifier: None,
26588                        inferred_type: None,
26589                    }));
26590                    Expression::Not(Box::new(UnaryOp::new(ilike_expr)))
26591                } else if self.check_identifier("SIMILAR") && self.check_next(TokenType::To) {
26592                    // NOT SIMILAR TO
26593                    self.skip(); // consume SIMILAR
26594                    self.skip(); // consume TO
26595                    let pattern = self.parse_bitwise_or()?;
26596                    let escape = if self.match_token(TokenType::Escape) {
26597                        Some(self.parse_primary()?)
26598                    } else {
26599                        None
26600                    };
26601                    Expression::SimilarTo(Box::new(SimilarToExpr {
26602                        this: left,
26603                        pattern,
26604                        escape,
26605                        not: true,
26606                    }))
26607                } else if self.match_token(TokenType::RLike) {
26608                    let right = self.parse_bitwise_or()?;
26609                    let regexp_expr = Expression::RegexpLike(Box::new(RegexpFunc {
26610                        this: left,
26611                        pattern: right,
26612                        flags: None,
26613                    }));
26614                    Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
26615                } else if self.match_token(TokenType::Null) {
26616                    // SQLite: a NOT NULL (postfix form, two separate tokens)
26617                    // Creates NOT(a IS NULL) which is semantically equivalent
26618                    let is_null =
26619                        Expression::Is(Box::new(BinaryOp::new(left, Expression::Null(Null))));
26620                    Expression::Not(Box::new(UnaryOp::new(is_null)))
26621                } else {
26622                    // NOT followed by something else - revert
26623                    return Ok(left);
26624                }
26625            } else if self.match_token(TokenType::In) {
26626                // BigQuery: IN UNNEST(expr)
26627                if self.check_identifier("UNNEST") {
26628                    self.skip(); // consume UNNEST
26629                    self.expect(TokenType::LParen)?;
26630                    let unnest_expr = self.parse_expression()?;
26631                    self.expect(TokenType::RParen)?;
26632                    Expression::In(Box::new(In {
26633                        this: left,
26634                        expressions: Vec::new(),
26635                        query: None,
26636                        not: false,
26637                        global: global_in,
26638                        unnest: Some(Box::new(unnest_expr)),
26639                        is_field: false,
26640                    }))
26641                } else if self.match_token(TokenType::LParen) {
26642                    // Standard IN (list) or IN (subquery)
26643                    // Check if this is a subquery (IN (SELECT ...) or IN (WITH ... SELECT ...))
26644                    if self.check(TokenType::Select) || self.check(TokenType::With) {
26645                        // Use parse_statement to handle both SELECT and WITH...SELECT
26646                        let subquery = self.parse_statement()?;
26647                        self.expect(TokenType::RParen)?;
26648                        Expression::In(Box::new(In {
26649                            this: left,
26650                            expressions: Vec::new(),
26651                            query: Some(subquery),
26652                            not: false,
26653                            global: global_in,
26654                            unnest: None,
26655                            is_field: false,
26656                        }))
26657                    } else if self.check(TokenType::RParen) {
26658                        // Empty IN set: IN ()
26659                        self.skip();
26660                        Expression::In(Box::new(In {
26661                            this: left,
26662                            expressions: Vec::new(),
26663                            query: None,
26664                            not: false,
26665                            global: global_in,
26666                            unnest: None,
26667                            is_field: false,
26668                        }))
26669                    } else {
26670                        let expressions = self.parse_expression_list()?;
26671                        self.expect(TokenType::RParen)?;
26672                        Expression::In(Box::new(In {
26673                            this: left,
26674                            expressions,
26675                            query: None,
26676                            not: false,
26677                            global: global_in,
26678                            unnest: None,
26679                            is_field: false,
26680                        }))
26681                    }
26682                } else {
26683                    // DuckDB: IN without parentheses for array/list membership: 'red' IN tbl.flags
26684                    let expr = self.parse_bitwise_or()?;
26685                    Expression::In(Box::new(In {
26686                        this: left,
26687                        expressions: vec![expr],
26688                        query: None,
26689                        not: false,
26690                        global: global_in,
26691                        unnest: None,
26692                        is_field: true,
26693                    }))
26694                }
26695            } else if self.match_token(TokenType::Between) {
26696                // Check for SYMMETRIC/ASYMMETRIC qualifier
26697                let symmetric = if self.match_texts(&["SYMMETRIC"]) {
26698                    Some(true)
26699                } else if self.match_texts(&["ASYMMETRIC"]) {
26700                    Some(false)
26701                } else {
26702                    None
26703                };
26704                let low = self.parse_bitwise_or()?;
26705                self.expect(TokenType::And)?;
26706                let high = self.parse_bitwise_or()?;
26707                Expression::Between(Box::new(Between {
26708                    this: left,
26709                    low,
26710                    high,
26711                    not: false,
26712                    symmetric,
26713                }))
26714            } else if self.match_token(TokenType::Adjacent) {
26715                let right = self.parse_bitwise_or()?;
26716                Expression::Adjacent(Box::new(BinaryOp::new(left, right)))
26717            } else if self.check(TokenType::Overlaps)
26718                && self.current + 1 < self.tokens.len()
26719                && !matches!(
26720                    self.tokens[self.current + 1].token_type,
26721                    TokenType::Semicolon
26722                        | TokenType::Comma
26723                        | TokenType::From
26724                        | TokenType::Where
26725                        | TokenType::RParen
26726                        | TokenType::As
26727                        | TokenType::Join
26728                        | TokenType::On
26729                        | TokenType::OrderBy
26730                        | TokenType::GroupBy
26731                        | TokenType::Having
26732                        | TokenType::Limit
26733                        | TokenType::Union
26734                        | TokenType::Except
26735                        | TokenType::Intersect
26736                        | TokenType::Eof
26737                )
26738            {
26739                self.skip(); // consume OVERLAPS
26740                let right = self.parse_bitwise_or()?;
26741                Expression::Overlaps(Box::new(OverlapsExpr {
26742                    this: Some(left),
26743                    expression: Some(right),
26744                    left_start: None,
26745                    left_end: None,
26746                    right_start: None,
26747                    right_end: None,
26748                }))
26749            } else if self.match_token(TokenType::IsNull) {
26750                // ISNULL postfix operator (PostgreSQL/SQLite)
26751                Expression::IsNull(Box::new(IsNull {
26752                    this: left,
26753                    not: false,
26754                    postfix_form: true,
26755                }))
26756            } else if self.match_token(TokenType::NotNull) {
26757                // NOTNULL postfix operator (PostgreSQL/SQLite)
26758                Expression::IsNull(Box::new(IsNull {
26759                    this: left,
26760                    not: true,
26761                    postfix_form: true,
26762                }))
26763            } else if self.match_token(TokenType::AtAt) {
26764                // PostgreSQL text search match operator (@@)
26765                let right = self.parse_bitwise_or()?;
26766                Expression::TsMatch(Box::new(BinaryOp::new(left, right)))
26767            } else if self.match_token(TokenType::AtGt) {
26768                // PostgreSQL array contains all operator (@>)
26769                let right = self.parse_bitwise_or()?;
26770                Expression::ArrayContainsAll(Box::new(BinaryOp::new(left, right)))
26771            } else if self.match_token(TokenType::LtAt) {
26772                // PostgreSQL array contained by operator (<@)
26773                let right = self.parse_bitwise_or()?;
26774                Expression::ArrayContainedBy(Box::new(BinaryOp::new(left, right)))
26775            } else if self.match_token(TokenType::DAmp) {
26776                // PostgreSQL array overlaps operator (&&)
26777                let right = self.parse_bitwise_or()?;
26778                Expression::ArrayOverlaps(Box::new(BinaryOp::new(left, right)))
26779            } else if self.match_token(TokenType::QMarkAmp) {
26780                // PostgreSQL JSONB contains all top keys operator (?&)
26781                let right = self.parse_bitwise_or()?;
26782                Expression::JSONBContainsAllTopKeys(Box::new(BinaryOp::new(left, right)))
26783            } else if self.match_token(TokenType::QMarkPipe) {
26784                // PostgreSQL JSONB contains any top key operator (?|)
26785                let right = self.parse_bitwise_or()?;
26786                Expression::JSONBContainsAnyTopKeys(Box::new(BinaryOp::new(left, right)))
26787            } else if !matches!(
26788                self.config.dialect,
26789                Some(crate::dialects::DialectType::ClickHouse)
26790            ) && self.match_token(TokenType::Parameter)
26791            {
26792                // PostgreSQL JSONB contains key operator (?)
26793                // Note: ? is tokenized as Parameter, but when used between expressions
26794                // it's the JSONB key existence operator
26795                // ClickHouse uses ? as ternary operator instead, handled in parse_assignment()
26796                let right = self.parse_bitwise_or()?;
26797                Expression::JSONBContains(Box::new(BinaryFunc {
26798                    original_name: Some("?".to_string()),
26799                    this: left,
26800                    expression: right,
26801                    inferred_type: None,
26802                }))
26803            } else if self.match_token(TokenType::HashDash) {
26804                // PostgreSQL JSONB delete at path operator (#-)
26805                let right = self.parse_bitwise_or()?;
26806                Expression::JSONBDeleteAtPath(Box::new(BinaryOp::new(left, right)))
26807            } else if self.match_token(TokenType::AmpLt) {
26808                // PostgreSQL range extends left operator (&<)
26809                let right = self.parse_bitwise_or()?;
26810                Expression::ExtendsLeft(Box::new(BinaryOp::new(left, right)))
26811            } else if self.match_token(TokenType::AmpGt) {
26812                // PostgreSQL range extends right operator (&>)
26813                let right = self.parse_bitwise_or()?;
26814                Expression::ExtendsRight(Box::new(BinaryOp::new(left, right)))
26815            } else if self.match_identifier("MEMBER") {
26816                // MySQL MEMBER OF(expr) operator - JSON membership test
26817                self.expect(TokenType::Of)?;
26818                self.expect(TokenType::LParen)?;
26819                let right = self.parse_expression()?;
26820                self.expect(TokenType::RParen)?;
26821                Expression::MemberOf(Box::new(BinaryOp::new(left, right)))
26822            } else if self.match_token(TokenType::CaretAt) {
26823                // DuckDB/PostgreSQL starts-with operator (^@)
26824                let right = self.parse_bitwise_or()?;
26825                Expression::StartsWith(Box::new(BinaryFunc {
26826                    original_name: Some("^@".to_string()),
26827                    this: left,
26828                    expression: right,
26829                    inferred_type: None,
26830                }))
26831            } else if self.match_token(TokenType::LrArrow) {
26832                // PostgreSQL distance operator (<->)
26833                let right = self.parse_bitwise_or()?;
26834                Expression::EuclideanDistance(Box::new(EuclideanDistance {
26835                    this: Box::new(left),
26836                    expression: Box::new(right),
26837                }))
26838            } else if self.match_token(TokenType::Operator) {
26839                // PostgreSQL OPERATOR(schema.op) syntax for schema-qualified operators
26840                // Example: col1 OPERATOR(pg_catalog.~) col2
26841                self.expect(TokenType::LParen)?;
26842
26843                // Collect all tokens between parentheses as the operator text
26844                // This can include schema names, dots, and operator symbols like ~
26845                let mut op_text = String::new();
26846                while !self.check(TokenType::RParen) && !self.is_at_end() {
26847                    op_text.push_str(&self.peek().text);
26848                    self.skip();
26849                }
26850                self.expect(TokenType::RParen)?;
26851
26852                // Collect any inline comments (e.g., /* foo */) between OPERATOR() and the RHS
26853                // Try trailing comments of the RParen (previous token) first,
26854                // then leading comments of the next token
26855                let mut comments = if self.current > 0 {
26856                    std::mem::take(&mut self.tokens[self.current - 1].trailing_comments)
26857                } else {
26858                    Vec::new()
26859                };
26860                if comments.is_empty() && !self.is_at_end() {
26861                    comments = std::mem::take(&mut self.tokens[self.current].comments);
26862                }
26863
26864                // Parse the right-hand side expression
26865                let right = self.parse_bitwise_or()?;
26866
26867                Expression::Operator(Box::new(Operator {
26868                    this: Box::new(left),
26869                    operator: Some(Box::new(Expression::Identifier(Identifier::new(op_text)))),
26870                    expression: Box::new(right),
26871                    comments,
26872                }))
26873            } else {
26874                return Ok(left);
26875            };
26876
26877            left = expr;
26878        }
26879    }
26880
26881    /// Parse bitwise OR expressions (|)
26882    fn parse_bitwise_or(&mut self) -> Result<Expression> {
26883        let mut left = self.parse_bitwise_xor()?;
26884
26885        loop {
26886            if self.match_token(TokenType::Pipe) {
26887                let right = self.parse_bitwise_xor()?;
26888                left = Expression::BitwiseOr(Box::new(BinaryOp::new(left, right)));
26889            } else {
26890                return Ok(left);
26891            }
26892        }
26893    }
26894
26895    /// Parse bitwise operators with an existing left expression
26896    /// Used for DuckDB's @ operator when @col is tokenized as a single Var token
26897    /// We already have the column, now need to continue parsing any binary operators
26898    /// Follows the same precedence chain: bitwise -> shift -> addition -> multiplication
26899    fn parse_bitwise_continuation(&mut self, left: Expression) -> Result<Expression> {
26900        // Start from multiplication level since we have a primary expression (col)
26901        // Then work up through addition, shift, bitwise AND/XOR/OR
26902        let mult_result = self.parse_multiplication_continuation(left)?;
26903        let add_result = self.parse_addition_continuation(mult_result)?;
26904        self.parse_bitwise_or_continuation(add_result)
26905    }
26906
26907    /// Parse bitwise OR with an existing left expression
26908    fn parse_bitwise_or_continuation(&mut self, mut left: Expression) -> Result<Expression> {
26909        loop {
26910            if self.match_token(TokenType::Pipe) {
26911                let right = self.parse_bitwise_xor()?;
26912                left = Expression::BitwiseOr(Box::new(BinaryOp::new(left, right)));
26913            } else {
26914                return Ok(left);
26915            }
26916        }
26917    }
26918
26919    /// Parse multiplication/division with an existing left expression
26920    fn parse_multiplication_continuation(&mut self, mut left: Expression) -> Result<Expression> {
26921        loop {
26922            let expr = if self.match_token(TokenType::Star) {
26923                let right = self.parse_power()?;
26924                Expression::Mul(Box::new(BinaryOp::new(left, right)))
26925            } else if self.match_token(TokenType::Slash) {
26926                let right = self.parse_power()?;
26927                Expression::Div(Box::new(BinaryOp::new(left, right)))
26928            } else if self.match_token(TokenType::Percent) {
26929                let right = self.parse_power()?;
26930                Expression::Mod(Box::new(BinaryOp::new(left, right)))
26931            } else if !self.check(TokenType::QuotedIdentifier)
26932                && (self.match_identifier("DIV") || self.match_token(TokenType::Div))
26933            {
26934                // DIV keyword for integer division (Hive/Spark/MySQL/ClickHouse)
26935                // Don't match QuotedIdentifier — `DIV` is an identifier alias, not an operator
26936                // If DIV was matched as a Var (not keyword Div token), verify it's actually
26937                // an operator by checking that a right operand follows. Otherwise it's an alias.
26938                let matched_as_var = self.previous().token_type == TokenType::Var;
26939                if matched_as_var
26940                    && (self.is_at_end()
26941                        || self.check(TokenType::Semicolon)
26942                        || self.check(TokenType::From)
26943                        || self.check(TokenType::Where)
26944                        || self.check(TokenType::Comma)
26945                        || self.check(TokenType::RParen))
26946                {
26947                    // Backtrack: DIV is being used as an alias, not an operator
26948                    self.current -= 1;
26949                    return Ok(left);
26950                }
26951                let right = self.parse_power()?;
26952                Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
26953                    this: left,
26954                    expression: right,
26955                    original_name: None,
26956                    inferred_type: None,
26957                }))
26958            } else {
26959                return Ok(left);
26960            };
26961            left = expr;
26962        }
26963    }
26964
26965    /// Parse addition/subtraction with an existing left expression
26966    fn parse_addition_continuation(&mut self, mut left: Expression) -> Result<Expression> {
26967        loop {
26968            let left_comments = self.previous_trailing_comments().to_vec();
26969
26970            let expr = if self.match_token(TokenType::Plus) {
26971                let operator_comments = self.previous_trailing_comments().to_vec();
26972                let right = self.parse_at_time_zone()?;
26973                let trailing_comments = self.previous_trailing_comments().to_vec();
26974                Expression::Add(Box::new(BinaryOp {
26975                    left,
26976                    right,
26977                    left_comments,
26978                    operator_comments,
26979                    trailing_comments,
26980                    inferred_type: None,
26981                }))
26982            } else if self.match_token(TokenType::Dash) {
26983                let operator_comments = self.previous_trailing_comments().to_vec();
26984                let right = self.parse_at_time_zone()?;
26985                let trailing_comments = self.previous_trailing_comments().to_vec();
26986                Expression::Sub(Box::new(BinaryOp {
26987                    left,
26988                    right,
26989                    left_comments,
26990                    operator_comments,
26991                    trailing_comments,
26992                    inferred_type: None,
26993                }))
26994            } else if !self.dpipe_is_logical_or() && self.match_token(TokenType::DPipe) {
26995                let operator_comments = self.previous_trailing_comments().to_vec();
26996                let right = self.parse_at_time_zone()?;
26997                let trailing_comments = self.previous_trailing_comments().to_vec();
26998                Expression::Concat(Box::new(BinaryOp {
26999                    left,
27000                    right,
27001                    left_comments,
27002                    operator_comments,
27003                    trailing_comments,
27004                    inferred_type: None,
27005                }))
27006            } else if self.match_token(TokenType::DQMark) {
27007                let right = self.parse_at_time_zone()?;
27008                Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
27009                    expressions: vec![left, right],
27010                    original_name: None,
27011                    inferred_type: None,
27012                }))
27013            } else {
27014                return Ok(left);
27015            };
27016
27017            left = expr;
27018        }
27019    }
27020
27021    /// Parse bitwise XOR expressions (^)
27022    fn parse_bitwise_xor(&mut self) -> Result<Expression> {
27023        let mut left = self.parse_bitwise_and()?;
27024
27025        loop {
27026            // In PostgreSQL, ^ is POWER (handled at parse_power level), and # is BitwiseXor
27027            if matches!(
27028                self.config.dialect,
27029                Some(crate::dialects::DialectType::PostgreSQL)
27030                    | Some(crate::dialects::DialectType::Redshift)
27031            ) {
27032                if self.match_token(TokenType::Hash) {
27033                    let right = self.parse_bitwise_and()?;
27034                    left = Expression::BitwiseXor(Box::new(BinaryOp::new(left, right)));
27035                } else {
27036                    return Ok(left);
27037                }
27038            } else if self.match_token(TokenType::Caret) {
27039                let right = self.parse_bitwise_and()?;
27040                left = Expression::BitwiseXor(Box::new(BinaryOp::new(left, right)));
27041            } else {
27042                return Ok(left);
27043            }
27044        }
27045    }
27046
27047    /// Parse bitwise AND expressions (&)
27048    fn parse_bitwise_and(&mut self) -> Result<Expression> {
27049        let mut left = self.parse_shift()?;
27050
27051        loop {
27052            if self.match_token(TokenType::Amp) {
27053                let right = self.parse_shift()?;
27054                left = Expression::BitwiseAnd(Box::new(BinaryOp::new(left, right)));
27055            } else {
27056                return Ok(left);
27057            }
27058        }
27059    }
27060
27061    /// Parse shift expressions (<< and >>)
27062    fn parse_shift(&mut self) -> Result<Expression> {
27063        let mut left = self.parse_addition()?;
27064
27065        loop {
27066            if self.match_token(TokenType::LtLt) {
27067                let right = self.parse_addition()?;
27068                left = Expression::BitwiseLeftShift(Box::new(BinaryOp::new(left, right)));
27069            } else if self.match_token(TokenType::GtGt) {
27070                let right = self.parse_addition()?;
27071                left = Expression::BitwiseRightShift(Box::new(BinaryOp::new(left, right)));
27072            } else {
27073                return Ok(left);
27074            }
27075        }
27076    }
27077
27078    /// Parse addition/subtraction
27079    fn parse_addition(&mut self) -> Result<Expression> {
27080        let mut left = self.parse_at_time_zone()?;
27081
27082        loop {
27083            // Capture comments after left operand before consuming operator
27084            let left_comments = self.previous_trailing_comments().to_vec();
27085
27086            let expr = if self.match_token(TokenType::Plus) {
27087                // Capture comments after operator (before right operand)
27088                let operator_comments = self.previous_trailing_comments().to_vec();
27089                let right = self.parse_at_time_zone()?;
27090                let trailing_comments = self.previous_trailing_comments().to_vec();
27091                Expression::Add(Box::new(BinaryOp {
27092                    left,
27093                    right,
27094                    left_comments,
27095                    operator_comments,
27096                    trailing_comments,
27097                    inferred_type: None,
27098                }))
27099            } else if self.match_token(TokenType::Dash) {
27100                let operator_comments = self.previous_trailing_comments().to_vec();
27101                let right = self.parse_at_time_zone()?;
27102                let trailing_comments = self.previous_trailing_comments().to_vec();
27103                Expression::Sub(Box::new(BinaryOp {
27104                    left,
27105                    right,
27106                    left_comments,
27107                    operator_comments,
27108                    trailing_comments,
27109                    inferred_type: None,
27110                }))
27111            } else if !self.dpipe_is_logical_or() && self.match_token(TokenType::DPipe) {
27112                let operator_comments = self.previous_trailing_comments().to_vec();
27113                let right = self.parse_at_time_zone()?;
27114                let trailing_comments = self.previous_trailing_comments().to_vec();
27115                Expression::Concat(Box::new(BinaryOp {
27116                    left,
27117                    right,
27118                    left_comments,
27119                    operator_comments,
27120                    trailing_comments,
27121                    inferred_type: None,
27122                }))
27123            } else if self.match_token(TokenType::DQMark) {
27124                let right = self.parse_at_time_zone()?;
27125                Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
27126                    expressions: vec![left, right],
27127                    original_name: None,
27128                    inferred_type: None,
27129                }))
27130            } else {
27131                return Ok(left);
27132            };
27133
27134            left = expr;
27135        }
27136    }
27137
27138    /// Parse AT TIME ZONE expression
27139    fn parse_at_time_zone(&mut self) -> Result<Expression> {
27140        let mut expr = self.parse_multiplication()?;
27141
27142        // Check for AT TIME ZONE (can be chained)
27143        while self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("AT") {
27144            self.skip(); // consume AT
27145                         // Check for TIME ZONE
27146            if self.check(TokenType::Time) {
27147                self.skip(); // consume TIME
27148                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ZONE") {
27149                    self.skip(); // consume ZONE
27150                    let zone = self.parse_unary()?;
27151                    expr = Expression::AtTimeZone(Box::new(AtTimeZone { this: expr, zone }));
27152                } else {
27153                    return Err(self.parse_error("Expected ZONE after AT TIME"));
27154                }
27155            } else {
27156                return Err(self.parse_error("Expected TIME after AT"));
27157            }
27158        }
27159
27160        Ok(expr)
27161    }
27162
27163    /// Parse multiplication/division
27164    fn parse_multiplication(&mut self) -> Result<Expression> {
27165        let mut left = self.parse_power()?;
27166
27167        loop {
27168            let expr = if self.match_token(TokenType::Star) {
27169                let right = self.parse_power()?;
27170                Expression::Mul(Box::new(BinaryOp::new(left, right)))
27171            } else if self.match_token(TokenType::Slash) {
27172                let right = self.parse_power()?;
27173                Expression::Div(Box::new(BinaryOp::new(left, right)))
27174            } else if self.match_token(TokenType::Percent) {
27175                let right = self.parse_power()?;
27176                Expression::Mod(Box::new(BinaryOp::new(left, right)))
27177            } else if !self.check(TokenType::QuotedIdentifier)
27178                && (self.match_identifier("MOD") || self.match_token(TokenType::Mod))
27179            {
27180                // MySQL/Teradata: x MOD y (infix modulo operator)
27181                // Don't match QuotedIdentifier — `MOD` is an identifier alias, not an operator
27182                let right = self.parse_power()?;
27183                Expression::Mod(Box::new(BinaryOp::new(left, right)))
27184            } else if !self.check(TokenType::QuotedIdentifier)
27185                && (self.match_identifier("DIV") || self.match_token(TokenType::Div))
27186            {
27187                // DIV keyword for integer division (Hive/Spark/MySQL/ClickHouse)
27188                // Don't match QuotedIdentifier — `DIV` is an identifier alias, not an operator
27189                // If DIV was matched as a Var (not keyword Div token), verify it's actually
27190                // an operator by checking that a right operand follows. Otherwise it's an alias.
27191                let matched_as_var = self.previous().token_type == TokenType::Var;
27192                if matched_as_var
27193                    && (self.is_at_end()
27194                        || self.check(TokenType::Semicolon)
27195                        || self.check(TokenType::From)
27196                        || self.check(TokenType::Where)
27197                        || self.check(TokenType::Comma)
27198                        || self.check(TokenType::RParen))
27199                {
27200                    // Backtrack: DIV is being used as an alias, not an operator
27201                    self.current -= 1;
27202                    return Ok(left);
27203                }
27204                let right = self.parse_power()?;
27205                Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
27206                    this: left,
27207                    expression: right,
27208                    original_name: None,
27209                    inferred_type: None,
27210                }))
27211            } else {
27212                return Ok(left);
27213            };
27214
27215            left = expr;
27216        }
27217    }
27218
27219    /// Parse power/exponentiation (**) operator
27220    /// In PostgreSQL/Redshift, ^ (Caret) is POWER, not BitwiseXor
27221    fn parse_power(&mut self) -> Result<Expression> {
27222        let mut left = self.parse_unary()?;
27223
27224        loop {
27225            if self.match_token(TokenType::DStar) {
27226                let right = self.parse_unary()?;
27227                left = Expression::Power(Box::new(BinaryFunc {
27228                    original_name: Some("**".to_string()),
27229                    this: left,
27230                    expression: right,
27231                    inferred_type: None,
27232                }));
27233            } else if matches!(
27234                self.config.dialect,
27235                Some(crate::dialects::DialectType::PostgreSQL)
27236                    | Some(crate::dialects::DialectType::Redshift)
27237                    | Some(crate::dialects::DialectType::DuckDB)
27238            ) && self.match_token(TokenType::Caret)
27239            {
27240                let right = self.parse_unary()?;
27241                left = Expression::Power(Box::new(BinaryFunc {
27242                    original_name: None,
27243                    this: left,
27244                    expression: right,
27245                    inferred_type: None,
27246                }));
27247            } else {
27248                return Ok(left);
27249            }
27250        }
27251    }
27252
27253    /// Try to parse a type literal expression like: point '(4,4)', timestamp '2024-01-01'
27254    /// PostgreSQL allows type name followed by string literal as a cast shorthand.
27255    /// Returns None if not a type literal pattern, so caller can fall through to parse_primary.
27256    fn try_parse_type_literal(&mut self) -> Result<Option<Expression>> {
27257        // Save position for backtracking
27258        let start_pos = self.current;
27259
27260        // Check if we're at an identifier or Var token that could be a type name
27261        if !self.check(TokenType::Identifier) && !self.check(TokenType::Var) {
27262            return Ok(None);
27263        }
27264
27265        // Get the potential type name without consuming
27266        let type_name = self.peek().text.to_ascii_uppercase();
27267
27268        // Check if this looks like a known data type that supports literal syntax
27269        // These are types where PostgreSQL allows TYPE 'value' syntax
27270        // NOTE: DATE, TIME, TIMESTAMP, INTERVAL are NOT here because they have their own
27271        // token types and are handled specially in parse_primary
27272        let is_type_literal_type = matches!(
27273            type_name.as_str(),
27274            // Geometric types (PostgreSQL)
27275            "POINT" | "LINE" | "LSEG" | "BOX" | "PATH" | "POLYGON" | "CIRCLE" |
27276            // Network types (PostgreSQL)
27277            "INET" | "CIDR" | "MACADDR" | "MACADDR8" |
27278            // Other types that support literal syntax
27279            "UUID" | "JSON" | "JSONB" | "XML" | "BIT" | "VARBIT" |
27280            // Range types (PostgreSQL)
27281            "INT4RANGE" | "INT8RANGE" | "NUMRANGE" | "TSRANGE" | "TSTZRANGE" | "DATERANGE"
27282        );
27283
27284        if !is_type_literal_type {
27285            return Ok(None);
27286        }
27287
27288        // Check if the next token (after type name) is a string literal
27289        if self.current + 1 >= self.tokens.len() {
27290            return Ok(None);
27291        }
27292
27293        if self.tokens[self.current + 1].token_type != TokenType::String {
27294            return Ok(None);
27295        }
27296
27297        // This looks like a type literal! Parse it.
27298        // Consume the type name
27299        self.skip();
27300
27301        // Try to parse the data type from the name
27302        let data_type = match self.parse_data_type_from_name(&type_name) {
27303            Ok(dt) => dt,
27304            Err(_) => {
27305                // If we can't parse the type, backtrack
27306                self.current = start_pos;
27307                return Ok(None);
27308            }
27309        };
27310
27311        // Parse the string literal
27312        if !self.check(TokenType::String) {
27313            // Backtrack - something went wrong
27314            self.current = start_pos;
27315            return Ok(None);
27316        }
27317
27318        let string_token = self.advance();
27319        let value = Expression::Literal(Box::new(Literal::String(string_token.text.clone())));
27320
27321        // JSON literal: JSON '"foo"' -> ParseJson expression (matches Python sqlglot)
27322        if matches!(data_type, DataType::Json | DataType::JsonB)
27323            || matches!(type_name.as_str(), "JSON" | "JSONB")
27324        {
27325            return Ok(Some(Expression::ParseJson(Box::new(UnaryFunc {
27326                this: value,
27327                original_name: None,
27328                inferred_type: None,
27329            }))));
27330        }
27331
27332        // Create the Cast expression
27333        Ok(Some(Expression::Cast(Box::new(Cast {
27334            this: value,
27335            to: data_type,
27336            trailing_comments: Vec::new(),
27337            double_colon_syntax: false,
27338            format: None,
27339            default: None,
27340            inferred_type: None,
27341        }))))
27342    }
27343
27344    /// Try to parse type shorthand CAST: INT 1, VARCHAR 'x', STRING 'x', TEXT 'y', etc.
27345    /// In generic mode (no dialect), a type keyword followed by a literal becomes CAST(literal AS type).
27346    /// This matches Python sqlglot's `_parse_types()` behavior.
27347    fn try_parse_type_shorthand_cast(&mut self) -> Result<Option<Expression>> {
27348        // Only apply in generic mode
27349        let is_generic = self.config.dialect.is_none()
27350            || matches!(
27351                self.config.dialect,
27352                Some(crate::dialects::DialectType::Generic)
27353            );
27354        if !is_generic {
27355            return Ok(None);
27356        }
27357
27358        let start_pos = self.current;
27359
27360        // Check if current token is a type keyword
27361        if !self.is_type_keyword() {
27362            return Ok(None);
27363        }
27364
27365        // Don't apply if the type keyword is followed by a left paren (function call)
27366        // or is not followed by a literal
27367        if self.current + 1 >= self.tokens.len() {
27368            return Ok(None);
27369        }
27370
27371        let next_type = self.tokens[self.current + 1].token_type;
27372        // The value after the type keyword must be a literal (number or string)
27373        if !matches!(next_type, TokenType::Number | TokenType::String) {
27374            return Ok(None);
27375        }
27376
27377        // Get the type name
27378        let type_token = self.advance();
27379        let type_name = type_token.text.to_ascii_uppercase();
27380
27381        // Parse the data type
27382        let data_type = match type_name.as_str() {
27383            "INT" | "INTEGER" => DataType::Int {
27384                length: None,
27385                integer_spelling: type_name == "INTEGER",
27386            },
27387            "BIGINT" => DataType::BigInt { length: None },
27388            "SMALLINT" => DataType::SmallInt { length: None },
27389            "TINYINT" => DataType::TinyInt { length: None },
27390            "FLOAT" => DataType::Float {
27391                precision: None,
27392                scale: None,
27393                real_spelling: false,
27394            },
27395            "DOUBLE" => DataType::Double {
27396                precision: None,
27397                scale: None,
27398            },
27399            "DECIMAL" | "NUMERIC" => DataType::Decimal {
27400                precision: None,
27401                scale: None,
27402            },
27403            "REAL" => DataType::Float {
27404                precision: None,
27405                scale: None,
27406                real_spelling: true,
27407            },
27408            "VARCHAR" => DataType::VarChar {
27409                length: None,
27410                parenthesized_length: false,
27411            },
27412            "CHAR" => DataType::Char { length: None },
27413            "TEXT" | "STRING" => DataType::Text,
27414            "BOOLEAN" | "BOOL" => DataType::Boolean,
27415            "BINARY" => DataType::Binary { length: None },
27416            "VARBINARY" => DataType::VarBinary { length: None },
27417            _ => {
27418                // Unknown type, backtrack
27419                self.current = start_pos;
27420                return Ok(None);
27421            }
27422        };
27423
27424        // Parse the literal value
27425        let value = if self.check(TokenType::String) {
27426            let tok = self.advance();
27427            Expression::Literal(Box::new(Literal::String(tok.text.clone())))
27428        } else if self.check(TokenType::Number) {
27429            let tok = self.advance();
27430            Expression::Literal(Box::new(Literal::Number(tok.text.clone())))
27431        } else {
27432            self.current = start_pos;
27433            return Ok(None);
27434        };
27435
27436        // Create the Cast expression
27437        Ok(Some(Expression::Cast(Box::new(Cast {
27438            this: value,
27439            to: data_type,
27440            trailing_comments: Vec::new(),
27441            double_colon_syntax: false,
27442            format: None,
27443            default: None,
27444            inferred_type: None,
27445        }))))
27446    }
27447
27448    /// Parse unary expressions
27449    fn parse_unary(&mut self) -> Result<Expression> {
27450        if self.match_token(TokenType::Plus) {
27451            // Unary plus is a no-op - just parse the inner expression
27452            // This handles +++1 -> 1, +-1 -> -1, etc.
27453            self.parse_unary()
27454        } else if self.match_token(TokenType::Dash) {
27455            let expr = self.parse_unary()?;
27456            Ok(Expression::Neg(Box::new(UnaryOp::new(expr))))
27457        } else if self.match_token(TokenType::Plus) {
27458            // Unary plus: +1, +expr — just return the inner expression (no-op)
27459            self.parse_unary()
27460        } else if self.match_token(TokenType::Tilde) {
27461            let expr = self.parse_unary()?;
27462            Ok(Expression::BitwiseNot(Box::new(UnaryOp::new(expr))))
27463        } else if self.match_token(TokenType::DPipeSlash) {
27464            // ||/ (Cube root - PostgreSQL)
27465            let expr = self.parse_unary()?;
27466            Ok(Expression::Cbrt(Box::new(UnaryFunc::with_name(
27467                expr,
27468                "||/".to_string(),
27469            ))))
27470        } else if self.match_token(TokenType::PipeSlash) {
27471            // |/ (Square root - PostgreSQL)
27472            let expr = self.parse_unary()?;
27473            Ok(Expression::Sqrt(Box::new(UnaryFunc::with_name(
27474                expr,
27475                "|/".to_string(),
27476            ))))
27477        } else if self.check(TokenType::DAt)
27478            && matches!(
27479                self.config.dialect,
27480                Some(crate::dialects::DialectType::DuckDB)
27481            )
27482        {
27483            // DuckDB @ operator: @(-1), @(expr), @-1
27484            // @ is the ABS operator in DuckDB with low precedence
27485            // Python sqlglot: "@": lambda self: exp.Abs(this=self._parse_bitwise())
27486            // This means @col + 1 parses as ABS(col + 1), not ABS(col) + 1
27487            self.skip(); // consume @
27488                         // Parse at bitwise level for correct precedence (matches Python sqlglot)
27489            let expr = self.parse_bitwise_or()?;
27490            Ok(Expression::Abs(Box::new(UnaryFunc::new(expr))))
27491        } else if self.check(TokenType::Var)
27492            && self.peek().text.starts_with('@')
27493            && matches!(
27494                self.config.dialect,
27495                Some(crate::dialects::DialectType::DuckDB)
27496            )
27497        {
27498            // DuckDB @ operator with identifier: @col, @col + 1
27499            // Tokenizer creates "@col" as a single Var token, so we need to handle it here
27500            // Python sqlglot: "@": lambda self: exp.Abs(this=self._parse_bitwise())
27501            let token = self.advance(); // consume @col token
27502            let col_name = &token.text[1..]; // strip leading @
27503
27504            // Create column expression for the identifier part
27505            let col_expr = Expression::boxed_column(Column {
27506                name: Identifier::new(col_name),
27507                table: None,
27508                join_mark: false,
27509                trailing_comments: Vec::new(),
27510                span: None,
27511                inferred_type: None,
27512            });
27513
27514            // Check if followed by operators that should be included in the ABS
27515            // We need to parse any remaining operators at bitwise level
27516            // First, check if there's a binary operator after this column
27517            if self.check(TokenType::Plus)
27518                || self.check(TokenType::Dash)
27519                || self.check(TokenType::Star)
27520                || self.check(TokenType::Slash)
27521                || self.check(TokenType::Percent)
27522                || self.check(TokenType::Amp)
27523                || self.check(TokenType::Pipe)
27524                || self.check(TokenType::Caret)
27525                || self.check(TokenType::LtLt)
27526                || self.check(TokenType::GtGt)
27527            {
27528                // There are more operators - we need to continue parsing at bitwise level
27529                // But parse_bitwise_or expects to start fresh, not continue with existing left
27530                // So we use a helper approach: parse_bitwise_continuation
27531                let full_expr = self.parse_bitwise_continuation(col_expr)?;
27532                Ok(Expression::Abs(Box::new(UnaryFunc::new(full_expr))))
27533            } else {
27534                // Just the column, no more operators
27535                Ok(Expression::Abs(Box::new(UnaryFunc::new(col_expr))))
27536            }
27537        } else if self.check(TokenType::DAt)
27538            && (self.check_next(TokenType::LParen) || self.check_next(TokenType::Dash))
27539        {
27540            // Non-DuckDB dialects: only handle @(expr) and @-expr as ABS
27541            self.skip(); // consume @
27542            let expr = self.parse_bitwise_or()?;
27543            Ok(Expression::Abs(Box::new(UnaryFunc::new(expr))))
27544        } else if self.check(TokenType::Prior)
27545            && !self.check_next(TokenType::As)
27546            && !self.check_next(TokenType::Comma)
27547            && !self.check_next(TokenType::RParen)
27548            && !self.check_next(TokenType::Semicolon)
27549            && self.current + 1 < self.tokens.len()
27550        {
27551            // Oracle PRIOR expression - references parent row's value in hierarchical queries
27552            // Can appear in SELECT list, CONNECT BY, or other expression contexts
27553            // Python sqlglot: "PRIOR": lambda self: self.expression(exp.Prior, this=self._parse_bitwise())
27554            // When followed by AS/comma/rparen/end, treat PRIOR as an identifier (column name)
27555            self.skip(); // consume PRIOR
27556            let expr = self.parse_bitwise_or()?;
27557            Ok(Expression::Prior(Box::new(Prior { this: expr })))
27558        } else {
27559            // Try to parse type literals like: point '(4,4)', timestamp '2024-01-01', interval '1 day'
27560            // PostgreSQL allows type name followed by string literal as a cast shorthand
27561            if let Some(type_literal) = self.try_parse_type_literal()? {
27562                return self.parse_postfix_operators(type_literal);
27563            }
27564            // Try to parse type shorthand CAST: INT 1, VARCHAR 'x', STRING 'x', TEXT 'y', etc.
27565            // In generic mode, type keyword followed by literal -> CAST(literal AS type)
27566            if let Some(type_cast) = self.try_parse_type_shorthand_cast()? {
27567                return self.parse_postfix_operators(type_cast);
27568            }
27569            let expr = self.parse_primary()?;
27570            // Handle postfix exclamation mark for Snowflake model attribute syntax: model!PREDICT(...)
27571            self.parse_postfix_operators(expr)
27572        }
27573    }
27574
27575    /// Parse postfix operators like ! (model attribute in Snowflake) and : (JSON path in Snowflake)
27576    fn parse_postfix_operators(&mut self, mut expr: Expression) -> Result<Expression> {
27577        // Handle Oracle/Redshift outer join marker (+) after column reference
27578        // Syntax: column_ref (+) indicates optional side of join
27579        if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
27580            // Look ahead to verify it's ( + )
27581            let saved_pos = self.current;
27582            if self.match_token(TokenType::LParen)
27583                && self.match_token(TokenType::Plus)
27584                && self.match_token(TokenType::RParen)
27585            {
27586                // Set join_mark on the column expression
27587                if let Expression::Column(ref mut col) = expr {
27588                    col.join_mark = true;
27589                }
27590            } else {
27591                self.current = saved_pos;
27592            }
27593        }
27594
27595        // Handle EXCLAMATION for Snowflake model attribute syntax: model!PREDICT(...)
27596        while self.match_token(TokenType::Exclamation) {
27597            // Parse the attribute/function after the exclamation mark
27598            // This can be either a simple identifier (model!admin) or a function call (model!PREDICT(1))
27599            let attr = self.parse_primary()?;
27600            expr = Expression::ModelAttribute(Box::new(ModelAttribute {
27601                this: Box::new(expr),
27602                expression: Box::new(attr),
27603            }));
27604        }
27605
27606        // Handle COLON for Snowflake JSON path extraction: a:field or a:field.subfield
27607        // This creates JSONExtract expressions that transform to GET_PATH(a, 'field') in Snowflake
27608        expr = self.parse_colon_json_path(expr)?;
27609
27610        // Handle DCOLON (::) - in SingleStore it's JSON extraction, in other dialects it's cast
27611        // SingleStore JSON path syntax:
27612        //   a::b -> JSON_EXTRACT_JSON(a, 'b')
27613        //   a::$b -> JSON_EXTRACT_STRING(a, 'b')
27614        //   a::%b -> JSON_EXTRACT_DOUBLE(a, 'b')
27615        //   a::?names -> JSON match syntax
27616        if matches!(
27617            self.config.dialect,
27618            Some(crate::dialects::DialectType::SingleStore)
27619        ) {
27620            expr = self.parse_singlestore_json_path(expr)?;
27621        } else {
27622            // For other dialects, :: is cast syntax
27623            // IMPORTANT: Use parse_data_type_for_cast to avoid consuming subscripts as array dimensions
27624            // e.g., ::VARIANT[0] should be cast to VARIANT followed by subscript [0]
27625            while self.match_token(TokenType::DColon) {
27626                let data_type = self.parse_data_type_for_cast()?;
27627                expr = Expression::Cast(Box::new(Cast {
27628                    this: expr,
27629                    to: data_type,
27630                    trailing_comments: Vec::new(),
27631                    double_colon_syntax: true,
27632                    format: None,
27633                    default: None,
27634                    inferred_type: None,
27635                }));
27636            }
27637        }
27638
27639        // Teradata: (FORMAT '...') phrase after an expression
27640        if matches!(
27641            self.config.dialect,
27642            Some(crate::dialects::DialectType::Teradata)
27643        ) && self.check(TokenType::LParen)
27644            && self.check_next(TokenType::Format)
27645        {
27646            self.skip(); // consume (
27647            self.skip(); // consume FORMAT
27648            let format = self.expect_string()?;
27649            self.expect(TokenType::RParen)?;
27650            expr = Expression::FormatPhrase(Box::new(FormatPhrase {
27651                this: Box::new(expr),
27652                format,
27653            }));
27654        }
27655
27656        Ok(expr)
27657    }
27658
27659    /// Parse SingleStore JSON path extraction syntax
27660    /// Examples:
27661    ///   a::b -> JSON_EXTRACT_JSON(a, 'b')
27662    ///   a::$b -> JSON_EXTRACT_STRING(a, 'b')
27663    ///   a::%b -> JSON_EXTRACT_DOUBLE(a, 'b')
27664    ///   a::`b`::`2` -> nested JSON extraction
27665    fn parse_singlestore_json_path(&mut self, mut expr: Expression) -> Result<Expression> {
27666        loop {
27667            if self.match_token(TokenType::DColon) {
27668                // :: followed by identifier -> JSON_EXTRACT_JSON
27669                // Check if next is a backtick-quoted identifier or regular identifier
27670                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
27671                    self.advance().text
27672                } else if self.check(TokenType::Number) {
27673                    // a::2 -> JSON_EXTRACT_JSON(a, '2')
27674                    self.advance().text
27675                } else {
27676                    return Err(self.parse_error("Expected identifier after ::"));
27677                };
27678
27679                expr = Expression::Function(Box::new(Function::new(
27680                    "JSON_EXTRACT_JSON".to_string(),
27681                    vec![expr, Expression::string(&path_key)],
27682                )));
27683            } else if self.match_token(TokenType::DColonDollar) {
27684                // ::$ followed by identifier -> JSON_EXTRACT_STRING
27685                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
27686                    self.advance().text
27687                } else {
27688                    return Err(self.parse_error("Expected identifier after ::$"));
27689                };
27690
27691                expr = Expression::Function(Box::new(Function::new(
27692                    "JSON_EXTRACT_STRING".to_string(),
27693                    vec![expr, Expression::string(&path_key)],
27694                )));
27695            } else if self.match_token(TokenType::DColonPercent) {
27696                // ::% followed by identifier -> JSON_EXTRACT_DOUBLE
27697                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
27698                    self.advance().text
27699                } else {
27700                    return Err(self.parse_error("Expected identifier after ::%"));
27701                };
27702
27703                expr = Expression::Function(Box::new(Function::new(
27704                    "JSON_EXTRACT_DOUBLE".to_string(),
27705                    vec![expr, Expression::string(&path_key)],
27706                )));
27707            } else if self.match_token(TokenType::DColonQMark) {
27708                // ::? followed by identifier -> Keep as JSONMatchAny expression for now
27709                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
27710                    self.advance().text
27711                } else {
27712                    return Err(self.parse_error("Expected identifier after ::?"));
27713                };
27714
27715                // For now, create a function that will be handled specially
27716                expr = Expression::Function(Box::new(Function::new(
27717                    "JSON_EXTRACT_JSON".to_string(), // placeholder
27718                    vec![expr, Expression::string(&format!("?{}", path_key))],
27719                )));
27720            } else {
27721                break;
27722            }
27723        }
27724        Ok(expr)
27725    }
27726
27727    /// Parse colon-separated JSON path syntax (Snowflake variant extraction)
27728    /// Examples:
27729    ///   a:from -> GET_PATH(a, 'from')
27730    ///   a:b.c.d -> GET_PATH(a, 'b.c.d')
27731    ///   a:from::STRING -> CAST(GET_PATH(a, 'from') AS VARCHAR)
27732    ///   a:b:c.d -> GET_PATH(a, 'b.c.d') (multiple colons joined into single path)
27733    fn parse_colon_json_path(&mut self, mut this: Expression) -> Result<Expression> {
27734        // DuckDB uses colon for prefix alias syntax (e.g., "alias: expr" means "expr AS alias")
27735        // Skip JSON path extraction for DuckDB - it's handled separately in parse_select_expressions
27736        if matches!(
27737            self.config.dialect,
27738            Some(crate::dialects::DialectType::DuckDB)
27739        ) {
27740            return Ok(this);
27741        }
27742
27743        // ClickHouse uses : as part of the ternary operator (condition ? true : false)
27744        // Skip JSON path extraction for ClickHouse to avoid consuming the ternary separator
27745        if matches!(
27746            self.config.dialect,
27747            Some(crate::dialects::DialectType::ClickHouse)
27748        ) {
27749            return Ok(this);
27750        }
27751
27752        // Only apply colon JSON path parsing to identifiers, columns, and function results
27753        // This prevents {'key': 'value'} object literals from being misinterpreted
27754        let is_valid_json_path_base = matches!(
27755            &this,
27756            Expression::Column(_) |
27757            Expression::Identifier(_) |
27758            Expression::Dot(_) |
27759            Expression::JSONExtract(_) |  // Allow chained paths like a:b:c
27760            Expression::Function(_) |     // Allow function results like PARSE_JSON(...):x
27761            Expression::ParseJson(_) |    // Allow PARSE_JSON specifically
27762            Expression::Parameter(_) // Allow positional params like $1:name
27763        );
27764
27765        if !is_valid_json_path_base {
27766            return Ok(this);
27767        }
27768
27769        // Check if we have a colon (but NOT double-colon which is cast syntax)
27770        if !self.check(TokenType::Colon) {
27771            return Ok(this);
27772        }
27773
27774        // Make sure this is not a double-colon (::) which is cast syntax
27775        if self.check_next(TokenType::Colon) {
27776            // This is :: (DColon should have been tokenized, but just in case)
27777            return Ok(this);
27778        }
27779
27780        // Collect ALL the JSON path parts across multiple colons
27781        // a:b.c:d.e -> GET_PATH(a, 'b.c.d.e')
27782        // a:b[0].c -> GET_PATH(a, 'b[0].c')
27783        let mut path_string = String::new();
27784
27785        // Parse all colon-separated path segments
27786        while self.check(TokenType::Colon) && !self.check_next(TokenType::Colon) {
27787            // Save position before consuming colon so we can backtrack
27788            // if what follows isn't a valid JSON path component (e.g., DuckDB's "foo: 1" label syntax)
27789            let saved_pos = self.current;
27790            let saved_path_len = path_string.len();
27791
27792            // Consume the colon
27793            self.skip();
27794
27795            // Parse first path component (required) - can be any identifier including keywords
27796            // Also handle backtick-quoted identifiers like `zip code` or `fb:testid`
27797            // Also handle bracket notation directly after colon: c1:['price'] or c1:["foo bar"]
27798            // IMPORTANT: Check QuotedIdentifier FIRST since is_identifier_token() includes QuotedIdentifier
27799            let mut had_initial_component = false;
27800            if self.check(TokenType::QuotedIdentifier) {
27801                // Quoted field name in variant access
27802                // Snowflake: v:"fruit" → double-quoted key → stored as plain text 'fruit'
27803                // Databricks: raw:`zip code` → backtick-quoted key → stored as bracket notation '["zip code"]'
27804                let quoted_name = self.advance().text.clone();
27805                let is_snowflake = matches!(
27806                    self.config.dialect,
27807                    Some(crate::dialects::DialectType::Snowflake)
27808                );
27809                let needs_bracket = quoted_name.contains(' ') || quoted_name.contains('\'');
27810                if is_snowflake && !needs_bracket {
27811                    // Snowflake double-quoted keys without special chars are stored as plain text
27812                    // Add dot separator for plain segments
27813                    if !path_string.is_empty() {
27814                        path_string.push('.');
27815                    }
27816                    path_string.push_str(&quoted_name);
27817                } else if is_snowflake && needs_bracket {
27818                    // Snowflake keys with spaces/apostrophes use bracket notation: ["key with spaces"]
27819                    // No dot before bracket notation
27820                    path_string.push_str("[\"");
27821                    // Don't escape single quotes here - the generator will handle escaping
27822                    // when outputting the string literal
27823                    path_string.push_str(&quoted_name);
27824                    path_string.push_str("\"]");
27825                } else {
27826                    // Other dialects (Databricks): wrap in bracket notation
27827                    // No dot before bracket notation
27828                    path_string.push_str("[\"");
27829                    for c in quoted_name.chars() {
27830                        if c == '"' {
27831                            path_string.push_str("\\\"");
27832                        } else {
27833                            path_string.push(c);
27834                        }
27835                    }
27836                    path_string.push_str("\"]");
27837                }
27838                had_initial_component = true;
27839            } else if self.is_identifier_token()
27840                || self.is_safe_keyword_as_identifier()
27841                || self.is_reserved_keyword_as_identifier()
27842            {
27843                // Add a dot separator for plain identifier segments
27844                if !path_string.is_empty() {
27845                    path_string.push('.');
27846                }
27847                let first_part = self.advance().text;
27848                path_string.push_str(&first_part);
27849                had_initial_component = true;
27850            } else if self.check(TokenType::LBracket) {
27851                // Bracket notation directly after colon: c1:['price'] or c1:["foo bar"]
27852                // Mark that we have a valid path start - the bracket will be parsed in the loop below
27853                had_initial_component = true;
27854            }
27855
27856            if !had_initial_component {
27857                // Not a valid JSON path component - backtrack and stop
27858                // This handles cases like DuckDB's "foo: 1" label/alias syntax
27859                // where the colon is followed by a non-identifier (e.g., a number)
27860                self.current = saved_pos;
27861                path_string.truncate(saved_path_len);
27862                break;
27863            }
27864
27865            // Parse optional array indices and additional path components
27866            loop {
27867                // Handle array index: [0], [1], [*], ['key'], ["key"], etc.
27868                if self.match_token(TokenType::LBracket) {
27869                    // Parse the index expression (typically a number, identifier, * for wildcard, or string key)
27870                    if self.check(TokenType::Number) {
27871                        path_string.push('[');
27872                        let idx = self.advance().text;
27873                        path_string.push_str(&idx);
27874                        self.expect(TokenType::RBracket)?;
27875                        path_string.push(']');
27876                    } else if self.check(TokenType::Star) {
27877                        // Wildcard array access: [*] matches all array elements
27878                        path_string.push('[');
27879                        self.skip();
27880                        path_string.push('*');
27881                        self.expect(TokenType::RBracket)?;
27882                        path_string.push(']');
27883                    } else if self.check(TokenType::String) {
27884                        // Single-quoted string key access: ['bicycle']
27885                        // Convert to dot notation for simple keys, keep bracket notation for keys with spaces
27886                        let key = self.advance().text;
27887                        self.expect(TokenType::RBracket)?;
27888                        // Check if the key contains spaces or special characters that require bracket notation
27889                        let needs_brackets =
27890                            key.contains(' ') || key.contains('"') || key.contains('\'');
27891                        if needs_brackets {
27892                            // Keep bracket notation with double quotes: ["zip code"]
27893                            path_string.push_str("[\"");
27894                            for c in key.chars() {
27895                                if c == '"' {
27896                                    path_string.push_str("\\\"");
27897                                } else {
27898                                    path_string.push(c);
27899                                }
27900                            }
27901                            path_string.push_str("\"]");
27902                        } else {
27903                            // Convert to dot notation: store['bicycle'] -> store.bicycle
27904                            // But only add dot if path_string is not empty (handles c1:['price'] -> c1:price)
27905                            if !path_string.is_empty() {
27906                                path_string.push('.');
27907                            }
27908                            path_string.push_str(&key);
27909                        }
27910                    } else if self.check(TokenType::QuotedIdentifier) {
27911                        // Double-quoted string key access: ["zip code"]
27912                        // These are tokenized as QuotedIdentifier, not String
27913                        // Must be checked BEFORE is_identifier_token() since it includes QuotedIdentifier
27914                        let key = self.advance().text;
27915                        self.expect(TokenType::RBracket)?;
27916                        // Always use bracket notation with double quotes for quoted identifiers
27917                        path_string.push_str("[\"");
27918                        for c in key.chars() {
27919                            if c == '"' {
27920                                path_string.push_str("\\\"");
27921                            } else {
27922                                path_string.push(c);
27923                            }
27924                        }
27925                        path_string.push_str("\"]");
27926                    } else if self.is_identifier_token() {
27927                        // Check if this is a "dynamic bracket" — a column reference like s.x
27928                        // inside brackets. We detect this by checking if the identifier is
27929                        // followed by a dot (making it a qualified column reference).
27930                        let saved_bracket_pos = self.current;
27931                        let ident_text = self.advance().text.clone();
27932                        if self.check(TokenType::Dot) {
27933                            // Dynamic bracket: [s.x] where s.x is a column reference
27934                            // Backtrack to before the identifier so we can parse the full expression
27935                            self.current = saved_bracket_pos;
27936                            // Parse the full expression inside the brackets
27937                            let index_expr = self.parse_expression()?;
27938                            self.expect(TokenType::RBracket)?;
27939
27940                            // Build JSONExtract for the path accumulated so far
27941                            let path_expr =
27942                                Expression::Literal(Box::new(Literal::String(path_string)));
27943                            let json_extract = Expression::JSONExtract(Box::new(JSONExtract {
27944                                this: Box::new(this),
27945                                expression: Box::new(path_expr),
27946                                only_json_types: None,
27947                                expressions: Vec::new(),
27948                                variant_extract: Some(Box::new(Expression::Boolean(
27949                                    BooleanLiteral { value: true },
27950                                ))),
27951                                json_query: None,
27952                                option: None,
27953                                quote: None,
27954                                on_condition: None,
27955                                requires_json: None,
27956                            }));
27957
27958                            // Wrap in Subscript
27959                            let subscript = Expression::Subscript(Box::new(Subscript {
27960                                this: json_extract,
27961                                index: index_expr,
27962                            }));
27963
27964                            // Now continue parsing any remaining path after the dynamic bracket.
27965                            // This handles patterns like [s.x].r.d or [s.x]:r or [s.x].r.d[s.y]
27966                            // We parse dots into a new path string, and if we encounter another
27967                            // dynamic bracket, we recurse.
27968                            let mut suffix_path = String::new();
27969                            loop {
27970                                if self.match_token(TokenType::Dot) {
27971                                    // Dot access after dynamic bracket: [s.x].r.d
27972                                    if !suffix_path.is_empty() {
27973                                        suffix_path.push('.');
27974                                    }
27975                                    if self.is_identifier_token()
27976                                        || self.is_safe_keyword_as_identifier()
27977                                        || self.is_reserved_keyword_as_identifier()
27978                                    {
27979                                        let part = self.advance().text;
27980                                        suffix_path.push_str(&part);
27981                                    } else {
27982                                        return Err(self.parse_error(
27983                                            "Expected identifier after . in JSON path",
27984                                        ));
27985                                    }
27986                                } else if self.check(TokenType::LBracket) {
27987                                    // Another bracket after dot path: [s.x].r.d[s.y]
27988                                    // We need to check if this bracket contains a dynamic expression
27989                                    break;
27990                                } else {
27991                                    break;
27992                                }
27993                            }
27994
27995                            // Build the result depending on whether there are suffix dot paths
27996                            let result_base = if suffix_path.is_empty() {
27997                                subscript
27998                            } else {
27999                                // Create another JSONExtract for the suffix path
28000                                Expression::JSONExtract(Box::new(JSONExtract {
28001                                    this: Box::new(subscript),
28002                                    expression: Box::new(Expression::Literal(Box::new(
28003                                        Literal::String(suffix_path),
28004                                    ))),
28005                                    only_json_types: None,
28006                                    expressions: Vec::new(),
28007                                    variant_extract: Some(Box::new(Expression::Boolean(
28008                                        BooleanLiteral { value: true },
28009                                    ))),
28010                                    json_query: None,
28011                                    option: None,
28012                                    quote: None,
28013                                    on_condition: None,
28014                                    requires_json: None,
28015                                }))
28016                            };
28017
28018                            // Check for another bracket (e.g., [s.y] after .r.d)
28019                            if self.match_token(TokenType::LBracket) {
28020                                // Parse the index expression
28021                                let index_expr2 = self.parse_expression()?;
28022                                self.expect(TokenType::RBracket)?;
28023                                let subscript2 = Expression::Subscript(Box::new(Subscript {
28024                                    this: result_base,
28025                                    index: index_expr2,
28026                                }));
28027                                // Update `this` and `path_string` so we properly continue the outer loop
28028                                this = subscript2;
28029                                path_string = String::new();
28030                            } else {
28031                                this = result_base;
28032                                path_string = String::new();
28033                            }
28034
28035                            // Continue parsing more colon segments or break
28036                            // Need to break out of the inner loop to let the outer while loop
28037                            // check for more colon segments
28038                            break;
28039                        } else {
28040                            // Simple identifier index: [idx]
28041                            path_string.push('[');
28042                            path_string.push_str(&ident_text);
28043                            self.expect(TokenType::RBracket)?;
28044                            path_string.push(']');
28045                        }
28046                    } else {
28047                        // Empty brackets or unexpected token - just close the bracket
28048                        path_string.push('[');
28049                        self.expect(TokenType::RBracket)?;
28050                        path_string.push(']');
28051                    }
28052                } else if self.match_token(TokenType::Dot) {
28053                    // Handle dot access
28054                    path_string.push('.');
28055                    if self.is_identifier_token()
28056                        || self.is_safe_keyword_as_identifier()
28057                        || self.is_reserved_keyword_as_identifier()
28058                    {
28059                        let part = self.advance().text;
28060                        path_string.push_str(&part);
28061                    } else {
28062                        return Err(self.parse_error("Expected identifier after . in JSON path"));
28063                    }
28064                } else {
28065                    break;
28066                }
28067            }
28068        }
28069
28070        // If no path was parsed (e.g., backtracked on first colon), return the original expression
28071        if path_string.is_empty() {
28072            return Ok(this);
28073        }
28074
28075        // Create the JSONExtract expression with variant_extract marker
28076        let path_expr = Expression::Literal(Box::new(Literal::String(path_string)));
28077        let json_extract = Expression::JSONExtract(Box::new(JSONExtract {
28078            this: Box::new(this),
28079            expression: Box::new(path_expr),
28080            only_json_types: None,
28081            expressions: Vec::new(),
28082            variant_extract: Some(Box::new(Expression::Boolean(BooleanLiteral {
28083                value: true,
28084            }))),
28085            json_query: None,
28086            option: None,
28087            quote: None,
28088            on_condition: None,
28089            requires_json: None,
28090        }));
28091
28092        Ok(json_extract)
28093    }
28094
28095    /// Check if the current token is a reserved keyword that can be used as identifier in JSON path
28096    fn is_reserved_keyword_as_identifier(&self) -> bool {
28097        if self.is_at_end() {
28098            return false;
28099        }
28100        let token = self.peek();
28101        // Allow reserved keywords like FROM, SELECT, etc. as JSON path components
28102        matches!(
28103            token.token_type,
28104            TokenType::From
28105                | TokenType::Select
28106                | TokenType::Where
28107                | TokenType::And
28108                | TokenType::Or
28109                | TokenType::Not
28110                | TokenType::In
28111                | TokenType::As
28112                | TokenType::On
28113                | TokenType::Join
28114                | TokenType::Left
28115                | TokenType::Right
28116                | TokenType::Inner
28117                | TokenType::Outer
28118                | TokenType::Cross
28119                | TokenType::Full
28120                | TokenType::Group
28121                | TokenType::Order
28122                | TokenType::By
28123                | TokenType::Having
28124                | TokenType::Limit
28125                | TokenType::Offset
28126                | TokenType::Union
28127                | TokenType::Except
28128                | TokenType::Intersect
28129                | TokenType::All
28130                | TokenType::Distinct
28131                | TokenType::Case
28132                | TokenType::When
28133                | TokenType::Then
28134                | TokenType::Else
28135                | TokenType::End
28136                | TokenType::Null
28137                | TokenType::True
28138                | TokenType::False
28139                | TokenType::Between
28140                | TokenType::Like
28141                | TokenType::Is
28142                | TokenType::Exists
28143                | TokenType::Insert
28144                | TokenType::Update
28145                | TokenType::Delete
28146                | TokenType::Create
28147                | TokenType::Alter
28148                | TokenType::Drop
28149                | TokenType::Table
28150                | TokenType::View
28151                | TokenType::Index
28152                | TokenType::Set
28153                | TokenType::Values
28154                | TokenType::Into
28155                | TokenType::Default
28156                | TokenType::Key
28157                | TokenType::Unique
28158                | TokenType::Check
28159                | TokenType::Constraint
28160                | TokenType::References
28161        )
28162    }
28163
28164    /// Parse primary expressions
28165    fn parse_primary(&mut self) -> Result<Expression> {
28166        // Handle APPROXIMATE COUNT(DISTINCT expr) - Redshift syntax
28167        // Parses as ApproxDistinct expression
28168        if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("APPROXIMATE") {
28169            let saved_pos = self.current;
28170            self.skip(); // consume APPROXIMATE
28171                         // Parse the COUNT(DISTINCT ...) that follows
28172            let func = self.parse_primary()?;
28173            // Check if it's COUNT with DISTINCT
28174            if let Expression::Count(ref count_expr) = func {
28175                if count_expr.distinct {
28176                    let this_expr = count_expr.this.clone().unwrap_or_else(|| {
28177                        Expression::Star(crate::expressions::Star {
28178                            table: None,
28179                            except: None,
28180                            replace: None,
28181                            rename: None,
28182                            trailing_comments: Vec::new(),
28183                            span: None,
28184                        })
28185                    });
28186                    return Ok(Expression::ApproxDistinct(Box::new(
28187                        crate::expressions::AggFunc {
28188                            this: this_expr,
28189                            distinct: false,
28190                            filter: None,
28191                            order_by: Vec::new(),
28192                            name: Some("APPROX_DISTINCT".to_string()),
28193                            ignore_nulls: None,
28194                            having_max: None,
28195                            limit: None,
28196                            inferred_type: None,
28197                        },
28198                    )));
28199                }
28200            }
28201            // Not COUNT(DISTINCT ...) - backtrack
28202            self.current = saved_pos;
28203        }
28204
28205        if let Some(connect_by_root) = self.try_parse_connect_by_root_expression()? {
28206            return Ok(connect_by_root);
28207        }
28208
28209        // PostgreSQL VARIADIC prefix in function call arguments
28210        // e.g., SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])
28211        if matches!(
28212            self.config.dialect,
28213            Some(crate::dialects::DialectType::PostgreSQL)
28214                | Some(crate::dialects::DialectType::Redshift)
28215        ) {
28216            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("VARIADIC") {
28217                self.skip(); // consume VARIADIC
28218                let expr = self.parse_bitwise_or()?;
28219                return Ok(Expression::Variadic(Box::new(
28220                    crate::expressions::Variadic {
28221                        this: Box::new(expr),
28222                    },
28223                )));
28224            }
28225        }
28226
28227        // MySQL charset introducer: _utf8mb4 'string', _latin1 x'hex', etc.
28228        if matches!(
28229            self.config.dialect,
28230            Some(crate::dialects::DialectType::MySQL)
28231                | Some(crate::dialects::DialectType::SingleStore)
28232                | Some(crate::dialects::DialectType::Doris)
28233                | Some(crate::dialects::DialectType::StarRocks)
28234        ) {
28235            if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
28236                if self.peek().text.starts_with('_')
28237                    && Self::is_mysql_charset_introducer(&self.peek().text.to_ascii_uppercase())
28238                {
28239                    // Check if next token is a string literal or hex string
28240                    if self.current + 1 < self.tokens.len() {
28241                        let next_tt = self.tokens[self.current + 1].token_type;
28242                        if matches!(
28243                            next_tt,
28244                            TokenType::String | TokenType::HexString | TokenType::BitString
28245                        ) {
28246                            let charset_token = self.advance(); // consume charset name
28247                            let charset_name = charset_token.text.clone();
28248                            let literal = self.parse_primary()?; // parse the string/hex literal
28249                            return Ok(Expression::Introducer(Box::new(
28250                                crate::expressions::Introducer {
28251                                    this: Box::new(Expression::Column(Box::new(
28252                                        crate::expressions::Column {
28253                                            name: crate::expressions::Identifier {
28254                                                name: charset_name,
28255                                                quoted: false,
28256                                                trailing_comments: Vec::new(),
28257                                                span: None,
28258                                            },
28259                                            table: None,
28260                                            join_mark: false,
28261                                            trailing_comments: Vec::new(),
28262                                            span: None,
28263                                            inferred_type: None,
28264                                        },
28265                                    ))),
28266                                    expression: Box::new(literal),
28267                                },
28268                            )));
28269                        }
28270                    }
28271                }
28272            }
28273        }
28274
28275        // Array literal: [1, 2, 3] or comprehension: [expr FOR var IN iterator]
28276        if self.match_token(TokenType::LBracket) {
28277            // Parse empty array: []
28278            if self.match_token(TokenType::RBracket) {
28279                return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
28280                    expressions: Vec::new(),
28281                    bracket_notation: true,
28282                    use_list_keyword: false,
28283                })));
28284            }
28285
28286            // Parse first expression
28287            let first_expr = self.parse_expression()?;
28288
28289            // Check for comprehension syntax: [expr FOR var IN iterator [IF condition]]
28290            if self.match_token(TokenType::For) {
28291                // Parse loop variable - typically a simple identifier like 'x'
28292                let loop_var = self.parse_primary()?;
28293
28294                // Parse optional position (second variable after comma)
28295                let position = if self.match_token(TokenType::Comma) {
28296                    Some(self.parse_primary()?)
28297                } else {
28298                    None
28299                };
28300
28301                // Expect IN keyword
28302                if !self.match_token(TokenType::In) {
28303                    return Err(self.parse_error("Expected IN in comprehension"));
28304                }
28305
28306                // Parse iterator expression
28307                let iterator = self.parse_expression()?;
28308
28309                // Parse optional condition after IF
28310                let condition = if self.match_token(TokenType::If) {
28311                    Some(self.parse_expression()?)
28312                } else {
28313                    None
28314                };
28315
28316                // Expect closing bracket
28317                self.expect(TokenType::RBracket)?;
28318
28319                // Return Comprehension
28320                return Ok(Expression::Comprehension(Box::new(Comprehension {
28321                    this: Box::new(first_expr),
28322                    expression: Box::new(loop_var),
28323                    position: position.map(Box::new),
28324                    iterator: Some(Box::new(iterator)),
28325                    condition: condition.map(Box::new),
28326                })));
28327            }
28328
28329            // Regular array - continue parsing elements
28330            // ClickHouse allows AS aliases in array: [1 AS a, 2 AS b]
28331            let first_expr = if matches!(
28332                self.config.dialect,
28333                Some(crate::dialects::DialectType::ClickHouse)
28334            ) && self.check(TokenType::As)
28335                && !self.check_next(TokenType::RBracket)
28336            {
28337                self.skip(); // consume AS
28338                let alias = self.expect_identifier()?;
28339                Expression::Alias(Box::new(Alias::new(first_expr, Identifier::new(alias))))
28340            } else {
28341                first_expr
28342            };
28343            let mut expressions = vec![first_expr];
28344            while self.match_token(TokenType::Comma) {
28345                // Handle trailing comma
28346                if self.check(TokenType::RBracket) {
28347                    break;
28348                }
28349                let expr = self.parse_expression()?;
28350                // ClickHouse: handle AS alias on array elements
28351                let expr = if matches!(
28352                    self.config.dialect,
28353                    Some(crate::dialects::DialectType::ClickHouse)
28354                ) && self.check(TokenType::As)
28355                    && !self.check_next(TokenType::RBracket)
28356                {
28357                    self.skip(); // consume AS
28358                    let alias = self.expect_identifier()?;
28359                    Expression::Alias(Box::new(Alias::new(expr, Identifier::new(alias))))
28360                } else {
28361                    expr
28362                };
28363                expressions.push(expr);
28364            }
28365            self.expect(TokenType::RBracket)?;
28366            return self.maybe_parse_subscript(Expression::ArrayFunc(Box::new(ArrayConstructor {
28367                expressions,
28368                bracket_notation: true,
28369                use_list_keyword: false,
28370            })));
28371        }
28372
28373        // Map/Struct literal with curly braces: {'a': 1, 'b': 2}
28374        // Or Snowflake wildcard syntax: {*}, {tbl.*}, {* EXCLUDE (...)}, {* ILIKE '...'}
28375        if self.match_token(TokenType::LBrace) {
28376            // ClickHouse query parameter: {name: Type}
28377            // We consumed `{` above, so rewind and let the dedicated parser consume it.
28378            if matches!(
28379                self.config.dialect,
28380                Some(crate::dialects::DialectType::ClickHouse)
28381            ) {
28382                self.current -= 1;
28383                if let Some(param) = self.parse_clickhouse_braced_parameter()? {
28384                    return self.maybe_parse_subscript(param);
28385                }
28386                // Not a ClickHouse query parameter, restore position after `{` for map/wildcard parsing.
28387                self.current += 1;
28388            }
28389
28390            // Parse empty map: {}
28391            if self.match_token(TokenType::RBrace) {
28392                return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
28393                    keys: Vec::new(),
28394                    values: Vec::new(),
28395                    curly_brace_syntax: true,
28396                    with_map_keyword: false,
28397                })));
28398            }
28399
28400            // Check for ODBC escape syntax: {fn function_name(args)}
28401            // This must be checked before wildcards and map literals
28402            if self.check_identifier("fn") {
28403                self.skip(); // consume 'fn'
28404                             // Parse function call
28405                let func_name = self.expect_identifier_or_keyword_with_quoted()?;
28406                self.expect(TokenType::LParen)?;
28407
28408                // Parse function arguments
28409                let mut args = Vec::new();
28410                if !self.check(TokenType::RParen) {
28411                    loop {
28412                        args.push(self.parse_expression()?);
28413                        if !self.match_token(TokenType::Comma) {
28414                            break;
28415                        }
28416                    }
28417                }
28418                self.expect(TokenType::RParen)?;
28419                self.expect(TokenType::RBrace)?;
28420
28421                // Return as a regular function call (the ODBC escape is just syntax sugar)
28422                return Ok(Expression::Function(Box::new(Function::new(
28423                    func_name.name,
28424                    args,
28425                ))));
28426            }
28427
28428            // Check for ODBC datetime literals: {d'2024-01-01'}, {t'12:00:00'}, {ts'2024-01-01 12:00:00'}
28429            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
28430                let type_text = self.peek().text.to_lowercase();
28431                if (type_text == "d" || type_text == "t" || type_text == "ts")
28432                    && self.check_next(TokenType::String)
28433                {
28434                    self.skip(); // consume type indicator (d, t, or ts)
28435                    let value = self.expect_string()?;
28436                    self.expect(TokenType::RBrace)?;
28437
28438                    // Return appropriate expression based on type
28439                    return match type_text.as_str() {
28440                        "d" => Ok(Expression::Date(Box::new(
28441                            crate::expressions::UnaryFunc::new(Expression::Literal(Box::new(
28442                                crate::expressions::Literal::String(value),
28443                            ))),
28444                        ))),
28445                        "t" => Ok(Expression::Time(Box::new(
28446                            crate::expressions::UnaryFunc::new(Expression::Literal(Box::new(
28447                                crate::expressions::Literal::String(value),
28448                            ))),
28449                        ))),
28450                        "ts" => Ok(Expression::Timestamp(Box::new(
28451                            crate::expressions::TimestampFunc {
28452                                this: Some(Box::new(Expression::Literal(Box::new(
28453                                    crate::expressions::Literal::String(value),
28454                                )))),
28455                                zone: None,
28456                                with_tz: None,
28457                                safe: None,
28458                            },
28459                        ))),
28460                        _ => {
28461                            Err(self
28462                                .parse_error(format!("Unknown ODBC datetime type: {}", type_text)))
28463                        }
28464                    };
28465                }
28466            }
28467
28468            // Check for Snowflake wildcard syntax: {*}, {tbl.*}, {* EXCLUDE (...)}, {* ILIKE '...'}
28469            // Pattern: either {*...} or {identifier/var followed by .*}
28470            // Note: Identifiers may be tokenized as Var or Identifier
28471            let is_table_star = (self.check(TokenType::Identifier) || self.check(TokenType::Var))
28472                && self.check_next(TokenType::Dot)
28473                && self
28474                    .tokens
28475                    .get(self.current + 2)
28476                    .map(|t| t.token_type == TokenType::Star)
28477                    .unwrap_or(false);
28478            let is_wildcard = self.check(TokenType::Star) || is_table_star;
28479
28480            if is_wildcard {
28481                // Parse the wildcard expression
28482                let wildcard_expr = if self.match_token(TokenType::Star) {
28483                    // {*} or {* EXCLUDE ...} or {* ILIKE ...}
28484                    // Check for ILIKE first since it's different from standard star modifiers
28485                    if self.check_keyword_text("ILIKE") {
28486                        self.skip();
28487                        let pattern = self.parse_expression()?;
28488                        // Create an ILike expression with Star as left side
28489                        Expression::ILike(Box::new(LikeOp {
28490                            left: Expression::Star(Star {
28491                                table: None,
28492                                except: None,
28493                                replace: None,
28494                                rename: None,
28495                                trailing_comments: Vec::new(),
28496                                span: None,
28497                            }),
28498                            right: pattern,
28499                            escape: None,
28500                            quantifier: None,
28501                            inferred_type: None,
28502                        }))
28503                    } else {
28504                        // {*} or {* EXCLUDE ...}
28505                        let star = self.parse_star_modifiers(None)?;
28506                        Expression::Star(star)
28507                    }
28508                } else {
28509                    // {tbl.*} - table qualified wildcard
28510                    let table_name = self.expect_identifier_or_keyword_with_quoted()?;
28511                    self.expect(TokenType::Dot)?;
28512                    self.expect(TokenType::Star)?;
28513                    let star = self.parse_star_modifiers(Some(table_name))?;
28514                    Expression::Star(star)
28515                };
28516
28517                self.expect(TokenType::RBrace)?;
28518
28519                // Wrap in BracedWildcard for generation
28520                return Ok(Expression::BracedWildcard(Box::new(wildcard_expr)));
28521            }
28522
28523            // Parse key-value pairs: key: value, ...
28524            let mut keys = Vec::new();
28525            let mut values = Vec::new();
28526            loop {
28527                let key = self.parse_expression()?;
28528                self.expect(TokenType::Colon)?;
28529                let value = self.parse_expression()?;
28530                keys.push(key);
28531                values.push(value);
28532                if !self.match_token(TokenType::Comma) {
28533                    break;
28534                }
28535                // Handle trailing comma
28536                if self.check(TokenType::RBrace) {
28537                    break;
28538                }
28539            }
28540            self.expect(TokenType::RBrace)?;
28541            return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
28542                keys,
28543                values,
28544                curly_brace_syntax: true,
28545                with_map_keyword: false,
28546            })));
28547        }
28548
28549        // Parenthesized expression or subquery
28550        if self.match_token(TokenType::LParen) {
28551            // Capture comments from the ( token (e.g., "(/* comment */ 1)")
28552            let lparen_comments = self.previous_trailing_comments().to_vec();
28553
28554            // Empty parens () — could be empty tuple or zero-param lambda () -> body
28555            if self.check(TokenType::RParen) {
28556                self.skip(); // consume )
28557                             // Check for lambda: () -> body
28558                if self.match_token(TokenType::Arrow) || self.match_token(TokenType::FArrow) {
28559                    let body = self.parse_expression()?;
28560                    return Ok(Expression::Lambda(Box::new(LambdaExpr {
28561                        parameters: Vec::new(),
28562                        body,
28563                        colon: false,
28564                        parameter_types: Vec::new(),
28565                    })));
28566                }
28567                // Otherwise empty tuple
28568                return self.maybe_parse_subscript(Expression::Tuple(Box::new(Tuple {
28569                    expressions: Vec::new(),
28570                })));
28571            }
28572
28573            // Check if this is a VALUES expression inside parens: (VALUES ...)
28574            if self.check(TokenType::Values) {
28575                let values = self.parse_values()?;
28576                self.expect(TokenType::RParen)?;
28577                return Ok(Expression::Subquery(Box::new(Subquery {
28578                    this: values,
28579                    alias: None,
28580                    column_aliases: Vec::new(),
28581                    order_by: None,
28582                    limit: None,
28583                    offset: None,
28584                    distribute_by: None,
28585                    sort_by: None,
28586                    cluster_by: None,
28587                    lateral: false,
28588                    modifiers_inside: false,
28589                    trailing_comments: self.previous_trailing_comments().to_vec(),
28590                    inferred_type: None,
28591                })));
28592            }
28593
28594            // Check if this is a subquery (SELECT, WITH, DuckDB FROM-first, or ClickHouse EXPLAIN)
28595            let is_explain_subquery = self.check(TokenType::Var)
28596                && self.peek().text.eq_ignore_ascii_case("EXPLAIN")
28597                && self.peek_nth(1).map_or(false, |t| {
28598                    // EXPLAIN followed by statement/style keywords is a subquery
28599                    matches!(
28600                        t.token_type,
28601                        TokenType::Select
28602                            | TokenType::Insert
28603                            | TokenType::Create
28604                            | TokenType::Alter
28605                            | TokenType::Drop
28606                            | TokenType::Set
28607                            | TokenType::System
28608                            | TokenType::Table
28609                    ) || matches!(
28610                        t.text.to_ascii_uppercase().as_str(),
28611                        "SYNTAX" | "AST" | "PLAN" | "PIPELINE" | "ESTIMATE" | "CURRENT" | "QUERY"
28612                    ) || (t.token_type == TokenType::Var
28613                        && self
28614                            .peek_nth(2)
28615                            .map_or(false, |t2| t2.token_type == TokenType::Eq))
28616                });
28617            // ClickHouse: (from, to, ...) -> body is a tuple-lambda with keyword params
28618            // Detect pattern: (keyword/ident, keyword/ident, ...) ->
28619            if matches!(
28620                self.config.dialect,
28621                Some(crate::dialects::DialectType::ClickHouse)
28622            ) {
28623                let mut look = self.current;
28624                let mut is_tuple_lambda = true;
28625                let mut param_count = 0;
28626                loop {
28627                    if look >= self.tokens.len() {
28628                        is_tuple_lambda = false;
28629                        break;
28630                    }
28631                    let tt = self.tokens[look].token_type;
28632                    if tt == TokenType::Identifier
28633                        || tt == TokenType::Var
28634                        || tt == TokenType::QuotedIdentifier
28635                        || tt.is_keyword()
28636                    {
28637                        param_count += 1;
28638                        look += 1;
28639                    } else {
28640                        is_tuple_lambda = false;
28641                        break;
28642                    }
28643                    if look >= self.tokens.len() {
28644                        is_tuple_lambda = false;
28645                        break;
28646                    }
28647                    if self.tokens[look].token_type == TokenType::Comma {
28648                        look += 1;
28649                    } else if self.tokens[look].token_type == TokenType::RParen {
28650                        look += 1;
28651                        break;
28652                    } else {
28653                        is_tuple_lambda = false;
28654                        break;
28655                    }
28656                }
28657                if is_tuple_lambda
28658                    && param_count >= 1
28659                    && look < self.tokens.len()
28660                    && self.tokens[look].token_type == TokenType::Arrow
28661                {
28662                    // Parse as lambda: consume params
28663                    let mut params = Vec::new();
28664                    loop {
28665                        let tok = self.advance();
28666                        params.push(Identifier::new(tok.text));
28667                        if self.match_token(TokenType::Comma) {
28668                            continue;
28669                        }
28670                        break;
28671                    }
28672                    self.expect(TokenType::RParen)?;
28673                    self.expect(TokenType::Arrow)?;
28674                    let body = self.parse_expression()?;
28675                    return Ok(Expression::Lambda(Box::new(LambdaExpr {
28676                        parameters: params,
28677                        body,
28678                        colon: false,
28679                        parameter_types: Vec::new(),
28680                    })));
28681                }
28682            }
28683            if self.check(TokenType::Select)
28684                || self.check(TokenType::With)
28685                || self.check(TokenType::From)
28686                || is_explain_subquery
28687            {
28688                let query = self.parse_statement()?;
28689
28690                // Parse LIMIT/OFFSET that may appear after set operations INSIDE the parentheses
28691                // e.g., (SELECT 1 EXCEPT (SELECT 2) LIMIT 1)
28692                let limit = if self.match_token(TokenType::Limit) {
28693                    Some(Limit {
28694                        this: self.parse_expression()?,
28695                        percent: false,
28696                        comments: Vec::new(),
28697                    })
28698                } else {
28699                    None
28700                };
28701                let offset = if self.match_token(TokenType::Offset) {
28702                    Some(Offset {
28703                        this: self.parse_expression()?,
28704                        rows: None,
28705                    })
28706                } else {
28707                    None
28708                };
28709
28710                self.expect(TokenType::RParen)?;
28711
28712                // Wrap in Subquery to preserve parentheses in set operations
28713                let subquery = if limit.is_some() || offset.is_some() {
28714                    // If we have limit/offset INSIDE the parens, set modifiers_inside = true
28715                    Expression::Subquery(Box::new(Subquery {
28716                        this: query,
28717                        alias: None,
28718                        column_aliases: Vec::new(),
28719                        order_by: None,
28720                        limit,
28721                        offset,
28722                        distribute_by: None,
28723                        sort_by: None,
28724                        cluster_by: None,
28725                        lateral: false,
28726                        modifiers_inside: true,
28727                        trailing_comments: self.previous_trailing_comments().to_vec(),
28728                        inferred_type: None,
28729                    }))
28730                } else {
28731                    Expression::Subquery(Box::new(Subquery {
28732                        this: query,
28733                        alias: None,
28734                        column_aliases: Vec::new(),
28735                        order_by: None,
28736                        limit: None,
28737                        offset: None,
28738                        distribute_by: None,
28739                        sort_by: None,
28740                        cluster_by: None,
28741                        lateral: false,
28742                        modifiers_inside: false,
28743                        trailing_comments: self.previous_trailing_comments().to_vec(),
28744                        inferred_type: None,
28745                    }))
28746                };
28747
28748                // Check for set operations after the subquery (e.g., (SELECT 1) UNION (SELECT 2))
28749                let set_result = self.parse_set_operation(subquery)?;
28750
28751                // Only parse ORDER BY/LIMIT/OFFSET after set operations if there WAS a set operation
28752                // (for cases like ((SELECT 0) UNION (SELECT 1) ORDER BY 1 OFFSET 1))
28753                // If there's no set operation, we should NOT consume these - they belong to outer context
28754                let had_set_operation = matches!(
28755                    &set_result,
28756                    Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)
28757                );
28758
28759                let result = if had_set_operation {
28760                    let order_by = if self.check(TokenType::Order) {
28761                        self.expect(TokenType::Order)?;
28762                        self.expect(TokenType::By)?;
28763                        Some(self.parse_order_by()?)
28764                    } else {
28765                        None
28766                    };
28767                    let limit_after = if self.match_token(TokenType::Limit) {
28768                        Some(Limit {
28769                            this: self.parse_expression()?,
28770                            percent: false,
28771                            comments: Vec::new(),
28772                        })
28773                    } else {
28774                        None
28775                    };
28776                    let offset_after = if self.match_token(TokenType::Offset) {
28777                        Some(Offset {
28778                            this: self.parse_expression()?,
28779                            rows: None,
28780                        })
28781                    } else {
28782                        None
28783                    };
28784
28785                    // If we have any modifiers, wrap in a Subquery with the modifiers OUTSIDE the paren
28786                    if order_by.is_some() || limit_after.is_some() || offset_after.is_some() {
28787                        Expression::Subquery(Box::new(Subquery {
28788                            this: set_result,
28789                            alias: None,
28790                            column_aliases: Vec::new(),
28791                            order_by,
28792                            limit: limit_after,
28793                            offset: offset_after,
28794                            lateral: false,
28795                            modifiers_inside: false,
28796                            trailing_comments: Vec::new(),
28797                            distribute_by: None,
28798                            sort_by: None,
28799                            cluster_by: None,
28800                            inferred_type: None,
28801                        }))
28802                    } else {
28803                        set_result
28804                    }
28805                } else {
28806                    set_result
28807                };
28808                // Allow postfix operators on subquery expressions (e.g., (SELECT 1, 2).1 for tuple element access)
28809                return self.maybe_parse_subscript(result);
28810            }
28811
28812            // Check if this starts with another paren that might be a subquery
28813            // e.g., ((SELECT 1))
28814            if self.check(TokenType::LParen) {
28815                let expr = self.parse_expression()?;
28816
28817                // Handle aliasing of expression inside outer parens (e.g., ((a, b) AS c))
28818                let first_expr = if self.match_token(TokenType::As) {
28819                    let alias = self.expect_identifier_or_alias_keyword_with_quoted()?;
28820                    Expression::Alias(Box::new(Alias::new(expr, alias)))
28821                } else {
28822                    expr
28823                };
28824
28825                // Check for tuple of tuples: ((1, 2), (3, 4))
28826                // Also handles ClickHouse: ((SELECT 1) AS x, (SELECT 2) AS y)
28827                if self.match_token(TokenType::Comma) {
28828                    let mut expressions = vec![first_expr];
28829                    loop {
28830                        if self.check(TokenType::RParen) {
28831                            break;
28832                        } // trailing comma
28833                        let elem = self.parse_expression()?;
28834                        // Handle AS alias after each element (ClickHouse tuple CTE pattern)
28835                        let elem = if self.match_token(TokenType::As) {
28836                            let alias = self.expect_identifier_or_keyword()?;
28837                            Expression::Alias(Box::new(Alias::new(elem, Identifier::new(alias))))
28838                        } else {
28839                            elem
28840                        };
28841                        expressions.push(elem);
28842                        if !self.match_token(TokenType::Comma) {
28843                            break;
28844                        }
28845                    }
28846                    self.expect(TokenType::RParen)?;
28847                    let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
28848                    return self.maybe_parse_subscript(tuple_expr);
28849                }
28850
28851                let result = first_expr;
28852
28853                self.expect(TokenType::RParen)?;
28854                let mut nested_paren_comments = lparen_comments.clone();
28855                nested_paren_comments.extend_from_slice(self.previous_trailing_comments());
28856                // Check for set operations after parenthesized expression
28857                if self.check(TokenType::Union)
28858                    || self.check(TokenType::Intersect)
28859                    || self.check(TokenType::Except)
28860                {
28861                    // This is a set operation - need to handle specially
28862                    if let Expression::Subquery(subq) = &result {
28863                        let set_result = self.parse_set_operation(subq.this.clone())?;
28864
28865                        // Parse ORDER BY/LIMIT/OFFSET after set operations
28866                        let order_by = if self.check(TokenType::Order) {
28867                            self.expect(TokenType::Order)?;
28868                            self.expect(TokenType::By)?;
28869                            Some(self.parse_order_by()?)
28870                        } else {
28871                            None
28872                        };
28873                        let limit = if self.match_token(TokenType::Limit) {
28874                            Some(Limit {
28875                                this: self.parse_expression()?,
28876                                percent: false,
28877                                comments: Vec::new(),
28878                            })
28879                        } else {
28880                            None
28881                        };
28882                        let offset = if self.match_token(TokenType::Offset) {
28883                            Some(Offset {
28884                                this: self.parse_expression()?,
28885                                rows: None,
28886                            })
28887                        } else {
28888                            None
28889                        };
28890
28891                        return Ok(Expression::Subquery(Box::new(Subquery {
28892                            this: set_result,
28893                            alias: None,
28894                            column_aliases: Vec::new(),
28895                            order_by,
28896                            limit,
28897                            offset,
28898                            lateral: false,
28899                            modifiers_inside: false,
28900                            trailing_comments: Vec::new(),
28901                            distribute_by: None,
28902                            sort_by: None,
28903                            cluster_by: None,
28904                            inferred_type: None,
28905                        })));
28906                    }
28907                }
28908                return self.maybe_parse_over(Expression::Paren(Box::new(Paren {
28909                    this: result,
28910                    trailing_comments: nested_paren_comments,
28911                })));
28912            }
28913
28914            let expr = self.parse_expression()?;
28915
28916            // Check for AS alias on the first element (e.g., (x AS y, ...))
28917            let first_expr = if self.match_token(TokenType::As) {
28918                let alias = self.expect_identifier_or_keyword_with_quoted()?;
28919                Expression::Alias(Box::new(Alias::new(expr, alias)))
28920            } else {
28921                expr
28922            };
28923
28924            // Check for tuple (multiple expressions separated by commas)
28925            if self.match_token(TokenType::Comma) {
28926                let mut expressions = vec![first_expr];
28927                // ClickHouse: trailing comma creates single-element tuple, e.g., (1,)
28928                if self.check(TokenType::RParen) {
28929                    self.skip(); // consume )
28930                    let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
28931                    return self.maybe_parse_subscript(tuple_expr);
28932                }
28933                // Parse remaining tuple elements, each can have AS alias
28934                loop {
28935                    let elem = self.parse_expression()?;
28936                    let elem_with_alias = if self.match_token(TokenType::As) {
28937                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
28938                        Expression::Alias(Box::new(Alias::new(elem, alias)))
28939                    } else {
28940                        elem
28941                    };
28942                    expressions.push(elem_with_alias);
28943                    if !self.match_token(TokenType::Comma) {
28944                        break;
28945                    }
28946                    // ClickHouse: trailing comma in multi-element tuple, e.g., (1, 2,)
28947                    if self.check(TokenType::RParen) {
28948                        break;
28949                    }
28950                }
28951
28952                self.expect(TokenType::RParen)?;
28953
28954                // Check for lambda expression: (a, b) -> body
28955                if self.match_token(TokenType::Arrow) {
28956                    let parameters = expressions
28957                        .into_iter()
28958                        .filter_map(|e| {
28959                            if let Expression::Column(c) = e {
28960                                Some(c.name)
28961                            } else if let Expression::Identifier(id) = e {
28962                                Some(id)
28963                            } else {
28964                                None
28965                            }
28966                        })
28967                        .collect();
28968                    let body = self.parse_expression()?;
28969                    return Ok(Expression::Lambda(Box::new(LambdaExpr {
28970                        parameters,
28971                        body,
28972                        colon: false,
28973                        parameter_types: Vec::new(),
28974                    })));
28975                }
28976
28977                // Check for optional alias on the whole tuple
28978                // But NOT when AS is followed by a type constructor like Tuple(a Int8, ...)
28979                // or STRUCT<a TINYINT, ...> which would be part of a CAST expression: CAST((1, 2) AS Tuple(a Int8, b Int16))
28980                // Also NOT when AS is followed by a type name then ) like: CAST((1, 2) AS String)
28981                let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
28982                let result = if self.check(TokenType::As) {
28983                    // Look ahead: AS + type_keyword + ( or < → likely a type, not an alias
28984                    let after_as = self.current + 1;
28985                    let after_ident = self.current + 2;
28986                    let is_type_constructor = after_ident < self.tokens.len()
28987                        && (self.tokens[after_as].token_type == TokenType::Identifier
28988                            || self.tokens[after_as].token_type == TokenType::Var
28989                            || self.tokens[after_as].token_type == TokenType::Nullable
28990                            || self.tokens[after_as].token_type == TokenType::Struct
28991                            || self.tokens[after_as].token_type == TokenType::Array)
28992                        && (self.tokens[after_ident].token_type == TokenType::LParen
28993                            || self.tokens[after_ident].token_type == TokenType::Lt);
28994                    // Check if AS is followed by identifier/keyword then ), indicating CAST(tuple AS Type)
28995                    let is_cast_type = after_ident < self.tokens.len()
28996                        && (self.tokens[after_as].token_type == TokenType::Identifier
28997                            || self.tokens[after_as].token_type == TokenType::Var
28998                            || self.tokens[after_as].token_type.is_keyword())
28999                        && self.tokens[after_ident].token_type == TokenType::RParen;
29000                    if is_type_constructor || is_cast_type {
29001                        tuple_expr
29002                    } else {
29003                        self.skip(); // consume AS
29004                        let alias = self.expect_identifier()?;
29005                        Expression::Alias(Box::new(Alias::new(tuple_expr, Identifier::new(alias))))
29006                    }
29007                } else {
29008                    tuple_expr
29009                };
29010
29011                // Allow postfix operators on tuple expressions (e.g., ('a', 'b').1 for tuple element access)
29012                return self.maybe_parse_subscript(result);
29013            }
29014
29015            // ClickHouse: (x -> body) — lambda inside parentheses
29016            if matches!(
29017                self.config.dialect,
29018                Some(crate::dialects::DialectType::ClickHouse)
29019            ) && self.match_token(TokenType::Arrow)
29020            {
29021                let parameters = if let Expression::Column(c) = first_expr {
29022                    vec![c.name]
29023                } else if let Expression::Identifier(id) = first_expr {
29024                    vec![id]
29025                } else {
29026                    return Err(self.parse_error("Expected identifier as lambda parameter"));
29027                };
29028                let body = self.parse_expression()?;
29029                self.expect(TokenType::RParen)?;
29030                return Ok(Expression::Paren(Box::new(Paren {
29031                    this: Expression::Lambda(Box::new(LambdaExpr {
29032                        parameters,
29033                        body,
29034                        colon: false,
29035                        parameter_types: Vec::new(),
29036                    })),
29037                    trailing_comments: Vec::new(),
29038                })));
29039            }
29040
29041            self.expect(TokenType::RParen)?;
29042            // Combine comments from ( and ) tokens
29043            let mut paren_comments = lparen_comments.clone();
29044            paren_comments.extend_from_slice(self.previous_trailing_comments());
29045
29046            // Check for lambda expression: (x) -> body or single identifier case
29047            if self.match_token(TokenType::Arrow) {
29048                // first_expr should be a single identifier for the parameter
29049                let parameters = if let Expression::Column(c) = first_expr {
29050                    vec![c.name]
29051                } else if let Expression::Identifier(id) = first_expr {
29052                    vec![id]
29053                } else {
29054                    return Err(self.parse_error("Expected identifier as lambda parameter"));
29055                };
29056                let body = self.parse_expression()?;
29057                return Ok(Expression::Lambda(Box::new(LambdaExpr {
29058                    parameters,
29059                    body,
29060                    colon: false,
29061                    parameter_types: Vec::new(),
29062                })));
29063            }
29064
29065            return self.maybe_parse_over(Expression::Paren(Box::new(Paren {
29066                this: first_expr,
29067                trailing_comments: paren_comments,
29068            })));
29069        }
29070
29071        // NULL
29072        if self.match_token(TokenType::Null) {
29073            return Ok(Expression::Null(Null));
29074        }
29075
29076        // TRUE
29077        if self.match_token(TokenType::True) {
29078            return Ok(Expression::Boolean(BooleanLiteral { value: true }));
29079        }
29080
29081        // FALSE
29082        if self.match_token(TokenType::False) {
29083            return Ok(Expression::Boolean(BooleanLiteral { value: false }));
29084        }
29085
29086        // LAMBDA expression (DuckDB syntax: LAMBDA x : expr)
29087        if self.check(TokenType::Lambda) {
29088            if let Some(lambda) = self.parse_lambda()? {
29089                return Ok(lambda);
29090            }
29091        }
29092
29093        // CASE expression - but not if followed by DOT (then it's an identifier like case.column)
29094        if self.check(TokenType::Case) && !self.check_next(TokenType::Dot) {
29095            let case_expr = self.parse_case()?;
29096            return self.maybe_parse_over(case_expr);
29097        }
29098
29099        // CAST expression
29100        if self.check(TokenType::Cast) {
29101            let cast_expr = self.parse_cast()?;
29102            return self.maybe_parse_subscript(cast_expr);
29103        }
29104
29105        // TRY_CAST expression
29106        if self.check(TokenType::TryCast) {
29107            let cast_expr = self.parse_try_cast()?;
29108            return self.maybe_parse_subscript(cast_expr);
29109        }
29110
29111        // SAFE_CAST expression (BigQuery)
29112        if self.check(TokenType::SafeCast) {
29113            let cast_expr = self.parse_safe_cast()?;
29114            return self.maybe_parse_subscript(cast_expr);
29115        }
29116
29117        // EXISTS - either subquery predicate EXISTS(SELECT ...) or Hive array function EXISTS(array, lambda)
29118        // ClickHouse: EXISTS without ( is a column name/identifier
29119        if self.check(TokenType::Exists)
29120            && matches!(
29121                self.config.dialect,
29122                Some(crate::dialects::DialectType::ClickHouse)
29123            )
29124            && !self.check_next(TokenType::LParen)
29125        {
29126            let tok = self.advance();
29127            return Ok(Expression::Identifier(Identifier::new(tok.text)));
29128        }
29129        if self.match_token(TokenType::Exists) {
29130            self.expect(TokenType::LParen)?;
29131
29132            // Check if this is a subquery EXISTS (SELECT, WITH, or FROM for DuckDB)
29133            // ClickHouse: also handle EXISTS((SELECT ...)) with double parens
29134            if self.check(TokenType::Select)
29135                || self.check(TokenType::With)
29136                || self.check(TokenType::From)
29137                || (self.check(TokenType::LParen)
29138                    && self
29139                        .peek_nth(1)
29140                        .map(|t| {
29141                            matches!(
29142                                t.token_type,
29143                                TokenType::Select | TokenType::With | TokenType::From
29144                            )
29145                        })
29146                        .unwrap_or(false))
29147            {
29148                let query = self.parse_statement()?;
29149                self.expect(TokenType::RParen)?;
29150                return Ok(Expression::Exists(Box::new(Exists {
29151                    this: query,
29152                    not: false,
29153                })));
29154            }
29155
29156            // Otherwise it's Hive's array EXISTS function: EXISTS(array, lambda_predicate)
29157            // This function checks if any element in the array matches the predicate
29158            let array_expr = self.parse_expression()?;
29159            self.expect(TokenType::Comma)?;
29160            let predicate = self.parse_expression()?;
29161            self.expect(TokenType::RParen)?;
29162            return Ok(Expression::Function(Box::new(Function {
29163                name: "EXISTS".to_string(),
29164                args: vec![array_expr, predicate],
29165                distinct: false,
29166                trailing_comments: Vec::new(),
29167                use_bracket_syntax: false,
29168                no_parens: false,
29169                quoted: false,
29170                span: None,
29171                inferred_type: None,
29172            })));
29173        }
29174
29175        // INTERVAL expression or identifier
29176        if self.check(TokenType::Interval) {
29177            if let Some(interval_expr) = self.try_parse_interval()? {
29178                return Ok(interval_expr);
29179            }
29180            // INTERVAL is used as an identifier
29181            let token = self.advance();
29182            return Ok(Expression::Identifier(Identifier::new(token.text)));
29183        }
29184
29185        // DATE literal: DATE '2024-01-15' or DATE function: DATE(expr)
29186        if self.check(TokenType::Date) {
29187            let token = self.advance();
29188            let original_text = token.text.clone();
29189            if self.check(TokenType::String) {
29190                let str_token = self.advance();
29191                if self.config.dialect.is_none() {
29192                    // Generic (no dialect): DATE 'literal' -> CAST('literal' AS DATE)
29193                    return Ok(Expression::Cast(Box::new(Cast {
29194                        this: Expression::Literal(Box::new(Literal::String(str_token.text))),
29195                        to: DataType::Date,
29196                        trailing_comments: Vec::new(),
29197                        double_colon_syntax: false,
29198                        format: None,
29199                        default: None,
29200                        inferred_type: None,
29201                    })));
29202                }
29203                return Ok(Expression::Literal(Box::new(Literal::Date(str_token.text))));
29204            }
29205            // Check for DATE() function call
29206            if self.match_token(TokenType::LParen) {
29207                let func_expr = self.parse_typed_function(&original_text, "DATE", false)?;
29208                return self.maybe_parse_over(func_expr);
29209            }
29210            // Fallback to DATE as column reference - preserve original case
29211            return Ok(Expression::boxed_column(Column {
29212                name: Identifier::new(original_text),
29213                table: None,
29214                join_mark: false,
29215                trailing_comments: Vec::new(),
29216                span: None,
29217                inferred_type: None,
29218            }));
29219        }
29220
29221        // TIME literal: TIME '10:30:00' or TIME function: TIME(expr)
29222        if self.check(TokenType::Time) {
29223            let token = self.advance();
29224            let original_text = token.text.clone();
29225            if self.check(TokenType::String) {
29226                let str_token = self.advance();
29227                return Ok(Expression::Literal(Box::new(Literal::Time(str_token.text))));
29228            }
29229            // Check for TIME() function call
29230            if self.match_token(TokenType::LParen) {
29231                let func_expr = self.parse_typed_function(&original_text, "TIME", false)?;
29232                return self.maybe_parse_over(func_expr);
29233            }
29234            // Fallback to TIME as column reference - preserve original case
29235            return self.maybe_parse_subscript(Expression::boxed_column(Column {
29236                name: Identifier::new(original_text),
29237                table: None,
29238                join_mark: false,
29239                trailing_comments: Vec::new(),
29240                span: None,
29241                inferred_type: None,
29242            }));
29243        }
29244
29245        // TIMESTAMP literal: TIMESTAMP '2024-01-15 10:30:00' or TIMESTAMP function: TIMESTAMP(expr)
29246        // Also handles TIMESTAMP(n) WITH TIME ZONE as a data type expression
29247        if self.check(TokenType::Timestamp) {
29248            let token = self.advance();
29249            let original_text = token.text.clone();
29250            if self.check(TokenType::String) {
29251                let str_token = self.advance();
29252                if self.config.dialect.is_none() {
29253                    // Generic (no dialect): TIMESTAMP 'literal' -> CAST('literal' AS TIMESTAMP)
29254                    return Ok(Expression::Cast(Box::new(Cast {
29255                        this: Expression::Literal(Box::new(Literal::String(str_token.text))),
29256                        to: DataType::Timestamp {
29257                            precision: None,
29258                            timezone: false,
29259                        },
29260                        trailing_comments: Vec::new(),
29261                        double_colon_syntax: false,
29262                        format: None,
29263                        default: None,
29264                        inferred_type: None,
29265                    })));
29266                }
29267                // Dialect-specific: keep as Literal::Timestamp for dialect transforms
29268                return Ok(Expression::Literal(Box::new(Literal::Timestamp(
29269                    str_token.text,
29270                ))));
29271            }
29272            // Check for TIMESTAMP(n) WITH/WITHOUT TIME ZONE or TIMESTAMP(n) 'literal' as data type
29273            // This is a data type, not a function call
29274            if self.check(TokenType::LParen) {
29275                // Look ahead to see if this is TIMESTAMP(number) WITH/WITHOUT/String (data type)
29276                // vs TIMESTAMP(expr) (function call)
29277                let is_data_type = self.check_next(TokenType::Number) && {
29278                    // Check if after (number) there's WITH, WITHOUT, or String literal
29279                    let mut lookahead = self.current + 2;
29280                    // Skip the number
29281                    while lookahead < self.tokens.len()
29282                        && self.tokens[lookahead].token_type == TokenType::RParen
29283                    {
29284                        lookahead += 1;
29285                        break;
29286                    }
29287                    // Check for WITH, WITHOUT, or String after the closing paren
29288                    lookahead < self.tokens.len()
29289                        && (self.tokens[lookahead].token_type == TokenType::With
29290                            || self.tokens[lookahead].text.eq_ignore_ascii_case("WITHOUT")
29291                            || self.tokens[lookahead].token_type == TokenType::String)
29292                };
29293
29294                if is_data_type {
29295                    // Parse as data type: TIMESTAMP(precision) [WITH/WITHOUT TIME ZONE] ['literal']
29296                    self.skip(); // consume (
29297                    let precision = Some(self.expect_number()? as u32);
29298                    self.expect(TokenType::RParen)?;
29299
29300                    let data_type = if self.match_token(TokenType::With) {
29301                        if self.match_token(TokenType::Local) {
29302                            // WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
29303                            self.match_keyword("TIME");
29304                            self.match_keyword("ZONE");
29305                            DataType::Custom {
29306                                name: format!("TIMESTAMPLTZ({})", precision.unwrap()),
29307                            }
29308                        } else {
29309                            self.match_keyword("TIME");
29310                            self.match_keyword("ZONE");
29311                            DataType::Timestamp {
29312                                precision,
29313                                timezone: true,
29314                            }
29315                        }
29316                    } else if self.match_keyword("WITHOUT") {
29317                        self.match_keyword("TIME");
29318                        self.match_keyword("ZONE");
29319                        DataType::Timestamp {
29320                            precision,
29321                            timezone: false,
29322                        }
29323                    } else {
29324                        DataType::Timestamp {
29325                            precision,
29326                            timezone: false,
29327                        }
29328                    };
29329
29330                    // Check for following string literal -> wrap in CAST
29331                    if self.check(TokenType::String) {
29332                        let str_token = self.advance();
29333                        return Ok(Expression::Cast(Box::new(Cast {
29334                            this: Expression::Literal(Box::new(Literal::String(str_token.text))),
29335                            to: data_type,
29336                            trailing_comments: Vec::new(),
29337                            double_colon_syntax: false,
29338                            format: None,
29339                            default: None,
29340                            inferred_type: None,
29341                        })));
29342                    }
29343
29344                    return Ok(Expression::DataType(data_type));
29345                }
29346
29347                // Otherwise parse as function call
29348                self.skip(); // consume (
29349                let func_expr = self.parse_typed_function(&original_text, "TIMESTAMP", false)?;
29350                return self.maybe_parse_over(func_expr);
29351            }
29352            // Check for TIMESTAMP WITH/WITHOUT TIME ZONE (no precision) as data type
29353            // Use lookahead to verify WITH is followed by TIME (not WITH FILL, WITH TOTALS, etc.)
29354            if (self.check(TokenType::With)
29355                && self.peek_nth(1).map_or(false, |t| {
29356                    t.text.eq_ignore_ascii_case("TIME") || t.text.eq_ignore_ascii_case("LOCAL")
29357                }))
29358                || self.check_keyword_text("WITHOUT")
29359            {
29360                let data_type = if self.match_token(TokenType::With) {
29361                    if self.match_token(TokenType::Local) {
29362                        // WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
29363                        self.match_keyword("TIME");
29364                        self.match_keyword("ZONE");
29365                        DataType::Custom {
29366                            name: "TIMESTAMPLTZ".to_string(),
29367                        }
29368                    } else {
29369                        self.match_keyword("TIME");
29370                        self.match_keyword("ZONE");
29371                        DataType::Timestamp {
29372                            precision: None,
29373                            timezone: true,
29374                        }
29375                    }
29376                } else if self.match_keyword("WITHOUT") {
29377                    self.match_keyword("TIME");
29378                    self.match_keyword("ZONE");
29379                    DataType::Timestamp {
29380                        precision: None,
29381                        timezone: false,
29382                    }
29383                } else {
29384                    DataType::Timestamp {
29385                        precision: None,
29386                        timezone: false,
29387                    }
29388                };
29389
29390                // Check for following string literal -> wrap in CAST
29391                if self.check(TokenType::String) {
29392                    let str_token = self.advance();
29393                    return Ok(Expression::Cast(Box::new(Cast {
29394                        this: Expression::Literal(Box::new(Literal::String(str_token.text))),
29395                        to: data_type,
29396                        trailing_comments: Vec::new(),
29397                        double_colon_syntax: false,
29398                        format: None,
29399                        default: None,
29400                        inferred_type: None,
29401                    })));
29402                }
29403
29404                return Ok(Expression::DataType(data_type));
29405            }
29406            // Fallback to TIMESTAMP as column reference - preserve original case
29407            return Ok(Expression::boxed_column(Column {
29408                name: Identifier::new(original_text),
29409                table: None,
29410                join_mark: false,
29411                trailing_comments: Vec::new(),
29412                span: None,
29413                inferred_type: None,
29414            }));
29415        }
29416
29417        // DATETIME literal: DATETIME '2024-01-15 10:30:00' or DATETIME function: DATETIME(expr)
29418        if self.check(TokenType::DateTime) {
29419            let token = self.advance();
29420            let original_text = token.text.clone();
29421            if self.check(TokenType::String) {
29422                let str_token = self.advance();
29423                return Ok(Expression::Literal(Box::new(Literal::Datetime(
29424                    str_token.text,
29425                ))));
29426            }
29427            // Check for DATETIME() function call
29428            if self.match_token(TokenType::LParen) {
29429                let func_expr = self.parse_typed_function(&original_text, "DATETIME", false)?;
29430                return self.maybe_parse_over(func_expr);
29431            }
29432            // Fallback to DATETIME as column reference - preserve original case
29433            return Ok(Expression::boxed_column(Column {
29434                name: Identifier::new(original_text),
29435                table: None,
29436                join_mark: false,
29437                trailing_comments: Vec::new(),
29438                span: None,
29439                inferred_type: None,
29440            }));
29441        }
29442
29443        // ROW() function (window function for row number)
29444        if self.check(TokenType::Row) && self.check_next(TokenType::LParen) {
29445            self.skip(); // consume ROW
29446            self.expect(TokenType::LParen)?;
29447            // ROW() typically takes no arguments
29448            let args = if !self.check(TokenType::RParen) {
29449                self.parse_expression_list()?
29450            } else {
29451                Vec::new()
29452            };
29453            self.expect(TokenType::RParen)?;
29454            let func_expr = Expression::Function(Box::new(Function {
29455                name: "ROW".to_string(),
29456                args,
29457                distinct: false,
29458                trailing_comments: Vec::new(),
29459                use_bracket_syntax: false,
29460                no_parens: false,
29461                quoted: false,
29462                span: None,
29463                inferred_type: None,
29464            }));
29465            return self.maybe_parse_over(func_expr);
29466        }
29467
29468        // Number - support postfix operators like ::type
29469        if self.check(TokenType::Number) {
29470            let token = self.advance();
29471            if matches!(
29472                self.config.dialect,
29473                Some(crate::dialects::DialectType::MySQL)
29474            ) {
29475                let text = token.text.as_str();
29476                if text.len() > 2
29477                    && (text.starts_with("0x") || text.starts_with("0X"))
29478                    && !text[2..].chars().all(|c| c.is_ascii_hexdigit())
29479                {
29480                    let ident = Expression::Identifier(Identifier {
29481                        name: token.text,
29482                        quoted: true,
29483                        trailing_comments: Vec::new(),
29484                        span: None,
29485                    });
29486                    return self.maybe_parse_subscript(ident);
29487                }
29488            }
29489            if matches!(
29490                self.config.dialect,
29491                Some(crate::dialects::DialectType::Teradata)
29492            ) && token.text == "0"
29493            {
29494                if let Some(next) = self.tokens.get(self.current) {
29495                    let is_adjacent = token.span.end == next.span.start;
29496                    let next_text = next.text.as_str();
29497                    let is_hex_prefix = next_text.starts_with('x') || next_text.starts_with('X');
29498                    if is_adjacent
29499                        && matches!(next.token_type, TokenType::Identifier | TokenType::Var)
29500                        && is_hex_prefix
29501                        && next_text.len() > 1
29502                        && next_text[1..].chars().all(|c| c.is_ascii_hexdigit())
29503                    {
29504                        // Consume the hex suffix token and emit a HexString literal
29505                        let hex_token = self.advance();
29506                        let hex = hex_token.text[1..].to_string();
29507                        let literal = Expression::Literal(Box::new(Literal::HexString(hex)));
29508                        return self.maybe_parse_subscript(literal);
29509                    }
29510                }
29511            }
29512            if matches!(
29513                self.config.dialect,
29514                Some(crate::dialects::DialectType::ClickHouse)
29515            ) {
29516                if let Some(next) = self.tokens.get(self.current) {
29517                    let is_adjacent = token.span.end == next.span.start;
29518                    if is_adjacent
29519                        && matches!(next.token_type, TokenType::Identifier | TokenType::Var)
29520                        && next.text.starts_with('_')
29521                    {
29522                        let suffix = next.text.clone();
29523                        self.skip(); // consume suffix token
29524                        let combined = format!("{}{}", token.text, suffix);
29525                        let literal = Expression::Literal(Box::new(Literal::Number(combined)));
29526                        return self.maybe_parse_subscript(literal);
29527                    }
29528                }
29529            }
29530            // Check for numeric literal suffix encoded as "number::TYPE" by tokenizer
29531            let literal = if let Some(sep_pos) = token.text.find("::") {
29532                let num_part = &token.text[..sep_pos];
29533                let type_name = &token.text[sep_pos + 2..];
29534                let num_expr = Expression::Literal(Box::new(Literal::Number(num_part.to_string())));
29535                let data_type = match type_name {
29536                    "BIGINT" => crate::expressions::DataType::BigInt { length: None },
29537                    "SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
29538                    "TINYINT" => crate::expressions::DataType::TinyInt { length: None },
29539                    "DOUBLE" => crate::expressions::DataType::Double {
29540                        precision: None,
29541                        scale: None,
29542                    },
29543                    "FLOAT" => crate::expressions::DataType::Float {
29544                        precision: None,
29545                        scale: None,
29546                        real_spelling: false,
29547                    },
29548                    "DECIMAL" => crate::expressions::DataType::Decimal {
29549                        precision: None,
29550                        scale: None,
29551                    },
29552                    _ => crate::expressions::DataType::Custom {
29553                        name: type_name.to_string(),
29554                    },
29555                };
29556                Expression::Cast(Box::new(crate::expressions::Cast {
29557                    this: num_expr,
29558                    to: data_type,
29559                    trailing_comments: Vec::new(),
29560                    double_colon_syntax: false,
29561                    format: None,
29562                    default: None,
29563                    inferred_type: None,
29564                }))
29565            } else {
29566                Expression::Literal(Box::new(Literal::Number(token.text)))
29567            };
29568            return self.maybe_parse_subscript(literal);
29569        }
29570
29571        // String - support postfix operators like ::type, ->, ->>
29572        // Also handle adjacent string literals (SQL standard) which concatenate: 'x' 'y' 'z' -> CONCAT('x', 'y', 'z')
29573        if self.check(TokenType::String) {
29574            let token = self.advance();
29575            let first_literal = Expression::Literal(Box::new(Literal::String(token.text)));
29576
29577            // Check for adjacent string literals (PostgreSQL and SQL standard feature)
29578            // 'x' 'y' 'z' should be treated as string concatenation
29579            if self.check(TokenType::String) {
29580                let mut expressions = vec![first_literal];
29581                while self.check(TokenType::String) {
29582                    let next_token = self.advance();
29583                    expressions.push(Expression::Literal(Box::new(Literal::String(
29584                        next_token.text,
29585                    ))));
29586                }
29587                // Create CONCAT function call with all adjacent strings
29588                let concat_func =
29589                    Expression::Function(Box::new(Function::new("CONCAT", expressions)));
29590                return self.maybe_parse_subscript(concat_func);
29591            }
29592
29593            return self.maybe_parse_subscript(first_literal);
29594        }
29595
29596        // Dollar-quoted string: $$...$$ or $tag$...$tag$ -- preserve as DollarString
29597        // so the generator can handle dialect-specific conversion
29598        if self.check(TokenType::DollarString) {
29599            let token = self.advance();
29600            let literal = Expression::Literal(Box::new(Literal::DollarString(token.text)));
29601            return self.maybe_parse_subscript(literal);
29602        }
29603
29604        // Triple-quoted string with double quotes: """..."""
29605        if self.check(TokenType::TripleDoubleQuotedString) {
29606            let token = self.advance();
29607            let literal =
29608                Expression::Literal(Box::new(Literal::TripleQuotedString(token.text, '"')));
29609            return self.maybe_parse_subscript(literal);
29610        }
29611
29612        // Triple-quoted string with single quotes: '''...'''
29613        if self.check(TokenType::TripleSingleQuotedString) {
29614            let token = self.advance();
29615            let literal =
29616                Expression::Literal(Box::new(Literal::TripleQuotedString(token.text, '\'')));
29617            return self.maybe_parse_subscript(literal);
29618        }
29619
29620        // National String (N'...')
29621        if self.check(TokenType::NationalString) {
29622            let token = self.advance();
29623            let literal = Expression::Literal(Box::new(Literal::NationalString(token.text)));
29624            return self.maybe_parse_subscript(literal);
29625        }
29626
29627        // Hex String (X'...')
29628        if self.check(TokenType::HexString) {
29629            let token = self.advance();
29630            let literal = Expression::Literal(Box::new(Literal::HexString(token.text)));
29631            return self.maybe_parse_subscript(literal);
29632        }
29633
29634        // Hex Number (0xA from BigQuery/SQLite) - integer in hex notation
29635        if self.check(TokenType::HexNumber) {
29636            let token = self.advance();
29637            if matches!(
29638                self.config.dialect,
29639                Some(crate::dialects::DialectType::MySQL)
29640            ) {
29641                let text = token.text.as_str();
29642                if text.len() > 2
29643                    && (text.starts_with("0x") || text.starts_with("0X"))
29644                    && !text[2..].chars().all(|c| c.is_ascii_hexdigit())
29645                {
29646                    let ident = Expression::Identifier(Identifier {
29647                        name: token.text,
29648                        quoted: true,
29649                        trailing_comments: Vec::new(),
29650                        span: None,
29651                    });
29652                    return self.maybe_parse_subscript(ident);
29653                }
29654            }
29655            let literal = Expression::Literal(Box::new(Literal::HexNumber(token.text)));
29656            return self.maybe_parse_subscript(literal);
29657        }
29658
29659        // Bit String (B'...')
29660        if self.check(TokenType::BitString) {
29661            let token = self.advance();
29662            let literal = Expression::Literal(Box::new(Literal::BitString(token.text)));
29663            return self.maybe_parse_subscript(literal);
29664        }
29665
29666        // Byte String (b"..." - BigQuery style)
29667        if self.check(TokenType::ByteString) {
29668            let token = self.advance();
29669            let literal = Expression::Literal(Box::new(Literal::ByteString(token.text)));
29670            return self.maybe_parse_subscript(literal);
29671        }
29672
29673        // Raw String (r"..." - BigQuery style, backslashes are literal)
29674        if self.check(TokenType::RawString) {
29675            let token = self.advance();
29676            // Raw strings preserve backslashes as literal characters.
29677            // The generator will handle escaping when converting to a regular string.
29678            let literal = Expression::Literal(Box::new(Literal::RawString(token.text)));
29679            return self.maybe_parse_subscript(literal);
29680        }
29681
29682        // Escape String (E'...' - PostgreSQL)
29683        if self.check(TokenType::EscapeString) {
29684            let token = self.advance();
29685            // EscapeString is stored as "E'content'" - extract just the content
29686            let literal = Expression::Literal(Box::new(Literal::EscapeString(token.text)));
29687            return self.maybe_parse_subscript(literal);
29688        }
29689
29690        // Star - check for DuckDB *COLUMNS(...) syntax first
29691        if self.check(TokenType::Star) {
29692            // DuckDB *COLUMNS(...) syntax: *COLUMNS(*), *COLUMNS('regex'), *COLUMNS(['col1', 'col2'])
29693            // Check if * is followed by COLUMNS and (
29694            if self.check_next_identifier("COLUMNS") {
29695                // Check if there's a ( after COLUMNS
29696                if self
29697                    .tokens
29698                    .get(self.current + 2)
29699                    .map(|t| t.token_type == TokenType::LParen)
29700                    .unwrap_or(false)
29701                {
29702                    self.skip(); // consume *
29703                    self.skip(); // consume COLUMNS
29704                    self.skip(); // consume (
29705
29706                    // Parse the argument: can be *, a regex string, or an array of column names
29707                    let arg = if self.check(TokenType::Star) {
29708                        self.skip(); // consume *
29709                        Expression::Star(Star {
29710                            table: None,
29711                            except: None,
29712                            replace: None,
29713                            rename: None,
29714                            trailing_comments: Vec::new(),
29715                            span: None,
29716                        })
29717                    } else {
29718                        self.parse_expression()?
29719                    };
29720
29721                    self.expect(TokenType::RParen)?;
29722
29723                    // Create Columns expression with unpack=true
29724                    return Ok(Expression::Columns(Box::new(Columns {
29725                        this: Box::new(arg),
29726                        unpack: Some(Box::new(Expression::Boolean(BooleanLiteral {
29727                            value: true,
29728                        }))),
29729                    })));
29730                }
29731            }
29732
29733            // Regular star
29734            self.skip(); // consume *
29735            let star = self.parse_star_modifiers(None)?;
29736            return Ok(Expression::Star(star));
29737        }
29738
29739        // Generic type expressions: ARRAY<T>, MAP<K,V>, STRUCT<...>
29740        // These are standalone type expressions (not in CAST context)
29741        // But also handle STRUCT<TYPE>(args) which becomes CAST(STRUCT(args) AS STRUCT<TYPE>)
29742        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
29743            let name_upper = self.peek().text.to_ascii_uppercase();
29744            if (name_upper == "ARRAY" || name_upper == "MAP" || name_upper == "STRUCT")
29745                && self.check_next(TokenType::Lt)
29746            {
29747                self.skip(); // consume ARRAY/MAP/STRUCT
29748                let data_type = self.parse_data_type_from_name(&name_upper)?;
29749
29750                // Check for typed constructor: STRUCT<TYPE>(args) or ARRAY<TYPE>(args)
29751                // These become CAST(STRUCT(args) AS TYPE) or CAST(ARRAY(args) AS TYPE)
29752                if self.match_token(TokenType::LParen) {
29753                    if name_upper == "STRUCT" {
29754                        // Parse struct constructor arguments
29755                        let args = if self.check(TokenType::RParen) {
29756                            Vec::new()
29757                        } else {
29758                            self.parse_struct_args()?
29759                        };
29760                        self.expect(TokenType::RParen)?;
29761
29762                        // Convert args to Struct fields (all unnamed)
29763                        let fields: Vec<(Option<String>, Expression)> =
29764                            args.into_iter().map(|e| (None, e)).collect();
29765
29766                        // Create CAST(STRUCT(args) AS STRUCT<TYPE>)
29767                        let struct_expr = Expression::Struct(Box::new(Struct { fields }));
29768                        let cast_expr = Expression::Cast(Box::new(Cast {
29769                            this: struct_expr,
29770                            to: data_type,
29771                            trailing_comments: Vec::new(),
29772                            double_colon_syntax: false,
29773                            format: None,
29774                            default: None,
29775                            inferred_type: None,
29776                        }));
29777                        return self.maybe_parse_subscript(cast_expr);
29778                    } else if name_upper == "ARRAY" {
29779                        // Parse array constructor arguments
29780                        let mut expressions = Vec::new();
29781                        if !self.check(TokenType::RParen) {
29782                            loop {
29783                                expressions.push(self.parse_expression()?);
29784                                if !self.match_token(TokenType::Comma) {
29785                                    break;
29786                                }
29787                            }
29788                        }
29789                        self.expect(TokenType::RParen)?;
29790
29791                        // Create CAST(ARRAY[args] AS ARRAY<TYPE>)
29792                        let array_expr = Expression::Array(Box::new(Array { expressions }));
29793                        let cast_expr = Expression::Cast(Box::new(Cast {
29794                            this: array_expr,
29795                            to: data_type,
29796                            trailing_comments: Vec::new(),
29797                            double_colon_syntax: false,
29798                            format: None,
29799                            default: None,
29800                            inferred_type: None,
29801                        }));
29802                        return self.maybe_parse_subscript(cast_expr);
29803                    }
29804                } else if self.match_token(TokenType::LBracket) {
29805                    // ARRAY<TYPE>[values] or ARRAY<TYPE>[] - bracket-style array constructor
29806                    let expressions = if self.check(TokenType::RBracket) {
29807                        Vec::new()
29808                    } else {
29809                        self.parse_expression_list()?
29810                    };
29811                    self.expect(TokenType::RBracket)?;
29812                    // Create CAST(Array(values) AS DataType)
29813                    let array_expr = Expression::Array(Box::new(Array { expressions }));
29814                    let cast_expr = Expression::Cast(Box::new(Cast {
29815                        this: array_expr,
29816                        to: data_type,
29817                        trailing_comments: Vec::new(),
29818                        double_colon_syntax: false,
29819                        format: None,
29820                        default: None,
29821                        inferred_type: None,
29822                    }));
29823                    return self.maybe_parse_subscript(cast_expr);
29824                }
29825
29826                return Ok(Expression::DataType(data_type));
29827            }
29828            // DuckDB-style MAP with curly brace literals: MAP {'key': value}
29829            if name_upper == "MAP" && self.check_next(TokenType::LBrace) {
29830                self.skip(); // consume MAP
29831                self.expect(TokenType::LBrace)?;
29832
29833                // Handle empty: MAP {}
29834                if self.match_token(TokenType::RBrace) {
29835                    return self.maybe_parse_subscript(Expression::MapFunc(Box::new(
29836                        MapConstructor {
29837                            keys: Vec::new(),
29838                            values: Vec::new(),
29839                            curly_brace_syntax: true,
29840                            with_map_keyword: true,
29841                        },
29842                    )));
29843                }
29844
29845                // Parse key-value pairs
29846                let mut keys = Vec::new();
29847                let mut values = Vec::new();
29848                loop {
29849                    let key = self.parse_primary()?;
29850                    self.expect(TokenType::Colon)?;
29851                    let value = self.parse_expression()?;
29852                    keys.push(key);
29853                    values.push(value);
29854                    if !self.match_token(TokenType::Comma) {
29855                        break;
29856                    }
29857                    // Handle trailing comma
29858                    if self.check(TokenType::RBrace) {
29859                        break;
29860                    }
29861                }
29862                self.expect(TokenType::RBrace)?;
29863
29864                return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
29865                    keys,
29866                    values,
29867                    curly_brace_syntax: true,
29868                    with_map_keyword: true,
29869                })));
29870            }
29871        }
29872
29873        // Keywords as identifiers when followed by DOT (e.g., case.x, top.y)
29874        // These keywords can be table/column names when used with dot notation
29875        if (self.check(TokenType::Case) || self.check(TokenType::Top))
29876            && self.check_next(TokenType::Dot)
29877        {
29878            let token = self.advance();
29879            let ident = Identifier::new(token.text);
29880            self.expect(TokenType::Dot)?;
29881            if self.match_token(TokenType::Star) {
29882                // case.* or top.*
29883                let star = self.parse_star_modifiers(Some(ident))?;
29884                return Ok(Expression::Star(star));
29885            }
29886            // case.column or top.column
29887            let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
29888            // Capture trailing comments from the column name token
29889            let trailing_comments = self.previous_trailing_comments().to_vec();
29890            let mut col = Expression::boxed_column(Column {
29891                name: col_ident,
29892                table: Some(ident),
29893                join_mark: false,
29894                trailing_comments,
29895                span: None,
29896                inferred_type: None,
29897            });
29898            // Handle Oracle/Redshift outer join marker (+) after column reference
29899            if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
29900                let saved_pos = self.current;
29901                if self.match_token(TokenType::LParen)
29902                    && self.match_token(TokenType::Plus)
29903                    && self.match_token(TokenType::RParen)
29904                {
29905                    if let Expression::Column(ref mut c) = col {
29906                        c.join_mark = true;
29907                    }
29908                } else {
29909                    self.current = saved_pos;
29910                }
29911            }
29912            return self.maybe_parse_subscript(col);
29913        }
29914
29915        // MySQL BINARY prefix operator: BINARY expr -> CAST(expr AS BINARY)
29916        // Only treat as prefix operator when followed by an expression (not ( which would be BINARY() function,
29917        // and not when it would be a data type like BINARY in column definitions)
29918        if self.check(TokenType::Var)
29919            && self.peek().text.eq_ignore_ascii_case("BINARY")
29920            && !self.check_next(TokenType::LParen)
29921            && !self.check_next(TokenType::Dot)
29922            && !self.check_next(TokenType::RParen)
29923            && !self.check_next(TokenType::Comma)
29924            && !self.is_at_end()
29925        {
29926            // Check if this is actually followed by an expression token (not end of statement)
29927            let next_idx = self.current + 1;
29928            let has_expr = next_idx < self.tokens.len()
29929                && !matches!(
29930                    self.tokens[next_idx].token_type,
29931                    TokenType::Semicolon | TokenType::Eof | TokenType::RParen | TokenType::Comma
29932                );
29933            if has_expr {
29934                self.skip(); // consume BINARY
29935                let expr = self.parse_unary()?;
29936                return Ok(Expression::Cast(Box::new(Cast {
29937                    this: expr,
29938                    to: DataType::Binary { length: None },
29939                    trailing_comments: Vec::new(),
29940                    double_colon_syntax: false,
29941                    format: None,
29942                    default: None,
29943                    inferred_type: None,
29944                })));
29945            }
29946        }
29947
29948        // RLIKE/REGEXP as function call: RLIKE(expr, pattern, flags)
29949        // Normally RLIKE is an operator, but Snowflake allows function syntax
29950        if self.check(TokenType::RLike) && self.check_next(TokenType::LParen) {
29951            let token = self.advance(); // consume RLIKE
29952            self.skip(); // consume LParen
29953            let args = if self.check(TokenType::RParen) {
29954                Vec::new()
29955            } else {
29956                self.parse_function_arguments()?
29957            };
29958            self.expect(TokenType::RParen)?;
29959            let func = Expression::Function(Box::new(Function {
29960                name: token.text.clone(), // Preserve original case; generator handles normalization
29961                args,
29962                distinct: false,
29963                trailing_comments: Vec::new(),
29964                use_bracket_syntax: false,
29965                no_parens: false,
29966                quoted: false,
29967                span: None,
29968                inferred_type: None,
29969            }));
29970            return self.maybe_parse_over(func);
29971        }
29972
29973        // INSERT as function call: INSERT(str, pos, len, newstr)
29974        // Snowflake/MySQL have INSERT as a string function, but INSERT is also a DML keyword.
29975        // When followed by ( in expression context, treat as function call.
29976        if self.check(TokenType::Insert) && self.check_next(TokenType::LParen) {
29977            let token = self.advance(); // consume INSERT
29978            self.skip(); // consume LParen
29979            let args = if self.check(TokenType::RParen) {
29980                Vec::new()
29981            } else {
29982                self.parse_function_arguments()?
29983            };
29984            self.expect(TokenType::RParen)?;
29985            let func = Expression::Function(Box::new(Function {
29986                name: token.text.clone(),
29987                args,
29988                distinct: false,
29989                trailing_comments: Vec::new(),
29990                use_bracket_syntax: false,
29991                no_parens: false,
29992                quoted: false,
29993                span: None,
29994                inferred_type: None,
29995            }));
29996            return self.maybe_parse_over(func);
29997        }
29998
29999        // ClickHouse: MINUS/EXCEPT/INTERSECT/REGEXP as function names (e.g., minus(a, b), REGEXP('^db'))
30000        // MINUS is tokenized as TokenType::Except (Oracle alias), REGEXP as TokenType::RLike
30001        if matches!(
30002            self.config.dialect,
30003            Some(crate::dialects::DialectType::ClickHouse)
30004        ) && (self.check(TokenType::Except)
30005            || self.check(TokenType::Intersect)
30006            || self.check(TokenType::RLike))
30007            && self.check_next(TokenType::LParen)
30008        {
30009            let token = self.advance(); // consume keyword
30010            self.skip(); // consume LParen
30011            let args = if self.check(TokenType::RParen) {
30012                Vec::new()
30013            } else {
30014                self.parse_function_arguments()?
30015            };
30016            self.expect(TokenType::RParen)?;
30017            let func = Expression::Function(Box::new(Function {
30018                name: token.text.clone(),
30019                args,
30020                distinct: false,
30021                trailing_comments: Vec::new(),
30022                use_bracket_syntax: false,
30023                no_parens: false,
30024                quoted: false,
30025                span: None,
30026                inferred_type: None,
30027            }));
30028            return self.maybe_parse_over(func);
30029        }
30030
30031        // Handle CURRENT_DATE/CURRENT_TIMESTAMP/CURRENT_TIME/CURRENT_DATETIME with parentheses
30032        // These have special token types but BigQuery and others use them as function calls with args
30033        if matches!(
30034            self.peek().token_type,
30035            TokenType::CurrentDate
30036                | TokenType::CurrentTimestamp
30037                | TokenType::CurrentTime
30038                | TokenType::CurrentDateTime
30039        ) {
30040            // Snowflake: CURRENT_TIME / CURRENT_TIME(n) -> Localtime (so DuckDB can output LOCALTIME)
30041            if matches!(
30042                self.config.dialect,
30043                Some(crate::dialects::DialectType::Snowflake)
30044            ) && self.peek().token_type == TokenType::CurrentTime
30045            {
30046                self.skip(); // consume CURRENT_TIME
30047                if self.match_token(TokenType::LParen) {
30048                    // CURRENT_TIME(n) - consume args but ignore precision
30049                    if !self.check(TokenType::RParen) {
30050                        let _ = self.parse_function_arguments()?;
30051                    }
30052                    self.expect(TokenType::RParen)?;
30053                }
30054                return self.maybe_parse_subscript(Expression::Localtime(Box::new(
30055                    crate::expressions::Localtime { this: None },
30056                )));
30057            }
30058            if self.check_next(TokenType::LParen) {
30059                // Parse as function call: CURRENT_DATE('UTC'), CURRENT_TIMESTAMP(), etc.
30060                let token = self.advance(); // consume CURRENT_DATE etc.
30061                self.skip(); // consume LParen
30062                let args = if self.check(TokenType::RParen) {
30063                    Vec::new()
30064                } else {
30065                    self.parse_function_arguments()?
30066                };
30067                self.expect(TokenType::RParen)?;
30068                let func = Expression::Function(Box::new(Function {
30069                    name: token.text.clone(),
30070                    args,
30071                    distinct: false,
30072                    trailing_comments: Vec::new(),
30073                    use_bracket_syntax: false,
30074                    no_parens: false,
30075                    quoted: false,
30076                    span: None,
30077                    inferred_type: None,
30078                }));
30079                return self.maybe_parse_subscript(func);
30080            } else {
30081                // No parens - parse as no-paren function
30082                let token = self.advance();
30083                let func = Expression::Function(Box::new(Function {
30084                    name: token.text.clone(),
30085                    args: Vec::new(),
30086                    distinct: false,
30087                    trailing_comments: Vec::new(),
30088                    use_bracket_syntax: false,
30089                    no_parens: true,
30090                    quoted: false,
30091                    span: None,
30092                    inferred_type: None,
30093                }));
30094                return self.maybe_parse_subscript(func);
30095            }
30096        }
30097
30098        // Type keyword followed by string literal -> CAST('value' AS TYPE)
30099        // E.g., NUMERIC '2.25' -> CAST('2.25' AS NUMERIC)
30100        if self.is_identifier_token() && self.check_next(TokenType::String) {
30101            let upper_name = self.peek().text.to_ascii_uppercase();
30102            if matches!(
30103                upper_name.as_str(),
30104                "NUMERIC" | "DECIMAL" | "BIGNUMERIC" | "BIGDECIMAL"
30105            ) {
30106                self.skip(); // consume the type keyword
30107                let str_token = self.advance(); // consume the string literal
30108                let data_type = match upper_name.as_str() {
30109                    "NUMERIC" | "DECIMAL" | "BIGNUMERIC" | "BIGDECIMAL" => {
30110                        crate::expressions::DataType::Decimal {
30111                            precision: None,
30112                            scale: None,
30113                        }
30114                    }
30115                    _ => unreachable!("type keyword already matched in outer if-condition"),
30116                };
30117                return Ok(Expression::Cast(Box::new(crate::expressions::Cast {
30118                    this: Expression::Literal(Box::new(Literal::String(str_token.text))),
30119                    to: data_type,
30120                    trailing_comments: Vec::new(),
30121                    double_colon_syntax: false,
30122                    format: None,
30123                    default: None,
30124                    inferred_type: None,
30125                })));
30126            }
30127        }
30128
30129        // Identifier, Column, or Function
30130        if self.is_identifier_token() {
30131            // Check for no-paren functions like CURRENT_TIMESTAMP, CURRENT_DATE, etc.
30132            // These should be parsed as functions even without parentheses
30133            let upper_name = self.peek().text.to_ascii_uppercase();
30134            if !self.check_next(TokenType::LParen)
30135                && !self.check_next(TokenType::Dot)
30136                && crate::function_registry::is_no_paren_function_name_upper(upper_name.as_str())
30137                && !(matches!(
30138                    self.config.dialect,
30139                    Some(crate::dialects::DialectType::ClickHouse)
30140                ) && upper_name.as_str() == "CURRENT_TIMESTAMP")
30141            {
30142                let token = self.advance();
30143                let func = Expression::Function(Box::new(Function {
30144                    name: token.text.clone(), // Preserve original case; generator handles normalization
30145                    args: Vec::new(),
30146                    distinct: false,
30147                    trailing_comments: Vec::new(),
30148                    use_bracket_syntax: false,
30149                    no_parens: true, // These functions were called without parentheses
30150                    quoted: false,
30151                    span: None,
30152                    inferred_type: None,
30153                }));
30154                return self.maybe_parse_subscript(func);
30155            }
30156
30157            let ident = self.expect_identifier_with_quoted()?;
30158            let name = ident.name.clone();
30159            let quoted = ident.quoted;
30160
30161            // Check for function call (skip Teradata FORMAT phrase)
30162            let is_teradata_format_phrase = matches!(
30163                self.config.dialect,
30164                Some(crate::dialects::DialectType::Teradata)
30165            ) && self.check(TokenType::LParen)
30166                && self.check_next(TokenType::Format);
30167            if !is_teradata_format_phrase && self.match_token(TokenType::LParen) {
30168                let upper_name = name.to_ascii_uppercase();
30169                let func_expr = self.parse_typed_function(&name, &upper_name, quoted)?;
30170                let func_expr = self.maybe_parse_clickhouse_parameterized_agg(func_expr)?;
30171                // Check for OVER clause (window function)
30172                return self.maybe_parse_over(func_expr);
30173            }
30174
30175            // Check for qualified name (table.column or table.method())
30176            if self.match_token(TokenType::Dot) {
30177                if self.match_token(TokenType::Star) {
30178                    // table.* with potential modifiers
30179                    let star = self.parse_star_modifiers(Some(ident))?;
30180                    let mut star_expr = Expression::Star(star);
30181                    // ClickHouse: a.* APPLY(func) EXCEPT(col) REPLACE(expr AS col) in any order
30182                    if matches!(
30183                        self.config.dialect,
30184                        Some(crate::dialects::DialectType::ClickHouse)
30185                    ) {
30186                        loop {
30187                            if self.check(TokenType::Apply) {
30188                                self.skip();
30189                                let apply_expr = if self.match_token(TokenType::LParen) {
30190                                    let e = self.parse_expression()?;
30191                                    self.expect(TokenType::RParen)?;
30192                                    e
30193                                } else {
30194                                    self.parse_expression()?
30195                                };
30196                                star_expr =
30197                                    Expression::Apply(Box::new(crate::expressions::Apply {
30198                                        this: Box::new(star_expr),
30199                                        expression: Box::new(apply_expr),
30200                                    }));
30201                            } else if self.check(TokenType::Except)
30202                                || self.check(TokenType::Exclude)
30203                            {
30204                                self.skip();
30205                                self.match_identifier("STRICT");
30206                                if self.match_token(TokenType::LParen) {
30207                                    loop {
30208                                        if self.check(TokenType::RParen) {
30209                                            break;
30210                                        }
30211                                        let _ = self.parse_expression()?;
30212                                        if !self.match_token(TokenType::Comma) {
30213                                            break;
30214                                        }
30215                                    }
30216                                    self.expect(TokenType::RParen)?;
30217                                } else if self.is_identifier_token()
30218                                    || self.is_safe_keyword_as_identifier()
30219                                {
30220                                    let _ = self.parse_expression()?;
30221                                }
30222                            } else if self.check(TokenType::Replace) {
30223                                self.skip();
30224                                self.match_identifier("STRICT");
30225                                if self.match_token(TokenType::LParen) {
30226                                    loop {
30227                                        if self.check(TokenType::RParen) {
30228                                            break;
30229                                        }
30230                                        let _ = self.parse_expression()?;
30231                                        if self.match_token(TokenType::As) {
30232                                            if self.is_identifier_token()
30233                                                || self.is_safe_keyword_as_identifier()
30234                                            {
30235                                                self.skip();
30236                                            }
30237                                        }
30238                                        if !self.match_token(TokenType::Comma) {
30239                                            break;
30240                                        }
30241                                    }
30242                                    self.expect(TokenType::RParen)?;
30243                                } else {
30244                                    let _ = self.parse_expression()?;
30245                                    if self.match_token(TokenType::As) {
30246                                        if self.is_identifier_token()
30247                                            || self.is_safe_keyword_as_identifier()
30248                                        {
30249                                            self.skip();
30250                                        }
30251                                    }
30252                                }
30253                            } else {
30254                                break;
30255                            }
30256                        }
30257                    }
30258                    return Ok(star_expr);
30259                }
30260                // Handle numeric field access: a.1, t.2 (ClickHouse tuple field access)
30261                // Also handle negative: a.-1 (ClickHouse negative tuple index)
30262                if self.check(TokenType::Number) {
30263                    let field_name = self.advance().text;
30264                    let col_expr = Expression::Dot(Box::new(DotAccess {
30265                        this: Expression::boxed_column(Column {
30266                            name: ident,
30267                            table: None,
30268                            join_mark: false,
30269                            trailing_comments: Vec::new(),
30270                            span: None,
30271                            inferred_type: None,
30272                        }),
30273                        field: Identifier::new(field_name),
30274                    }));
30275                    return self.maybe_parse_subscript(col_expr);
30276                }
30277                if matches!(
30278                    self.config.dialect,
30279                    Some(crate::dialects::DialectType::ClickHouse)
30280                ) && self.check(TokenType::Dash)
30281                    && self.current + 1 < self.tokens.len()
30282                    && self.tokens[self.current + 1].token_type == TokenType::Number
30283                {
30284                    self.skip(); // consume -
30285                    let num = self.advance().text;
30286                    let field_name = format!("-{}", num);
30287                    let col_expr = Expression::Dot(Box::new(DotAccess {
30288                        this: Expression::boxed_column(Column {
30289                            name: ident,
30290                            table: None,
30291                            join_mark: false,
30292                            trailing_comments: Vec::new(),
30293                            span: None,
30294                            inferred_type: None,
30295                        }),
30296                        field: Identifier::new(field_name),
30297                    }));
30298                    return self.maybe_parse_subscript(col_expr);
30299                }
30300                // ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
30301                if matches!(
30302                    self.config.dialect,
30303                    Some(crate::dialects::DialectType::ClickHouse)
30304                ) && self.check(TokenType::Caret)
30305                {
30306                    self.skip(); // consume ^
30307                    let mut field_name = "^".to_string();
30308                    if self.check(TokenType::Identifier)
30309                        || self.check(TokenType::Var)
30310                        || self.check_keyword()
30311                    {
30312                        field_name.push_str(&self.advance().text);
30313                    }
30314                    let col_expr = Expression::Dot(Box::new(DotAccess {
30315                        this: Expression::boxed_column(Column {
30316                            name: ident,
30317                            table: None,
30318                            join_mark: false,
30319                            trailing_comments: Vec::new(),
30320                            span: None,
30321                            inferred_type: None,
30322                        }),
30323                        field: Identifier::new(field_name),
30324                    }));
30325                    return self.maybe_parse_subscript(col_expr);
30326                }
30327                // Allow keywords as column names (e.g., a.filter, x.update)
30328                let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
30329
30330                // Handle Oracle/Redshift outer join marker (+) BEFORE checking for method call
30331                // This is critical: (+) looks like a method call but is actually a join marker
30332                if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
30333                    let saved_pos = self.current;
30334                    if self.match_token(TokenType::LParen)
30335                        && self.match_token(TokenType::Plus)
30336                        && self.match_token(TokenType::RParen)
30337                    {
30338                        let trailing_comments = self.previous_trailing_comments().to_vec();
30339                        let col = Expression::boxed_column(Column {
30340                            name: col_ident,
30341                            table: Some(ident),
30342                            join_mark: true,
30343                            trailing_comments,
30344                            span: None,
30345                            inferred_type: None,
30346                        });
30347                        return self.maybe_parse_subscript(col);
30348                    } else {
30349                        self.current = saved_pos;
30350                    }
30351                }
30352
30353                // Check if this is a method call (column followed by parentheses)
30354                if self.check(TokenType::LParen) {
30355                    // This is a method call like table.EXTRACT() or obj.INT()
30356                    self.skip(); // consume (
30357                    let args = if self.check(TokenType::RParen) {
30358                        Vec::new()
30359                    } else {
30360                        self.parse_expression_list()?
30361                    };
30362                    self.expect(TokenType::RParen)?;
30363                    let method_call = Expression::MethodCall(Box::new(MethodCall {
30364                        this: Expression::boxed_column(Column {
30365                            name: ident.clone(),
30366                            table: None,
30367                            join_mark: false,
30368                            trailing_comments: Vec::new(),
30369                            span: None,
30370                            inferred_type: None,
30371                        }),
30372                        method: col_ident,
30373                        args,
30374                    }));
30375                    return self.maybe_parse_subscript(method_call);
30376                }
30377
30378                // Capture trailing comments from the column name token
30379                let trailing_comments = self.previous_trailing_comments().to_vec();
30380                let col = Expression::boxed_column(Column {
30381                    name: col_ident,
30382                    table: Some(ident),
30383                    join_mark: false,
30384                    trailing_comments,
30385                    span: None,
30386                    inferred_type: None,
30387                });
30388                return self.maybe_parse_subscript(col);
30389            }
30390
30391            // Check for Oracle pseudocolumns (ROWNUM, ROWID, LEVEL, SYSDATE, etc.)
30392            // Oracle pseudocolumns (LEVEL, ROWNUM, ROWID, SYSDATE, etc.)
30393            // Only recognize in Oracle and generic dialect — other dialects treat these as regular identifiers
30394            if !quoted
30395                && matches!(
30396                    self.config.dialect,
30397                    Some(crate::dialects::DialectType::Oracle) | None
30398                )
30399            {
30400                if let Some(pseudocolumn_type) = PseudocolumnType::from_str(&name) {
30401                    return Ok(Expression::Pseudocolumn(Pseudocolumn {
30402                        kind: pseudocolumn_type,
30403                    }));
30404                }
30405            }
30406
30407            // Check for lambda expression: x -> body
30408            // But NOT if followed by a string literal (that's JSON extract: col -> '$.path')
30409            if self.check(TokenType::Arrow)
30410                && !self
30411                    .peek_nth(1)
30412                    .map_or(false, |t| t.token_type == TokenType::String)
30413            {
30414                self.skip(); // consume the Arrow token
30415                let body = self.parse_expression()?;
30416                return Ok(Expression::Lambda(Box::new(LambdaExpr {
30417                    parameters: vec![ident],
30418                    body,
30419                    colon: false,
30420                    parameter_types: Vec::new(),
30421                })));
30422            }
30423
30424            // Capture trailing comments from the identifier token
30425            let trailing_comments = self.previous_trailing_comments().to_vec();
30426            let col = Expression::boxed_column(Column {
30427                name: ident,
30428                table: None,
30429                join_mark: false,
30430                trailing_comments,
30431                span: None,
30432                inferred_type: None,
30433            });
30434            return self.maybe_parse_subscript(col);
30435        }
30436
30437        // Exasol-style IF expression: IF condition THEN true_value ELSE false_value ENDIF
30438        // Check for IF not followed by ( (which would be IF function call handled elsewhere)
30439        // This handles: IF age < 18 THEN 'minor' ELSE 'adult' ENDIF
30440        // IMPORTANT: This must be checked BEFORE is_safe_keyword_as_identifier() which would
30441        // treat IF as a column name when not followed by ( or .
30442        // For TSQL/Fabric: IF (cond) BEGIN ... END is an IF statement, not function
30443        if self.check(TokenType::If)
30444            && !self.check_next(TokenType::Dot)
30445            && (!self.check_next(TokenType::LParen)
30446                || matches!(
30447                    self.config.dialect,
30448                    Some(crate::dialects::DialectType::TSQL)
30449                        | Some(crate::dialects::DialectType::Fabric)
30450                ))
30451        {
30452            let saved_pos = self.current;
30453            self.skip(); // consume IF
30454            if let Some(if_expr) = self.parse_if()? {
30455                return Ok(if_expr);
30456            }
30457            // parse_if() returned None — IF is not an IF expression here,
30458            // restore position so it can be treated as an identifier
30459            self.current = saved_pos;
30460        }
30461
30462        // NEXT VALUE FOR sequence_name [OVER (ORDER BY ...)]
30463        // Must check before treating NEXT as a standalone identifier via is_safe_keyword_as_identifier
30464        if self.check(TokenType::Next)
30465            && self.current + 2 < self.tokens.len()
30466            && self.tokens[self.current + 1]
30467                .text
30468                .eq_ignore_ascii_case("VALUE")
30469            && self.tokens[self.current + 2]
30470                .text
30471                .eq_ignore_ascii_case("FOR")
30472        {
30473            self.skip(); // consume NEXT
30474            if let Some(expr) = self.parse_next_value_for()? {
30475                return Ok(expr);
30476            }
30477        }
30478
30479        // ClickHouse: `from` can be a column name when followed by comma or dot
30480        if matches!(
30481            self.config.dialect,
30482            Some(crate::dialects::DialectType::ClickHouse)
30483        ) && self.check(TokenType::From)
30484            && (self.check_next(TokenType::Comma) || self.check_next(TokenType::Dot))
30485        {
30486            let token = self.advance();
30487            let name = token.text.clone();
30488            if self.match_token(TokenType::Dot) {
30489                // from.col qualified reference
30490                let col_name = self.expect_identifier_or_keyword()?;
30491                return Ok(Expression::Column(Box::new(crate::expressions::Column {
30492                    name: Identifier::new(col_name),
30493                    table: Some(Identifier::new(name)),
30494                    join_mark: false,
30495                    trailing_comments: Vec::new(),
30496                    span: None,
30497                    inferred_type: None,
30498                })));
30499            }
30500            return Ok(Expression::Column(Box::new(crate::expressions::Column {
30501                name: Identifier::new(name),
30502                table: None,
30503                join_mark: false,
30504                trailing_comments: Vec::new(),
30505                span: None,
30506                inferred_type: None,
30507            })));
30508        }
30509
30510        // ClickHouse: `except` as identifier in expression context (set operations are handled at statement level)
30511        // except(args) is already handled above in the MINUS/EXCEPT/INTERSECT function block
30512        if matches!(
30513            self.config.dialect,
30514            Some(crate::dialects::DialectType::ClickHouse)
30515        ) && self.check(TokenType::Except)
30516            && !self.check_next(TokenType::LParen)
30517        {
30518            let token = self.advance();
30519            let name = token.text.clone();
30520            if self.match_token(TokenType::Dot) {
30521                let col_name = self.expect_identifier_or_keyword()?;
30522                return Ok(Expression::Column(Box::new(crate::expressions::Column {
30523                    name: Identifier::new(col_name),
30524                    table: Some(Identifier::new(name)),
30525                    join_mark: false,
30526                    trailing_comments: Vec::new(),
30527                    span: None,
30528                    inferred_type: None,
30529                })));
30530            }
30531            return Ok(Expression::Column(Box::new(crate::expressions::Column {
30532                name: Identifier::new(name),
30533                table: None,
30534                join_mark: false,
30535                trailing_comments: Vec::new(),
30536                span: None,
30537                inferred_type: None,
30538            })));
30539        }
30540
30541        // ClickHouse: structural keywords like FROM, ON, JOIN can be used as identifiers
30542        // in expression context when followed by an operator (e.g., from + 1, on.col)
30543        if matches!(
30544            self.config.dialect,
30545            Some(crate::dialects::DialectType::ClickHouse)
30546        ) && self.peek().token_type.is_keyword()
30547            && !self.is_safe_keyword_as_identifier()
30548        {
30549            let next_tt = self
30550                .peek_nth(1)
30551                .map(|t| t.token_type)
30552                .unwrap_or(TokenType::Semicolon);
30553            // A structural keyword can be used as an identifier when it appears
30554            // in expression context. We detect this by checking what follows.
30555            // Essentially: it's NOT an identifier only if the keyword itself starts
30556            // a clause (e.g., FROM followed by a table name). But when it's followed
30557            // by an operator, comma, close-paren, or even another clause keyword
30558            // (meaning it's the last token in an expression), it's an identifier.
30559            let is_expr_context = !matches!(
30560                next_tt,
30561                TokenType::Identifier
30562                    | TokenType::Var
30563                    | TokenType::QuotedIdentifier
30564                    | TokenType::LParen
30565                    | TokenType::Number
30566                    | TokenType::String
30567            );
30568            if is_expr_context {
30569                let token = self.advance();
30570                return Ok(Expression::boxed_column(Column {
30571                    name: Identifier::new(token.text),
30572                    table: None,
30573                    join_mark: false,
30574                    trailing_comments: Vec::new(),
30575                    span: None,
30576                    inferred_type: None,
30577                }));
30578            }
30579        }
30580        // %s or %(name)s percent parameter (PostgreSQL psycopg2 style)
30581        // Must be checked BEFORE the keyword-as-identifier handler below, since
30582        // Percent is in is_keyword() and is_safe_keyword_as_identifier() returns true for it.
30583        if self.check(TokenType::Percent)
30584            && (
30585                self.check_next(TokenType::Var)  // %s
30586            || self.check_next(TokenType::LParen)
30587                // %(name)s
30588            )
30589        {
30590            self.skip(); // consume %
30591                         // Check for %(name)s - named parameter
30592            if self.match_token(TokenType::LParen) {
30593                // Get the parameter name
30594                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
30595                    let name = self.advance().text;
30596                    self.expect(TokenType::RParen)?;
30597                    // Expect 's' after the closing paren
30598                    if self.check(TokenType::Var) && self.peek().text == "s" {
30599                        self.skip(); // consume 's'
30600                    }
30601                    return Ok(Expression::Parameter(Box::new(Parameter {
30602                        name: Some(name),
30603                        index: None,
30604                        style: ParameterStyle::Percent,
30605                        quoted: false,
30606                        string_quoted: false,
30607                        expression: None,
30608                    })));
30609                } else {
30610                    return Err(self.parse_error("Expected parameter name after %("));
30611                }
30612            }
30613            // Check for %s - anonymous parameter
30614            if self.check(TokenType::Var) && self.peek().text == "s" {
30615                self.skip(); // consume 's'
30616                return Ok(Expression::Parameter(Box::new(Parameter {
30617                    name: None,
30618                    index: None,
30619                    style: ParameterStyle::Percent,
30620                    quoted: false,
30621                    string_quoted: false,
30622                    expression: None,
30623                })));
30624            }
30625            // Not a parameter - backtrack
30626            self.current -= 1;
30627        }
30628
30629        // Some keywords can be used as identifiers (column names, table names, etc.)
30630        // when they are "safe" keywords that don't affect query structure.
30631        // Structural keywords like FROM, WHERE, JOIN should NOT be usable as identifiers.
30632        if self.is_safe_keyword_as_identifier() {
30633            let token = self.advance();
30634            let name = token.text.clone();
30635
30636            // Check for function call (keyword followed by paren) - skip Teradata FORMAT phrase
30637            let is_teradata_format_phrase = matches!(
30638                self.config.dialect,
30639                Some(crate::dialects::DialectType::Teradata)
30640            ) && self.check(TokenType::LParen)
30641                && self.check_next(TokenType::Format);
30642            if !is_teradata_format_phrase && self.match_token(TokenType::LParen) {
30643                let upper_name = name.to_ascii_uppercase();
30644                let func_expr = self.parse_typed_function(&name, &upper_name, false)?;
30645                let func_expr = self.maybe_parse_clickhouse_parameterized_agg(func_expr)?;
30646                return self.maybe_parse_over(func_expr);
30647            }
30648
30649            // Check for qualified name (keyword.column or keyword.method())
30650            if self.match_token(TokenType::Dot) {
30651                if self.match_token(TokenType::Star) {
30652                    // keyword.* with potential modifiers
30653                    let ident = Identifier::new(name);
30654                    let star = self.parse_star_modifiers(Some(ident))?;
30655                    return Ok(Expression::Star(star));
30656                }
30657                // ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
30658                if matches!(
30659                    self.config.dialect,
30660                    Some(crate::dialects::DialectType::ClickHouse)
30661                ) && self.check(TokenType::Caret)
30662                {
30663                    self.skip(); // consume ^
30664                    let mut field_name = "^".to_string();
30665                    if self.check(TokenType::Identifier)
30666                        || self.check(TokenType::Var)
30667                        || self.check_keyword()
30668                    {
30669                        field_name.push_str(&self.advance().text);
30670                    }
30671                    let col = Expression::Dot(Box::new(DotAccess {
30672                        this: Expression::boxed_column(Column {
30673                            name: Identifier::new(name),
30674                            table: None,
30675                            join_mark: false,
30676                            trailing_comments: Vec::new(),
30677                            span: None,
30678                            inferred_type: None,
30679                        }),
30680                        field: Identifier::new(field_name),
30681                    }));
30682                    return self.maybe_parse_subscript(col);
30683                }
30684
30685                // Handle numeric field access: keyword.1, keyword.2 (ClickHouse tuple field access)
30686                if self.check(TokenType::Number) {
30687                    let field_name = self.advance().text;
30688                    let col_expr = Expression::Dot(Box::new(DotAccess {
30689                        this: Expression::boxed_column(Column {
30690                            name: Identifier::new(name),
30691                            table: None,
30692                            join_mark: false,
30693                            trailing_comments: Vec::new(),
30694                            span: None,
30695                            inferred_type: None,
30696                        }),
30697                        field: Identifier::new(field_name),
30698                    }));
30699                    return self.maybe_parse_subscript(col_expr);
30700                }
30701
30702                // Allow keywords as column names
30703                let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
30704
30705                // Check if this is a method call
30706                if self.check(TokenType::LParen) {
30707                    self.skip(); // consume (
30708                    let args = if self.check(TokenType::RParen) {
30709                        Vec::new()
30710                    } else {
30711                        self.parse_expression_list()?
30712                    };
30713                    self.expect(TokenType::RParen)?;
30714                    let method_call = Expression::MethodCall(Box::new(MethodCall {
30715                        this: Expression::Identifier(Identifier::new(name)),
30716                        method: col_ident,
30717                        args,
30718                    }));
30719                    return self.maybe_parse_subscript(method_call);
30720                }
30721
30722                // Capture trailing comments from the column name token
30723                let trailing_comments = self.previous_trailing_comments().to_vec();
30724                let mut col = Expression::boxed_column(Column {
30725                    name: col_ident,
30726                    table: Some(Identifier::new(name)),
30727                    join_mark: false,
30728                    trailing_comments,
30729                    span: None,
30730                    inferred_type: None,
30731                });
30732                // Handle Oracle/Redshift outer join marker (+) after column reference
30733                if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
30734                    let saved_pos = self.current;
30735                    if self.match_token(TokenType::LParen)
30736                        && self.match_token(TokenType::Plus)
30737                        && self.match_token(TokenType::RParen)
30738                    {
30739                        if let Expression::Column(ref mut c) = col {
30740                            c.join_mark = true;
30741                        }
30742                    } else {
30743                        self.current = saved_pos;
30744                    }
30745                }
30746                return self.maybe_parse_subscript(col);
30747            }
30748
30749            // Simple identifier (keyword used as column name)
30750            // Capture trailing comments from the keyword token
30751            let trailing_comments = self.previous_trailing_comments().to_vec();
30752            let ident = Identifier::new(name);
30753            let col = Expression::boxed_column(Column {
30754                name: ident,
30755                table: None,
30756                join_mark: false,
30757                trailing_comments,
30758                span: None,
30759                inferred_type: None,
30760            });
30761            return self.maybe_parse_subscript(col);
30762        }
30763
30764        // @@ system variable (MySQL/SQL Server): @@version, @@IDENTITY, @@GLOBAL.var
30765        if self.match_token(TokenType::AtAt) {
30766            // Get the variable name
30767            let name = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
30768                let mut n = self.advance().text;
30769                // Handle @@scope.variable (e.g., @@GLOBAL.max_connections, @@SESSION.sql_mode)
30770                if self.match_token(TokenType::Dot) {
30771                    if self.check(TokenType::Identifier)
30772                        || self.check(TokenType::Var)
30773                        || self.is_safe_keyword_as_identifier()
30774                    {
30775                        n.push('.');
30776                        n.push_str(&self.advance().text);
30777                    }
30778                }
30779                n
30780            } else if self.check_keyword() {
30781                // Handle @@keyword (e.g., @@sql_mode when sql_mode is a keyword)
30782                self.advance().text
30783            } else {
30784                return Err(self.parse_error("Expected variable name after @@"));
30785            };
30786            return Ok(Expression::Parameter(Box::new(Parameter {
30787                name: Some(name),
30788                index: None,
30789                style: ParameterStyle::DoubleAt,
30790                quoted: false,
30791                string_quoted: false,
30792                expression: None,
30793            })));
30794        }
30795
30796        // @ user variable/parameter: @x, @"x", @JOIN, @'foo'
30797        if self.match_token(TokenType::DAt) {
30798            // Get the variable name - can be identifier, quoted identifier, keyword, or string
30799            let (name, quoted, string_quoted) =
30800                if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
30801                    (self.advance().text, false, false)
30802                } else if self.check(TokenType::QuotedIdentifier) {
30803                    // Quoted identifier like @"x"
30804                    let token = self.advance();
30805                    (token.text, true, false)
30806                } else if self.check(TokenType::String) {
30807                    // String-quoted like @'foo'
30808                    let token = self.advance();
30809                    (token.text, false, true)
30810                } else if self.check(TokenType::Number) {
30811                    // Numeric like @1
30812                    let token = self.advance();
30813                    (token.text, false, false)
30814                } else if self.peek().token_type.is_keyword() {
30815                    // Keyword used as variable name like @JOIN
30816                    let token = self.advance();
30817                    (token.text, false, false)
30818                } else {
30819                    return Err(self.parse_error("Expected variable name after @"));
30820                };
30821            return Ok(Expression::Parameter(Box::new(Parameter {
30822                name: Some(name),
30823                index: None,
30824                style: ParameterStyle::At,
30825                quoted,
30826                string_quoted,
30827                expression: None,
30828            })));
30829        }
30830
30831        // Parameter: ? placeholder or $n positional parameter
30832        if self.check(TokenType::Parameter) {
30833            let token = self.advance();
30834            // Check if this is a positional parameter ($1, $2, etc.) or a plain ? placeholder
30835            if let Ok(index) = token.text.parse::<u32>() {
30836                // Positional parameter like $1, $2 (token text is just the number)
30837                let param = Expression::Parameter(Box::new(Parameter {
30838                    name: None,
30839                    index: Some(index),
30840                    style: ParameterStyle::Dollar,
30841                    quoted: false,
30842                    string_quoted: false,
30843                    expression: None,
30844                }));
30845                // Check for JSON path access: $1:name or dot access: $1.c1
30846                let result = self.parse_colon_json_path(param)?;
30847                return self.maybe_parse_subscript(result);
30848            } else {
30849                // Plain ? placeholder
30850                return Ok(Expression::Placeholder(Placeholder { index: None }));
30851            }
30852        }
30853
30854        // :name or :1 colon parameter
30855        if self.match_token(TokenType::Colon) {
30856            // Check for numeric parameter :1, :2, etc.
30857            if self.check(TokenType::Number) {
30858                let num_token = self.advance();
30859                if let Ok(index) = num_token.text.parse::<u32>() {
30860                    return Ok(Expression::Parameter(Box::new(Parameter {
30861                        name: None,
30862                        index: Some(index),
30863                        style: ParameterStyle::Colon,
30864                        quoted: false,
30865                        string_quoted: false,
30866                        expression: None,
30867                    })));
30868                }
30869                return Err(
30870                    self.parse_error(format!("Invalid colon parameter: :{}", num_token.text))
30871                );
30872            }
30873            // Get the parameter name
30874            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
30875                let name = self.advance().text;
30876                return Ok(Expression::Parameter(Box::new(Parameter {
30877                    name: Some(name),
30878                    index: None,
30879                    style: ParameterStyle::Colon,
30880                    quoted: false,
30881                    string_quoted: false,
30882                    expression: None,
30883                })));
30884            } else {
30885                return Err(self.parse_error("Expected parameter name after :"));
30886            }
30887        }
30888
30889        // $n dollar parameter: $1, $2, etc.
30890        if self.match_token(TokenType::Dollar) {
30891            // Check for ${identifier} or ${kind:name} template variable syntax (Databricks, Hive)
30892            // Hive supports ${hiveconf:variable_name} syntax
30893            if self.match_token(TokenType::LBrace) {
30894                // Parse the variable name - can be identifier or keyword
30895                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
30896                    let name_token = self.advance();
30897                    // Check for ${kind:name} syntax (e.g., ${hiveconf:some_var})
30898                    let expression = if self.match_token(TokenType::Colon) {
30899                        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
30900                            let expr_token = self.advance();
30901                            Some(expr_token.text.clone())
30902                        } else {
30903                            return Err(self.parse_error("Expected identifier after : in ${...}"));
30904                        }
30905                    } else {
30906                        None
30907                    };
30908                    self.expect(TokenType::RBrace)?;
30909                    return Ok(Expression::Parameter(Box::new(Parameter {
30910                        name: Some(name_token.text.clone()),
30911                        index: None,
30912                        style: ParameterStyle::DollarBrace,
30913                        quoted: false,
30914                        string_quoted: false,
30915                        expression,
30916                    })));
30917                } else {
30918                    return Err(self.parse_error("Expected identifier after ${"));
30919                }
30920            }
30921            // Check for number following the dollar sign → positional parameter ($1, $2, etc.)
30922            if self.check(TokenType::Number) {
30923                let num_token = self.advance();
30924                // Parse the number as an index
30925                if let Ok(index) = num_token.text.parse::<u32>() {
30926                    let param_expr = Expression::Parameter(Box::new(Parameter {
30927                        name: None,
30928                        index: Some(index),
30929                        style: ParameterStyle::Dollar,
30930                        quoted: false,
30931                        string_quoted: false,
30932                        expression: None,
30933                    }));
30934                    // Check for JSON path access: $1:name or $1:name:subname
30935                    let result = self.parse_colon_json_path(param_expr)?;
30936                    // Also check for dot access: $1.c1 or $1:name.field
30937                    return self.maybe_parse_subscript(result);
30938                }
30939                // If it's not a valid integer, treat as error
30940                return Err(
30941                    self.parse_error(format!("Invalid dollar parameter: ${}", num_token.text))
30942                );
30943            }
30944            // Check for identifier following the dollar sign → session variable ($x, $query_id, etc.)
30945            if self.check(TokenType::Identifier)
30946                || self.check(TokenType::Var)
30947                || self.is_safe_keyword_as_identifier()
30948            {
30949                let name_token = self.advance();
30950                return Ok(Expression::Parameter(Box::new(Parameter {
30951                    name: Some(name_token.text.clone()),
30952                    index: None,
30953                    style: ParameterStyle::Dollar,
30954                    quoted: false,
30955                    string_quoted: false,
30956                    expression: None,
30957                })));
30958            }
30959            // Just a $ by itself - treat as error
30960            return Err(self.parse_error("Expected number or identifier after $"));
30961        }
30962
30963        // %s or %(name)s percent parameter (PostgreSQL psycopg2 style)
30964        if self.match_token(TokenType::Percent) {
30965            // Check for %(name)s - named parameter
30966            if self.match_token(TokenType::LParen) {
30967                // Get the parameter name
30968                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
30969                    let name = self.advance().text;
30970                    self.expect(TokenType::RParen)?;
30971                    // Expect 's' after the closing paren
30972                    if self.check(TokenType::Var) && self.peek().text == "s" {
30973                        self.skip(); // consume 's'
30974                    }
30975                    return Ok(Expression::Parameter(Box::new(Parameter {
30976                        name: Some(name),
30977                        index: None,
30978                        style: ParameterStyle::Percent,
30979                        quoted: false,
30980                        string_quoted: false,
30981                        expression: None,
30982                    })));
30983                } else {
30984                    return Err(self.parse_error("Expected parameter name after %("));
30985                }
30986            }
30987            // Check for %s - anonymous parameter
30988            if self.check(TokenType::Var) && self.peek().text == "s" {
30989                self.skip(); // consume 's'
30990                return Ok(Expression::Parameter(Box::new(Parameter {
30991                    name: None,
30992                    index: None,
30993                    style: ParameterStyle::Percent,
30994                    quoted: false,
30995                    string_quoted: false,
30996                    expression: None,
30997                })));
30998            }
30999            // If not followed by 's' or '(', it's not a parameter - error
31000            return Err(self.parse_error("Expected 's' or '(' after % for parameter"));
31001        }
31002
31003        // LEFT, RIGHT, OUTER, FULL, ALL etc. keywords as identifiers when followed by DOT
31004        // e.g., SELECT LEFT.FOO FROM ... or SELECT all.count FROM ...
31005        if (self.check(TokenType::Left)
31006            || self.check(TokenType::Right)
31007            || self.check(TokenType::Outer)
31008            || self.check(TokenType::Full)
31009            || self.check(TokenType::All)
31010            || self.check(TokenType::Only)
31011            || self.check(TokenType::Next)
31012            || self.check(TokenType::If))
31013            && self.check_next(TokenType::Dot)
31014        {
31015            let token = self.advance();
31016            let ident = Identifier::new(token.text);
31017            self.expect(TokenType::Dot)?;
31018            if self.match_token(TokenType::Star) {
31019                let star = self.parse_star_modifiers(Some(ident))?;
31020                return Ok(Expression::Star(star));
31021            }
31022            let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
31023            let trailing_comments = self.previous_trailing_comments().to_vec();
31024            let mut col = Expression::boxed_column(Column {
31025                name: col_ident,
31026                table: Some(ident),
31027                join_mark: false,
31028                trailing_comments,
31029                span: None,
31030                inferred_type: None,
31031            });
31032            // Handle Oracle/Redshift outer join marker (+) after column reference
31033            if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
31034                let saved_pos = self.current;
31035                if self.match_token(TokenType::LParen)
31036                    && self.match_token(TokenType::Plus)
31037                    && self.match_token(TokenType::RParen)
31038                {
31039                    if let Expression::Column(ref mut c) = col {
31040                        c.join_mark = true;
31041                    }
31042                } else {
31043                    self.current = saved_pos;
31044                }
31045            }
31046            return self.maybe_parse_subscript(col);
31047        }
31048
31049        // NEXT VALUE FOR sequence_name [OVER (ORDER BY ...)]
31050        // Must check before treating NEXT as a standalone identifier
31051        if self.check(TokenType::Next) {
31052            // NEXT(arg) - pattern navigation function in MATCH_RECOGNIZE
31053            if self.check_next(TokenType::LParen) {
31054                let token = self.advance();
31055                self.skip(); // consume LParen
31056                let args = self.parse_function_args_list()?;
31057                self.expect(TokenType::RParen)?;
31058                return Ok(Expression::Function(Box::new(Function {
31059                    name: token.text,
31060                    args,
31061                    distinct: false,
31062                    trailing_comments: Vec::new(),
31063                    use_bracket_syntax: false,
31064                    no_parens: false,
31065                    quoted: false,
31066                    span: None,
31067                    inferred_type: None,
31068                })));
31069            }
31070        }
31071
31072        // LEFT, RIGHT, OUTER, FULL, ONLY, NEXT as standalone identifiers (not followed by JOIN or LParen)
31073        // e.g., SELECT LEFT FROM ... or SELECT only FROM ...
31074        // If followed by LParen, it's a function call (e.g., NEXT(bar) in MATCH_RECOGNIZE)
31075        if self.can_be_alias_keyword()
31076            && !self.check_next(TokenType::Join)
31077            && !self.check_next(TokenType::LParen)
31078        {
31079            let token = self.advance();
31080            let trailing_comments = self.previous_trailing_comments().to_vec();
31081            let col = Expression::boxed_column(Column {
31082                name: Identifier::new(token.text),
31083                table: None,
31084                join_mark: false,
31085                trailing_comments,
31086                span: None,
31087                inferred_type: None,
31088            });
31089            return self.maybe_parse_subscript(col);
31090        }
31091
31092        Err(self.parse_error(format!("Unexpected token: {:?}", self.peek().token_type)))
31093    }
31094
31095    /// Check if function name is a known aggregate function
31096    fn is_aggregate_function(name: &str) -> bool {
31097        crate::function_registry::is_aggregate_function_name(name)
31098    }
31099
31100    /// Whether the source dialect uses LOG(base, value) order (base first).
31101    /// Default is true. BigQuery, TSQL, Tableau, Fabric use LOG(value, base).
31102    fn log_base_first(&self) -> bool {
31103        !matches!(
31104            self.config.dialect,
31105            Some(crate::dialects::DialectType::BigQuery)
31106                | Some(crate::dialects::DialectType::TSQL)
31107                | Some(crate::dialects::DialectType::Tableau)
31108                | Some(crate::dialects::DialectType::Fabric)
31109        )
31110    }
31111
31112    /// Whether the source dialect treats single-arg LOG(x) as LN(x).
31113    /// These dialects have LOG_DEFAULTS_TO_LN = True in Python sqlglot.
31114    fn log_defaults_to_ln(&self) -> bool {
31115        matches!(
31116            self.config.dialect,
31117            Some(crate::dialects::DialectType::MySQL)
31118                | Some(crate::dialects::DialectType::BigQuery)
31119                | Some(crate::dialects::DialectType::TSQL)
31120                | Some(crate::dialects::DialectType::ClickHouse)
31121                | Some(crate::dialects::DialectType::Hive)
31122                | Some(crate::dialects::DialectType::Spark)
31123                | Some(crate::dialects::DialectType::Databricks)
31124                | Some(crate::dialects::DialectType::Drill)
31125                | Some(crate::dialects::DialectType::Dremio)
31126        )
31127    }
31128
31129    /// Parse the subset of typed functions that are handled via function-registry metadata.
31130    fn try_parse_registry_typed_function(
31131        &mut self,
31132        name: &str,
31133        upper_name: &str,
31134        canonical_upper_name: &str,
31135        quoted: bool,
31136    ) -> Result<Option<Expression>> {
31137        let Some(spec) =
31138            crate::function_registry::typed_function_spec_by_canonical_upper(canonical_upper_name)
31139        else {
31140            return Ok(None);
31141        };
31142
31143        match (spec.parse_kind, spec.canonical_name) {
31144            (crate::function_registry::TypedParseKind::AggregateLike, "COUNT_IF") => {
31145                let distinct = self.match_token(TokenType::Distinct);
31146                let this = self.parse_expression()?;
31147                // ClickHouse: handle AS alias inside countIf args: countIf(expr AS d, pred)
31148                let this = if matches!(
31149                    self.config.dialect,
31150                    Some(crate::dialects::DialectType::ClickHouse)
31151                ) && self.check(TokenType::As)
31152                {
31153                    let next_idx = self.current + 1;
31154                    let after_alias_idx = self.current + 2;
31155                    let is_alias = next_idx < self.tokens.len()
31156                        && (matches!(
31157                            self.tokens[next_idx].token_type,
31158                            TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
31159                        ) || self.tokens[next_idx].token_type.is_keyword())
31160                        && after_alias_idx < self.tokens.len()
31161                        && matches!(
31162                            self.tokens[after_alias_idx].token_type,
31163                            TokenType::RParen | TokenType::Comma
31164                        );
31165                    if is_alias {
31166                        self.skip(); // consume AS
31167                        let alias_token = self.advance();
31168                        Expression::Alias(Box::new(crate::expressions::Alias {
31169                            this,
31170                            alias: Identifier::new(alias_token.text.clone()),
31171                            column_aliases: Vec::new(),
31172                            pre_alias_comments: Vec::new(),
31173                            trailing_comments: Vec::new(),
31174                            inferred_type: None,
31175                        }))
31176                    } else {
31177                        this
31178                    }
31179                } else {
31180                    this
31181                };
31182                if matches!(
31183                    self.config.dialect,
31184                    Some(crate::dialects::DialectType::ClickHouse)
31185                ) && self.match_token(TokenType::Comma)
31186                {
31187                    let mut args = vec![this];
31188                    let arg = self.parse_expression()?;
31189                    // Handle AS alias on subsequent args too
31190                    let arg = if self.check(TokenType::As) {
31191                        let next_idx = self.current + 1;
31192                        let after_alias_idx = self.current + 2;
31193                        let is_alias = next_idx < self.tokens.len()
31194                            && (matches!(
31195                                self.tokens[next_idx].token_type,
31196                                TokenType::Identifier
31197                                    | TokenType::Var
31198                                    | TokenType::QuotedIdentifier
31199                            ) || self.tokens[next_idx].token_type.is_keyword())
31200                            && after_alias_idx < self.tokens.len()
31201                            && matches!(
31202                                self.tokens[after_alias_idx].token_type,
31203                                TokenType::RParen | TokenType::Comma
31204                            );
31205                        if is_alias {
31206                            self.skip(); // consume AS
31207                            let alias_token = self.advance();
31208                            Expression::Alias(Box::new(crate::expressions::Alias {
31209                                this: arg,
31210                                alias: Identifier::new(alias_token.text.clone()),
31211                                column_aliases: Vec::new(),
31212                                pre_alias_comments: Vec::new(),
31213                                trailing_comments: Vec::new(),
31214                                inferred_type: None,
31215                            }))
31216                        } else {
31217                            arg
31218                        }
31219                    } else {
31220                        arg
31221                    };
31222                    args.push(arg);
31223                    while self.match_token(TokenType::Comma) {
31224                        args.push(self.parse_expression()?);
31225                    }
31226                    self.expect(TokenType::RParen)?;
31227                    return Ok(Some(Expression::CombinedAggFunc(Box::new(
31228                        CombinedAggFunc {
31229                            this: Box::new(Expression::Identifier(Identifier::new("countIf"))),
31230                            expressions: args,
31231                        },
31232                    ))));
31233                }
31234                self.expect(TokenType::RParen)?;
31235                let filter = self.parse_filter_clause()?;
31236                Ok(Some(Expression::CountIf(Box::new(AggFunc {
31237                    ignore_nulls: None,
31238                    this,
31239                    distinct,
31240                    filter,
31241                    order_by: Vec::new(),
31242                    having_max: None,
31243                    name: Some(name.to_string()),
31244                    limit: None,
31245                    inferred_type: None,
31246                }))))
31247            }
31248            (crate::function_registry::TypedParseKind::Binary, "STARTS_WITH")
31249            | (crate::function_registry::TypedParseKind::Binary, "ENDS_WITH") => {
31250                let this = self.parse_expression()?;
31251                self.expect(TokenType::Comma)?;
31252                let expression = self.parse_expression()?;
31253                self.expect(TokenType::RParen)?;
31254                let func = BinaryFunc {
31255                    original_name: None,
31256                    this,
31257                    expression,
31258                    inferred_type: None,
31259                };
31260                let expr = match spec.canonical_name {
31261                    "STARTS_WITH" => Expression::StartsWith(Box::new(func)),
31262                    "ENDS_WITH" => Expression::EndsWith(Box::new(func)),
31263                    _ => unreachable!("binary typed parse kind already matched in caller"),
31264                };
31265                Ok(Some(expr))
31266            }
31267            (crate::function_registry::TypedParseKind::Binary, "ATAN2") => {
31268                let this = self.parse_expression()?;
31269                self.expect(TokenType::Comma)?;
31270                let expression = self.parse_expression()?;
31271                self.expect(TokenType::RParen)?;
31272                Ok(Some(Expression::Atan2(Box::new(BinaryFunc {
31273                    original_name: None,
31274                    this,
31275                    expression,
31276                    inferred_type: None,
31277                }))))
31278            }
31279            (crate::function_registry::TypedParseKind::Binary, "MAP_FROM_ARRAYS")
31280            | (crate::function_registry::TypedParseKind::Binary, "MAP_CONTAINS_KEY")
31281            | (crate::function_registry::TypedParseKind::Binary, "ELEMENT_AT") => {
31282                let this = self.parse_expression()?;
31283                self.expect(TokenType::Comma)?;
31284                let expression = self.parse_expression()?;
31285                self.expect(TokenType::RParen)?;
31286                let func = BinaryFunc {
31287                    original_name: None,
31288                    this,
31289                    expression,
31290                    inferred_type: None,
31291                };
31292                let expr = match spec.canonical_name {
31293                    "MAP_FROM_ARRAYS" => Expression::MapFromArrays(Box::new(func)),
31294                    "MAP_CONTAINS_KEY" => Expression::MapContainsKey(Box::new(func)),
31295                    "ELEMENT_AT" => Expression::ElementAt(Box::new(func)),
31296                    _ => unreachable!("binary map parse kind already matched in caller"),
31297                };
31298                Ok(Some(expr))
31299            }
31300            (crate::function_registry::TypedParseKind::Binary, "CONTAINS")
31301            | (crate::function_registry::TypedParseKind::Binary, "MOD")
31302            | (crate::function_registry::TypedParseKind::Binary, "POW") => {
31303                let this = self.parse_expression()?;
31304                self.expect(TokenType::Comma)?;
31305                let expression = self.parse_expression()?;
31306                self.expect(TokenType::RParen)?;
31307                let expr = match spec.canonical_name {
31308                    "CONTAINS" => Expression::Contains(Box::new(BinaryFunc {
31309                        original_name: None,
31310                        this,
31311                        expression,
31312                        inferred_type: None,
31313                    })),
31314                    "MOD" => Expression::ModFunc(Box::new(BinaryFunc {
31315                        original_name: None,
31316                        this,
31317                        expression,
31318                        inferred_type: None,
31319                    })),
31320                    "POW" => Expression::Power(Box::new(BinaryFunc {
31321                        original_name: None,
31322                        this,
31323                        expression,
31324                        inferred_type: None,
31325                    })),
31326                    _ => unreachable!("binary scalar parse kind already matched in caller"),
31327                };
31328                Ok(Some(expr))
31329            }
31330            (crate::function_registry::TypedParseKind::Binary, "ADD_MONTHS")
31331            | (crate::function_registry::TypedParseKind::Binary, "MONTHS_BETWEEN")
31332            | (crate::function_registry::TypedParseKind::Binary, "NEXT_DAY") => {
31333                let this = self.parse_expression()?;
31334                self.expect(TokenType::Comma)?;
31335                let expression = self.parse_expression()?;
31336                if spec.canonical_name == "MONTHS_BETWEEN" && self.match_token(TokenType::Comma) {
31337                    let round_off = self.parse_expression()?;
31338                    self.expect(TokenType::RParen)?;
31339                    return Ok(Some(Expression::Function(Box::new(
31340                        crate::expressions::Function::new(
31341                            "MONTHS_BETWEEN".to_string(),
31342                            vec![this, expression, round_off],
31343                        ),
31344                    ))));
31345                }
31346                self.expect(TokenType::RParen)?;
31347                let func = BinaryFunc {
31348                    original_name: None,
31349                    this,
31350                    expression,
31351                    inferred_type: None,
31352                };
31353                let expr = match spec.canonical_name {
31354                    "ADD_MONTHS" => Expression::AddMonths(Box::new(func)),
31355                    "MONTHS_BETWEEN" => Expression::MonthsBetween(Box::new(func)),
31356                    "NEXT_DAY" => Expression::NextDay(Box::new(func)),
31357                    _ => unreachable!("date binary parse kind already matched in caller"),
31358                };
31359                Ok(Some(expr))
31360            }
31361            (crate::function_registry::TypedParseKind::Binary, "ARRAY_CONTAINS")
31362            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_POSITION")
31363            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_APPEND")
31364            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_PREPEND")
31365            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_UNION")
31366            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_EXCEPT")
31367            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_REMOVE") => {
31368                let this = self.parse_expression()?;
31369                self.expect(TokenType::Comma)?;
31370                let expression = self.parse_expression()?;
31371                self.expect(TokenType::RParen)?;
31372                let func = BinaryFunc {
31373                    original_name: None,
31374                    this,
31375                    expression,
31376                    inferred_type: None,
31377                };
31378                let expr = match spec.canonical_name {
31379                    "ARRAY_CONTAINS" => Expression::ArrayContains(Box::new(func)),
31380                    "ARRAY_POSITION" => Expression::ArrayPosition(Box::new(func)),
31381                    "ARRAY_APPEND" => Expression::ArrayAppend(Box::new(func)),
31382                    "ARRAY_PREPEND" => Expression::ArrayPrepend(Box::new(func)),
31383                    "ARRAY_UNION" => Expression::ArrayUnion(Box::new(func)),
31384                    "ARRAY_EXCEPT" => Expression::ArrayExcept(Box::new(func)),
31385                    "ARRAY_REMOVE" => Expression::ArrayRemove(Box::new(func)),
31386                    _ => unreachable!("array binary parse kind already matched in caller"),
31387                };
31388                Ok(Some(expr))
31389            }
31390            (crate::function_registry::TypedParseKind::Unary, "LENGTH") => {
31391                let this = self.parse_expression()?;
31392                // PostgreSQL: LENGTH(string, encoding) accepts optional second argument
31393                if self.match_token(TokenType::Comma) {
31394                    let encoding = self.parse_expression()?;
31395                    self.expect(TokenType::RParen)?;
31396                    // Store as a regular function to preserve both arguments
31397                    Ok(Some(Expression::Function(Box::new(Function::new(
31398                        upper_name,
31399                        vec![this, encoding],
31400                    )))))
31401                } else {
31402                    self.expect(TokenType::RParen)?;
31403                    Ok(Some(Expression::Length(Box::new(UnaryFunc::new(this)))))
31404                }
31405            }
31406            (crate::function_registry::TypedParseKind::Unary, "LOWER") => {
31407                let this = self.parse_expression_with_clickhouse_alias()?;
31408                self.expect(TokenType::RParen)?;
31409                Ok(Some(Expression::Lower(Box::new(UnaryFunc::new(this)))))
31410            }
31411            (crate::function_registry::TypedParseKind::Unary, "UPPER") => {
31412                let this = self.parse_expression_with_clickhouse_alias()?;
31413                self.expect(TokenType::RParen)?;
31414                Ok(Some(Expression::Upper(Box::new(UnaryFunc::new(this)))))
31415            }
31416            (crate::function_registry::TypedParseKind::Unary, "TYPEOF") => {
31417                let this = self.parse_expression()?;
31418                // ClickHouse: expr AS alias inside function args
31419                let this = self.maybe_clickhouse_alias(this);
31420                if self.match_token(TokenType::Comma) {
31421                    // Preserve additional args via generic function form
31422                    let mut all_args = vec![this];
31423                    let remaining = self.parse_function_arguments()?;
31424                    all_args.extend(remaining);
31425                    self.expect(TokenType::RParen)?;
31426                    Ok(Some(Expression::Function(Box::new(Function {
31427                        name: name.to_string(),
31428                        args: all_args,
31429                        distinct: false,
31430                        trailing_comments: Vec::new(),
31431                        use_bracket_syntax: false,
31432                        no_parens: false,
31433                        quoted: false,
31434                        span: None,
31435                        inferred_type: None,
31436                    }))))
31437                } else {
31438                    self.expect(TokenType::RParen)?;
31439                    Ok(Some(Expression::Typeof(Box::new(UnaryFunc::new(this)))))
31440                }
31441            }
31442            (crate::function_registry::TypedParseKind::Unary, "DAYOFWEEK")
31443            | (crate::function_registry::TypedParseKind::Unary, "DAYOFYEAR")
31444            | (crate::function_registry::TypedParseKind::Unary, "DAYOFMONTH")
31445            | (crate::function_registry::TypedParseKind::Unary, "WEEKOFYEAR") => {
31446                let this = self.parse_expression()?;
31447                self.expect(TokenType::RParen)?;
31448                let func = UnaryFunc::new(this);
31449                let expr = match spec.canonical_name {
31450                    "DAYOFWEEK" => Expression::DayOfWeek(Box::new(func)),
31451                    "DAYOFYEAR" => Expression::DayOfYear(Box::new(func)),
31452                    "DAYOFMONTH" => Expression::DayOfMonth(Box::new(func)),
31453                    "WEEKOFYEAR" => Expression::WeekOfYear(Box::new(func)),
31454                    _ => unreachable!("date-part unary parse kind already matched in caller"),
31455                };
31456                Ok(Some(expr))
31457            }
31458            (crate::function_registry::TypedParseKind::Unary, "SIN")
31459            | (crate::function_registry::TypedParseKind::Unary, "COS")
31460            | (crate::function_registry::TypedParseKind::Unary, "TAN")
31461            | (crate::function_registry::TypedParseKind::Unary, "ASIN")
31462            | (crate::function_registry::TypedParseKind::Unary, "ACOS")
31463            | (crate::function_registry::TypedParseKind::Unary, "ATAN")
31464            | (crate::function_registry::TypedParseKind::Unary, "RADIANS")
31465            | (crate::function_registry::TypedParseKind::Unary, "DEGREES") => {
31466                let this = self.parse_expression()?;
31467                // MySQL: ATAN(y, x) with 2 args is equivalent to ATAN2(y, x)
31468                if spec.canonical_name == "ATAN" && self.match_token(TokenType::Comma) {
31469                    let expression = self.parse_expression()?;
31470                    self.expect(TokenType::RParen)?;
31471                    return Ok(Some(Expression::Atan2(Box::new(BinaryFunc {
31472                        original_name: Some("ATAN".to_string()),
31473                        this,
31474                        expression,
31475                        inferred_type: None,
31476                    }))));
31477                }
31478                self.expect(TokenType::RParen)?;
31479                let func = UnaryFunc::new(this);
31480                let expr = match spec.canonical_name {
31481                    "SIN" => Expression::Sin(Box::new(func)),
31482                    "COS" => Expression::Cos(Box::new(func)),
31483                    "TAN" => Expression::Tan(Box::new(func)),
31484                    "ASIN" => Expression::Asin(Box::new(func)),
31485                    "ACOS" => Expression::Acos(Box::new(func)),
31486                    "ATAN" => Expression::Atan(Box::new(func)),
31487                    "RADIANS" => Expression::Radians(Box::new(func)),
31488                    "DEGREES" => Expression::Degrees(Box::new(func)),
31489                    _ => unreachable!("trig unary parse kind already matched in caller"),
31490                };
31491                Ok(Some(expr))
31492            }
31493            (crate::function_registry::TypedParseKind::Unary, "YEAR")
31494            | (crate::function_registry::TypedParseKind::Unary, "MONTH")
31495            | (crate::function_registry::TypedParseKind::Unary, "DAY")
31496            | (crate::function_registry::TypedParseKind::Unary, "HOUR")
31497            | (crate::function_registry::TypedParseKind::Unary, "MINUTE")
31498            | (crate::function_registry::TypedParseKind::Unary, "SECOND")
31499            | (crate::function_registry::TypedParseKind::Unary, "DAYOFWEEK_ISO")
31500            | (crate::function_registry::TypedParseKind::Unary, "QUARTER")
31501            | (crate::function_registry::TypedParseKind::Unary, "EPOCH")
31502            | (crate::function_registry::TypedParseKind::Unary, "EPOCH_MS") => {
31503                let this = self.parse_expression()?;
31504                self.expect(TokenType::RParen)?;
31505                let func = UnaryFunc::new(this);
31506                let expr = match spec.canonical_name {
31507                    "YEAR" => Expression::Year(Box::new(func)),
31508                    "MONTH" => Expression::Month(Box::new(func)),
31509                    "DAY" => Expression::Day(Box::new(func)),
31510                    "HOUR" => Expression::Hour(Box::new(func)),
31511                    "MINUTE" => Expression::Minute(Box::new(func)),
31512                    "SECOND" => Expression::Second(Box::new(func)),
31513                    "DAYOFWEEK_ISO" => Expression::DayOfWeekIso(Box::new(func)),
31514                    "QUARTER" => Expression::Quarter(Box::new(func)),
31515                    "EPOCH" => Expression::Epoch(Box::new(func)),
31516                    "EPOCH_MS" => Expression::EpochMs(Box::new(func)),
31517                    _ => unreachable!("date unary parse kind already matched in caller"),
31518                };
31519                Ok(Some(expr))
31520            }
31521            (crate::function_registry::TypedParseKind::Unary, "ARRAY_LENGTH")
31522            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_SIZE")
31523            | (crate::function_registry::TypedParseKind::Unary, "CARDINALITY")
31524            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_REVERSE")
31525            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_DISTINCT")
31526            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_COMPACT")
31527            | (crate::function_registry::TypedParseKind::Unary, "EXPLODE")
31528            | (crate::function_registry::TypedParseKind::Unary, "EXPLODE_OUTER") => {
31529                let this = self.parse_expression()?;
31530                // PostgreSQL ARRAY_LENGTH and ARRAY_SIZE can take a second dimension arg.
31531                // Preserve that by falling back to generic function form for 2-arg usage.
31532                if (spec.canonical_name == "ARRAY_LENGTH" || spec.canonical_name == "ARRAY_SIZE")
31533                    && self.match_token(TokenType::Comma)
31534                {
31535                    let dimension = self.parse_expression()?;
31536                    self.expect(TokenType::RParen)?;
31537                    return Ok(Some(Expression::Function(Box::new(Function {
31538                        name: name.to_string(),
31539                        args: vec![this, dimension],
31540                        distinct: false,
31541                        trailing_comments: Vec::new(),
31542                        use_bracket_syntax: false,
31543                        no_parens: false,
31544                        quoted: false,
31545                        span: None,
31546                        inferred_type: None,
31547                    }))));
31548                }
31549                self.expect(TokenType::RParen)?;
31550                let func = UnaryFunc::new(this);
31551                let expr = match spec.canonical_name {
31552                    "ARRAY_LENGTH" => Expression::ArrayLength(Box::new(func)),
31553                    "ARRAY_SIZE" => Expression::ArraySize(Box::new(func)),
31554                    "CARDINALITY" => Expression::Cardinality(Box::new(func)),
31555                    "ARRAY_REVERSE" => Expression::ArrayReverse(Box::new(func)),
31556                    "ARRAY_DISTINCT" => Expression::ArrayDistinct(Box::new(func)),
31557                    "ARRAY_COMPACT" => Expression::ArrayCompact(Box::new(func)),
31558                    "EXPLODE" => Expression::Explode(Box::new(func)),
31559                    "EXPLODE_OUTER" => Expression::ExplodeOuter(Box::new(func)),
31560                    _ => unreachable!("array unary parse kind already matched in caller"),
31561                };
31562                Ok(Some(expr))
31563            }
31564            (crate::function_registry::TypedParseKind::Unary, "MAP_FROM_ENTRIES")
31565            | (crate::function_registry::TypedParseKind::Unary, "MAP_KEYS")
31566            | (crate::function_registry::TypedParseKind::Unary, "MAP_VALUES") => {
31567                let this = self.parse_expression()?;
31568                self.expect(TokenType::RParen)?;
31569                let func = UnaryFunc::new(this);
31570                let expr = match spec.canonical_name {
31571                    "MAP_FROM_ENTRIES" => Expression::MapFromEntries(Box::new(func)),
31572                    "MAP_KEYS" => Expression::MapKeys(Box::new(func)),
31573                    "MAP_VALUES" => Expression::MapValues(Box::new(func)),
31574                    _ => unreachable!("map unary parse kind already matched in caller"),
31575                };
31576                Ok(Some(expr))
31577            }
31578            (crate::function_registry::TypedParseKind::Unary, "ABS") => {
31579                let this = self.parse_expression_with_clickhouse_alias()?;
31580                self.expect(TokenType::RParen)?;
31581                Ok(Some(Expression::Abs(Box::new(UnaryFunc::new(this)))))
31582            }
31583            (crate::function_registry::TypedParseKind::Unary, "SQRT")
31584            | (crate::function_registry::TypedParseKind::Unary, "EXP")
31585            | (crate::function_registry::TypedParseKind::Unary, "LN") => {
31586                let this = self.parse_expression()?;
31587                self.expect(TokenType::RParen)?;
31588                let expr = match spec.canonical_name {
31589                    "SQRT" => Expression::Sqrt(Box::new(UnaryFunc::new(this))),
31590                    "EXP" => Expression::Exp(Box::new(UnaryFunc::new(this))),
31591                    "LN" => Expression::Ln(Box::new(UnaryFunc::new(this))),
31592                    _ => unreachable!("math unary parse kind already matched in caller"),
31593                };
31594                Ok(Some(expr))
31595            }
31596            (crate::function_registry::TypedParseKind::Variadic, "TO_NUMBER")
31597            | (crate::function_registry::TypedParseKind::Variadic, "TRY_TO_NUMBER") => {
31598                let args = self.parse_expression_list()?;
31599                self.expect(TokenType::RParen)?;
31600                let this = args.get(0).cloned().unwrap_or(Expression::Null(Null {}));
31601                let format = args.get(1).cloned().map(Box::new);
31602                let precision = args.get(2).cloned().map(Box::new);
31603                let scale = args.get(3).cloned().map(Box::new);
31604                let safe = if spec.canonical_name == "TRY_TO_NUMBER" {
31605                    Some(Box::new(Expression::Boolean(BooleanLiteral {
31606                        value: true,
31607                    })))
31608                } else {
31609                    None
31610                };
31611                Ok(Some(Expression::ToNumber(Box::new(ToNumber {
31612                    this: Box::new(this),
31613                    format,
31614                    nlsparam: None,
31615                    precision,
31616                    scale,
31617                    safe,
31618                    safe_name: None,
31619                }))))
31620            }
31621            (crate::function_registry::TypedParseKind::Variadic, "SUBSTRING") => {
31622                let this = self.parse_expression()?;
31623                // ClickHouse: implicit/explicit alias: substring('1234' lhs FROM 2) or substring('1234' AS lhs FROM 2)
31624                let this = self.try_clickhouse_func_arg_alias(this);
31625
31626                // Check for SQL standard FROM syntax: SUBSTRING(str FROM pos [FOR len])
31627                if self.match_token(TokenType::From) {
31628                    let start = self.parse_expression()?;
31629                    let start = self.try_clickhouse_func_arg_alias(start);
31630                    let length = if self.match_token(TokenType::For) {
31631                        let len = self.parse_expression()?;
31632                        Some(self.try_clickhouse_func_arg_alias(len))
31633                    } else {
31634                        None
31635                    };
31636                    self.expect(TokenType::RParen)?;
31637                    Ok(Some(Expression::Substring(Box::new(SubstringFunc {
31638                        this,
31639                        start,
31640                        length,
31641                        from_for_syntax: true,
31642                    }))))
31643                } else if self.match_token(TokenType::For) {
31644                    // PostgreSQL: SUBSTRING(str FOR len) or SUBSTRING(str FOR len FROM pos)
31645                    let length_expr = self.parse_expression()?;
31646                    let length_expr = self.try_clickhouse_func_arg_alias(length_expr);
31647                    let start = if self.match_token(TokenType::From) {
31648                        let s = self.parse_expression()?;
31649                        self.try_clickhouse_func_arg_alias(s)
31650                    } else {
31651                        // No FROM, use 1 as default start position
31652                        Expression::Literal(Box::new(Literal::Number("1".to_string())))
31653                    };
31654                    self.expect(TokenType::RParen)?;
31655                    Ok(Some(Expression::Substring(Box::new(SubstringFunc {
31656                        this,
31657                        start,
31658                        length: Some(length_expr),
31659                        from_for_syntax: true,
31660                    }))))
31661                } else if self.match_token(TokenType::Comma) {
31662                    // Comma-separated syntax: SUBSTRING(str, pos) or SUBSTRING(str, pos, len)
31663                    let start = self.parse_expression()?;
31664                    let start = self.try_clickhouse_func_arg_alias(start);
31665                    let length = if self.match_token(TokenType::Comma) {
31666                        let len = self.parse_expression()?;
31667                        Some(self.try_clickhouse_func_arg_alias(len))
31668                    } else {
31669                        None
31670                    };
31671                    self.expect(TokenType::RParen)?;
31672                    Ok(Some(Expression::Substring(Box::new(SubstringFunc {
31673                        this,
31674                        start,
31675                        length,
31676                        from_for_syntax: false,
31677                    }))))
31678                } else {
31679                    // Just SUBSTRING(str) with no other args - unusual but handle it
31680                    self.expect(TokenType::RParen)?;
31681                    // Treat as function call
31682                    Ok(Some(Expression::Function(Box::new(Function {
31683                        name: name.to_string(),
31684                        args: vec![this],
31685                        distinct: false,
31686                        trailing_comments: Vec::new(),
31687                        use_bracket_syntax: false,
31688                        no_parens: false,
31689                        quoted: false,
31690                        span: None,
31691                        inferred_type: None,
31692                    }))))
31693                }
31694            }
31695            (crate::function_registry::TypedParseKind::Variadic, "DATE_PART") => {
31696                let part = self.parse_expression()?;
31697                // For TSQL/Fabric, normalize date part aliases (e.g., "dd" -> DAY)
31698                let mut part = if matches!(
31699                    self.config.dialect,
31700                    Some(crate::dialects::DialectType::TSQL)
31701                        | Some(crate::dialects::DialectType::Fabric)
31702                ) {
31703                    self.normalize_tsql_date_part(part)
31704                } else {
31705                    part
31706                };
31707                // Accept both FROM and comma as separator (Snowflake supports both syntaxes)
31708                if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
31709                    return Err(self.parse_error("Expected FROM or comma in DATE_PART"));
31710                }
31711                let from_expr = self.parse_expression()?;
31712                self.expect(TokenType::RParen)?;
31713                if matches!(
31714                    self.config.dialect,
31715                    Some(crate::dialects::DialectType::Snowflake)
31716                ) {
31717                    if self
31718                        .try_parse_date_part_field_identifier_expr(&part)
31719                        .is_some()
31720                    {
31721                        part = self.convert_date_part_identifier_expr_to_var(part);
31722                    }
31723                }
31724                let mut args = vec![part, from_expr];
31725                self.normalize_date_part_arg("DATE_PART", &mut args);
31726                Ok(Some(Expression::Function(Box::new(Function {
31727                    name: "DATE_PART".to_string(),
31728                    args,
31729                    distinct: false,
31730                    trailing_comments: Vec::new(),
31731                    use_bracket_syntax: false,
31732                    no_parens: false,
31733                    quoted: false,
31734                    span: None,
31735                    inferred_type: None,
31736                }))))
31737            }
31738            (crate::function_registry::TypedParseKind::Variadic, "DATEADD") => {
31739                let mut first_arg = self.parse_expression()?;
31740                first_arg = self.try_clickhouse_func_arg_alias(first_arg);
31741                self.expect(TokenType::Comma)?;
31742                let second_arg = self.parse_expression()?;
31743                let second_arg = self.try_clickhouse_func_arg_alias(second_arg);
31744
31745                // Check if there's a third argument (traditional 3-arg syntax)
31746                if self.match_token(TokenType::Comma) {
31747                    let third_arg = self.parse_expression()?;
31748                    let third_arg = self.try_clickhouse_func_arg_alias(third_arg);
31749                    self.expect(TokenType::RParen)?;
31750                    if matches!(
31751                        self.config.dialect,
31752                        Some(crate::dialects::DialectType::Snowflake)
31753                    ) {
31754                        if self
31755                            .try_parse_date_part_unit_identifier_expr(&first_arg)
31756                            .is_some()
31757                        {
31758                            first_arg = self.convert_date_part_identifier_expr_to_var(first_arg);
31759                        }
31760                    }
31761                    let mut args = vec![first_arg, second_arg, third_arg];
31762                    self.normalize_date_part_arg(name, &mut args);
31763                    Ok(Some(Expression::Function(Box::new(Function {
31764                        name: name.to_string(),
31765                        args,
31766                        distinct: false,
31767                        trailing_comments: Vec::new(),
31768                        use_bracket_syntax: false,
31769                        no_parens: false,
31770                        quoted: false,
31771                        span: None,
31772                        inferred_type: None,
31773                    }))))
31774                } else {
31775                    // BigQuery 2-arg syntax: DATE_ADD(date, interval)
31776                    self.expect(TokenType::RParen)?;
31777                    Ok(Some(Expression::Function(Box::new(Function {
31778                        name: name.to_string(),
31779                        args: vec![first_arg, second_arg],
31780                        distinct: false,
31781                        trailing_comments: Vec::new(),
31782                        use_bracket_syntax: false,
31783                        no_parens: false,
31784                        quoted: false,
31785                        span: None,
31786                        inferred_type: None,
31787                    }))))
31788                }
31789            }
31790            (crate::function_registry::TypedParseKind::Variadic, "DATEDIFF") => {
31791                // First argument (can be unit for DATEDIFF/TIMESTAMPDIFF or datetime for TIMEDIFF)
31792                let first_arg = self.parse_expression()?;
31793                let first_arg = self.try_clickhouse_func_arg_alias(first_arg);
31794                self.expect(TokenType::Comma)?;
31795                let second_arg = self.parse_expression()?;
31796                let second_arg = self.try_clickhouse_func_arg_alias(second_arg);
31797                // Third argument is optional (SQLite TIMEDIFF only takes 2 args)
31798                let mut args = if self.match_token(TokenType::Comma) {
31799                    let third_arg = self.parse_expression()?;
31800                    let third_arg = self.try_clickhouse_func_arg_alias(third_arg);
31801                    vec![first_arg, second_arg, third_arg]
31802                } else {
31803                    vec![first_arg, second_arg]
31804                };
31805                // ClickHouse: optional 4th timezone argument for dateDiff
31806                while self.match_token(TokenType::Comma) {
31807                    let arg = self.parse_expression()?;
31808                    args.push(self.try_clickhouse_func_arg_alias(arg));
31809                }
31810                self.expect(TokenType::RParen)?;
31811                if matches!(
31812                    self.config.dialect,
31813                    Some(crate::dialects::DialectType::Snowflake)
31814                ) && args.len() == 3
31815                {
31816                    if let Some(unit) = self.try_parse_date_part_unit_expr(&args[0]) {
31817                        return Ok(Some(Expression::DateDiff(Box::new(DateDiffFunc {
31818                            this: args[2].clone(),
31819                            expression: args[1].clone(),
31820                            unit: Some(unit),
31821                        }))));
31822                    }
31823                }
31824                self.normalize_date_part_arg(name, &mut args);
31825                Ok(Some(Expression::Function(Box::new(Function {
31826                    name: name.to_string(),
31827                    args,
31828                    distinct: false,
31829                    trailing_comments: Vec::new(),
31830                    use_bracket_syntax: false,
31831                    no_parens: false,
31832                    quoted: false,
31833                    span: None,
31834                    inferred_type: None,
31835                }))))
31836            }
31837            (crate::function_registry::TypedParseKind::Variadic, "RANDOM") => {
31838                // RANDOM() - no args, RANDOM(seed) - Snowflake, RANDOM(lower, upper) - Teradata
31839                if self.check(TokenType::RParen) {
31840                    self.expect(TokenType::RParen)?;
31841                    Ok(Some(Expression::Random(Random)))
31842                } else {
31843                    let first = self.parse_expression()?;
31844                    if self.match_token(TokenType::Comma) {
31845                        let second = self.parse_expression()?;
31846                        self.expect(TokenType::RParen)?;
31847                        Ok(Some(Expression::Rand(Box::new(Rand {
31848                            seed: None,
31849                            lower: Some(Box::new(first)),
31850                            upper: Some(Box::new(second)),
31851                        }))))
31852                    } else {
31853                        self.expect(TokenType::RParen)?;
31854                        Ok(Some(Expression::Rand(Box::new(Rand {
31855                            seed: Some(Box::new(first)),
31856                            lower: None,
31857                            upper: None,
31858                        }))))
31859                    }
31860                }
31861            }
31862            (crate::function_registry::TypedParseKind::Variadic, "RAND") => {
31863                let seed = if self.check(TokenType::RParen) {
31864                    None
31865                } else {
31866                    Some(Box::new(self.parse_expression()?))
31867                };
31868                self.expect(TokenType::RParen)?;
31869                Ok(Some(Expression::Rand(Box::new(Rand {
31870                    seed,
31871                    lower: None,
31872                    upper: None,
31873                }))))
31874            }
31875            (crate::function_registry::TypedParseKind::Variadic, "PI") => {
31876                self.expect(TokenType::RParen)?;
31877                Ok(Some(Expression::Pi(Pi)))
31878            }
31879            (crate::function_registry::TypedParseKind::Variadic, "LAST_DAY") => {
31880                let this = self.parse_expression()?;
31881                let unit = if self.match_token(TokenType::Comma) {
31882                    Some(self.parse_datetime_field()?)
31883                } else {
31884                    None
31885                };
31886                self.expect(TokenType::RParen)?;
31887                Ok(Some(Expression::LastDay(Box::new(LastDayFunc {
31888                    this,
31889                    unit,
31890                }))))
31891            }
31892            (crate::function_registry::TypedParseKind::Variadic, "POSITION") => {
31893                let expr = self
31894                    .parse_position()?
31895                    .ok_or_else(|| self.parse_error("Expected expression in POSITION"))?;
31896                self.expect(TokenType::RParen)?;
31897                Ok(Some(expr))
31898            }
31899            (crate::function_registry::TypedParseKind::Variadic, "STRPOS") => {
31900                let this = self.parse_expression()?;
31901                self.expect(TokenType::Comma)?;
31902                let substr = self.parse_expression()?;
31903                let occurrence = if self.match_token(TokenType::Comma) {
31904                    Some(Box::new(self.parse_expression()?))
31905                } else {
31906                    None
31907                };
31908                self.expect(TokenType::RParen)?;
31909                Ok(Some(Expression::StrPosition(Box::new(StrPosition {
31910                    this: Box::new(this),
31911                    substr: Some(Box::new(substr)),
31912                    position: None,
31913                    occurrence,
31914                }))))
31915            }
31916            (crate::function_registry::TypedParseKind::Variadic, "LOCATE") => {
31917                if self.check(TokenType::RParen) {
31918                    self.skip();
31919                    return Ok(Some(Expression::Function(Box::new(Function {
31920                        name: name.to_string(),
31921                        args: vec![],
31922                        distinct: false,
31923                        trailing_comments: Vec::new(),
31924                        use_bracket_syntax: false,
31925                        no_parens: false,
31926                        quoted: false,
31927                        span: None,
31928                        inferred_type: None,
31929                    }))));
31930                }
31931                let first = self.parse_expression()?;
31932                if !self.check(TokenType::Comma) && self.check(TokenType::RParen) {
31933                    self.skip();
31934                    return Ok(Some(Expression::Function(Box::new(Function {
31935                        name: name.to_string(),
31936                        args: vec![first],
31937                        distinct: false,
31938                        trailing_comments: Vec::new(),
31939                        use_bracket_syntax: false,
31940                        no_parens: false,
31941                        quoted: false,
31942                        span: None,
31943                        inferred_type: None,
31944                    }))));
31945                }
31946                self.expect(TokenType::Comma)?;
31947                let second = self.parse_expression()?;
31948                let position = if self.match_token(TokenType::Comma) {
31949                    Some(Box::new(self.parse_expression()?))
31950                } else {
31951                    None
31952                };
31953                self.expect(TokenType::RParen)?;
31954                Ok(Some(Expression::StrPosition(Box::new(StrPosition {
31955                    this: Box::new(second),
31956                    substr: Some(Box::new(first)),
31957                    position,
31958                    occurrence: None,
31959                }))))
31960            }
31961            (crate::function_registry::TypedParseKind::Variadic, "INSTR") => {
31962                let first = self.parse_expression()?;
31963                self.expect(TokenType::Comma)?;
31964                let second = self.parse_expression()?;
31965                let position = if self.match_token(TokenType::Comma) {
31966                    Some(Box::new(self.parse_expression()?))
31967                } else {
31968                    None
31969                };
31970                self.expect(TokenType::RParen)?;
31971                Ok(Some(Expression::StrPosition(Box::new(StrPosition {
31972                    this: Box::new(first),
31973                    substr: Some(Box::new(second)),
31974                    position,
31975                    occurrence: None,
31976                }))))
31977            }
31978            (crate::function_registry::TypedParseKind::Variadic, "NORMALIZE") => {
31979                let this = self.parse_expression()?;
31980                let form = if self.match_token(TokenType::Comma) {
31981                    Some(Box::new(self.parse_expression()?))
31982                } else {
31983                    None
31984                };
31985                self.expect(TokenType::RParen)?;
31986                Ok(Some(Expression::Normalize(Box::new(Normalize {
31987                    this: Box::new(this),
31988                    form,
31989                    is_casefold: None,
31990                }))))
31991            }
31992            (crate::function_registry::TypedParseKind::Variadic, "INITCAP") => {
31993                let this = self.parse_expression()?;
31994                let delimiter = if self.match_token(TokenType::Comma) {
31995                    Some(Box::new(self.parse_expression()?))
31996                } else {
31997                    None
31998                };
31999                self.expect(TokenType::RParen)?;
32000                if let Some(delim) = delimiter {
32001                    Ok(Some(Expression::Function(Box::new(Function::new(
32002                        "INITCAP".to_string(),
32003                        vec![this, *delim],
32004                    )))))
32005                } else {
32006                    Ok(Some(Expression::Initcap(Box::new(UnaryFunc::new(this)))))
32007                }
32008            }
32009            (crate::function_registry::TypedParseKind::Variadic, "FLOOR") => {
32010                let this = self.parse_expression()?;
32011                let to = if self.match_token(TokenType::To) {
32012                    self.parse_var()?
32013                } else {
32014                    None
32015                };
32016                let scale = if to.is_none() && self.match_token(TokenType::Comma) {
32017                    Some(self.parse_expression()?)
32018                } else {
32019                    None
32020                };
32021                if self.check(TokenType::Comma) {
32022                    let mut args = vec![this];
32023                    if let Some(s) = scale {
32024                        args.push(s);
32025                    }
32026                    while self.match_token(TokenType::Comma) {
32027                        args.push(self.parse_expression()?);
32028                    }
32029                    self.expect(TokenType::RParen)?;
32030                    return Ok(Some(Expression::Function(Box::new(Function {
32031                        name: name.to_string(),
32032                        args,
32033                        distinct: false,
32034                        trailing_comments: Vec::new(),
32035                        use_bracket_syntax: false,
32036                        no_parens: false,
32037                        quoted: false,
32038                        span: None,
32039                        inferred_type: None,
32040                    }))));
32041                }
32042                self.expect(TokenType::RParen)?;
32043                Ok(Some(Expression::Floor(Box::new(FloorFunc {
32044                    this,
32045                    scale,
32046                    to,
32047                }))))
32048            }
32049            (crate::function_registry::TypedParseKind::Variadic, "LOG") => {
32050                let first = self.parse_expression()?;
32051                if self.match_token(TokenType::Comma) {
32052                    let second = self.parse_expression()?;
32053                    self.expect(TokenType::RParen)?;
32054                    let (value, base) = if self.log_base_first() {
32055                        (second, first)
32056                    } else {
32057                        (first, second)
32058                    };
32059                    Ok(Some(Expression::Log(Box::new(LogFunc {
32060                        this: value,
32061                        base: Some(base),
32062                    }))))
32063                } else {
32064                    self.expect(TokenType::RParen)?;
32065                    if self.log_defaults_to_ln() {
32066                        Ok(Some(Expression::Ln(Box::new(UnaryFunc::new(first)))))
32067                    } else {
32068                        Ok(Some(Expression::Log(Box::new(LogFunc {
32069                            this: first,
32070                            base: None,
32071                        }))))
32072                    }
32073                }
32074            }
32075            (crate::function_registry::TypedParseKind::Variadic, "FLATTEN") => {
32076                let args = self.parse_function_arguments()?;
32077                self.expect(TokenType::RParen)?;
32078                Ok(Some(Expression::Function(Box::new(Function {
32079                    name: name.to_string(),
32080                    args,
32081                    distinct: false,
32082                    trailing_comments: Vec::new(),
32083                    use_bracket_syntax: false,
32084                    no_parens: false,
32085                    quoted: false,
32086                    span: None,
32087                    inferred_type: None,
32088                }))))
32089            }
32090            (crate::function_registry::TypedParseKind::Variadic, "ARRAY_INTERSECT") => {
32091                let mut expressions = vec![self.parse_expression()?];
32092                while self.match_token(TokenType::Comma) {
32093                    expressions.push(self.parse_expression()?);
32094                }
32095                self.expect(TokenType::RParen)?;
32096                Ok(Some(Expression::ArrayIntersect(Box::new(VarArgFunc {
32097                    expressions,
32098                    original_name: Some(name.to_string()),
32099                    inferred_type: None,
32100                }))))
32101            }
32102            (crate::function_registry::TypedParseKind::Variadic, "CURRENT_SCHEMAS") => {
32103                let args = if self.check(TokenType::RParen) {
32104                    Vec::new()
32105                } else {
32106                    vec![self.parse_expression()?]
32107                };
32108                self.expect(TokenType::RParen)?;
32109                Ok(Some(Expression::CurrentSchemas(Box::new(CurrentSchemas {
32110                    this: args.into_iter().next().map(Box::new),
32111                }))))
32112            }
32113            (crate::function_registry::TypedParseKind::Variadic, "COALESCE") => {
32114                let args = if self.check(TokenType::RParen) {
32115                    Vec::new()
32116                } else {
32117                    self.parse_expression_list()?
32118                };
32119                self.expect(TokenType::RParen)?;
32120                Ok(Some(Expression::Coalesce(Box::new(
32121                    crate::expressions::VarArgFunc {
32122                        original_name: None,
32123                        expressions: args,
32124                        inferred_type: None,
32125                    },
32126                ))))
32127            }
32128            (crate::function_registry::TypedParseKind::Variadic, "IFNULL") => {
32129                let args = self.parse_expression_list()?;
32130                self.expect(TokenType::RParen)?;
32131                if args.len() >= 2 {
32132                    Ok(Some(Expression::Coalesce(Box::new(
32133                        crate::expressions::VarArgFunc {
32134                            original_name: Some("IFNULL".to_string()),
32135                            expressions: args,
32136                            inferred_type: None,
32137                        },
32138                    ))))
32139                } else {
32140                    Ok(Some(Expression::Function(Box::new(Function {
32141                        name: name.to_string(),
32142                        args,
32143                        distinct: false,
32144                        trailing_comments: Vec::new(),
32145                        use_bracket_syntax: false,
32146                        no_parens: false,
32147                        quoted: false,
32148                        span: None,
32149                        inferred_type: None,
32150                    }))))
32151                }
32152            }
32153            (crate::function_registry::TypedParseKind::Variadic, "NVL") => {
32154                let args = self.parse_expression_list()?;
32155                self.expect(TokenType::RParen)?;
32156                if args.len() > 2 {
32157                    Ok(Some(Expression::Function(Box::new(Function {
32158                        name: "COALESCE".to_string(),
32159                        args,
32160                        distinct: false,
32161                        trailing_comments: Vec::new(),
32162                        use_bracket_syntax: false,
32163                        no_parens: false,
32164                        quoted: false,
32165                        span: None,
32166                        inferred_type: None,
32167                    }))))
32168                } else if args.len() == 2 {
32169                    Ok(Some(Expression::Nvl(Box::new(
32170                        crate::expressions::BinaryFunc {
32171                            original_name: Some("NVL".to_string()),
32172                            this: args[0].clone(),
32173                            expression: args[1].clone(),
32174                            inferred_type: None,
32175                        },
32176                    ))))
32177                } else {
32178                    Ok(Some(Expression::Function(Box::new(Function {
32179                        name: name.to_string(),
32180                        args,
32181                        distinct: false,
32182                        trailing_comments: Vec::new(),
32183                        use_bracket_syntax: false,
32184                        no_parens: false,
32185                        quoted: false,
32186                        span: None,
32187                        inferred_type: None,
32188                    }))))
32189                }
32190            }
32191            (crate::function_registry::TypedParseKind::Variadic, "NVL2") => {
32192                let args = self.parse_expression_list()?;
32193                self.expect(TokenType::RParen)?;
32194                if args.len() >= 3 {
32195                    Ok(Some(Expression::Nvl2(Box::new(
32196                        crate::expressions::Nvl2Func {
32197                            this: args[0].clone(),
32198                            true_value: args[1].clone(),
32199                            false_value: args[2].clone(),
32200                            inferred_type: None,
32201                        },
32202                    ))))
32203                } else {
32204                    Ok(Some(Expression::Function(Box::new(Function {
32205                        name: name.to_string(),
32206                        args,
32207                        distinct: false,
32208                        trailing_comments: Vec::new(),
32209                        use_bracket_syntax: false,
32210                        no_parens: false,
32211                        quoted: false,
32212                        span: None,
32213                        inferred_type: None,
32214                    }))))
32215                }
32216            }
32217            (crate::function_registry::TypedParseKind::Variadic, "EXTRACT") => {
32218                if matches!(
32219                    self.config.dialect,
32220                    Some(crate::dialects::DialectType::ClickHouse)
32221                ) && (self.check(TokenType::Identifier)
32222                    || self.check(TokenType::Var)
32223                    || self.peek().token_type.is_keyword()
32224                    || self.check(TokenType::String)
32225                    || self.check(TokenType::Number))
32226                    && (self.check_next(TokenType::Comma)
32227                        || self.check_next(TokenType::LParen)
32228                        || self.check_next(TokenType::Var)
32229                        || self.check_next(TokenType::Identifier))
32230                {
32231                    let args = self.parse_function_arguments()?;
32232                    self.expect(TokenType::RParen)?;
32233                    return Ok(Some(Expression::Function(Box::new(Function {
32234                        name: name.to_string(),
32235                        args,
32236                        distinct: false,
32237                        trailing_comments: Vec::new(),
32238                        use_bracket_syntax: false,
32239                        no_parens: false,
32240                        quoted: false,
32241                        span: None,
32242                        inferred_type: None,
32243                    }))));
32244                }
32245
32246                if self.check(TokenType::String) {
32247                    let args = self.parse_expression_list()?;
32248                    self.expect(TokenType::RParen)?;
32249                    return Ok(Some(Expression::Function(Box::new(Function {
32250                        name: name.to_string(),
32251                        args,
32252                        distinct: false,
32253                        trailing_comments: Vec::new(),
32254                        use_bracket_syntax: false,
32255                        no_parens: false,
32256                        quoted: false,
32257                        span: None,
32258                        inferred_type: None,
32259                    }))));
32260                }
32261
32262                let field = self.parse_datetime_field()?;
32263                if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
32264                    return Err(self.parse_error("Expected FROM or comma after EXTRACT field"));
32265                }
32266                let this = self.parse_expression()?;
32267                let this = self.try_clickhouse_func_arg_alias(this);
32268                self.expect(TokenType::RParen)?;
32269                Ok(Some(Expression::Extract(Box::new(ExtractFunc {
32270                    this,
32271                    field,
32272                }))))
32273            }
32274            (crate::function_registry::TypedParseKind::Variadic, "STRUCT") => {
32275                let args = if self.check(TokenType::RParen) {
32276                    Vec::new()
32277                } else {
32278                    self.parse_struct_args()?
32279                };
32280                self.expect(TokenType::RParen)?;
32281                Ok(Some(Expression::Function(Box::new(Function {
32282                    name: name.to_string(),
32283                    args,
32284                    distinct: false,
32285                    trailing_comments: Vec::new(),
32286                    use_bracket_syntax: false,
32287                    no_parens: false,
32288                    quoted: false,
32289                    span: None,
32290                    inferred_type: None,
32291                }))))
32292            }
32293            (crate::function_registry::TypedParseKind::Variadic, "CHAR") => {
32294                let args = self.parse_expression_list()?;
32295                let charset = if self.match_token(TokenType::Using) {
32296                    if !self.is_at_end() {
32297                        let charset_token = self.advance();
32298                        Some(charset_token.text.clone())
32299                    } else {
32300                        None
32301                    }
32302                } else {
32303                    None
32304                };
32305                self.expect(TokenType::RParen)?;
32306                if charset.is_some() {
32307                    Ok(Some(Expression::CharFunc(Box::new(
32308                        crate::expressions::CharFunc {
32309                            args,
32310                            charset,
32311                            name: None,
32312                        },
32313                    ))))
32314                } else {
32315                    Ok(Some(Expression::Function(Box::new(Function {
32316                        name: name.to_string(),
32317                        args,
32318                        distinct: false,
32319                        trailing_comments: Vec::new(),
32320                        use_bracket_syntax: false,
32321                        no_parens: false,
32322                        quoted: false,
32323                        span: None,
32324                        inferred_type: None,
32325                    }))))
32326                }
32327            }
32328            (crate::function_registry::TypedParseKind::Variadic, "CHR") => {
32329                let args = self.parse_expression_list()?;
32330                let charset = if self.match_token(TokenType::Using) {
32331                    if !self.is_at_end() {
32332                        let charset_token = self.advance();
32333                        Some(charset_token.text.clone())
32334                    } else {
32335                        None
32336                    }
32337                } else {
32338                    None
32339                };
32340                self.expect(TokenType::RParen)?;
32341                if charset.is_some() {
32342                    Ok(Some(Expression::CharFunc(Box::new(
32343                        crate::expressions::CharFunc {
32344                            args,
32345                            charset,
32346                            name: Some("CHR".to_string()),
32347                        },
32348                    ))))
32349                } else {
32350                    Ok(Some(Expression::Function(Box::new(Function {
32351                        name: name.to_string(),
32352                        args,
32353                        distinct: false,
32354                        trailing_comments: Vec::new(),
32355                        use_bracket_syntax: false,
32356                        no_parens: false,
32357                        quoted: false,
32358                        span: None,
32359                        inferred_type: None,
32360                    }))))
32361                }
32362            }
32363            (crate::function_registry::TypedParseKind::Variadic, "RANGE_N") => {
32364                let this = self.parse_bitwise_or()?;
32365                self.expect(TokenType::Between)?;
32366                let mut expressions = Vec::new();
32367                while !self.check(TokenType::Each) && !self.check(TokenType::RParen) {
32368                    expressions.push(self.parse_expression()?);
32369                    if !self.match_token(TokenType::Comma) {
32370                        break;
32371                    }
32372                }
32373                let each = if self.match_token(TokenType::Each) {
32374                    Some(Box::new(self.parse_expression()?))
32375                } else {
32376                    None
32377                };
32378                self.expect(TokenType::RParen)?;
32379                Ok(Some(Expression::RangeN(Box::new(RangeN {
32380                    this: Box::new(this),
32381                    expressions,
32382                    each,
32383                }))))
32384            }
32385            (crate::function_registry::TypedParseKind::Variadic, "XMLTABLE") => {
32386                if let Some(xml_table) = self.parse_xml_table()? {
32387                    self.expect(TokenType::RParen)?;
32388                    Ok(Some(xml_table))
32389                } else {
32390                    Err(self.parse_error("Failed to parse XMLTABLE"))
32391                }
32392            }
32393            (crate::function_registry::TypedParseKind::Variadic, "XMLELEMENT") => {
32394                if let Some(elem) = self.parse_xml_element()? {
32395                    self.expect(TokenType::RParen)?;
32396                    Ok(Some(elem))
32397                } else {
32398                    self.expect(TokenType::RParen)?;
32399                    Ok(Some(Expression::Function(Box::new(Function {
32400                        name: name.to_string(),
32401                        args: Vec::new(),
32402                        distinct: false,
32403                        trailing_comments: Vec::new(),
32404                        use_bracket_syntax: false,
32405                        no_parens: false,
32406                        quoted: false,
32407                        span: None,
32408                        inferred_type: None,
32409                    }))))
32410                }
32411            }
32412            (crate::function_registry::TypedParseKind::Variadic, "XMLATTRIBUTES") => {
32413                let mut attrs = Vec::new();
32414                if !self.check(TokenType::RParen) {
32415                    loop {
32416                        let expr = self.parse_expression()?;
32417                        if self.match_token(TokenType::As) {
32418                            let alias_ident = self.expect_identifier_or_keyword_with_quoted()?;
32419                            attrs.push(Expression::Alias(Box::new(Alias {
32420                                this: expr,
32421                                alias: alias_ident,
32422                                column_aliases: Vec::new(),
32423                                pre_alias_comments: Vec::new(),
32424                                trailing_comments: Vec::new(),
32425                                inferred_type: None,
32426                            })));
32427                        } else {
32428                            attrs.push(expr);
32429                        }
32430                        if !self.match_token(TokenType::Comma) {
32431                            break;
32432                        }
32433                    }
32434                }
32435                self.expect(TokenType::RParen)?;
32436                Ok(Some(Expression::Function(Box::new(Function {
32437                    name: "XMLATTRIBUTES".to_string(),
32438                    args: attrs,
32439                    distinct: false,
32440                    trailing_comments: Vec::new(),
32441                    use_bracket_syntax: false,
32442                    no_parens: false,
32443                    quoted: false,
32444                    span: None,
32445                    inferred_type: None,
32446                }))))
32447            }
32448            (crate::function_registry::TypedParseKind::Variadic, "XMLCOMMENT") => {
32449                let args = if self.check(TokenType::RParen) {
32450                    Vec::new()
32451                } else {
32452                    self.parse_expression_list()?
32453                };
32454                self.expect(TokenType::RParen)?;
32455                Ok(Some(Expression::Function(Box::new(Function {
32456                    name: "XMLCOMMENT".to_string(),
32457                    args,
32458                    distinct: false,
32459                    trailing_comments: Vec::new(),
32460                    use_bracket_syntax: false,
32461                    no_parens: false,
32462                    quoted: false,
32463                    span: None,
32464                    inferred_type: None,
32465                }))))
32466            }
32467            (crate::function_registry::TypedParseKind::Variadic, "MATCH") => {
32468                let expressions = if self.check(TokenType::Table)
32469                    && !matches!(
32470                        self.config.dialect,
32471                        Some(crate::dialects::DialectType::ClickHouse)
32472                    ) {
32473                    self.skip();
32474                    let table_name = self.expect_identifier_or_keyword()?;
32475                    vec![Expression::Var(Box::new(Var {
32476                        this: format!("TABLE {}", table_name),
32477                    }))]
32478                } else {
32479                    self.parse_expression_list()?
32480                };
32481
32482                self.expect(TokenType::RParen)?;
32483
32484                if !self.check_keyword_text("AGAINST") {
32485                    return Ok(Some(Expression::Function(Box::new(Function {
32486                        name: "MATCH".to_string(),
32487                        args: expressions,
32488                        distinct: false,
32489                        trailing_comments: Vec::new(),
32490                        use_bracket_syntax: false,
32491                        no_parens: false,
32492                        quoted: false,
32493                        span: None,
32494                        inferred_type: None,
32495                    }))));
32496                }
32497
32498                self.skip();
32499                self.expect(TokenType::LParen)?;
32500                let search_expr = self.parse_primary()?;
32501
32502                let modifier = if self.match_text_seq(&["IN", "NATURAL", "LANGUAGE", "MODE"]) {
32503                    if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
32504                        Some(Box::new(Expression::Var(Box::new(Var {
32505                            this: "IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION".to_string(),
32506                        }))))
32507                    } else {
32508                        Some(Box::new(Expression::Var(Box::new(Var {
32509                            this: "IN NATURAL LANGUAGE MODE".to_string(),
32510                        }))))
32511                    }
32512                } else if self.match_text_seq(&["IN", "BOOLEAN", "MODE"]) {
32513                    Some(Box::new(Expression::Var(Box::new(Var {
32514                        this: "IN BOOLEAN MODE".to_string(),
32515                    }))))
32516                } else if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
32517                    Some(Box::new(Expression::Var(Box::new(Var {
32518                        this: "WITH QUERY EXPANSION".to_string(),
32519                    }))))
32520                } else {
32521                    None
32522                };
32523
32524                self.expect(TokenType::RParen)?;
32525
32526                Ok(Some(Expression::MatchAgainst(Box::new(MatchAgainst {
32527                    this: Box::new(search_expr),
32528                    expressions,
32529                    modifier,
32530                }))))
32531            }
32532            (crate::function_registry::TypedParseKind::Variadic, "TRANSFORM") => {
32533                let expressions = if self.check(TokenType::RParen) {
32534                    Vec::new()
32535                } else {
32536                    self.parse_function_args_with_lambda()?
32537                };
32538                self.expect(TokenType::RParen)?;
32539
32540                let row_format_before = if self.match_token(TokenType::Row) {
32541                    self.parse_row()?
32542                } else {
32543                    None
32544                };
32545
32546                let record_writer = if self.match_text_seq(&["RECORDWRITER"]) {
32547                    Some(Box::new(self.parse_expression()?))
32548                } else {
32549                    None
32550                };
32551
32552                if self.match_token(TokenType::Using) {
32553                    let command_script = Some(Box::new(self.parse_expression()?));
32554                    let schema = if self.match_token(TokenType::As) {
32555                        self.parse_schema()?
32556                    } else {
32557                        None
32558                    };
32559
32560                    let row_format_after = if self.match_token(TokenType::Row) {
32561                        self.parse_row()?
32562                    } else {
32563                        None
32564                    };
32565
32566                    let record_reader = if self.match_text_seq(&["RECORDREADER"]) {
32567                        Some(Box::new(self.parse_expression()?))
32568                    } else {
32569                        None
32570                    };
32571
32572                    Ok(Some(Expression::QueryTransform(Box::new(QueryTransform {
32573                        expressions,
32574                        command_script,
32575                        schema: schema.map(Box::new),
32576                        row_format_before: row_format_before.map(Box::new),
32577                        record_writer,
32578                        row_format_after: row_format_after.map(Box::new),
32579                        record_reader,
32580                    }))))
32581                } else {
32582                    Ok(Some(Expression::Function(Box::new(Function {
32583                        name: name.to_string(),
32584                        args: expressions,
32585                        distinct: false,
32586                        trailing_comments: Vec::new(),
32587                        use_bracket_syntax: false,
32588                        no_parens: false,
32589                        quoted,
32590                        span: None,
32591                        inferred_type: None,
32592                    }))))
32593                }
32594            }
32595            (crate::function_registry::TypedParseKind::Variadic, "CONVERT") => {
32596                let is_try = upper_name == "TRY_CONVERT";
32597                let is_tsql = matches!(
32598                    self.config.dialect,
32599                    Some(crate::dialects::DialectType::TSQL)
32600                        | Some(crate::dialects::DialectType::Fabric)
32601                );
32602
32603                if is_tsql {
32604                    let saved = self.current;
32605                    let orig_type_text = if self.current < self.tokens.len() {
32606                        self.tokens[self.current].text.to_ascii_uppercase()
32607                    } else {
32608                        String::new()
32609                    };
32610                    let dt = self.parse_data_type();
32611                    if let Ok(mut dt) = dt {
32612                        if self.match_token(TokenType::Comma) {
32613                            if orig_type_text == "NVARCHAR" || orig_type_text == "NCHAR" {
32614                                dt = match dt {
32615                                    crate::expressions::DataType::VarChar { length, .. } => {
32616                                        if let Some(len) = length {
32617                                            crate::expressions::DataType::Custom {
32618                                                name: format!("{}({})", orig_type_text, len),
32619                                            }
32620                                        } else {
32621                                            crate::expressions::DataType::Custom {
32622                                                name: orig_type_text.clone(),
32623                                            }
32624                                        }
32625                                    }
32626                                    crate::expressions::DataType::Char { length } => {
32627                                        if let Some(len) = length {
32628                                            crate::expressions::DataType::Custom {
32629                                                name: format!("{}({})", orig_type_text, len),
32630                                            }
32631                                        } else {
32632                                            crate::expressions::DataType::Custom {
32633                                                name: orig_type_text.clone(),
32634                                            }
32635                                        }
32636                                    }
32637                                    other => other,
32638                                };
32639                            }
32640                            let value = self.parse_expression()?;
32641                            let style = if self.match_token(TokenType::Comma) {
32642                                Some(self.parse_expression()?)
32643                            } else {
32644                                None
32645                            };
32646                            self.expect(TokenType::RParen)?;
32647                            let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
32648                            let mut args = vec![Expression::DataType(dt), value];
32649                            if let Some(s) = style {
32650                                args.push(s);
32651                            }
32652                            return Ok(Some(Expression::Function(Box::new(Function {
32653                                name: func_name.to_string(),
32654                                args,
32655                                distinct: false,
32656                                trailing_comments: Vec::new(),
32657                                use_bracket_syntax: false,
32658                                no_parens: false,
32659                                quoted: false,
32660                                span: None,
32661                                inferred_type: None,
32662                            }))));
32663                        }
32664                        self.current = saved;
32665                    } else {
32666                        self.current = saved;
32667                    }
32668                }
32669
32670                let this = self.parse_expression()?;
32671                if self.match_token(TokenType::Using) {
32672                    let charset = self.expect_identifier()?;
32673                    self.expect(TokenType::RParen)?;
32674                    Ok(Some(Expression::Cast(Box::new(Cast {
32675                        this,
32676                        to: DataType::CharacterSet { name: charset },
32677                        trailing_comments: Vec::new(),
32678                        double_colon_syntax: false,
32679                        format: None,
32680                        default: None,
32681                        inferred_type: None,
32682                    }))))
32683                } else if self.match_token(TokenType::Comma) {
32684                    let mut args = vec![this];
32685                    args.push(self.parse_expression()?);
32686                    while self.match_token(TokenType::Comma) {
32687                        args.push(self.parse_expression()?);
32688                    }
32689                    self.expect(TokenType::RParen)?;
32690                    let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
32691                    Ok(Some(Expression::Function(Box::new(Function {
32692                        name: func_name.to_string(),
32693                        args,
32694                        distinct: false,
32695                        trailing_comments: Vec::new(),
32696                        use_bracket_syntax: false,
32697                        no_parens: false,
32698                        quoted: false,
32699                        span: None,
32700                        inferred_type: None,
32701                    }))))
32702                } else {
32703                    self.expect(TokenType::RParen)?;
32704                    let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
32705                    Ok(Some(Expression::Function(Box::new(Function {
32706                        name: func_name.to_string(),
32707                        args: vec![this],
32708                        distinct: false,
32709                        trailing_comments: Vec::new(),
32710                        use_bracket_syntax: false,
32711                        no_parens: false,
32712                        quoted: false,
32713                        span: None,
32714                        inferred_type: None,
32715                    }))))
32716                }
32717            }
32718            (crate::function_registry::TypedParseKind::Variadic, "TRIM") => {
32719                let (position, position_explicit) = if self.match_token(TokenType::Leading) {
32720                    (TrimPosition::Leading, true)
32721                } else if self.match_token(TokenType::Trailing) {
32722                    (TrimPosition::Trailing, true)
32723                } else if self.match_token(TokenType::Both) {
32724                    (TrimPosition::Both, true)
32725                } else {
32726                    (TrimPosition::Both, false)
32727                };
32728
32729                if position_explicit || self.check(TokenType::From) {
32730                    if self.match_token(TokenType::From) {
32731                        let this = self.parse_expression()?;
32732                        self.expect(TokenType::RParen)?;
32733                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
32734                            this,
32735                            characters: None,
32736                            position,
32737                            sql_standard_syntax: true,
32738                            position_explicit,
32739                        }))))
32740                    } else {
32741                        let first_expr = self.parse_bitwise_or()?;
32742                        let first_expr = self.try_clickhouse_func_arg_alias(first_expr);
32743                        if self.match_token(TokenType::From) {
32744                            let this = self.parse_bitwise_or()?;
32745                            let this = self.try_clickhouse_func_arg_alias(this);
32746                            self.expect(TokenType::RParen)?;
32747                            Ok(Some(Expression::Trim(Box::new(TrimFunc {
32748                                this,
32749                                characters: Some(first_expr),
32750                                position,
32751                                sql_standard_syntax: true,
32752                                position_explicit,
32753                            }))))
32754                        } else {
32755                            self.expect(TokenType::RParen)?;
32756                            Ok(Some(Expression::Trim(Box::new(TrimFunc {
32757                                this: first_expr,
32758                                characters: None,
32759                                position,
32760                                sql_standard_syntax: true,
32761                                position_explicit,
32762                            }))))
32763                        }
32764                    }
32765                } else {
32766                    let first_expr = self.parse_expression()?;
32767                    let first_expr = self.try_clickhouse_func_arg_alias(first_expr);
32768                    if self.match_token(TokenType::From) {
32769                        let this = self.parse_expression()?;
32770                        let this = self.try_clickhouse_func_arg_alias(this);
32771                        self.expect(TokenType::RParen)?;
32772                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
32773                            this,
32774                            characters: Some(first_expr),
32775                            position: TrimPosition::Both,
32776                            sql_standard_syntax: true,
32777                            position_explicit: false,
32778                        }))))
32779                    } else if self.match_token(TokenType::Comma) {
32780                        let second_expr = self.parse_expression()?;
32781                        self.expect(TokenType::RParen)?;
32782                        let trim_pattern_first = matches!(
32783                            self.config.dialect,
32784                            Some(crate::dialects::DialectType::Spark)
32785                        );
32786                        let (this, characters) = if trim_pattern_first {
32787                            (second_expr, first_expr)
32788                        } else {
32789                            (first_expr, second_expr)
32790                        };
32791                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
32792                            this,
32793                            characters: Some(characters),
32794                            position: TrimPosition::Both,
32795                            sql_standard_syntax: false,
32796                            position_explicit: false,
32797                        }))))
32798                    } else {
32799                        self.expect(TokenType::RParen)?;
32800                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
32801                            this: first_expr,
32802                            characters: None,
32803                            position: TrimPosition::Both,
32804                            sql_standard_syntax: false,
32805                            position_explicit: false,
32806                        }))))
32807                    }
32808                }
32809            }
32810            (crate::function_registry::TypedParseKind::Variadic, "OVERLAY") => {
32811                if matches!(
32812                    self.config.dialect,
32813                    Some(crate::dialects::DialectType::ClickHouse)
32814                ) {
32815                    let args = self.parse_function_arguments()?;
32816                    self.expect(TokenType::RParen)?;
32817                    return Ok(Some(Expression::Function(Box::new(Function {
32818                        name: name.to_string(),
32819                        args,
32820                        distinct: false,
32821                        trailing_comments: Vec::new(),
32822                        use_bracket_syntax: false,
32823                        no_parens: false,
32824                        quoted: false,
32825                        span: None,
32826                        inferred_type: None,
32827                    }))));
32828                }
32829
32830                let this = self.parse_expression()?;
32831                if self.match_token(TokenType::Placing) {
32832                    let replacement = self.parse_expression()?;
32833                    self.expect(TokenType::From)?;
32834                    let from = self.parse_expression()?;
32835                    let length = if self.match_token(TokenType::For) {
32836                        Some(self.parse_expression()?)
32837                    } else {
32838                        None
32839                    };
32840                    self.expect(TokenType::RParen)?;
32841                    Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
32842                        this,
32843                        replacement,
32844                        from,
32845                        length,
32846                    }))))
32847                } else if self.match_token(TokenType::Comma) {
32848                    let replacement = self.parse_expression()?;
32849                    if self.match_token(TokenType::Comma) {
32850                        let from = self.parse_expression()?;
32851                        let length = if self.match_token(TokenType::Comma) {
32852                            Some(self.parse_expression()?)
32853                        } else {
32854                            None
32855                        };
32856                        self.expect(TokenType::RParen)?;
32857                        Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
32858                            this,
32859                            replacement,
32860                            from,
32861                            length,
32862                        }))))
32863                    } else {
32864                        self.expect(TokenType::RParen)?;
32865                        Ok(Some(Expression::Function(Box::new(Function {
32866                            name: name.to_string(),
32867                            args: vec![this, replacement],
32868                            distinct: false,
32869                            trailing_comments: Vec::new(),
32870                            use_bracket_syntax: false,
32871                            no_parens: false,
32872                            quoted: false,
32873                            span: None,
32874                            inferred_type: None,
32875                        }))))
32876                    }
32877                } else {
32878                    self.expect(TokenType::RParen)?;
32879                    Ok(Some(Expression::Function(Box::new(Function {
32880                        name: name.to_string(),
32881                        args: vec![this],
32882                        distinct: false,
32883                        trailing_comments: Vec::new(),
32884                        use_bracket_syntax: false,
32885                        no_parens: false,
32886                        quoted: false,
32887                        span: None,
32888                        inferred_type: None,
32889                    }))))
32890                }
32891            }
32892            (crate::function_registry::TypedParseKind::Variadic, "CEIL") => {
32893                let this = self.parse_expression()?;
32894                // Check for TO unit syntax (Druid: CEIL(__time TO WEEK))
32895                let to = if self.match_token(TokenType::To) {
32896                    // Parse the time unit as a variable/identifier
32897                    self.parse_var()?
32898                } else {
32899                    None
32900                };
32901                let decimals = if to.is_none() && self.match_token(TokenType::Comma) {
32902                    Some(self.parse_expression()?)
32903                } else {
32904                    None
32905                };
32906                self.expect(TokenType::RParen)?;
32907                Ok(Some(Expression::Ceil(Box::new(CeilFunc {
32908                    this,
32909                    decimals,
32910                    to,
32911                }))))
32912            }
32913            (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_FROM_PARTS")
32914            | (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_NTZ_FROM_PARTS")
32915            | (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_LTZ_FROM_PARTS")
32916            | (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_TZ_FROM_PARTS")
32917            | (crate::function_registry::TypedParseKind::Variadic, "DATE_FROM_PARTS")
32918            | (crate::function_registry::TypedParseKind::Variadic, "TIME_FROM_PARTS") => {
32919                let args = self.parse_expression_list()?;
32920                self.expect(TokenType::RParen)?;
32921                Ok(Some(Expression::Function(Box::new(Function {
32922                    name: name.to_string(),
32923                    args,
32924                    distinct: false,
32925                    trailing_comments: Vec::new(),
32926                    use_bracket_syntax: false,
32927                    no_parens: false,
32928                    quoted: false,
32929                    span: None,
32930                    inferred_type: None,
32931                }))))
32932            }
32933            (crate::function_registry::TypedParseKind::CastLike, "TRY_CAST") => {
32934                let this = self.parse_expression()?;
32935                self.expect(TokenType::As)?;
32936                let to = self.parse_data_type()?;
32937                self.expect(TokenType::RParen)?;
32938                Ok(Some(Expression::TryCast(Box::new(Cast {
32939                    this,
32940                    to,
32941                    trailing_comments: Vec::new(),
32942                    double_colon_syntax: false,
32943                    format: None,
32944                    default: None,
32945                    inferred_type: None,
32946                }))))
32947            }
32948            (crate::function_registry::TypedParseKind::Conditional, "IF") => {
32949                // ClickHouse: if() with zero args is valid in test queries
32950                if self.check(TokenType::RParen) {
32951                    self.skip();
32952                    return Ok(Some(Expression::Function(Box::new(Function {
32953                        name: name.to_string(),
32954                        args: vec![],
32955                        distinct: false,
32956                        trailing_comments: Vec::new(),
32957                        use_bracket_syntax: false,
32958                        no_parens: false,
32959                        quoted: false,
32960                        span: None,
32961                        inferred_type: None,
32962                    }))));
32963                }
32964                let args = self.parse_expression_list()?;
32965                self.expect(TokenType::RParen)?;
32966                let expr = if args.len() == 3 {
32967                    Expression::IfFunc(Box::new(crate::expressions::IfFunc {
32968                        original_name: Some(upper_name.to_string()),
32969                        condition: args[0].clone(),
32970                        true_value: args[1].clone(),
32971                        false_value: Some(args[2].clone()),
32972                        inferred_type: None,
32973                    }))
32974                } else if args.len() == 2 {
32975                    // IF with 2 args: condition, true_value (no false_value)
32976                    Expression::IfFunc(Box::new(crate::expressions::IfFunc {
32977                        original_name: Some(upper_name.to_string()),
32978                        condition: args[0].clone(),
32979                        true_value: args[1].clone(),
32980                        false_value: None,
32981                        inferred_type: None,
32982                    }))
32983                } else {
32984                    return Err(self.parse_error("IF function requires 2 or 3 arguments"));
32985                };
32986                Ok(Some(expr))
32987            }
32988            _ => {
32989                self.try_parse_registry_grouped_typed_family(name, upper_name, canonical_upper_name)
32990            }
32991        }
32992    }
32993
32994    /// Route heavy typed-function families via registry metadata groups.
32995    fn try_parse_registry_grouped_typed_family(
32996        &mut self,
32997        name: &str,
32998        upper_name: &str,
32999        canonical_upper_name: &str,
33000    ) -> Result<Option<Expression>> {
33001        use crate::function_registry::TypedDispatchGroup;
33002
33003        match crate::function_registry::typed_dispatch_group_by_name_upper(canonical_upper_name) {
33004            Some(TypedDispatchGroup::AggregateFamily) => self
33005                .parse_typed_aggregate_family(name, upper_name, canonical_upper_name)
33006                .map(Some),
33007            Some(TypedDispatchGroup::WindowFamily) => self
33008                .parse_typed_window_family(name, upper_name, canonical_upper_name)
33009                .map(Some),
33010            Some(TypedDispatchGroup::JsonFamily) => self
33011                .parse_typed_json_family(name, upper_name, canonical_upper_name)
33012                .map(Some),
33013            Some(TypedDispatchGroup::TranslateTeradataFamily) => {
33014                if matches!(
33015                    self.config.dialect,
33016                    Some(crate::dialects::DialectType::Teradata)
33017                ) {
33018                    self.parse_typed_translate_teradata_family(
33019                        name,
33020                        upper_name,
33021                        canonical_upper_name,
33022                    )
33023                    .map(Some)
33024                } else {
33025                    Ok(None)
33026                }
33027            }
33028            None => Ok(None),
33029        }
33030    }
33031
33032    fn make_unquoted_function(name: &str, args: Vec<Expression>) -> Expression {
33033        Expression::Function(Box::new(Function {
33034            name: name.to_string(),
33035            args,
33036            distinct: false,
33037            trailing_comments: Vec::new(),
33038            use_bracket_syntax: false,
33039            no_parens: false,
33040            quoted: false,
33041            span: None,
33042            inferred_type: None,
33043        }))
33044    }
33045
33046    fn make_simple_aggregate(
33047        name: &str,
33048        args: Vec<Expression>,
33049        distinct: bool,
33050        filter: Option<Expression>,
33051    ) -> Expression {
33052        Expression::AggregateFunction(Box::new(AggregateFunction {
33053            name: name.to_string(),
33054            args,
33055            distinct,
33056            filter,
33057            order_by: Vec::new(),
33058            limit: None,
33059            ignore_nulls: None,
33060            inferred_type: None,
33061        }))
33062    }
33063
33064    /// Parse phase-3 typed-function slices that are straightforward pass-throughs.
33065    fn try_parse_phase3_typed_function(
33066        &mut self,
33067        name: &str,
33068        _upper_name: &str,
33069        canonical_upper_name: &str,
33070    ) -> Result<Option<Expression>> {
33071        let Some(behavior) =
33072            crate::function_registry::parser_dispatch_behavior_by_name_upper(canonical_upper_name)
33073        else {
33074            return Ok(None);
33075        };
33076
33077        match behavior {
33078            crate::function_registry::ParserDispatchBehavior::ExprListFunction => {
33079                let args = self.parse_expression_list()?;
33080                self.expect(TokenType::RParen)?;
33081                Ok(Some(Self::make_unquoted_function(name, args)))
33082            }
33083            crate::function_registry::ParserDispatchBehavior::OptionalExprListFunction => {
33084                let args = if self.check(TokenType::RParen) {
33085                    Vec::new()
33086                } else {
33087                    self.parse_expression_list()?
33088                };
33089                self.expect(TokenType::RParen)?;
33090                Ok(Some(Self::make_unquoted_function(name, args)))
33091            }
33092            crate::function_registry::ParserDispatchBehavior::FunctionArgumentsFunction => {
33093                let args = self.parse_function_arguments()?;
33094                self.expect(TokenType::RParen)?;
33095                Ok(Some(Self::make_unquoted_function(name, args)))
33096            }
33097            crate::function_registry::ParserDispatchBehavior::ZeroArgFunction => {
33098                self.expect(TokenType::RParen)?;
33099                Ok(Some(Self::make_unquoted_function(name, Vec::new())))
33100            }
33101            crate::function_registry::ParserDispatchBehavior::ExprListMaybeAggregateByFilter => {
33102                let args = if self.check(TokenType::RParen) {
33103                    Vec::new()
33104                } else {
33105                    self.parse_expression_list()?
33106                };
33107                self.expect(TokenType::RParen)?;
33108                let filter = self.parse_filter_clause()?;
33109                if filter.is_some() {
33110                    Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
33111                } else {
33112                    Ok(Some(Self::make_unquoted_function(name, args)))
33113                }
33114            }
33115            crate::function_registry::ParserDispatchBehavior::ExprListMaybeAggregateByAggSuffix => {
33116                let args = self.parse_expression_list()?;
33117                self.expect(TokenType::RParen)?;
33118                let filter = self.parse_filter_clause()?;
33119                if canonical_upper_name.ends_with("_AGG") || filter.is_some() {
33120                    Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
33121                } else {
33122                    Ok(Some(Self::make_unquoted_function(name, args)))
33123                }
33124            }
33125            crate::function_registry::ParserDispatchBehavior::HashLike => {
33126                let args = self.parse_expression_list()?;
33127                self.expect(TokenType::RParen)?;
33128                let filter = self.parse_filter_clause()?;
33129                if canonical_upper_name == "HASH_AGG" || filter.is_some() {
33130                    Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
33131                } else {
33132                    Ok(Some(Self::make_unquoted_function(name, args)))
33133                }
33134            }
33135            crate::function_registry::ParserDispatchBehavior::HllAggregate => {
33136                let distinct = self.match_token(TokenType::Distinct);
33137                let args = if self.match_token(TokenType::Star) {
33138                    vec![Expression::Star(Star {
33139                        table: None,
33140                        except: None,
33141                        replace: None,
33142                        rename: None,
33143                        trailing_comments: Vec::new(),
33144                        span: None,
33145                    })]
33146                } else if self.check(TokenType::RParen) {
33147                    Vec::new()
33148                } else {
33149                    self.parse_expression_list()?
33150                };
33151                self.expect(TokenType::RParen)?;
33152                let filter = self.parse_filter_clause()?;
33153                Ok(Some(Self::make_simple_aggregate(
33154                    name, args, distinct, filter,
33155                )))
33156            }
33157            crate::function_registry::ParserDispatchBehavior::PercentileAggregate => {
33158                let distinct = self.match_token(TokenType::Distinct);
33159                if !distinct {
33160                    self.match_token(TokenType::All);
33161                }
33162                let args = self.parse_expression_list()?;
33163                self.expect(TokenType::RParen)?;
33164                let filter = self.parse_filter_clause()?;
33165                Ok(Some(Self::make_simple_aggregate(
33166                    name, args, distinct, filter,
33167                )))
33168            }
33169            crate::function_registry::ParserDispatchBehavior::ExprListAggregate => {
33170                let args = self.parse_expression_list()?;
33171                self.expect(TokenType::RParen)?;
33172                let filter = self.parse_filter_clause()?;
33173                Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
33174            }
33175            crate::function_registry::ParserDispatchBehavior::UnaryAggregate => {
33176                let this = self.parse_expression()?;
33177                self.expect(TokenType::RParen)?;
33178                let filter = self.parse_filter_clause()?;
33179                Ok(Some(Self::make_simple_aggregate(
33180                    name,
33181                    vec![this],
33182                    false,
33183                    filter,
33184                )))
33185            }
33186            crate::function_registry::ParserDispatchBehavior::TranslateNonTeradata => {
33187                if matches!(
33188                    self.config.dialect,
33189                    Some(crate::dialects::DialectType::Teradata)
33190                ) {
33191                    return Ok(None);
33192                }
33193                let args = self.parse_expression_list()?;
33194                self.expect(TokenType::RParen)?;
33195                Ok(Some(Self::make_unquoted_function(name, args)))
33196            }
33197        }
33198    }
33199
33200    /// Parse a typed function call (after the opening paren)
33201    /// Following Python SQLGlot pattern: match all function aliases to typed expressions
33202    fn parse_typed_function(
33203        &mut self,
33204        name: &str,
33205        upper_name: &str,
33206        quoted: bool,
33207    ) -> Result<Expression> {
33208        let canonical_upper_name =
33209            crate::function_registry::canonical_typed_function_name_upper(upper_name);
33210
33211        // Handle internal function rewrites (sqlglot internal functions that map to CAST)
33212        if canonical_upper_name == "TIME_TO_TIME_STR" {
33213            let arg = self.parse_expression()?;
33214            self.expect(TokenType::RParen)?;
33215            return Ok(Expression::Cast(Box::new(Cast {
33216                this: arg,
33217                to: DataType::Text,
33218                trailing_comments: Vec::new(),
33219                double_colon_syntax: false,
33220                format: None,
33221                default: None,
33222                inferred_type: None,
33223            })));
33224        }
33225
33226        if let Some(expr) =
33227            self.try_parse_registry_typed_function(name, upper_name, canonical_upper_name, quoted)?
33228        {
33229            return Ok(expr);
33230        }
33231        if let Some(expr) =
33232            self.try_parse_phase3_typed_function(name, upper_name, canonical_upper_name)?
33233        {
33234            return Ok(expr);
33235        }
33236
33237        self.parse_generic_function(name, quoted)
33238    }
33239
33240    fn parse_typed_aggregate_family(
33241        &mut self,
33242        name: &str,
33243        upper_name: &str,
33244        canonical_upper_name: &str,
33245    ) -> Result<Expression> {
33246        match canonical_upper_name {
33247            // COUNT function
33248            "COUNT" => {
33249                let (this, star, distinct) = if self.check(TokenType::RParen) {
33250                    (None, false, false)
33251                } else if self.match_token(TokenType::Star) {
33252                    (None, true, false)
33253                } else if self.match_token(TokenType::All) {
33254                    // COUNT(ALL expr) - ALL is the default, just consume it
33255                    (Some(self.parse_expression()?), false, false)
33256                } else if self.match_token(TokenType::Distinct) {
33257                    let first_expr = self.parse_expression()?;
33258                    // Check for multiple columns: COUNT(DISTINCT a, b, c)
33259                    if self.match_token(TokenType::Comma) {
33260                        let mut args = vec![first_expr];
33261                        loop {
33262                            args.push(self.parse_expression()?);
33263                            if !self.match_token(TokenType::Comma) {
33264                                break;
33265                            }
33266                        }
33267                        // Return as a tuple expression for COUNT DISTINCT over multiple columns
33268                        (
33269                            Some(Expression::Tuple(Box::new(Tuple { expressions: args }))),
33270                            false,
33271                            true,
33272                        )
33273                    } else {
33274                        (Some(first_expr), false, true)
33275                    }
33276                } else {
33277                    let first_expr = self.parse_expression()?;
33278                    // ClickHouse: consume optional AS alias inside function args (e.g., count(NULL AS a))
33279                    let first_expr = if matches!(
33280                        self.config.dialect,
33281                        Some(crate::dialects::DialectType::ClickHouse)
33282                    ) && self.check(TokenType::As)
33283                    {
33284                        self.skip(); // consume AS
33285                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
33286                        Expression::Alias(Box::new(Alias {
33287                            this: first_expr,
33288                            alias,
33289                            column_aliases: Vec::new(),
33290                            pre_alias_comments: Vec::new(),
33291                            trailing_comments: Vec::new(),
33292                            inferred_type: None,
33293                        }))
33294                    } else {
33295                        first_expr
33296                    };
33297                    // Check for multiple arguments (rare but possible)
33298                    if self.match_token(TokenType::Comma) {
33299                        let mut args = vec![first_expr];
33300                        loop {
33301                            args.push(self.parse_expression()?);
33302                            if !self.match_token(TokenType::Comma) {
33303                                break;
33304                            }
33305                        }
33306                        self.expect(TokenType::RParen)?;
33307                        // Multiple args without DISTINCT - treat as generic function
33308                        return Ok(Expression::Function(Box::new(Function {
33309                            name: name.to_string(),
33310                            args,
33311                            distinct: false,
33312                            trailing_comments: Vec::new(),
33313                            use_bracket_syntax: false,
33314                            no_parens: false,
33315                            quoted: false,
33316                            span: None,
33317                            inferred_type: None,
33318                        })));
33319                    }
33320                    (Some(first_expr), false, false)
33321                };
33322                // BigQuery: RESPECT NULLS / IGNORE NULLS inside COUNT
33323                let ignore_nulls = if self.match_token(TokenType::Ignore)
33324                    && self.match_token(TokenType::Nulls)
33325                {
33326                    Some(true)
33327                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
33328                {
33329                    Some(false)
33330                } else {
33331                    None
33332                };
33333                self.expect(TokenType::RParen)?;
33334                let filter = self.parse_filter_clause()?;
33335                // Also check for IGNORE NULLS / RESPECT NULLS after the closing paren
33336                let ignore_nulls = if ignore_nulls.is_some() {
33337                    ignore_nulls
33338                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
33339                    Some(true)
33340                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
33341                    Some(false)
33342                } else {
33343                    None
33344                };
33345                Ok(Expression::Count(Box::new(CountFunc {
33346                    this,
33347                    star,
33348                    distinct,
33349                    filter,
33350                    ignore_nulls,
33351                    original_name: Some(name.to_string()),
33352                    inferred_type: None,
33353                })))
33354            }
33355
33356            // LIST function: LIST(SELECT ...) in Materialize - list constructor with subquery
33357            "LIST" => {
33358                let is_materialize = matches!(
33359                    self.config.dialect,
33360                    Some(crate::dialects::DialectType::Materialize)
33361                );
33362                if is_materialize && self.check(TokenType::Select) {
33363                    let query = self.parse_select()?;
33364                    self.expect(TokenType::RParen)?;
33365                    return Ok(Expression::List(Box::new(List {
33366                        expressions: vec![query],
33367                    })));
33368                }
33369                // For non-Materialize or non-subquery, parse as either generic function or aggregate.
33370                let distinct = self.match_token(TokenType::Distinct);
33371                let args = if self.check(TokenType::RParen) {
33372                    Vec::new()
33373                } else {
33374                    self.parse_function_arguments()?
33375                };
33376                let order_by = if self.match_token(TokenType::Order) {
33377                    self.expect(TokenType::By)?;
33378                    self.parse_order_by_list()?
33379                } else {
33380                    Vec::new()
33381                };
33382                let limit = if self.match_token(TokenType::Limit) {
33383                    Some(Box::new(self.parse_expression()?))
33384                } else {
33385                    None
33386                };
33387                self.expect(TokenType::RParen)?;
33388                let filter = self.parse_filter_clause()?;
33389
33390                if distinct || !order_by.is_empty() || limit.is_some() || filter.is_some() {
33391                    Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
33392                        name: name.to_string(),
33393                        args,
33394                        distinct,
33395                        filter,
33396                        order_by,
33397                        limit,
33398                        ignore_nulls: None,
33399                        inferred_type: None,
33400                    })))
33401                } else {
33402                    Ok(Expression::Function(Box::new(Function {
33403                        name: name.to_string(),
33404                        args,
33405                        distinct: false,
33406                        trailing_comments: Vec::new(),
33407                        use_bracket_syntax: false,
33408                        no_parens: false,
33409                        quoted: false,
33410                        span: None,
33411                        inferred_type: None,
33412                    })))
33413                }
33414            }
33415
33416            // MAP function: MAP(SELECT ...) in Materialize - map constructor with subquery
33417            "MAP" => {
33418                let is_materialize = matches!(
33419                    self.config.dialect,
33420                    Some(crate::dialects::DialectType::Materialize)
33421                );
33422                if is_materialize && self.check(TokenType::Select) {
33423                    let query = self.parse_select()?;
33424                    self.expect(TokenType::RParen)?;
33425                    return Ok(Expression::ToMap(Box::new(ToMap {
33426                        this: Box::new(query),
33427                    })));
33428                }
33429                // For non-Materialize or non-subquery, fall through to generic handling
33430                let args = if self.check(TokenType::RParen) {
33431                    Vec::new()
33432                } else {
33433                    self.parse_function_arguments()?
33434                };
33435                self.expect(TokenType::RParen)?;
33436                Ok(Expression::Function(Box::new(Function {
33437                    name: name.to_string(),
33438                    args,
33439                    distinct: false,
33440                    trailing_comments: Vec::new(),
33441                    use_bracket_syntax: false,
33442                    no_parens: false,
33443                    quoted: false,
33444                    span: None,
33445                    inferred_type: None,
33446                })))
33447            }
33448
33449            // ARRAY function: ARRAY(SELECT ...) or ARRAY((SELECT ...) LIMIT n) is an array constructor with subquery
33450            // Different from ARRAY<type> which is a data type
33451            "ARRAY" => {
33452                // Check if this is ARRAY(SELECT ...) - array subquery constructor
33453                if self.check(TokenType::Select) {
33454                    let query = self.parse_select()?;
33455                    self.expect(TokenType::RParen)?;
33456                    // Pass the query directly as an argument to ARRAY function
33457                    // The generator will handle it correctly
33458                    return Ok(Expression::Function(Box::new(Function {
33459                        name: name.to_string(),
33460                        args: vec![query],
33461                        distinct: false,
33462                        trailing_comments: Vec::new(),
33463                        use_bracket_syntax: false,
33464                        no_parens: false,
33465                        quoted: false,
33466                        span: None,
33467                        inferred_type: None,
33468                    })));
33469                }
33470                // Check if this is ARRAY((SELECT ...) LIMIT n) - BigQuery allows LIMIT outside the subquery parens
33471                // This is common for constructs like ARRAY((SELECT AS STRUCT ...) LIMIT 10)
33472                if self.check(TokenType::LParen) {
33473                    // This could be a parenthesized subquery with modifiers after it
33474                    // Save position in case we need to backtrack
33475                    let saved_pos = self.current;
33476                    self.skip(); // consume opening paren
33477
33478                    // Check if there's a SELECT or WITH inside
33479                    if self.check(TokenType::Select) || self.check(TokenType::With) {
33480                        let inner_query = self.parse_statement()?;
33481                        self.expect(TokenType::RParen)?; // close inner parens
33482
33483                        // Now check for LIMIT/OFFSET modifiers outside the inner parens
33484                        let limit = if self.match_token(TokenType::Limit) {
33485                            let expr = self.parse_expression()?;
33486                            Some(Limit {
33487                                this: expr,
33488                                percent: false,
33489                                comments: Vec::new(),
33490                            })
33491                        } else {
33492                            None
33493                        };
33494
33495                        let offset = if self.match_token(TokenType::Offset) {
33496                            let expr = self.parse_expression()?;
33497                            let rows = if self.match_token(TokenType::Row)
33498                                || self.match_token(TokenType::Rows)
33499                            {
33500                                Some(true)
33501                            } else {
33502                                None
33503                            };
33504                            Some(Offset { this: expr, rows })
33505                        } else {
33506                            None
33507                        };
33508
33509                        self.expect(TokenType::RParen)?; // close ARRAY parens
33510
33511                        // Wrap the inner query in a Subquery with the modifiers
33512                        let subquery = Expression::Subquery(Box::new(Subquery {
33513                            this: inner_query,
33514                            alias: None,
33515                            column_aliases: Vec::new(),
33516                            order_by: None,
33517                            limit,
33518                            offset,
33519                            lateral: false,
33520                            modifiers_inside: false,
33521                            trailing_comments: Vec::new(),
33522                            distribute_by: None,
33523                            sort_by: None,
33524                            cluster_by: None,
33525                            inferred_type: None,
33526                        }));
33527
33528                        return Ok(Expression::Function(Box::new(Function {
33529                            name: name.to_string(),
33530                            args: vec![subquery],
33531                            distinct: false,
33532                            trailing_comments: Vec::new(),
33533                            use_bracket_syntax: false,
33534                            no_parens: false,
33535                            quoted: false,
33536                            span: None,
33537                            inferred_type: None,
33538                        })));
33539                    } else {
33540                        // Not a subquery, backtrack and parse as regular arguments
33541                        self.current = saved_pos;
33542                    }
33543                }
33544                // Otherwise fall through to parse as generic function or error
33545                // This could be ARRAY(...values...) or invalid syntax
33546                let args = if self.check(TokenType::RParen) {
33547                    Vec::new()
33548                } else {
33549                    self.parse_function_arguments()?
33550                };
33551                self.expect(TokenType::RParen)?;
33552                Ok(Expression::Function(Box::new(Function {
33553                    name: name.to_string(),
33554                    args,
33555                    distinct: false,
33556                    trailing_comments: Vec::new(),
33557                    use_bracket_syntax: false,
33558                    no_parens: false,
33559                    quoted: false,
33560                    span: None,
33561                    inferred_type: None,
33562                })))
33563            }
33564
33565            // Simple aggregate functions (SUM, AVG, MIN, MAX, etc.)
33566            // These can have multiple arguments in some contexts (e.g., MAX(a, b) is a scalar function)
33567            "SUM"
33568            | "AVG"
33569            | "MIN"
33570            | "MAX"
33571            | "ARRAY_AGG"
33572            | "ARRAY_CONCAT_AGG"
33573            | "STDDEV"
33574            | "STDDEV_POP"
33575            | "STDDEV_SAMP"
33576            | "VARIANCE"
33577            | "VAR_POP"
33578            | "VAR_SAMP"
33579            | "MEDIAN"
33580            | "MODE"
33581            | "FIRST"
33582            | "LAST"
33583            | "ANY_VALUE"
33584            | "APPROX_DISTINCT"
33585            | "APPROX_COUNT_DISTINCT"
33586            | "BIT_AND"
33587            | "BIT_OR"
33588            | "BIT_XOR" => {
33589                let distinct = if self.match_token(TokenType::Distinct) {
33590                    true
33591                } else {
33592                    self.match_token(TokenType::All); // ALL is the default, just consume it
33593                    false
33594                };
33595
33596                // MODE() can have zero arguments when used with WITHIN GROUP
33597                // e.g., MODE() WITHIN GROUP (ORDER BY col)
33598                if self.check(TokenType::RParen) {
33599                    // Empty args - will likely be followed by WITHIN GROUP
33600                    self.expect(TokenType::RParen)?;
33601                    let filter = self.parse_filter_clause()?;
33602                    let agg = AggFunc {
33603                        ignore_nulls: None,
33604                        this: Expression::Null(Null {}), // Placeholder for 0-arg aggregate
33605                        distinct: false,
33606                        filter,
33607                        order_by: Vec::new(),
33608                        having_max: None,
33609                        name: Some(name.to_string()),
33610                        limit: None,
33611                        inferred_type: None,
33612                    };
33613                    return Ok(match upper_name {
33614                        "MODE" => Expression::Mode(Box::new(agg)),
33615                        _ => {
33616                            // ClickHouse: allow zero-arg aggregates (server will validate)
33617                            if matches!(
33618                                self.config.dialect,
33619                                Some(crate::dialects::DialectType::ClickHouse)
33620                            ) {
33621                                Expression::Function(Box::new(Function {
33622                                    name: name.to_string(),
33623                                    args: Vec::new(),
33624                                    distinct: false,
33625                                    trailing_comments: Vec::new(),
33626                                    use_bracket_syntax: false,
33627                                    no_parens: false,
33628                                    quoted: false,
33629                                    span: None,
33630                                    inferred_type: None,
33631                                }))
33632                            } else {
33633                                return Err(self.parse_error(format!(
33634                                    "{} cannot have zero arguments",
33635                                    upper_name
33636                                )));
33637                            }
33638                        }
33639                    });
33640                }
33641
33642                let first_arg = self.parse_expression_with_clickhouse_alias()?;
33643
33644                // Check if there are more arguments (multi-arg scalar function like MAX(a, b))
33645                if self.match_token(TokenType::Comma) {
33646                    // Special handling for FIRST, LAST, ANY_VALUE with boolean second arg
33647                    // In Spark/Hive: first(col, true) means FIRST(col) IGNORE NULLS
33648                    let is_ignore_nulls_func = matches!(upper_name, "FIRST" | "LAST" | "ANY_VALUE");
33649
33650                    let second_arg = self.parse_expression()?;
33651
33652                    // Check if this is the IGNORE NULLS pattern: func(col, true)
33653                    if is_ignore_nulls_func && self.check(TokenType::RParen) {
33654                        if let Expression::Boolean(BooleanLiteral { value: true }) = &second_arg {
33655                            // This is func(col, true) -> FUNC(col) IGNORE NULLS
33656                            self.expect(TokenType::RParen)?;
33657                            let filter = self.parse_filter_clause()?;
33658                            let agg = AggFunc {
33659                                ignore_nulls: Some(true),
33660                                this: first_arg,
33661                                distinct,
33662                                filter,
33663                                order_by: Vec::new(),
33664                                having_max: None,
33665                                name: Some(name.to_string()),
33666                                limit: None,
33667                                inferred_type: None,
33668                            };
33669                            return Ok(match upper_name {
33670                                "FIRST" => Expression::First(Box::new(agg)),
33671                                "LAST" => Expression::Last(Box::new(agg)),
33672                                "ANY_VALUE" => Expression::AnyValue(Box::new(agg)),
33673                                _ => unreachable!(
33674                                    "function name already matched by is_ignore_nulls_func guard"
33675                                ),
33676                            });
33677                        }
33678                    }
33679
33680                    // Multiple arguments - treat as generic function call
33681                    let mut args = vec![first_arg, second_arg];
33682                    while self.match_token(TokenType::Comma) {
33683                        args.push(self.parse_expression()?);
33684                    }
33685                    self.expect(TokenType::RParen)?;
33686                    Ok(Expression::Function(Box::new(Function {
33687                        name: name.to_string(),
33688                        args,
33689                        distinct: false,
33690                        trailing_comments: Vec::new(),
33691                        use_bracket_syntax: false,
33692                        no_parens: false,
33693                        quoted: false,
33694                        span: None,
33695                        inferred_type: None,
33696                    })))
33697                } else {
33698                    // Check for IGNORE NULLS / RESPECT NULLS (BigQuery style)
33699                    let ignore_nulls = if self.match_token(TokenType::Ignore)
33700                        && self.match_token(TokenType::Nulls)
33701                    {
33702                        Some(true)
33703                    } else if self.match_token(TokenType::Respect)
33704                        && self.match_token(TokenType::Nulls)
33705                    {
33706                        Some(false)
33707                    } else {
33708                        None
33709                    };
33710
33711                    // Check for HAVING MAX/MIN inside aggregate (BigQuery syntax)
33712                    // e.g., ANY_VALUE(fruit HAVING MAX sold)
33713                    let having_max = if self.match_token(TokenType::Having) {
33714                        let is_max = if self.check_keyword_text("MAX") {
33715                            self.skip();
33716                            true
33717                        } else if self.check_keyword_text("MIN") {
33718                            self.skip();
33719                            false
33720                        } else {
33721                            return Err(
33722                                self.parse_error("Expected MAX or MIN after HAVING in aggregate")
33723                            );
33724                        };
33725                        let expr = self.parse_expression()?;
33726                        Some((Box::new(expr), is_max))
33727                    } else {
33728                        None
33729                    };
33730
33731                    // Check for ORDER BY inside aggregate (e.g., ARRAY_AGG(x ORDER BY y))
33732                    let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
33733                        self.parse_order_by_list()?
33734                    } else {
33735                        Vec::new()
33736                    };
33737                    // Check for LIMIT inside aggregate (e.g., ARRAY_AGG(x ORDER BY y LIMIT 2))
33738                    // Also supports LIMIT offset, count (e.g., ARRAY_AGG(x ORDER BY y LIMIT 1, 10))
33739                    let limit = if self.match_token(TokenType::Limit) {
33740                        let first = self.parse_expression()?;
33741                        if self.match_token(TokenType::Comma) {
33742                            let second = self.parse_expression()?;
33743                            // Store as Tuple(offset, count)
33744                            Some(Box::new(Expression::Tuple(Box::new(Tuple {
33745                                expressions: vec![first, second],
33746                            }))))
33747                        } else {
33748                            Some(Box::new(first))
33749                        }
33750                    } else {
33751                        None
33752                    };
33753                    // Single argument - treat as aggregate function
33754                    self.expect(TokenType::RParen)?;
33755                    let filter = self.parse_filter_clause()?;
33756                    // Also check for IGNORE NULLS / RESPECT NULLS after the closing paren
33757                    // e.g., FIRST(col) IGNORE NULLS (Hive/Spark/generic SQL syntax)
33758                    let ignore_nulls = if ignore_nulls.is_some() {
33759                        ignore_nulls
33760                    } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
33761                        Some(true)
33762                    } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
33763                        Some(false)
33764                    } else {
33765                        None
33766                    };
33767                    let agg = AggFunc {
33768                        ignore_nulls,
33769                        this: first_arg,
33770                        distinct,
33771                        filter,
33772                        order_by,
33773                        having_max,
33774                        name: Some(name.to_string()),
33775                        limit,
33776                        inferred_type: None,
33777                    };
33778                    Ok(match upper_name {
33779                        "SUM" => Expression::Sum(Box::new(agg)),
33780                        "AVG" => Expression::Avg(Box::new(agg)),
33781                        "MIN" => Expression::Min(Box::new(agg)),
33782                        "MAX" => Expression::Max(Box::new(agg)),
33783                        "ARRAY_AGG" => Expression::ArrayAgg(Box::new(agg)),
33784                        "ARRAY_CONCAT_AGG" => Expression::ArrayConcatAgg(Box::new(agg)),
33785                        "STDDEV" => Expression::Stddev(Box::new(agg)),
33786                        "STDDEV_POP" => Expression::StddevPop(Box::new(agg)),
33787                        "STDDEV_SAMP" => Expression::StddevSamp(Box::new(agg)),
33788                        "VARIANCE" => Expression::Variance(Box::new(agg)),
33789                        "VAR_POP" => Expression::VarPop(Box::new(agg)),
33790                        "VAR_SAMP" => Expression::VarSamp(Box::new(agg)),
33791                        "MEDIAN" => Expression::Median(Box::new(agg)),
33792                        "MODE" => Expression::Mode(Box::new(agg)),
33793                        "FIRST" => Expression::First(Box::new(agg)),
33794                        "LAST" => Expression::Last(Box::new(agg)),
33795                        "ANY_VALUE" => Expression::AnyValue(Box::new(agg)),
33796                        "APPROX_DISTINCT" => Expression::ApproxDistinct(Box::new(agg)),
33797                        "APPROX_COUNT_DISTINCT" => Expression::ApproxCountDistinct(Box::new(agg)),
33798                        "BIT_AND" => Expression::BitwiseAndAgg(Box::new(agg)),
33799                        "BIT_OR" => Expression::BitwiseOrAgg(Box::new(agg)),
33800                        "BIT_XOR" => Expression::BitwiseXorAgg(Box::new(agg)),
33801                        _ => unreachable!("aggregate function name already matched in caller"),
33802                    })
33803                }
33804            }
33805
33806            // STRING_AGG - STRING_AGG([DISTINCT] expr [, separator] [ORDER BY order_list])
33807            "STRING_AGG" => {
33808                let distinct = self.match_token(TokenType::Distinct);
33809                let this = self.parse_expression()?;
33810                // Separator is optional
33811                let separator = if self.match_token(TokenType::Comma) {
33812                    Some(self.parse_expression()?)
33813                } else {
33814                    None
33815                };
33816                let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
33817                    Some(self.parse_order_by_list()?)
33818                } else {
33819                    None
33820                };
33821                // BigQuery: LIMIT inside STRING_AGG
33822                let limit = if self.match_token(TokenType::Limit) {
33823                    Some(Box::new(self.parse_expression()?))
33824                } else {
33825                    None
33826                };
33827                self.expect(TokenType::RParen)?;
33828                let filter = self.parse_filter_clause()?;
33829                Ok(Expression::StringAgg(Box::new(StringAggFunc {
33830                    this,
33831                    separator,
33832                    order_by,
33833                    distinct,
33834                    filter,
33835                    limit,
33836                    inferred_type: None,
33837                })))
33838            }
33839
33840            // GROUP_CONCAT - GROUP_CONCAT([DISTINCT] expr [, expr...] [ORDER BY order_list] [SEPARATOR 'sep'])
33841            // MySQL allows multiple args which get wrapped in CONCAT:
33842            // GROUP_CONCAT(a, b, c SEPARATOR ',') -> GroupConcat(CONCAT(a, b, c), SEPARATOR=',')
33843            "GROUP_CONCAT" => {
33844                let distinct = self.match_token(TokenType::Distinct);
33845                let first = self.parse_expression()?;
33846                // Check for additional comma-separated expressions (before ORDER BY or SEPARATOR)
33847                let mut exprs = vec![first];
33848                while self.match_token(TokenType::Comma) {
33849                    // Check if the next tokens are ORDER BY or SEPARATOR
33850                    // If so, the comma was part of the separator syntax (not more args)
33851                    if self.check(TokenType::Order) || self.check(TokenType::Separator) {
33852                        // This shouldn't happen normally in valid SQL, backtrack
33853                        break;
33854                    }
33855                    exprs.push(self.parse_expression()?);
33856                }
33857                // If multiple expressions, wrap in CONCAT (matches Python sqlglot behavior)
33858                let this = if exprs.len() == 1 {
33859                    exprs.pop().unwrap()
33860                } else {
33861                    Expression::Function(Box::new(Function::new("CONCAT".to_string(), exprs)))
33862                };
33863                // Parse optional ORDER BY
33864                let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
33865                    Some(self.parse_order_by_list()?)
33866                } else {
33867                    None
33868                };
33869                // Parse optional SEPARATOR - can be a string literal or expression (e.g., variable)
33870                let separator = if self.match_token(TokenType::Separator) {
33871                    Some(self.parse_expression()?)
33872                } else {
33873                    None
33874                };
33875                // Parse optional LIMIT (MySQL 8.0.19+)
33876                let limit = if self.match_token(TokenType::Limit) {
33877                    Some(Box::new(self.parse_expression()?))
33878                } else {
33879                    None
33880                };
33881                self.expect(TokenType::RParen)?;
33882                let filter = self.parse_filter_clause()?;
33883                Ok(Expression::GroupConcat(Box::new(GroupConcatFunc {
33884                    this,
33885                    separator,
33886                    order_by,
33887                    distinct,
33888                    filter,
33889                    limit,
33890                    inferred_type: None,
33891                })))
33892            }
33893
33894            // LISTAGG - LISTAGG([DISTINCT] expr [, separator [ON OVERFLOW ...]]) WITHIN GROUP (ORDER BY ...)
33895            "LISTAGG" => {
33896                // Check for optional DISTINCT
33897                let distinct = self.match_token(TokenType::Distinct);
33898                let this = self.parse_expression()?;
33899                let separator = if self.match_token(TokenType::Comma) {
33900                    Some(self.parse_expression()?)
33901                } else {
33902                    None
33903                };
33904                // Parse optional ON OVERFLOW clause
33905                let on_overflow = if self.match_token(TokenType::On) {
33906                    if self.match_identifier("OVERFLOW") {
33907                        if self.match_identifier("ERROR") {
33908                            Some(ListAggOverflow::Error)
33909                        } else if self.match_token(TokenType::Truncate) {
33910                            // Optional filler string
33911                            let filler = if self.check(TokenType::String) {
33912                                Some(self.parse_expression()?)
33913                            } else {
33914                                None
33915                            };
33916                            // WITH COUNT or WITHOUT COUNT
33917                            let with_count = if self.match_token(TokenType::With) {
33918                                self.match_identifier("COUNT");
33919                                true
33920                            } else if self.match_identifier("WITHOUT") {
33921                                self.match_identifier("COUNT");
33922                                false
33923                            } else {
33924                                true // default is WITH COUNT
33925                            };
33926                            Some(ListAggOverflow::Truncate { filler, with_count })
33927                        } else {
33928                            None
33929                        }
33930                    } else {
33931                        None
33932                    }
33933                } else {
33934                    None
33935                };
33936                self.expect(TokenType::RParen)?;
33937                // WITHIN GROUP (ORDER BY ...) is handled by maybe_parse_over
33938                Ok(Expression::ListAgg(Box::new(ListAggFunc {
33939                    this,
33940                    separator,
33941                    on_overflow,
33942                    order_by: None,
33943                    distinct,
33944                    filter: None,
33945                    inferred_type: None,
33946                })))
33947            }
33948            _ => unreachable!(
33949                "phase-6 aggregate parser called with non-aggregate family name '{}'",
33950                canonical_upper_name
33951            ),
33952        }
33953    }
33954
33955    fn parse_typed_window_family(
33956        &mut self,
33957        name: &str,
33958        upper_name: &str,
33959        canonical_upper_name: &str,
33960    ) -> Result<Expression> {
33961        match canonical_upper_name {
33962            // Window functions with no arguments (ClickHouse allows args in row_number)
33963            "ROW_NUMBER" => {
33964                if self.check(TokenType::RParen) {
33965                    self.skip();
33966                    Ok(Expression::RowNumber(RowNumber))
33967                } else {
33968                    // ClickHouse: row_number(column1) — parse as regular function
33969                    let args = self.parse_function_args_list()?;
33970                    self.expect(TokenType::RParen)?;
33971                    let trailing_comments = self.previous_trailing_comments().to_vec();
33972                    Ok(Expression::Function(Box::new(Function {
33973                        name: name.to_string(),
33974                        args,
33975                        distinct: false,
33976                        trailing_comments,
33977                        use_bracket_syntax: false,
33978                        no_parens: false,
33979                        quoted: false,
33980                        span: None,
33981                        inferred_type: None,
33982                    })))
33983                }
33984            }
33985            "RANK" => {
33986                // DuckDB allows: RANK(ORDER BY col) OVER (...)
33987                // Oracle allows: RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
33988                let (order_by, args) = if self.check(TokenType::RParen) {
33989                    // RANK() - no arguments
33990                    (None, Vec::new())
33991                } else if self.match_token(TokenType::Order) {
33992                    // DuckDB: RANK(ORDER BY col)
33993                    self.expect(TokenType::By)?;
33994                    (Some(self.parse_order_by()?.expressions), Vec::new())
33995                } else {
33996                    // Oracle hypothetical: RANK(val1, val2, ...)
33997                    let mut args = vec![self.parse_expression()?];
33998                    while self.match_token(TokenType::Comma) {
33999                        args.push(self.parse_expression()?);
34000                    }
34001                    (None, args)
34002                };
34003                self.expect(TokenType::RParen)?;
34004                Ok(Expression::Rank(Rank { order_by, args }))
34005            }
34006            "DENSE_RANK" => {
34007                // Oracle allows: DENSE_RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
34008                let args = if self.check(TokenType::RParen) {
34009                    Vec::new()
34010                } else {
34011                    let mut args = vec![self.parse_expression()?];
34012                    while self.match_token(TokenType::Comma) {
34013                        args.push(self.parse_expression()?);
34014                    }
34015                    args
34016                };
34017                self.expect(TokenType::RParen)?;
34018                Ok(Expression::DenseRank(DenseRank { args }))
34019            }
34020            "PERCENT_RANK" => {
34021                // DuckDB allows: PERCENT_RANK(ORDER BY col) OVER (...)
34022                // Oracle allows: PERCENT_RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
34023                let (order_by, args) = if self.check(TokenType::RParen) {
34024                    // PERCENT_RANK() - no arguments
34025                    (None, Vec::new())
34026                } else if self.match_token(TokenType::Order) {
34027                    // DuckDB: PERCENT_RANK(ORDER BY col)
34028                    self.expect(TokenType::By)?;
34029                    (Some(self.parse_order_by()?.expressions), Vec::new())
34030                } else {
34031                    // Oracle hypothetical: PERCENT_RANK(val1, val2, ...)
34032                    let mut args = vec![self.parse_expression()?];
34033                    while self.match_token(TokenType::Comma) {
34034                        args.push(self.parse_expression()?);
34035                    }
34036                    (None, args)
34037                };
34038                self.expect(TokenType::RParen)?;
34039                Ok(Expression::PercentRank(PercentRank { order_by, args }))
34040            }
34041            "CUME_DIST" => {
34042                // DuckDB allows: CUME_DIST(ORDER BY col) OVER (...)
34043                // Oracle allows: CUME_DIST(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
34044                let (order_by, args) = if self.check(TokenType::RParen) {
34045                    // CUME_DIST() - no arguments
34046                    (None, Vec::new())
34047                } else if self.match_token(TokenType::Order) {
34048                    // DuckDB: CUME_DIST(ORDER BY col)
34049                    self.expect(TokenType::By)?;
34050                    (Some(self.parse_order_by()?.expressions), Vec::new())
34051                } else {
34052                    // Oracle hypothetical: CUME_DIST(val1, val2, ...)
34053                    let mut args = vec![self.parse_expression()?];
34054                    while self.match_token(TokenType::Comma) {
34055                        args.push(self.parse_expression()?);
34056                    }
34057                    (None, args)
34058                };
34059                self.expect(TokenType::RParen)?;
34060                Ok(Expression::CumeDist(CumeDist { order_by, args }))
34061            }
34062
34063            // NTILE
34064            "NTILE" => {
34065                // num_buckets is optional (Databricks allows NTILE() with no args)
34066                let num_buckets = if self.check(TokenType::RParen) {
34067                    None
34068                } else {
34069                    Some(self.parse_expression()?)
34070                };
34071
34072                // ClickHouse: NTILE can have extra args (e.g., ntile(3, 2)) — skip them
34073                while matches!(
34074                    self.config.dialect,
34075                    Some(crate::dialects::DialectType::ClickHouse)
34076                ) && self.match_token(TokenType::Comma)
34077                {
34078                    let _ = self.parse_expression()?;
34079                }
34080
34081                // DuckDB allows: NTILE(n ORDER BY col) OVER (...)
34082                let order_by = if self.match_token(TokenType::Order) {
34083                    self.expect(TokenType::By)?;
34084                    Some(self.parse_order_by()?.expressions)
34085                } else {
34086                    None
34087                };
34088                self.expect(TokenType::RParen)?;
34089                Ok(Expression::NTile(Box::new(NTileFunc {
34090                    num_buckets,
34091                    order_by,
34092                })))
34093            }
34094
34095            // LEAD / LAG
34096            "LEAD" | "LAG" => {
34097                let this = self.parse_expression()?;
34098                let (offset, default) = if self.match_token(TokenType::Comma) {
34099                    let off = self.parse_expression()?;
34100                    let def = if self.match_token(TokenType::Comma) {
34101                        Some(self.parse_expression()?)
34102                    } else {
34103                        None
34104                    };
34105                    (Some(off), def)
34106                } else {
34107                    (None, None)
34108                };
34109                // Check for IGNORE NULLS / RESPECT NULLS inside parens (e.g., Redshift: LAG(x IGNORE NULLS))
34110                let ignore_nulls_inside = if self.match_token(TokenType::Ignore)
34111                    && self.match_token(TokenType::Nulls)
34112                {
34113                    Some(true)
34114                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
34115                {
34116                    Some(false)
34117                } else {
34118                    None
34119                };
34120                self.expect(TokenType::RParen)?;
34121                // Also check for IGNORE NULLS / RESPECT NULLS after parens
34122                let ignore_nulls = if ignore_nulls_inside.is_some() {
34123                    ignore_nulls_inside
34124                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
34125                    Some(true)
34126                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
34127                    Some(false)
34128                } else {
34129                    None
34130                };
34131                let func = LeadLagFunc {
34132                    this,
34133                    offset,
34134                    default,
34135                    ignore_nulls,
34136                };
34137                Ok(if upper_name == "LEAD" {
34138                    Expression::Lead(Box::new(func))
34139                } else {
34140                    Expression::Lag(Box::new(func))
34141                })
34142            }
34143
34144            // FIRST_VALUE / LAST_VALUE
34145            "FIRST_VALUE" | "LAST_VALUE" => {
34146                let this = self.parse_expression()?;
34147                // Parse ORDER BY inside parens (e.g., DuckDB: LAST_VALUE(x ORDER BY x))
34148                let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
34149                    self.parse_order_by_list()?
34150                } else {
34151                    Vec::new()
34152                };
34153                // Check for IGNORE NULLS / RESPECT NULLS inside the parens
34154                let mut ignore_nulls_inside = if self.match_token(TokenType::Ignore)
34155                    && self.match_token(TokenType::Nulls)
34156                {
34157                    Some(true)
34158                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
34159                {
34160                    Some(false) // RESPECT NULLS explicitly sets to false
34161                } else {
34162                    None
34163                };
34164                // Spark/Hive: first_value(col, true) means FIRST_VALUE(col) IGNORE NULLS
34165                if ignore_nulls_inside.is_none() && self.match_token(TokenType::Comma) {
34166                    let second_arg = self.parse_expression()?;
34167                    if let Expression::Boolean(BooleanLiteral { value: true }) = &second_arg {
34168                        ignore_nulls_inside = Some(true);
34169                    }
34170                    // If second arg is not true, just ignore it (not standard)
34171                }
34172                self.expect(TokenType::RParen)?;
34173                // Also check for IGNORE NULLS / RESPECT NULLS after the parens (some dialects use this syntax)
34174                let ignore_nulls: Option<bool> = if ignore_nulls_inside.is_some() {
34175                    ignore_nulls_inside
34176                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
34177                    Some(true)
34178                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
34179                    Some(false)
34180                } else {
34181                    None
34182                };
34183                let func = ValueFunc {
34184                    this,
34185                    ignore_nulls,
34186                    order_by,
34187                };
34188                Ok(if upper_name == "FIRST_VALUE" {
34189                    Expression::FirstValue(Box::new(func))
34190                } else {
34191                    Expression::LastValue(Box::new(func))
34192                })
34193            }
34194
34195            // NTH_VALUE
34196            "NTH_VALUE" => {
34197                let this = self.parse_expression()?;
34198                self.expect(TokenType::Comma)?;
34199                let offset = self.parse_expression()?;
34200                // Check for IGNORE NULLS / RESPECT NULLS inside the parens
34201                let ignore_nulls_inside = if self.match_token(TokenType::Ignore)
34202                    && self.match_token(TokenType::Nulls)
34203                {
34204                    Some(true)
34205                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
34206                {
34207                    Some(false)
34208                } else {
34209                    None
34210                };
34211                self.expect(TokenType::RParen)?;
34212                // Check for Snowflake FROM FIRST / FROM LAST after the parens
34213                let from_first = if self.match_keywords(&[TokenType::From, TokenType::First]) {
34214                    Some(true)
34215                } else if self.match_keywords(&[TokenType::From, TokenType::Last]) {
34216                    Some(false)
34217                } else {
34218                    None
34219                };
34220                // Also check for IGNORE NULLS / RESPECT NULLS after the parens (and after FROM FIRST/LAST)
34221                let ignore_nulls: Option<bool> = if ignore_nulls_inside.is_some() {
34222                    ignore_nulls_inside
34223                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
34224                    Some(true)
34225                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
34226                    Some(false)
34227                } else {
34228                    None
34229                };
34230                Ok(Expression::NthValue(Box::new(NthValueFunc {
34231                    this,
34232                    offset,
34233                    ignore_nulls,
34234                    from_first,
34235                })))
34236            }
34237            _ => unreachable!(
34238                "phase-6 window parser called with non-window family name '{}'",
34239                canonical_upper_name
34240            ),
34241        }
34242    }
34243
34244    fn parse_typed_json_family(
34245        &mut self,
34246        name: &str,
34247        upper_name: &str,
34248        canonical_upper_name: &str,
34249    ) -> Result<Expression> {
34250        match canonical_upper_name {
34251            // JSON functions
34252            "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" | "JSON_QUERY" | "JSON_VALUE" => {
34253                let this = self.parse_expression()?;
34254                // Path is optional for some dialects (e.g., TSQL JSON_QUERY with 1 arg defaults to '$')
34255                let path = if self.match_token(TokenType::Comma) {
34256                    self.parse_expression()?
34257                } else {
34258                    // Default path is '$' when not provided
34259                    Expression::Literal(Box::new(Literal::String("$".to_string())))
34260                };
34261
34262                // SQLite JSON_EXTRACT supports multiple paths - check for additional paths
34263                // If multiple paths, use generic Function instead of typed expression
34264                if self.check(TokenType::Comma)
34265                    && !self.check_identifier("WITH")
34266                    && !self.check_identifier("WITHOUT")
34267                    && !self.check_identifier("KEEP")
34268                    && !self.check_identifier("OMIT")
34269                    && !self.check_identifier("NULL")
34270                    && !self.check_identifier("ERROR")
34271                    && !self.check_identifier("EMPTY")
34272                    && !self.check(TokenType::Returning)
34273                {
34274                    let mut args = vec![this, path];
34275                    while self.match_token(TokenType::Comma) {
34276                        args.push(self.parse_expression()?);
34277                    }
34278                    self.expect(TokenType::RParen)?;
34279                    let func_expr = Expression::Function(Box::new(Function {
34280                        name: name.to_string(),
34281                        args,
34282                        distinct: false,
34283                        trailing_comments: Vec::new(),
34284                        use_bracket_syntax: false,
34285                        no_parens: false,
34286                        quoted: false,
34287                        span: None,
34288                        inferred_type: None,
34289                    }));
34290                    // Exasol: JSON_EXTRACT(...) EMITS (col1 TYPE1, col2 TYPE2)
34291                    if matches!(
34292                        self.config.dialect,
34293                        Some(crate::dialects::DialectType::Exasol)
34294                    ) && self.check_identifier("EMITS")
34295                    {
34296                        self.skip(); // consume EMITS
34297                        if let Some(schema) = self.parse_schema()? {
34298                            return Ok(Expression::FunctionEmits(Box::new(FunctionEmits {
34299                                this: func_expr,
34300                                emits: schema,
34301                            })));
34302                        }
34303                    }
34304                    return Ok(func_expr);
34305                }
34306
34307                // Parse JSON_QUERY/JSON_VALUE options (Trino/Presto style)
34308                // Options: WITH/WITHOUT [CONDITIONAL|UNCONDITIONAL] [ARRAY] WRAPPER
34309                //          KEEP QUOTES / OMIT QUOTES [ON SCALAR STRING]
34310                //          NULL ON ERROR / ERROR ON ERROR / EMPTY ON ERROR
34311                //          RETURNING type
34312                let mut wrapper_option: Option<String> = None;
34313                let mut quotes_option: Option<String> = None;
34314                let mut on_scalar_string = false;
34315                let mut on_error: Option<String> = None;
34316                let mut returning: Option<DataType> = None;
34317
34318                // Keep parsing options until we see RParen
34319                while !self.check(TokenType::RParen) {
34320                    // WITH [CONDITIONAL|UNCONDITIONAL] [ARRAY] WRAPPER - match in order of specificity
34321                    if self.match_text_seq(&["WITH", "UNCONDITIONAL", "ARRAY", "WRAPPER"]) {
34322                        wrapper_option = Some("WITH UNCONDITIONAL ARRAY WRAPPER".to_string());
34323                    } else if self.match_text_seq(&["WITH", "CONDITIONAL", "ARRAY", "WRAPPER"]) {
34324                        wrapper_option = Some("WITH CONDITIONAL ARRAY WRAPPER".to_string());
34325                    } else if self.match_text_seq(&["WITH", "UNCONDITIONAL", "WRAPPER"]) {
34326                        wrapper_option = Some("WITH UNCONDITIONAL WRAPPER".to_string());
34327                    } else if self.match_text_seq(&["WITH", "CONDITIONAL", "WRAPPER"]) {
34328                        wrapper_option = Some("WITH CONDITIONAL WRAPPER".to_string());
34329                    } else if self.match_text_seq(&["WITH", "ARRAY", "WRAPPER"]) {
34330                        wrapper_option = Some("WITH ARRAY WRAPPER".to_string());
34331                    } else if self.match_text_seq(&["WITH", "WRAPPER"]) {
34332                        wrapper_option = Some("WITH WRAPPER".to_string());
34333                    // WITHOUT [CONDITIONAL] [ARRAY] WRAPPER
34334                    } else if self.match_text_seq(&["WITHOUT", "CONDITIONAL", "ARRAY", "WRAPPER"]) {
34335                        wrapper_option = Some("WITHOUT CONDITIONAL ARRAY WRAPPER".to_string());
34336                    } else if self.match_text_seq(&["WITHOUT", "CONDITIONAL", "WRAPPER"]) {
34337                        wrapper_option = Some("WITHOUT CONDITIONAL WRAPPER".to_string());
34338                    } else if self.match_text_seq(&["WITHOUT", "ARRAY", "WRAPPER"]) {
34339                        wrapper_option = Some("WITHOUT ARRAY WRAPPER".to_string());
34340                    } else if self.match_text_seq(&["WITHOUT", "WRAPPER"]) {
34341                        wrapper_option = Some("WITHOUT WRAPPER".to_string());
34342                    } else if self.match_text_seq(&["KEEP", "QUOTES"]) {
34343                        // KEEP QUOTES
34344                        quotes_option = Some("KEEP QUOTES".to_string());
34345                    } else if self.match_text_seq(&["OMIT", "QUOTES", "ON", "SCALAR", "STRING"]) {
34346                        // OMIT QUOTES ON SCALAR STRING
34347                        quotes_option = Some("OMIT QUOTES".to_string());
34348                        on_scalar_string = true;
34349                    } else if self.match_text_seq(&["OMIT", "QUOTES"]) {
34350                        // OMIT QUOTES
34351                        quotes_option = Some("OMIT QUOTES".to_string());
34352                    } else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
34353                        on_error = Some("NULL ON ERROR".to_string());
34354                    } else if self.match_text_seq(&["ERROR", "ON", "ERROR"]) {
34355                        on_error = Some("ERROR ON ERROR".to_string());
34356                    } else if self.match_text_seq(&["EMPTY", "ON", "ERROR"]) {
34357                        on_error = Some("EMPTY ON ERROR".to_string());
34358                    } else if self.match_token(TokenType::Returning) {
34359                        // RETURNING type
34360                        returning = Some(self.parse_data_type()?);
34361                    } else {
34362                        // No more options recognized, break
34363                        break;
34364                    }
34365                }
34366
34367                self.expect(TokenType::RParen)?;
34368                let func = JsonExtractFunc {
34369                    this,
34370                    path,
34371                    returning,
34372                    arrow_syntax: false,
34373                    hash_arrow_syntax: false,
34374                    wrapper_option,
34375                    quotes_option,
34376                    on_scalar_string,
34377                    on_error,
34378                };
34379                Ok(match upper_name {
34380                    "JSON_EXTRACT" => Expression::JsonExtract(Box::new(func)),
34381                    "JSON_EXTRACT_SCALAR" => Expression::JsonExtractScalar(Box::new(func)),
34382                    "JSON_QUERY" => Expression::JsonQuery(Box::new(func)),
34383                    "JSON_VALUE" => Expression::JsonValue(Box::new(func)),
34384                    _ => unreachable!("JSON function name already matched in caller"),
34385                })
34386            }
34387            // JSON_KEYS, TO_JSON, PARSE_JSON etc. support additional args including named args (BigQuery)
34388            // e.g., JSON_KEYS(expr, depth, mode => 'lax'), TO_JSON(expr, stringify_wide_numbers => FALSE)
34389            // e.g., PARSE_JSON('{}', wide_number_mode => 'exact')
34390            "JSON_ARRAY_LENGTH" | "JSON_KEYS" | "JSON_TYPE" | "TO_JSON" | "PARSE_JSON" => {
34391                let this = self.parse_expression()?;
34392                // ClickHouse: expr AS alias inside function args
34393                let this = self.maybe_clickhouse_alias(this);
34394
34395                // Check for additional arguments (comma-separated, possibly named)
34396                if self.match_token(TokenType::Comma) {
34397                    // Has additional arguments - parse as generic Function to preserve all args
34398                    let mut all_args = vec![this];
34399                    let remaining = self.parse_function_arguments()?;
34400                    all_args.extend(remaining);
34401                    self.expect(TokenType::RParen)?;
34402                    Ok(Expression::Function(Box::new(Function {
34403                        name: name.to_string(),
34404                        args: all_args,
34405                        distinct: false,
34406                        trailing_comments: Vec::new(),
34407                        use_bracket_syntax: false,
34408                        no_parens: false,
34409                        quoted: false,
34410                        span: None,
34411                        inferred_type: None,
34412                    })))
34413                } else {
34414                    // Single argument - use typed expression
34415                    self.expect(TokenType::RParen)?;
34416                    let func = UnaryFunc::new(this);
34417                    Ok(match canonical_upper_name {
34418                        "JSON_ARRAY_LENGTH" => Expression::JsonArrayLength(Box::new(func)),
34419                        "JSON_KEYS" => Expression::JsonKeys(Box::new(func)),
34420                        "JSON_TYPE" => Expression::JsonType(Box::new(func)),
34421                        "TO_JSON" => Expression::ToJson(Box::new(func)),
34422                        "PARSE_JSON" => Expression::ParseJson(Box::new(func)),
34423                        _ => unreachable!("JSON function name already matched in caller"),
34424                    })
34425                }
34426            }
34427
34428            // JSON_OBJECT with SQL standard syntax: JSON_OBJECT('key': value, ...) or JSON_OBJECT(*)
34429            "JSON_OBJECT" => {
34430                let mut pairs = Vec::new();
34431                let mut star = false;
34432                if !self.check(TokenType::RParen) {
34433                    // Check for JSON_OBJECT(*) syntax
34434                    if self.check(TokenType::Star) && self.check_next(TokenType::RParen) {
34435                        self.skip(); // consume *
34436                        star = true;
34437                    } else {
34438                        loop {
34439                            // Check for KEY keyword for KEY 'key' IS value syntax (KEY is a keyword token)
34440                            let has_key_keyword = self.match_token(TokenType::Key);
34441                            // Parse key: try string first (for 'key' syntax), then column
34442                            let key = if let Some(s) = self.parse_string()? {
34443                                s
34444                            } else {
34445                                // Use parse_primary to handle function calls (ARRAY_AGG, CAST,
34446                                // f(x)) as well as simple columns. parse_primary does NOT call
34447                                // parse_postfix_operators, so a trailing ':' remains as a
34448                                // key/value separator and is not consumed as JSON path.
34449                                self.parse_primary()?
34450                            };
34451
34452                            // Support colon, VALUE keyword (identifier), and IS keyword (for KEY...IS syntax)
34453                            let has_separator = self.match_token(TokenType::Colon)
34454                                || self.match_identifier("VALUE")
34455                                || (has_key_keyword && self.match_token(TokenType::Is));
34456
34457                            if has_separator {
34458                                let value = self.parse_bitwise()?.ok_or_else(|| {
34459                                    self.parse_error("Expected value expression in JSON_OBJECT")
34460                                })?;
34461                                // Check for FORMAT JSON after value
34462                                let value_with_format = if self.match_text_seq(&["FORMAT", "JSON"])
34463                                {
34464                                    Expression::JSONFormat(Box::new(JSONFormat {
34465                                        this: Some(Box::new(value)),
34466                                        options: Vec::new(),
34467                                        is_json: None,
34468                                        to_json: None,
34469                                    }))
34470                                } else {
34471                                    value
34472                                };
34473                                pairs.push((key, value_with_format));
34474                            } else {
34475                                // Just key/value pairs without separator
34476                                if self.match_token(TokenType::Comma) {
34477                                    let value = self.parse_bitwise()?.ok_or_else(|| {
34478                                        self.parse_error("Expected value expression in JSON_OBJECT")
34479                                    })?;
34480                                    pairs.push((key, value));
34481                                } else {
34482                                    return Err(self
34483                                        .parse_error("Expected value expression in JSON_OBJECT"));
34484                                }
34485                            }
34486                            if !self.match_token(TokenType::Comma) {
34487                                break;
34488                            }
34489                        }
34490                    }
34491                }
34492                // Parse optional modifiers: NULL ON NULL, ABSENT ON NULL, WITH UNIQUE KEYS
34493                let null_handling = if self.match_token(TokenType::Null) {
34494                    self.match_token(TokenType::On);
34495                    self.match_token(TokenType::Null);
34496                    Some(JsonNullHandling::NullOnNull)
34497                } else if self.match_identifier("ABSENT") {
34498                    self.match_token(TokenType::On);
34499                    self.match_token(TokenType::Null);
34500                    Some(JsonNullHandling::AbsentOnNull)
34501                } else {
34502                    None
34503                };
34504                let with_unique_keys = if self.match_token(TokenType::With) {
34505                    self.match_token(TokenType::Unique);
34506                    self.match_identifier("KEYS");
34507                    true
34508                } else {
34509                    false
34510                };
34511                // Parse optional RETURNING clause: RETURNING type [FORMAT JSON] [ENCODING encoding]
34512                let (returning_type, format_json, encoding) = if self
34513                    .match_token(TokenType::Returning)
34514                {
34515                    let return_type = self.parse_data_type()?;
34516                    // Optional FORMAT JSON
34517                    let has_format_json = if self.match_token(TokenType::Format) {
34518                        // JSON might be a keyword or identifier
34519                        let _ = self.match_token(TokenType::Json) || self.match_identifier("JSON");
34520                        true
34521                    } else {
34522                        false
34523                    };
34524                    // Optional ENCODING encoding
34525                    let enc = if self.match_identifier("ENCODING") {
34526                        Some(self.expect_identifier_or_keyword()?)
34527                    } else {
34528                        None
34529                    };
34530                    (Some(return_type), has_format_json, enc)
34531                } else {
34532                    (None, false, None)
34533                };
34534                self.expect(TokenType::RParen)?;
34535                Ok(Expression::JsonObject(Box::new(JsonObjectFunc {
34536                    pairs,
34537                    null_handling,
34538                    with_unique_keys,
34539                    returning_type,
34540                    format_json,
34541                    encoding,
34542                    star,
34543                })))
34544            }
34545
34546            // JSON_ARRAY function with Oracle-specific options
34547            // JSON_ARRAY(expr [FORMAT JSON], ... [NULL ON NULL | ABSENT ON NULL] [RETURNING type] [STRICT])
34548            "JSON_ARRAY" => {
34549                let mut expressions = Vec::new();
34550                if !self.check(TokenType::RParen) {
34551                    loop {
34552                        let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
34553                        // Check for FORMAT JSON after each expression
34554                        let expr_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
34555                            Expression::JSONFormat(Box::new(JSONFormat {
34556                                this: Some(Box::new(expr)),
34557                                options: Vec::new(),
34558                                is_json: None,
34559                                to_json: None,
34560                            }))
34561                        } else {
34562                            expr
34563                        };
34564                        expressions.push(expr_with_format);
34565                        if !self.match_token(TokenType::Comma) {
34566                            break;
34567                        }
34568                    }
34569                }
34570                // Parse NULL ON NULL or ABSENT ON NULL
34571                let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
34572                    Some(Box::new(Expression::Var(Box::new(Var {
34573                        this: "NULL ON NULL".to_string(),
34574                    }))))
34575                } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
34576                    Some(Box::new(Expression::Var(Box::new(Var {
34577                        this: "ABSENT ON NULL".to_string(),
34578                    }))))
34579                } else {
34580                    None
34581                };
34582                // Parse RETURNING type
34583                let return_type = if self.match_token(TokenType::Returning) {
34584                    let dt = self.parse_data_type()?;
34585                    Some(Box::new(Expression::DataType(dt)))
34586                } else {
34587                    None
34588                };
34589                // Parse STRICT
34590                let strict = if self.match_identifier("STRICT") {
34591                    Some(Box::new(Expression::Boolean(BooleanLiteral {
34592                        value: true,
34593                    })))
34594                } else {
34595                    None
34596                };
34597                self.expect(TokenType::RParen)?;
34598                Ok(Expression::JSONArray(Box::new(JSONArray {
34599                    expressions,
34600                    null_handling,
34601                    return_type,
34602                    strict,
34603                })))
34604            }
34605
34606            // JSON_ARRAYAGG function with Oracle-specific options
34607            // JSON_ARRAYAGG(expr [FORMAT JSON] [ORDER BY ...] [NULL ON NULL | ABSENT ON NULL] [RETURNING type] [STRICT])
34608            "JSON_ARRAYAGG" => {
34609                let this = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
34610                // Check for FORMAT JSON after the expression
34611                let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
34612                    Expression::JSONFormat(Box::new(JSONFormat {
34613                        this: Some(Box::new(this)),
34614                        options: Vec::new(),
34615                        is_json: None,
34616                        to_json: None,
34617                    }))
34618                } else {
34619                    this
34620                };
34621                // Parse ORDER BY clause
34622                let order = if self.match_token(TokenType::Order) {
34623                    self.match_token(TokenType::By);
34624                    // Parse comma-separated ordered expressions
34625                    let mut order_exprs = Vec::new();
34626                    loop {
34627                        if let Some(ordered) = self.parse_ordered_item()? {
34628                            order_exprs.push(ordered);
34629                        } else {
34630                            break;
34631                        }
34632                        if !self.match_token(TokenType::Comma) {
34633                            break;
34634                        }
34635                    }
34636                    if !order_exprs.is_empty() {
34637                        Some(Box::new(Expression::OrderBy(Box::new(OrderBy {
34638                            expressions: order_exprs,
34639                            siblings: false,
34640                            comments: Vec::new(),
34641                        }))))
34642                    } else {
34643                        None
34644                    }
34645                } else {
34646                    None
34647                };
34648                // Parse NULL ON NULL or ABSENT ON NULL
34649                let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
34650                    Some(Box::new(Expression::Var(Box::new(Var {
34651                        this: "NULL ON NULL".to_string(),
34652                    }))))
34653                } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
34654                    Some(Box::new(Expression::Var(Box::new(Var {
34655                        this: "ABSENT ON NULL".to_string(),
34656                    }))))
34657                } else {
34658                    None
34659                };
34660                // Parse RETURNING type
34661                let return_type = if self.match_token(TokenType::Returning) {
34662                    let dt = self.parse_data_type()?;
34663                    Some(Box::new(Expression::DataType(dt)))
34664                } else {
34665                    None
34666                };
34667                // Parse STRICT
34668                let strict = if self.match_identifier("STRICT") {
34669                    Some(Box::new(Expression::Boolean(BooleanLiteral {
34670                        value: true,
34671                    })))
34672                } else {
34673                    None
34674                };
34675                self.expect(TokenType::RParen)?;
34676                Ok(Expression::JSONArrayAgg(Box::new(JSONArrayAgg {
34677                    this: Box::new(this_with_format),
34678                    order,
34679                    null_handling,
34680                    return_type,
34681                    strict,
34682                })))
34683            }
34684
34685            // JSON_OBJECTAGG with KEY...VALUE syntax
34686            // JSON_OBJECTAGG(KEY key VALUE value) or JSON_OBJECTAGG(key: value)
34687            "JSON_OBJECTAGG" => {
34688                // Check for KEY keyword (KEY is a keyword token, not an identifier)
34689                let _has_key_keyword = self.match_token(TokenType::Key);
34690                // Parse key: use column parsing to avoid colon being interpreted as JSON path
34691                let key = self.parse_column()?.unwrap_or(Expression::Null(Null));
34692
34693                // Support colon, comma (MySQL), or VALUE keyword
34694                let _ = self.match_token(TokenType::Colon)
34695                    || self.match_token(TokenType::Comma)
34696                    || self.match_identifier("VALUE");
34697
34698                let value = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
34699                // Check for FORMAT JSON after value
34700                let value_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
34701                    Expression::JSONFormat(Box::new(JSONFormat {
34702                        this: Some(Box::new(value)),
34703                        options: Vec::new(),
34704                        is_json: None,
34705                        to_json: None,
34706                    }))
34707                } else {
34708                    value
34709                };
34710                // Parse NULL ON NULL or ABSENT ON NULL
34711                let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
34712                    Some(Box::new(Expression::Var(Box::new(Var {
34713                        this: "NULL ON NULL".to_string(),
34714                    }))))
34715                } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
34716                    Some(Box::new(Expression::Var(Box::new(Var {
34717                        this: "ABSENT ON NULL".to_string(),
34718                    }))))
34719                } else {
34720                    None
34721                };
34722                // Parse WITH/WITHOUT UNIQUE KEYS
34723                let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE"]) {
34724                    self.match_identifier("KEYS");
34725                    Some(Box::new(Expression::Boolean(BooleanLiteral {
34726                        value: true,
34727                    })))
34728                } else if self.match_text_seq(&["WITHOUT", "UNIQUE"]) {
34729                    self.match_identifier("KEYS");
34730                    Some(Box::new(Expression::Boolean(BooleanLiteral {
34731                        value: false,
34732                    })))
34733                } else {
34734                    None
34735                };
34736                // Parse RETURNING type
34737                let return_type = if self.match_token(TokenType::Returning) {
34738                    let dt = self.parse_data_type()?;
34739                    Some(Box::new(Expression::DataType(dt)))
34740                } else {
34741                    None
34742                };
34743                self.expect(TokenType::RParen)?;
34744                Ok(Expression::JSONObjectAgg(Box::new(JSONObjectAgg {
34745                    expressions: vec![Expression::JSONKeyValue(Box::new(JSONKeyValue {
34746                        this: Box::new(key),
34747                        expression: Box::new(value_with_format),
34748                    }))],
34749                    null_handling,
34750                    unique_keys,
34751                    return_type,
34752                    encoding: None,
34753                })))
34754            }
34755
34756            // JSON_TABLE function - MySQL/Oracle table function for JSON data
34757            // JSON_TABLE(json_doc [FORMAT JSON], path COLUMNS (column_list)) [AS alias]
34758            "JSON_TABLE" => {
34759                // Parse the JSON expression
34760                let this = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
34761                // Check for FORMAT JSON after the expression
34762                let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
34763                    Expression::JSONFormat(Box::new(JSONFormat {
34764                        this: Some(Box::new(this)),
34765                        options: Vec::new(),
34766                        is_json: None,
34767                        to_json: None,
34768                    }))
34769                } else {
34770                    this
34771                };
34772
34773                // Parse path (after comma)
34774                let path = if self.match_token(TokenType::Comma) {
34775                    if let Some(s) = self.parse_string()? {
34776                        Some(Box::new(s))
34777                    } else {
34778                        None
34779                    }
34780                } else {
34781                    None
34782                };
34783
34784                // Oracle uses "ERROR ON ERROR" (value then behavior) instead of "ON ERROR ERROR"
34785                // Parse error handling: ERROR ON ERROR or NULL ON ERROR
34786                let error_handling =
34787                    if self.match_identifier("ERROR") && self.match_text_seq(&["ON", "ERROR"]) {
34788                        Some(Box::new(Expression::Var(Box::new(Var {
34789                            this: "ERROR ON ERROR".to_string(),
34790                        }))))
34791                    } else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
34792                        Some(Box::new(Expression::Var(Box::new(Var {
34793                            this: "NULL ON ERROR".to_string(),
34794                        }))))
34795                    } else {
34796                        None
34797                    };
34798
34799                // Parse empty handling: ERROR ON EMPTY or NULL ON EMPTY
34800                let empty_handling =
34801                    if self.match_identifier("ERROR") && self.match_text_seq(&["ON", "EMPTY"]) {
34802                        Some(Box::new(Expression::Var(Box::new(Var {
34803                            this: "ERROR ON EMPTY".to_string(),
34804                        }))))
34805                    } else if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
34806                        Some(Box::new(Expression::Var(Box::new(Var {
34807                            this: "NULL ON EMPTY".to_string(),
34808                        }))))
34809                    } else {
34810                        None
34811                    };
34812
34813                // Parse COLUMNS clause
34814                let schema = self.parse_json_table_columns()?;
34815
34816                self.expect(TokenType::RParen)?;
34817
34818                Ok(Expression::JSONTable(Box::new(JSONTable {
34819                    this: Box::new(this_with_format),
34820                    schema: schema.map(Box::new),
34821                    path,
34822                    error_handling,
34823                    empty_handling,
34824                })))
34825            }
34826            _ => unreachable!(
34827                "phase-6 json parser called with non-json family name '{}'",
34828                canonical_upper_name
34829            ),
34830        }
34831    }
34832
34833    fn parse_typed_translate_teradata_family(
34834        &mut self,
34835        name: &str,
34836        _upper_name: &str,
34837        canonical_upper_name: &str,
34838    ) -> Result<Expression> {
34839        match canonical_upper_name {
34840            // Teradata: TRANSLATE(x USING charset [WITH ERROR])
34841            "TRANSLATE"
34842                if matches!(
34843                    self.config.dialect,
34844                    Some(crate::dialects::DialectType::Teradata)
34845                ) =>
34846            {
34847                let this = self.parse_expression()?;
34848                if self.match_token(TokenType::Using) {
34849                    let expression = self.parse_expression()?;
34850                    let with_error = if self.match_text_seq(&["WITH", "ERROR"]) {
34851                        Some(Box::new(Expression::Boolean(BooleanLiteral {
34852                            value: true,
34853                        })))
34854                    } else {
34855                        None
34856                    };
34857                    self.expect(TokenType::RParen)?;
34858                    Ok(Expression::TranslateCharacters(Box::new(
34859                        TranslateCharacters {
34860                            this: Box::new(this),
34861                            expression: Box::new(expression),
34862                            with_error,
34863                        },
34864                    )))
34865                } else {
34866                    let mut args = vec![this];
34867                    if self.match_token(TokenType::Comma) {
34868                        let mut rest = self.parse_expression_list()?;
34869                        args.append(&mut rest);
34870                    }
34871                    self.expect(TokenType::RParen)?;
34872                    Ok(Expression::Function(Box::new(Function {
34873                        name: name.to_string(),
34874                        args,
34875                        distinct: false,
34876                        trailing_comments: Vec::new(),
34877                        use_bracket_syntax: false,
34878                        no_parens: false,
34879                        quoted: false,
34880                        span: None,
34881                        inferred_type: None,
34882                    })))
34883                }
34884            }
34885
34886            _ => unreachable!(
34887                "phase-6 translate parser called with non-translate family name '{}'",
34888                canonical_upper_name
34889            ),
34890        }
34891    }
34892
34893    /// Parse a generic function call (fallback for unrecognized functions)
34894    fn parse_generic_function(&mut self, name: &str, quoted: bool) -> Result<Expression> {
34895        let is_known_agg = Self::is_aggregate_function(name);
34896
34897        let (mut args, distinct) = if self.check(TokenType::RParen) {
34898            (Vec::new(), false)
34899        } else if self.check(TokenType::Star) {
34900            // Check for DuckDB *COLUMNS(...) syntax first
34901            if self.check_next_identifier("COLUMNS")
34902                && self
34903                    .tokens
34904                    .get(self.current + 2)
34905                    .map(|t| t.token_type == TokenType::LParen)
34906                    .unwrap_or(false)
34907            {
34908                // Parse *COLUMNS(...) as a function argument
34909                (self.parse_function_arguments()?, false)
34910            } else {
34911                // Regular star: parse star modifiers like EXCLUDE/EXCEPT/REPLACE/RENAME
34912                // e.g., COLUMNS(* EXCLUDE (empid, dept))
34913                self.skip(); // consume *
34914                let star = self.parse_star_modifiers(None)?;
34915                let mut args = vec![Expression::Star(star)];
34916                // ClickHouse: func(*, col1, col2) — star followed by more args
34917                if self.match_token(TokenType::Comma) {
34918                    let rest = self.parse_function_arguments()?;
34919                    args.extend(rest);
34920                }
34921                (args, false)
34922            }
34923        } else if self.check(TokenType::Distinct)
34924            && !self.check_next(TokenType::Comma)
34925            && !self.check_next(TokenType::RParen)
34926        {
34927            // DISTINCT as aggregate modifier: func(DISTINCT expr)
34928            // Not when followed by comma or rparen — then DISTINCT is used as an identifier value
34929            self.skip(); // consume DISTINCT
34930            (self.parse_function_arguments()?, true)
34931        } else if is_known_agg && self.match_token(TokenType::All) {
34932            // ALL is the default quantifier, just consume it
34933            (self.parse_function_arguments()?, false)
34934        } else {
34935            (self.parse_function_arguments()?, false)
34936        };
34937
34938        // For known aggregate functions, check for IGNORE NULLS, ORDER BY, LIMIT inside parens
34939        let (ignore_nulls, order_by, agg_limit) = if is_known_agg {
34940            let ignore_nulls = if self.match_token(TokenType::Ignore)
34941                && self.match_token(TokenType::Nulls)
34942            {
34943                Some(true)
34944            } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls) {
34945                Some(false)
34946            } else {
34947                None
34948            };
34949
34950            let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
34951                self.parse_order_by_list()?
34952            } else {
34953                Vec::new()
34954            };
34955            let limit = if self.match_token(TokenType::Limit) {
34956                Some(Box::new(self.parse_expression()?))
34957            } else {
34958                None
34959            };
34960            (ignore_nulls, order_by, limit)
34961        } else {
34962            (None, Vec::new(), None)
34963        };
34964
34965        // ClickHouse: SETTINGS key=value, ... before closing paren in function calls
34966        if matches!(
34967            self.config.dialect,
34968            Some(crate::dialects::DialectType::ClickHouse)
34969        ) && self.check(TokenType::Settings)
34970            && self.current + 2 < self.tokens.len()
34971            && (self.tokens[self.current + 1].token_type == TokenType::Var
34972                || self.tokens[self.current + 1].token_type == TokenType::Identifier)
34973            && self.tokens[self.current + 2].token_type == TokenType::Eq
34974        {
34975            self.skip(); // consume SETTINGS
34976            loop {
34977                let _key = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
34978                    self.advance().text
34979                } else {
34980                    break;
34981                };
34982                if self.match_token(TokenType::Eq) {
34983                    let _value = self.parse_primary()?;
34984                }
34985                if !self.match_token(TokenType::Comma) {
34986                    break;
34987                }
34988            }
34989        }
34990
34991        self.expect(TokenType::RParen)?;
34992        let trailing_comments = self.previous_trailing_comments().to_vec();
34993
34994        // Check for WITHIN GROUP (ORDER BY ...)
34995        if self.match_identifier("WITHIN") {
34996            if self.match_identifier("GROUP") {
34997                self.expect(TokenType::LParen)?;
34998                self.expect(TokenType::Order)?;
34999                self.expect(TokenType::By)?;
35000                let within_order = self.parse_order_by_list()?;
35001                self.expect(TokenType::RParen)?;
35002
35003                let func_expr = Expression::AggregateFunction(Box::new(AggregateFunction {
35004                    name: name.to_string(),
35005                    args,
35006                    distinct,
35007                    filter: None,
35008                    order_by: Vec::new(),
35009                    limit: None,
35010                    ignore_nulls: None,
35011                    inferred_type: None,
35012                }));
35013
35014                let within = Expression::WithinGroup(Box::new(WithinGroup {
35015                    this: func_expr,
35016                    order_by: within_order,
35017                }));
35018
35019                // Check for FILTER after WITHIN GROUP
35020                let filter = self.parse_filter_clause()?;
35021                if let Some(filter_expr) = filter {
35022                    return Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
35023                        name: format!("__WITHIN_GROUP_{}", name),
35024                        args: vec![within, filter_expr],
35025                        distinct: false,
35026                        filter: None,
35027                        order_by: Vec::new(),
35028                        limit: None,
35029                        ignore_nulls: None,
35030                        inferred_type: None,
35031                    })));
35032                }
35033
35034                return Ok(within);
35035            }
35036        }
35037
35038        let filter = self.parse_filter_clause()?;
35039
35040        // Check for postfix IGNORE NULLS / RESPECT NULLS after RParen
35041        let ignore_nulls = if ignore_nulls.is_some() {
35042            ignore_nulls
35043        } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
35044            Some(true)
35045        } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
35046            Some(false)
35047        } else {
35048            None
35049        };
35050
35051        if filter.is_some() || is_known_agg || ignore_nulls.is_some() {
35052            Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
35053                name: name.to_string(),
35054                args,
35055                distinct,
35056                filter,
35057                order_by,
35058                limit: agg_limit,
35059                ignore_nulls,
35060                inferred_type: None,
35061            })))
35062        } else {
35063            self.normalize_date_part_arg(name, &mut args);
35064            let mut func = Function::new(name.to_string(), args);
35065            func.distinct = distinct;
35066            func.trailing_comments = trailing_comments;
35067            func.quoted = quoted;
35068            Ok(Expression::Function(Box::new(func)))
35069        }
35070    }
35071
35072    /// Check for an AS alias after an expression in ClickHouse function arg context.
35073    fn maybe_clickhouse_alias(&mut self, expr: Expression) -> Expression {
35074        if matches!(
35075            self.config.dialect,
35076            Some(crate::dialects::DialectType::ClickHouse)
35077        ) && self.check(TokenType::As)
35078            && !self.check_next(TokenType::RParen)
35079            && !self.check_next(TokenType::Comma)
35080        {
35081            let next_idx = self.current + 1;
35082            let is_alias = next_idx < self.tokens.len()
35083                && matches!(
35084                    self.tokens[next_idx].token_type,
35085                    TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
35086                );
35087            if is_alias {
35088                self.skip(); // consume AS
35089                let alias_token = self.advance();
35090                let alias_name = Identifier {
35091                    name: alias_token.text.clone(),
35092                    quoted: alias_token.token_type == TokenType::QuotedIdentifier,
35093                    trailing_comments: Vec::new(),
35094                    span: None,
35095                };
35096                return Expression::Alias(Box::new(crate::expressions::Alias {
35097                    this: expr,
35098                    alias: alias_name,
35099                    column_aliases: Vec::new(),
35100                    pre_alias_comments: Vec::new(),
35101                    trailing_comments: Vec::new(),
35102                    inferred_type: None,
35103                }));
35104            }
35105        }
35106        expr
35107    }
35108
35109    /// Parse an expression, then check for AS alias in ClickHouse function arg context.
35110    /// ClickHouse allows: func(expr AS alias, ...) where AS creates a named alias inside function args.
35111    fn parse_expression_with_clickhouse_alias(&mut self) -> Result<Expression> {
35112        let expr = self.parse_expression()?;
35113        Ok(self.maybe_clickhouse_alias(expr))
35114    }
35115
35116    /// Parse function arguments, handling named arguments (name => value, name := value)
35117    /// and TABLE/MODEL prefixed arguments (BigQuery)
35118    fn parse_function_arguments(&mut self) -> Result<Vec<Expression>> {
35119        let mut args = Vec::new();
35120
35121        loop {
35122            // ClickHouse: SETTINGS key=value, ... terminates function args
35123            // Only break if SETTINGS is followed by identifier = value pattern
35124            if matches!(
35125                self.config.dialect,
35126                Some(crate::dialects::DialectType::ClickHouse)
35127            ) && self.check(TokenType::Settings)
35128                && self.current + 2 < self.tokens.len()
35129                && (self.tokens[self.current + 1].token_type == TokenType::Var
35130                    || self.tokens[self.current + 1].token_type == TokenType::Identifier)
35131                && self.tokens[self.current + 2].token_type == TokenType::Eq
35132            {
35133                break; // will be consumed by SETTINGS handler after loop
35134            }
35135
35136            // ClickHouse: bare SELECT/WITH as function argument (e.g., view(SELECT 1), remote(..., view(SELECT ...)))
35137            if matches!(
35138                self.config.dialect,
35139                Some(crate::dialects::DialectType::ClickHouse)
35140            ) && (self.check(TokenType::Select) || self.check(TokenType::With))
35141            {
35142                let query = self.parse_statement()?;
35143                args.push(query);
35144                if !self.match_token(TokenType::Comma) {
35145                    break;
35146                }
35147                continue;
35148            }
35149
35150            // Check for TABLE ref or MODEL ref as function argument (BigQuery)
35151            // e.g., GAP_FILL(TABLE device_data, ...) or ML.PREDICT(MODEL mydataset.mymodel, ...)
35152            let is_table_or_model_arg = if !self.is_at_end() {
35153                self.check(TokenType::Table) || self.peek().text.eq_ignore_ascii_case("MODEL")
35154            } else {
35155                false
35156            };
35157            let arg = if is_table_or_model_arg {
35158                let prefix = self.peek().text.to_ascii_uppercase();
35159                let saved_pos = self.current;
35160                self.skip(); // consume TABLE or MODEL
35161
35162                // Only treat as TABLE/MODEL argument if followed by an identifier (table name),
35163                // not by => (which would be a named arg like "table => value")
35164                if !self.is_at_end()
35165                    && !self.check(TokenType::FArrow)
35166                    && !self.check(TokenType::ColonEq)
35167                {
35168                    // Parse the table/model reference (supports dotted names like dataset.table)
35169                    if let Some(table_expr) = self.parse_table_parts()? {
35170                        Expression::TableArgument(Box::new(TableArgument {
35171                            prefix,
35172                            this: table_expr,
35173                        }))
35174                    } else {
35175                        // Failed to parse table parts, backtrack and treat as regular expression
35176                        self.current = saved_pos;
35177                        self.parse_expression()?
35178                    }
35179                } else {
35180                    // TABLE/MODEL followed by => or :=, backtrack and handle as named arg
35181                    self.current = saved_pos;
35182                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
35183                        let ident_token = self.advance();
35184                        let ident_name = ident_token.text.clone();
35185                        if self.match_token(TokenType::FArrow) {
35186                            let value = self.parse_expression()?;
35187                            Expression::NamedArgument(Box::new(NamedArgument {
35188                                name: Identifier::new(ident_name),
35189                                value,
35190                                separator: NamedArgSeparator::DArrow,
35191                            }))
35192                        } else if self.match_token(TokenType::ColonEq) {
35193                            let value = self.parse_expression()?;
35194                            Expression::NamedArgument(Box::new(NamedArgument {
35195                                name: Identifier::new(ident_name),
35196                                value,
35197                                separator: NamedArgSeparator::ColonEq,
35198                            }))
35199                        } else {
35200                            self.current = saved_pos;
35201                            self.parse_expression()?
35202                        }
35203                    } else {
35204                        self.parse_expression()?
35205                    }
35206                }
35207            } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
35208                // Try to parse:
35209                // 1. Named argument: identifier => value or identifier := value
35210                // 2. Snowflake lambda with type: identifier type -> body (e.g., a int -> a + 1)
35211                // Save position to backtrack if not a named argument
35212                let saved_pos = self.current;
35213
35214                // Try to get identifier
35215                let ident_token = self.advance();
35216                let ident_name = ident_token.text.clone();
35217
35218                // PostgreSQL/Redshift VARIADIC keyword: backtrack and let parse_expression handle it
35219                // VARIADIC ARRAY[...] must not be misinterpreted as a lambda with type annotation
35220                if ident_name.eq_ignore_ascii_case("VARIADIC")
35221                    && matches!(
35222                        self.config.dialect,
35223                        Some(crate::dialects::DialectType::PostgreSQL)
35224                            | Some(crate::dialects::DialectType::Redshift)
35225                    )
35226                {
35227                    self.current = saved_pos;
35228                    self.parse_expression()?
35229                }
35230                // Check for Snowflake lambda with type annotation: a int -> body
35231                // Look ahead to see if we have a type token followed by ->
35232                else if !self.is_at_end()
35233                    && self.is_type_keyword()
35234                    && !self.check(TokenType::FArrow)
35235                    && !self.check(TokenType::ColonEq)
35236                {
35237                    // Parse type annotation
35238                    let type_annotation = self.parse_data_type()?;
35239
35240                    // Check for arrow
35241                    if self.match_token(TokenType::Arrow) {
35242                        // This is a Snowflake lambda: param type -> body
35243                        let body = self.parse_expression()?;
35244                        Expression::Lambda(Box::new(LambdaExpr {
35245                            parameters: vec![Identifier::new(ident_name)],
35246                            body,
35247                            colon: false,
35248                            parameter_types: vec![Some(type_annotation)],
35249                        }))
35250                    } else {
35251                        // Not a lambda, backtrack and parse as regular expression
35252                        self.current = saved_pos;
35253                        self.parse_expression()?
35254                    }
35255                }
35256                // ClickHouse: simple lambda without type annotation: ident -> body
35257                else if self.match_token(TokenType::Arrow) {
35258                    let body = self.parse_expression()?;
35259                    Expression::Lambda(Box::new(LambdaExpr {
35260                        parameters: vec![Identifier::new(ident_name)],
35261                        body,
35262                        colon: false,
35263                        parameter_types: Vec::new(),
35264                    }))
35265                }
35266                // Check for named argument separator (=> is FArrow)
35267                else if self.match_token(TokenType::FArrow) {
35268                    // name => value
35269                    let value = self.parse_expression()?;
35270                    Expression::NamedArgument(Box::new(NamedArgument {
35271                        name: Identifier::new(ident_name),
35272                        value,
35273                        separator: NamedArgSeparator::DArrow,
35274                    }))
35275                } else if self.match_token(TokenType::ColonEq) {
35276                    // name := value
35277                    let value = self.parse_expression()?;
35278                    Expression::NamedArgument(Box::new(NamedArgument {
35279                        name: Identifier::new(ident_name),
35280                        value,
35281                        separator: NamedArgSeparator::ColonEq,
35282                    }))
35283                } else {
35284                    // Not a named argument, backtrack and parse as regular expression
35285                    self.current = saved_pos;
35286                    self.parse_expression()?
35287                }
35288            } else {
35289                // Regular expression
35290                self.parse_expression()?
35291            };
35292
35293            // Handle AS alias inside function arguments (e.g. ClickHouse: arrayJoin([1,2,3] AS src))
35294            let arg = if matches!(
35295                self.config.dialect,
35296                Some(crate::dialects::DialectType::ClickHouse)
35297            ) && self.check(TokenType::As)
35298                && !self.check_next(TokenType::RParen)
35299                && !self.check_next(TokenType::Comma)
35300            {
35301                // Look ahead: AS followed by identifier/keyword, then ) or , means it's an alias
35302                let next_idx = self.current + 1;
35303                let after_alias_idx = self.current + 2;
35304                let is_alias_token = next_idx < self.tokens.len()
35305                    && (matches!(
35306                        self.tokens[next_idx].token_type,
35307                        TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
35308                    ) || self.tokens[next_idx].token_type.is_keyword());
35309                // Ensure the token AFTER the alias is ) or , (function arg boundary)
35310                let is_alias = is_alias_token
35311                    && after_alias_idx < self.tokens.len()
35312                    && matches!(
35313                        self.tokens[after_alias_idx].token_type,
35314                        TokenType::RParen | TokenType::Comma
35315                    );
35316                if is_alias {
35317                    self.skip(); // consume AS
35318                    let alias_token = self.advance();
35319                    let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
35320                        let mut ident = Identifier::new(alias_token.text.clone());
35321                        ident.quoted = true;
35322                        ident
35323                    } else {
35324                        Identifier::new(alias_token.text.clone())
35325                    };
35326                    Expression::Alias(Box::new(crate::expressions::Alias {
35327                        this: arg,
35328                        alias: alias_name,
35329                        column_aliases: Vec::new(),
35330                        pre_alias_comments: Vec::new(),
35331                        trailing_comments: Vec::new(),
35332                        inferred_type: None,
35333                    }))
35334                } else {
35335                    arg
35336                }
35337            } else {
35338                arg
35339            };
35340
35341            // ClickHouse: implicit alias without AS keyword: func(expr identifier, ...)
35342            let arg = self.try_clickhouse_implicit_alias(arg);
35343
35344            // Handle trailing comments
35345            let trailing_comments = self.previous_trailing_comments().to_vec();
35346            let arg = if trailing_comments.is_empty() {
35347                arg
35348            } else {
35349                match &arg {
35350                    Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
35351                        Expression::Annotated(Box::new(Annotated {
35352                            this: arg,
35353                            trailing_comments,
35354                        }))
35355                    }
35356                    _ => arg,
35357                }
35358            };
35359
35360            args.push(arg);
35361
35362            if !self.match_token(TokenType::Comma) {
35363                break;
35364            }
35365            // Skip consecutive commas (Snowflake allows skipping optional named args)
35366            // e.g., ROUND(SCALE => 1, EXPR => 2.25, , ROUNDING_MODE => 'HALF_TO_EVEN')
35367            while self.check(TokenType::Comma) {
35368                self.skip();
35369            }
35370        }
35371
35372        // ClickHouse: SETTINGS key=value, ... at end of function args before RParen
35373        if matches!(
35374            self.config.dialect,
35375            Some(crate::dialects::DialectType::ClickHouse)
35376        ) && self.check(TokenType::Settings)
35377            && self.current + 2 < self.tokens.len()
35378            && (self.tokens[self.current + 1].token_type == TokenType::Var
35379                || self.tokens[self.current + 1].token_type == TokenType::Identifier)
35380            && self.tokens[self.current + 2].token_type == TokenType::Eq
35381        {
35382            self.skip(); // consume SETTINGS
35383            loop {
35384                let _key = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
35385                    self.advance().text
35386                } else {
35387                    break;
35388                };
35389                if self.match_token(TokenType::Eq) {
35390                    let _value = self.parse_primary()?;
35391                }
35392                if !self.match_token(TokenType::Comma) {
35393                    break;
35394                }
35395            }
35396        }
35397
35398        Ok(args)
35399    }
35400
35401    /// Parse optional FILTER clause
35402    fn parse_filter_clause(&mut self) -> Result<Option<Expression>> {
35403        if self.match_token(TokenType::Filter) {
35404            self.expect(TokenType::LParen)?;
35405            // WHERE is optional (DuckDB allows FILTER(condition) without WHERE)
35406            self.match_token(TokenType::Where);
35407            let filter_expr = self.parse_expression()?;
35408            self.expect(TokenType::RParen)?;
35409            Ok(Some(filter_expr))
35410        } else {
35411            Ok(None)
35412        }
35413    }
35414
35415    /// Parse STRUCT arguments with optional AS aliases: STRUCT(x, y AS name, ...)
35416    fn parse_struct_args(&mut self) -> Result<Vec<Expression>> {
35417        let mut args = Vec::new();
35418
35419        loop {
35420            let expr = self.parse_expression()?;
35421
35422            // Check for AS alias
35423            if self.match_token(TokenType::As) {
35424                let alias = self.expect_identifier_or_keyword()?;
35425                args.push(Expression::Alias(Box::new(Alias {
35426                    this: expr,
35427                    alias: Identifier::new(alias),
35428                    column_aliases: Vec::new(),
35429                    pre_alias_comments: Vec::new(),
35430                    trailing_comments: Vec::new(),
35431                    inferred_type: None,
35432                })));
35433            } else {
35434                args.push(expr);
35435            }
35436
35437            if !self.match_token(TokenType::Comma) {
35438                break;
35439            }
35440        }
35441
35442        Ok(args)
35443    }
35444
35445    /// Maybe parse OVER clause for window functions or WITHIN GROUP for ordered-set aggregates
35446    fn maybe_parse_over(&mut self, expr: Expression) -> Result<Expression> {
35447        let expr = self.maybe_parse_subscript(expr)?;
35448
35449        // For Oracle: Check for interval span after expression (e.g., (expr) DAY(9) TO SECOND(3))
35450        // https://docs.oracle.com/en/database/oracle/oracle-database/26/sqlrf/Interval-Expressions.html
35451        let expr = if matches!(
35452            self.config.dialect,
35453            Some(crate::dialects::DialectType::Oracle)
35454        ) {
35455            self.try_parse_oracle_interval_span(expr)?
35456        } else {
35457            expr
35458        };
35459
35460        // Check for WITHIN GROUP (for ordered-set aggregate functions like LISTAGG, PERCENTILE_CONT)
35461        let expr = if self.check(TokenType::Within) && self.check_next(TokenType::Group) {
35462            self.skip(); // consume WITHIN
35463            self.skip(); // consume GROUP
35464            self.expect(TokenType::LParen)?;
35465            self.expect(TokenType::Order)?;
35466            self.expect(TokenType::By)?;
35467            let order_by = self.parse_order_by_list()?;
35468            self.expect(TokenType::RParen)?;
35469            Expression::WithinGroup(Box::new(WithinGroup {
35470                this: expr,
35471                order_by,
35472            }))
35473        } else {
35474            expr
35475        };
35476
35477        // Check for FILTER clause (can follow WITHIN GROUP or standalone aggregate)
35478        // SQL:2003 syntax: aggregate_function(...) FILTER (WHERE condition)
35479        let expr = if self.match_token(TokenType::Filter) {
35480            self.expect(TokenType::LParen)?;
35481            // WHERE is required in standard SQL FILTER clause
35482            self.expect(TokenType::Where)?;
35483            let filter_expr = self.parse_expression()?;
35484            self.expect(TokenType::RParen)?;
35485            Expression::Filter(Box::new(Filter {
35486                this: Box::new(expr),
35487                expression: Box::new(filter_expr),
35488            }))
35489        } else {
35490            expr
35491        };
35492
35493        // ClickHouse: IGNORE NULLS / RESPECT NULLS modifier after function call (before OVER)
35494        // This handles cases like: func(args) IGNORE NULLS OVER w
35495        // and parametric aggregates: func(params)(args) IGNORE NULLS
35496        let expr = if matches!(
35497            self.config.dialect,
35498            Some(crate::dialects::DialectType::ClickHouse)
35499        ) && (self.match_keywords(&[TokenType::Ignore, TokenType::Nulls])
35500            || self.match_keywords(&[TokenType::Respect, TokenType::Nulls]))
35501        {
35502            // Consume the modifier — we don't need to store it for transpilation
35503            expr
35504        } else {
35505            expr
35506        };
35507
35508        // Check for KEEP clause (Oracle: aggregate KEEP (DENSE_RANK FIRST|LAST ORDER BY ...))
35509        // Only if KEEP is followed by LPAREN - otherwise KEEP is used as an alias
35510        let keep = if self.check(TokenType::Keep) && self.check_next(TokenType::LParen) {
35511            self.skip(); // consume KEEP
35512            Some(self.parse_keep_clause()?)
35513        } else {
35514            None
35515        };
35516
35517        // Check for OVER clause (can follow KEEP, FILTER, WITHIN GROUP, or standalone aggregate)
35518        if self.match_token(TokenType::Over) {
35519            let over = self.parse_over_clause()?;
35520            Ok(Expression::WindowFunction(Box::new(WindowFunction {
35521                this: expr,
35522                over,
35523                keep,
35524                inferred_type: None,
35525            })))
35526        } else if keep.is_some() {
35527            // KEEP without OVER - still a window-like construct
35528            // Create a WindowFunction with empty Over
35529            Ok(Expression::WindowFunction(Box::new(WindowFunction {
35530                this: expr,
35531                over: Over {
35532                    window_name: None,
35533                    partition_by: Vec::new(),
35534                    order_by: Vec::new(),
35535                    frame: None,
35536                    alias: None,
35537                },
35538                keep,
35539                inferred_type: None,
35540            })))
35541        } else {
35542            Ok(expr)
35543        }
35544    }
35545
35546    /// ClickHouse: parse parameterized aggregate functions like func(params)(args)
35547    fn maybe_parse_clickhouse_parameterized_agg(&mut self, expr: Expression) -> Result<Expression> {
35548        if !matches!(
35549            self.config.dialect,
35550            Some(crate::dialects::DialectType::ClickHouse)
35551        ) {
35552            return Ok(expr);
35553        }
35554        if !self.check(TokenType::LParen) {
35555            return Ok(expr);
35556        }
35557
35558        let (name, quoted, params) = match expr {
35559            Expression::Function(func) => (func.name, func.quoted, func.args),
35560            Expression::AggregateFunction(agg) => {
35561                if agg.distinct
35562                    || agg.filter.is_some()
35563                    || !agg.order_by.is_empty()
35564                    || agg.limit.is_some()
35565                    || agg.ignore_nulls.is_some()
35566                {
35567                    return Ok(Expression::AggregateFunction(agg));
35568                }
35569                (agg.name, false, agg.args)
35570            }
35571            _ => return Ok(expr),
35572        };
35573
35574        self.skip(); // consume (
35575                     // Handle DISTINCT in second arg list: func(params)(DISTINCT args)
35576        let distinct = self.match_token(TokenType::Distinct);
35577        let expressions = if self.check(TokenType::RParen) {
35578            Vec::new()
35579        } else {
35580            self.parse_function_arguments()?
35581        };
35582        self.expect(TokenType::RParen)?;
35583
35584        let ident = Identifier {
35585            name,
35586            quoted,
35587            trailing_comments: Vec::new(),
35588            span: None,
35589        };
35590
35591        // If DISTINCT was used, wrap the result to indicate it
35592        // For now, we just include it in the CombinedParameterizedAgg
35593        let _ = distinct; // DISTINCT is consumed but not separately tracked in this AST node
35594        Ok(Expression::CombinedParameterizedAgg(Box::new(
35595            CombinedParameterizedAgg {
35596                this: Box::new(Expression::Identifier(ident)),
35597                params,
35598                expressions,
35599            },
35600        )))
35601    }
35602
35603    /// Parse Oracle KEEP clause: KEEP (DENSE_RANK FIRST|LAST ORDER BY ...)
35604    fn parse_keep_clause(&mut self) -> Result<Keep> {
35605        self.expect(TokenType::LParen)?;
35606
35607        // Expect DENSE_RANK
35608        if !self.match_identifier("DENSE_RANK") {
35609            return Err(self.parse_error("Expected DENSE_RANK in KEEP clause"));
35610        }
35611
35612        // Expect FIRST or LAST
35613        let first = if self.match_token(TokenType::First) {
35614            true
35615        } else if self.match_token(TokenType::Last) {
35616            false
35617        } else {
35618            return Err(self.parse_error("Expected FIRST or LAST in KEEP clause"));
35619        };
35620
35621        // Expect ORDER BY
35622        self.expect(TokenType::Order)?;
35623        self.expect(TokenType::By)?;
35624
35625        let order_by = self.parse_order_by_list()?;
35626
35627        self.expect(TokenType::RParen)?;
35628
35629        Ok(Keep { first, order_by })
35630    }
35631
35632    /// Parse a JSON path operand - just the immediate literal/identifier without any subscript processing
35633    /// This is used for JSON arrow operators (->, ->>) to get proper left-to-right associativity
35634    fn parse_json_path_operand(&mut self) -> Result<Expression> {
35635        // Negative number literal (e.g., -1)
35636        if self.check(TokenType::Dash) {
35637            let dash_pos = self.current;
35638            self.skip(); // consume the dash
35639            if self.check(TokenType::Number) {
35640                let token = self.advance();
35641                return Ok(Expression::Neg(Box::new(UnaryOp {
35642                    this: Expression::Literal(Box::new(Literal::Number(token.text))),
35643                    inferred_type: None,
35644                })));
35645            }
35646            // Not a negative number, backtrack
35647            self.current = dash_pos;
35648        }
35649
35650        // Number literal
35651        if self.check(TokenType::Number) {
35652            let token = self.advance();
35653            // Check for numeric literal suffix encoded as "number::TYPE" by tokenizer
35654            if let Some(sep_pos) = token.text.find("::") {
35655                let num_part = &token.text[..sep_pos];
35656                let type_name = &token.text[sep_pos + 2..];
35657                let num_expr = Expression::Literal(Box::new(Literal::Number(num_part.to_string())));
35658                let data_type = match type_name {
35659                    "BIGINT" => crate::expressions::DataType::BigInt { length: None },
35660                    "SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
35661                    "TINYINT" => crate::expressions::DataType::TinyInt { length: None },
35662                    "DOUBLE" => crate::expressions::DataType::Double {
35663                        precision: None,
35664                        scale: None,
35665                    },
35666                    "FLOAT" => crate::expressions::DataType::Float {
35667                        precision: None,
35668                        scale: None,
35669                        real_spelling: false,
35670                    },
35671                    "DECIMAL" => crate::expressions::DataType::Decimal {
35672                        precision: None,
35673                        scale: None,
35674                    },
35675                    _ => crate::expressions::DataType::Custom {
35676                        name: type_name.to_string(),
35677                    },
35678                };
35679                return Ok(Expression::TryCast(Box::new(crate::expressions::Cast {
35680                    this: num_expr,
35681                    to: data_type,
35682                    trailing_comments: Vec::new(),
35683                    double_colon_syntax: false,
35684                    format: None,
35685                    default: None,
35686                    inferred_type: None,
35687                })));
35688            }
35689            return Ok(Expression::Literal(Box::new(Literal::Number(token.text))));
35690        }
35691
35692        // String literal
35693        if self.check(TokenType::String) {
35694            let token = self.advance();
35695            return Ok(Expression::Literal(Box::new(Literal::String(token.text))));
35696        }
35697
35698        // Parenthesized expression (for complex paths)
35699        if self.match_token(TokenType::LParen) {
35700            let expr = self.parse_expression()?;
35701            self.expect(TokenType::RParen)?;
35702            return Ok(Expression::Paren(Box::new(Paren {
35703                this: expr,
35704                trailing_comments: Vec::new(),
35705            })));
35706        }
35707
35708        // Array literal: ['$.family', '$.species']
35709        // Used in DuckDB for multi-path JSON extraction
35710        if self.match_token(TokenType::LBracket) {
35711            // Empty array: []
35712            if self.match_token(TokenType::RBracket) {
35713                return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
35714                    expressions: Vec::new(),
35715                    bracket_notation: true,
35716                    use_list_keyword: false,
35717                })));
35718            }
35719
35720            // Parse array elements
35721            let mut expressions = vec![self.parse_expression()?];
35722            while self.match_token(TokenType::Comma) {
35723                if self.check(TokenType::RBracket) {
35724                    break;
35725                }
35726                expressions.push(self.parse_expression()?);
35727            }
35728            self.expect(TokenType::RBracket)?;
35729
35730            return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
35731                expressions,
35732                bracket_notation: true,
35733                use_list_keyword: false,
35734            })));
35735        }
35736
35737        // Identifier (possibly qualified like table.column)
35738        if self.is_identifier_token() {
35739            let first_ident = self.expect_identifier_with_quoted()?;
35740
35741            // Check for qualified name: identifier.identifier
35742            if self.match_token(TokenType::Dot) {
35743                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
35744                    let second_ident = if self.is_identifier_token() {
35745                        self.expect_identifier_with_quoted()?
35746                    } else {
35747                        let token = self.advance();
35748                        Identifier::new(token.text)
35749                    };
35750                    return Ok(Expression::boxed_column(Column {
35751                        name: second_ident,
35752                        table: Some(first_ident),
35753                        join_mark: false,
35754                        trailing_comments: Vec::new(),
35755                        span: None,
35756                        inferred_type: None,
35757                    }));
35758                }
35759            }
35760
35761            return Ok(Expression::boxed_column(Column {
35762                name: first_ident,
35763                table: None,
35764                join_mark: false,
35765                trailing_comments: Vec::new(),
35766                span: None,
35767                inferred_type: None,
35768            }));
35769        }
35770
35771        // Keywords as identifiers (possibly qualified)
35772        if self.is_safe_keyword_as_identifier() {
35773            let token = self.advance();
35774            let first_ident = Identifier::new(token.text);
35775
35776            // Check for qualified name: identifier.identifier
35777            if self.match_token(TokenType::Dot) {
35778                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
35779                    let second_ident = if self.is_identifier_token() {
35780                        self.expect_identifier_with_quoted()?
35781                    } else {
35782                        let token = self.advance();
35783                        Identifier::new(token.text)
35784                    };
35785                    return Ok(Expression::boxed_column(Column {
35786                        name: second_ident,
35787                        table: Some(first_ident),
35788                        join_mark: false,
35789                        trailing_comments: Vec::new(),
35790                        span: None,
35791                        inferred_type: None,
35792                    }));
35793                }
35794            }
35795
35796            return Ok(Expression::boxed_column(Column {
35797                name: first_ident,
35798                table: None,
35799                join_mark: false,
35800                trailing_comments: Vec::new(),
35801                span: None,
35802                inferred_type: None,
35803            }));
35804        }
35805
35806        Err(self.parse_error(format!(
35807            "Unexpected token in JSON path: {:?}",
35808            self.peek().token_type
35809        )))
35810    }
35811
35812    /// Maybe parse subscript access (array[index], struct.field)
35813    fn maybe_parse_subscript(&mut self, mut expr: Expression) -> Result<Expression> {
35814        loop {
35815            // ClickHouse: empty brackets [] in JSON paths represent Array(JSON) type access.
35816            // json.a.b[] -> json.a.b.:"Array(JSON)"
35817            // json.a.b[][] -> json.a.b.:"Array(Array(JSON))"
35818            // Check for consecutive empty bracket pairs before normal bracket handling.
35819            if matches!(
35820                self.config.dialect,
35821                Some(crate::dialects::DialectType::ClickHouse)
35822            ) && self.check(TokenType::LBracket)
35823            {
35824                let is_empty_bracket = self
35825                    .peek_nth(1)
35826                    .map_or(false, |t| t.token_type == TokenType::RBracket);
35827                if is_empty_bracket {
35828                    let mut bracket_json_type: Option<DataType> = None;
35829                    while self.check(TokenType::LBracket) {
35830                        let is_empty = self
35831                            .peek_nth(1)
35832                            .map_or(false, |t| t.token_type == TokenType::RBracket);
35833                        if is_empty {
35834                            self.skip(); // consume [
35835                            self.skip(); // consume ]
35836                            bracket_json_type = Some(DataType::Array {
35837                                element_type: Box::new(bracket_json_type.unwrap_or(DataType::Json)),
35838                                dimension: None,
35839                            });
35840                        } else {
35841                            break;
35842                        }
35843                    }
35844                    if let Some(json_type) = bracket_json_type {
35845                        expr = Expression::JSONCast(Box::new(crate::expressions::JSONCast {
35846                            this: Box::new(expr),
35847                            to: json_type,
35848                        }));
35849                        continue;
35850                    }
35851                }
35852            }
35853
35854            if self.match_token(TokenType::LBracket) {
35855                // Check if expr is an array/list constructor keyword (ARRAY[...] or LIST[...])
35856                let array_constructor_type = match &expr {
35857                    Expression::Column(col) if col.table.is_none() => {
35858                        let upper = col.name.name.to_ascii_uppercase();
35859                        if upper == "ARRAY" || upper == "LIST" {
35860                            Some(upper)
35861                        } else {
35862                            None
35863                        }
35864                    }
35865                    Expression::Identifier(id) => {
35866                        let upper = id.name.to_ascii_uppercase();
35867                        if upper == "ARRAY" || upper == "LIST" {
35868                            Some(upper)
35869                        } else {
35870                            None
35871                        }
35872                    }
35873                    _ => None,
35874                };
35875
35876                if let Some(constructor_type) = array_constructor_type {
35877                    // Parse ARRAY[expr, expr, ...] or LIST[expr, expr, ...]
35878                    // bracket_notation=false means we have the ARRAY/LIST keyword prefix
35879                    let use_list_keyword = constructor_type == "LIST";
35880                    if self.check(TokenType::RBracket) {
35881                        // Empty array: ARRAY[]
35882                        self.skip();
35883                        expr = Expression::ArrayFunc(Box::new(ArrayConstructor {
35884                            expressions: Vec::new(),
35885                            bracket_notation: false, // Has ARRAY/LIST keyword
35886                            use_list_keyword,
35887                        }));
35888                    } else {
35889                        let expressions = self.parse_expression_list()?;
35890                        self.expect(TokenType::RBracket)?;
35891                        expr = Expression::ArrayFunc(Box::new(ArrayConstructor {
35892                            expressions,
35893                            bracket_notation: false, // Has ARRAY/LIST keyword
35894                            use_list_keyword,
35895                        }));
35896                    }
35897                    continue;
35898                }
35899
35900                // Special case: MAP[...] constructor syntax
35901                // Check if expr is a MAP identifier
35902                // ClickHouse: map[key] is always subscript access, not a MAP constructor
35903                let is_map_constructor = !matches!(
35904                    self.config.dialect,
35905                    Some(crate::dialects::DialectType::ClickHouse)
35906                ) && match &expr {
35907                    Expression::Column(col) => {
35908                        col.name.name.eq_ignore_ascii_case("MAP") && col.table.is_none()
35909                    }
35910                    Expression::Identifier(id) => id.name.eq_ignore_ascii_case("MAP"),
35911                    _ => false,
35912                };
35913
35914                if is_map_constructor {
35915                    let is_materialize = matches!(
35916                        self.config.dialect,
35917                        Some(crate::dialects::DialectType::Materialize)
35918                    );
35919
35920                    // Materialize: MAP[] empty map or MAP['a' => 1, ...] with fat arrow
35921                    if is_materialize {
35922                        if self.check(TokenType::RBracket) {
35923                            // Empty map: MAP[]
35924                            self.skip();
35925                            expr = Expression::ToMap(Box::new(ToMap {
35926                                this: Box::new(Expression::Struct(Box::new(Struct {
35927                                    fields: Vec::new(),
35928                                }))),
35929                            }));
35930                            continue;
35931                        }
35932
35933                        // Parse MAP['a' => 1, 'b' => 2, ...] with fat arrow entries
35934                        // Store entries as PropertyEQ expressions (key => value)
35935                        let mut entries = Vec::new();
35936                        loop {
35937                            let key = self.parse_expression()?;
35938                            self.expect(TokenType::FArrow)?;
35939                            let value = self.parse_expression()?;
35940                            // Store as PropertyEQ which will be output as key => value
35941                            entries.push((
35942                                None,
35943                                Expression::PropertyEQ(Box::new(BinaryOp::new(key, value))),
35944                            ));
35945
35946                            if !self.match_token(TokenType::Comma) {
35947                                break;
35948                            }
35949                        }
35950                        self.expect(TokenType::RBracket)?;
35951
35952                        expr = Expression::ToMap(Box::new(ToMap {
35953                            this: Box::new(Expression::Struct(Box::new(Struct {
35954                                fields: entries,
35955                            }))),
35956                        }));
35957                        continue;
35958                    }
35959
35960                    // DuckDB/BigQuery: MAP[keys, values] syntax
35961                    let keys = self.parse_expression()?;
35962                    self.expect(TokenType::Comma)?;
35963                    let values = self.parse_expression()?;
35964                    self.expect(TokenType::RBracket)?;
35965                    expr = Expression::Function(Box::new(Function {
35966                        name: "MAP".to_string(),
35967                        args: vec![keys, values],
35968                        distinct: false,
35969                        trailing_comments: Vec::new(),
35970                        use_bracket_syntax: true,
35971                        no_parens: false,
35972                        quoted: false,
35973                        span: None,
35974                        inferred_type: None,
35975                    }));
35976                    continue;
35977                }
35978
35979                // Check for slice syntax: [start:end:step]
35980                // Handle [:...] case where start is omitted
35981                if self.check(TokenType::Colon) {
35982                    self.skip(); // consume first :
35983                                 // Parse end - use parse_slice_element to avoid : being interpreted as parameter
35984                    let end = self.parse_slice_element()?;
35985                    // Check for step (second colon)
35986                    let step = if self.match_token(TokenType::Colon) {
35987                        self.parse_slice_element()?
35988                    } else {
35989                        None
35990                    };
35991                    self.expect(TokenType::RBracket)?;
35992                    if step.is_some() {
35993                        // Three-part slice with step: Subscript with Slice index
35994                        let slice = Expression::Slice(Box::new(Slice {
35995                            this: None, // start is omitted
35996                            expression: end.map(Box::new),
35997                            step: step.map(Box::new),
35998                        }));
35999                        expr = Expression::Subscript(Box::new(Subscript {
36000                            this: expr,
36001                            index: slice,
36002                        }));
36003                    } else {
36004                        expr = Expression::ArraySlice(Box::new(ArraySlice {
36005                            this: expr,
36006                            start: None,
36007                            end,
36008                        }));
36009                    }
36010                } else {
36011                    let start = self.parse_slice_element()?;
36012                    // Check if this is a slice
36013                    if self.match_token(TokenType::Colon) {
36014                        let end = self.parse_slice_element()?;
36015                        // Check for step (second colon)
36016                        let step = if self.match_token(TokenType::Colon) {
36017                            self.parse_slice_element()?
36018                        } else {
36019                            None
36020                        };
36021                        self.expect(TokenType::RBracket)?;
36022                        if step.is_some() {
36023                            // Three-part slice with step: Subscript with Slice index
36024                            let slice = Expression::Slice(Box::new(Slice {
36025                                this: start.map(Box::new),
36026                                expression: end.map(Box::new),
36027                                step: step.map(Box::new),
36028                            }));
36029                            expr = Expression::Subscript(Box::new(Subscript {
36030                                this: expr,
36031                                index: slice,
36032                            }));
36033                        } else {
36034                            expr = Expression::ArraySlice(Box::new(ArraySlice {
36035                                this: expr,
36036                                start,
36037                                end,
36038                            }));
36039                        }
36040                    } else {
36041                        self.expect(TokenType::RBracket)?;
36042                        // Simple subscript access - start must be Some
36043                        let index =
36044                            start.unwrap_or_else(|| Expression::Null(crate::expressions::Null));
36045                        expr = Expression::Subscript(Box::new(Subscript { this: expr, index }));
36046                    }
36047                }
36048            } else if self.match_token(TokenType::DotColon) {
36049                // In ClickHouse, the type after .: may be a quoted identifier like "Array(JSON)"
36050                // which needs to be re-parsed as a proper data type.
36051                let data_type = if matches!(
36052                    self.config.dialect,
36053                    Some(crate::dialects::DialectType::ClickHouse)
36054                ) && self.check(TokenType::QuotedIdentifier)
36055                {
36056                    let type_text = self.advance().text.clone();
36057                    // Re-parse the quoted identifier text as a data type
36058                    self.parse_data_type_from_text(&type_text)?
36059                } else {
36060                    self.parse_data_type()?
36061                };
36062                expr = Expression::JSONCast(Box::new(JSONCast {
36063                    this: Box::new(expr),
36064                    to: data_type,
36065                }));
36066            } else if self.match_token(TokenType::Dot) {
36067                // Handle chained dot access (a.b.c.d)
36068                if self.match_token(TokenType::Star) {
36069                    // expr.* - struct field expansion with potential modifiers (EXCEPT, REPLACE, etc.)
36070                    let table_name = match &expr {
36071                        Expression::Column(col) => {
36072                            if let Some(ref table) = col.table {
36073                                Some(Identifier::new(format!("{}.{}", table.name, col.name.name)))
36074                            } else {
36075                                Some(col.name.clone())
36076                            }
36077                        }
36078                        Expression::Dot(d) => {
36079                            fn dot_to_name_inner(expr: &Expression) -> String {
36080                                match expr {
36081                                    Expression::Column(col) => {
36082                                        if let Some(ref table) = col.table {
36083                                            format!("{}.{}", table.name, col.name.name)
36084                                        } else {
36085                                            col.name.name.clone()
36086                                        }
36087                                    }
36088                                    Expression::Dot(d) => {
36089                                        format!("{}.{}", dot_to_name_inner(&d.this), d.field.name)
36090                                    }
36091                                    _ => String::new(),
36092                                }
36093                            }
36094                            Some(Identifier::new(dot_to_name_inner(&Expression::Dot(
36095                                d.clone(),
36096                            ))))
36097                        }
36098                        _ => None,
36099                    };
36100                    if table_name.is_some() {
36101                        let star = self.parse_star_modifiers(table_name)?;
36102                        expr = Expression::Star(star);
36103                        // ClickHouse: a.* APPLY(func) EXCEPT(col) REPLACE(expr AS col) in any order
36104                        if matches!(
36105                            self.config.dialect,
36106                            Some(crate::dialects::DialectType::ClickHouse)
36107                        ) {
36108                            loop {
36109                                if self.check(TokenType::Apply) {
36110                                    self.skip();
36111                                    let apply_expr = if self.match_token(TokenType::LParen) {
36112                                        let e = self.parse_expression()?;
36113                                        self.expect(TokenType::RParen)?;
36114                                        e
36115                                    } else {
36116                                        self.parse_expression()?
36117                                    };
36118                                    expr = Expression::Apply(Box::new(crate::expressions::Apply {
36119                                        this: Box::new(expr),
36120                                        expression: Box::new(apply_expr),
36121                                    }));
36122                                } else if self.check(TokenType::Except)
36123                                    || self.check(TokenType::Exclude)
36124                                {
36125                                    self.skip();
36126                                    self.match_identifier("STRICT");
36127                                    if self.match_token(TokenType::LParen) {
36128                                        loop {
36129                                            if self.check(TokenType::RParen) {
36130                                                break;
36131                                            }
36132                                            let _ = self.parse_expression()?;
36133                                            if !self.match_token(TokenType::Comma) {
36134                                                break;
36135                                            }
36136                                        }
36137                                        self.expect(TokenType::RParen)?;
36138                                    } else if self.is_identifier_token()
36139                                        || self.is_safe_keyword_as_identifier()
36140                                    {
36141                                        let _ = self.parse_expression()?;
36142                                    }
36143                                } else if self.check(TokenType::Replace) {
36144                                    self.skip();
36145                                    self.match_identifier("STRICT");
36146                                    if self.match_token(TokenType::LParen) {
36147                                        loop {
36148                                            if self.check(TokenType::RParen) {
36149                                                break;
36150                                            }
36151                                            let _ = self.parse_expression()?;
36152                                            if self.match_token(TokenType::As) {
36153                                                if self.is_identifier_token()
36154                                                    || self.is_safe_keyword_as_identifier()
36155                                                {
36156                                                    self.skip();
36157                                                }
36158                                            }
36159                                            if !self.match_token(TokenType::Comma) {
36160                                                break;
36161                                            }
36162                                        }
36163                                        self.expect(TokenType::RParen)?;
36164                                    } else {
36165                                        let _ = self.parse_expression()?;
36166                                        if self.match_token(TokenType::As) {
36167                                            if self.is_identifier_token()
36168                                                || self.is_safe_keyword_as_identifier()
36169                                            {
36170                                                self.skip();
36171                                            }
36172                                        }
36173                                    }
36174                                } else {
36175                                    break;
36176                                }
36177                            }
36178                        }
36179                    } else {
36180                        // For complex expressions (like CAST, function calls), use Dot with * as field
36181                        expr = Expression::Dot(Box::new(DotAccess {
36182                            this: expr,
36183                            field: Identifier::new("*"),
36184                        }));
36185                    }
36186                } else if self.check(TokenType::Identifier)
36187                    || self.check(TokenType::Var)
36188                    || self.check(TokenType::QuotedIdentifier)
36189                    || self.check_keyword()
36190                {
36191                    let is_quoted = self.check(TokenType::QuotedIdentifier);
36192                    let field_name = self.advance().text;
36193                    // Check if this is a method call (field followed by parentheses)
36194                    if self.check(TokenType::LParen) && !is_quoted {
36195                        // This is a method call like a.b.C() or x.EXTRACT()
36196                        self.skip(); // consume (
36197                        let args = if self.check(TokenType::RParen) {
36198                            Vec::new()
36199                        } else {
36200                            self.parse_expression_list()?
36201                        };
36202                        self.expect(TokenType::RParen)?;
36203                        // Create a method call expression (DotAccess with function call)
36204                        expr = Expression::MethodCall(Box::new(MethodCall {
36205                            this: expr,
36206                            method: Identifier::new(field_name),
36207                            args,
36208                        }));
36209                    } else {
36210                        let mut ident = Identifier::new(field_name);
36211                        if is_quoted {
36212                            ident.quoted = true;
36213                        }
36214                        expr = Expression::Dot(Box::new(DotAccess {
36215                            this: expr,
36216                            field: ident,
36217                        }));
36218                    }
36219                } else if self.check(TokenType::Number) {
36220                    // Handle numeric field access like a.0 or x.1
36221                    let field_name = self.advance().text;
36222                    expr = Expression::Dot(Box::new(DotAccess {
36223                        this: expr,
36224                        field: Identifier::new(field_name),
36225                    }));
36226                } else if matches!(
36227                    self.config.dialect,
36228                    Some(crate::dialects::DialectType::ClickHouse)
36229                ) && self.check(TokenType::Caret)
36230                {
36231                    // ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
36232                    self.skip(); // consume ^
36233                                 // What follows should be an identifier path
36234                    let mut field_name = "^".to_string();
36235                    if self.check(TokenType::Identifier)
36236                        || self.check(TokenType::Var)
36237                        || self.check_keyword()
36238                    {
36239                        field_name.push_str(&self.advance().text);
36240                    }
36241                    expr = Expression::Dot(Box::new(DotAccess {
36242                        this: expr,
36243                        field: Identifier::new(field_name),
36244                    }));
36245                } else if matches!(
36246                    self.config.dialect,
36247                    Some(crate::dialects::DialectType::ClickHouse)
36248                ) && self.check(TokenType::Colon)
36249                {
36250                    // ClickHouse: json.path.:Type — the : prefix means type cast on JSON path
36251                    self.skip(); // consume :
36252                                 // Consume the type name
36253                    let mut type_name = ":".to_string();
36254                    if self.check(TokenType::Identifier)
36255                        || self.check(TokenType::Var)
36256                        || self.check_keyword()
36257                    {
36258                        type_name.push_str(&self.advance().text);
36259                    }
36260                    expr = Expression::Dot(Box::new(DotAccess {
36261                        this: expr,
36262                        field: Identifier::new(type_name),
36263                    }));
36264                } else if matches!(
36265                    self.config.dialect,
36266                    Some(crate::dialects::DialectType::ClickHouse)
36267                ) && self.check(TokenType::Dash)
36268                    && self
36269                        .peek_nth(1)
36270                        .is_some_and(|t| t.token_type == TokenType::Number)
36271                {
36272                    // ClickHouse: tuple.-1 — negative tuple index
36273                    self.skip(); // consume -
36274                    let num = self.advance().text;
36275                    expr = Expression::Dot(Box::new(DotAccess {
36276                        this: expr,
36277                        field: Identifier::new(format!("-{}", num)),
36278                    }));
36279                } else {
36280                    return Err(self.parse_error("Expected field name after dot"));
36281                }
36282            } else if self.match_token(TokenType::Collate) {
36283                // Parse COLLATE 'collation_name' or COLLATE "collation_name" or COLLATE collation_name
36284                let (collation, quoted, double_quoted) = if self.check(TokenType::String) {
36285                    // Single-quoted string: COLLATE 'de_DE'
36286                    (self.advance().text, true, false)
36287                } else if self.check(TokenType::QuotedIdentifier) {
36288                    // Double-quoted identifier: COLLATE "de_DE"
36289                    (self.advance().text, false, true)
36290                } else {
36291                    // Unquoted identifier: COLLATE de_DE
36292                    (self.expect_identifier_or_keyword()?, false, false)
36293                };
36294                expr = Expression::Collation(Box::new(CollationExpr {
36295                    this: expr,
36296                    collation,
36297                    quoted,
36298                    double_quoted,
36299                }));
36300            } else if self.check(TokenType::DColon)
36301                || self.check(TokenType::DColonDollar)
36302                || self.check(TokenType::DColonPercent)
36303                || self.check(TokenType::DColonQMark)
36304            {
36305                // For SingleStore, :: variants are JSON path extraction
36306                // For other dialects, :: is cast syntax (PostgreSQL-style)
36307                if matches!(
36308                    self.config.dialect,
36309                    Some(crate::dialects::DialectType::SingleStore)
36310                ) {
36311                    // SingleStore JSON path extraction: expr::key, expr::$key, expr::%key, expr::?key
36312                    if self.match_token(TokenType::DColon) {
36313                        // ::key -> JSON_EXTRACT_JSON(expr, 'key')
36314                        let path_key =
36315                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
36316                                self.advance().text
36317                            } else if self.check(TokenType::Number) {
36318                                self.advance().text
36319                            } else if self.check(TokenType::QuotedIdentifier) {
36320                                self.advance().text
36321                            } else {
36322                                return Err(self.parse_error(
36323                                    "Expected identifier or number after :: in JSON path",
36324                                ));
36325                            };
36326                        expr = Expression::Function(Box::new(Function::new(
36327                            "JSON_EXTRACT_JSON".to_string(),
36328                            vec![expr, Expression::string(&path_key)],
36329                        )));
36330                    } else if self.match_token(TokenType::DColonDollar) {
36331                        // ::$key -> JSON_EXTRACT_STRING(expr, 'key')
36332                        let path_key =
36333                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
36334                                self.advance().text
36335                            } else if self.check(TokenType::Number) {
36336                                self.advance().text
36337                            } else {
36338                                return Err(self.parse_error(
36339                                    "Expected identifier or number after ::$ in JSON path",
36340                                ));
36341                            };
36342                        expr = Expression::Function(Box::new(Function::new(
36343                            "JSON_EXTRACT_STRING".to_string(),
36344                            vec![expr, Expression::string(&path_key)],
36345                        )));
36346                    } else if self.match_token(TokenType::DColonPercent) {
36347                        // ::%key -> JSON_EXTRACT_DOUBLE(expr, 'key')
36348                        let path_key =
36349                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
36350                                self.advance().text
36351                            } else if self.check(TokenType::Number) {
36352                                self.advance().text
36353                            } else {
36354                                return Err(self.parse_error(
36355                                    "Expected identifier or number after ::% in JSON path",
36356                                ));
36357                            };
36358                        expr = Expression::Function(Box::new(Function::new(
36359                            "JSON_EXTRACT_DOUBLE".to_string(),
36360                            vec![expr, Expression::string(&path_key)],
36361                        )));
36362                    } else if self.match_token(TokenType::DColonQMark) {
36363                        // ::?key -> SingleStoreJsonPathQMark function (for JSON_MATCH_ANY patterns)
36364                        let path_key =
36365                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
36366                                self.advance().text
36367                            } else if self.check(TokenType::Number) {
36368                                self.advance().text
36369                            } else {
36370                                return Err(self.parse_error(
36371                                    "Expected identifier or number after ::? in JSON path",
36372                                ));
36373                            };
36374                        // Use a special function name that SingleStore generator will recognize
36375                        expr = Expression::Function(Box::new(Function::new(
36376                            "__SS_JSON_PATH_QMARK__".to_string(),
36377                            vec![expr, Expression::string(&path_key)],
36378                        )));
36379                    }
36380                } else {
36381                    // PostgreSQL :: cast operator: expr::type
36382                    self.skip(); // consume DColon
36383                                 // Use parse_data_type_for_cast to avoid consuming subscripts as array dimensions
36384                    let data_type = self.parse_data_type_for_cast()?;
36385                    expr = Expression::Cast(Box::new(Cast {
36386                        this: expr,
36387                        to: data_type,
36388                        trailing_comments: Vec::new(),
36389                        double_colon_syntax: true,
36390                        format: None,
36391                        default: None,
36392                        inferred_type: None,
36393                    }));
36394                }
36395            } else if self.match_token(TokenType::ColonGt) {
36396                // SingleStore :> cast operator: expr :> type
36397                let data_type = self.parse_data_type_for_cast()?;
36398                expr = Expression::Cast(Box::new(Cast {
36399                    this: expr,
36400                    to: data_type,
36401                    trailing_comments: Vec::new(),
36402                    double_colon_syntax: false, // Use :> syntax in generator
36403                    format: None,
36404                    default: None,
36405                    inferred_type: None,
36406                }));
36407            } else if self.match_token(TokenType::NColonGt) {
36408                // SingleStore !:> try cast operator: expr !:> type
36409                let data_type = self.parse_data_type_for_cast()?;
36410                expr = Expression::TryCast(Box::new(Cast {
36411                    this: expr,
36412                    to: data_type,
36413                    trailing_comments: Vec::new(),
36414                    double_colon_syntax: false,
36415                    format: None,
36416                    default: None,
36417                    inferred_type: None,
36418                }));
36419            } else if self.match_token(TokenType::QDColon) {
36420                // Databricks ?:: try cast operator: expr?::type
36421                let data_type = self.parse_data_type_for_cast()?;
36422                expr = Expression::TryCast(Box::new(Cast {
36423                    this: expr,
36424                    to: data_type,
36425                    trailing_comments: Vec::new(),
36426                    double_colon_syntax: true, // Uses :: style syntax
36427                    format: None,
36428                    default: None,
36429                    inferred_type: None,
36430                }));
36431            } else if self.check(TokenType::Arrow)
36432                && !matches!(
36433                    self.config.dialect,
36434                    Some(crate::dialects::DialectType::ClickHouse)
36435                )
36436            {
36437                self.skip(); // consume ->
36438                             // JSON extract operator: expr -> path (PostgreSQL, MySQL, DuckDB)
36439                             // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
36440                let path = self.parse_json_path_operand()?;
36441                expr = Expression::JsonExtract(Box::new(JsonExtractFunc {
36442                    this: expr,
36443                    path,
36444                    returning: None,
36445                    arrow_syntax: true,
36446                    hash_arrow_syntax: false,
36447                    wrapper_option: None,
36448                    quotes_option: None,
36449                    on_scalar_string: false,
36450                    on_error: None,
36451                }));
36452            } else if self.match_token(TokenType::DArrow) {
36453                // JSON extract text operator: expr ->> path (PostgreSQL, MySQL, DuckDB)
36454                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
36455                let path = self.parse_json_path_operand()?;
36456                expr = Expression::JsonExtractScalar(Box::new(JsonExtractFunc {
36457                    this: expr,
36458                    path,
36459                    returning: None,
36460                    arrow_syntax: true,
36461                    hash_arrow_syntax: false,
36462                    wrapper_option: None,
36463                    quotes_option: None,
36464                    on_scalar_string: false,
36465                    on_error: None,
36466                }));
36467            } else if self.match_token(TokenType::HashArrow) {
36468                // JSONB path extract: expr #> path (PostgreSQL)
36469                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
36470                let path = self.parse_json_path_operand()?;
36471                expr = Expression::JsonExtractPath(Box::new(JsonPathFunc {
36472                    this: expr,
36473                    paths: vec![path],
36474                }));
36475            } else if self.match_token(TokenType::DHashArrow) {
36476                // JSONB path extract text: expr #>> path (PostgreSQL)
36477                // For now, use JsonExtractScalar since the result is text
36478                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
36479                let path = self.parse_json_path_operand()?;
36480                expr = Expression::JsonExtractScalar(Box::new(JsonExtractFunc {
36481                    this: expr,
36482                    path,
36483                    returning: None,
36484                    arrow_syntax: false,     // This is #>> not ->>
36485                    hash_arrow_syntax: true, // Mark as #>> operator
36486                    wrapper_option: None,
36487                    quotes_option: None,
36488                    on_scalar_string: false,
36489                    on_error: None,
36490                }));
36491            } else if self.check_join_marker() {
36492                // Oracle/Redshift-style outer join marker: column (+)
36493                // Only applies to Column expressions
36494                if let Expression::Column(col) = &mut expr {
36495                    self.skip(); // consume (
36496                    self.skip(); // consume +
36497                    self.skip(); // consume )
36498                    col.join_mark = true;
36499                    // Don't continue - join marker is terminal (no more postfix ops after it)
36500                    break;
36501                }
36502                // If not a Column, just break - the marker is invalid in this context
36503                else {
36504                    break;
36505                }
36506            } else {
36507                break;
36508            }
36509        }
36510        Ok(expr)
36511    }
36512
36513    /// Check if the next tokens are the Oracle-style join marker (+)
36514    fn check_join_marker(&self) -> bool {
36515        self.check(TokenType::LParen)
36516            && self
36517                .peek_nth(1)
36518                .map_or(false, |t| t.token_type == TokenType::Plus)
36519            && self
36520                .peek_nth(2)
36521                .map_or(false, |t| t.token_type == TokenType::RParen)
36522    }
36523
36524    /// Parse OVER clause
36525    fn parse_over_clause(&mut self) -> Result<Over> {
36526        // Handle OVER window_name (without parentheses)
36527        if !self.check(TokenType::LParen) {
36528            // OVER window_name - just a named window reference
36529            let window_name = self.expect_identifier_or_keyword()?;
36530            return Ok(Over {
36531                window_name: Some(Identifier::new(window_name)),
36532                partition_by: Vec::new(),
36533                order_by: Vec::new(),
36534                frame: None,
36535                alias: None,
36536            });
36537        }
36538
36539        self.expect(TokenType::LParen)?;
36540
36541        // Check for named window reference at start of OVER clause
36542        // e.g., OVER (w ORDER BY y) - w is a window name that can be extended
36543        let window_name = if (self.check(TokenType::Identifier)
36544            || self.check(TokenType::Var)
36545            || self.check_keyword())
36546            && !self.check(TokenType::Partition)
36547            && !self.check(TokenType::Order)
36548            && !self.check(TokenType::Rows)
36549            && !self.check(TokenType::Range)
36550            && !self.check(TokenType::Groups)
36551            && !self.check(TokenType::Distribute)
36552            && !self.check(TokenType::Sort)
36553        {
36554            // Look ahead to see if next token indicates this is a window name
36555            let pos = self.current;
36556            let name = self.advance().text;
36557            // If next token is a keyword that can follow a window name, this is a named reference
36558            if self.check(TokenType::Order)
36559                || self.check(TokenType::Partition)
36560                || self.check(TokenType::Rows)
36561                || self.check(TokenType::Range)
36562                || self.check(TokenType::Groups)
36563                || self.check(TokenType::RParen)
36564                || self.check(TokenType::Distribute)
36565                || self.check(TokenType::Sort)
36566            {
36567                Some(Identifier::new(name))
36568            } else {
36569                // Not a named window, restore position
36570                self.current = pos;
36571                None
36572            }
36573        } else {
36574            None
36575        };
36576
36577        // Parse PARTITION BY or DISTRIBUTE BY (Hive uses DISTRIBUTE BY in window specs)
36578        let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
36579            self.parse_expression_list()?
36580        } else if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
36581            // Hive: DISTRIBUTE BY is equivalent to PARTITION BY in window specs
36582            self.parse_expression_list()?
36583        } else {
36584            Vec::new()
36585        };
36586
36587        // Parse ORDER BY or SORT BY (Hive uses SORT BY in window specs)
36588        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By])
36589            || self.match_keywords(&[TokenType::Sort, TokenType::By])
36590        {
36591            let mut exprs = Vec::new();
36592            loop {
36593                let expr = self.parse_expression()?;
36594                let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
36595                    (true, false)
36596                } else if self.match_token(TokenType::Asc) {
36597                    (false, true)
36598                } else {
36599                    (false, false)
36600                };
36601                // ClickHouse/SQL: COLLATE 'collation' in window ORDER BY
36602                if self.match_token(TokenType::Collate) {
36603                    // Consume collation name (string or identifier)
36604                    if self.check(TokenType::String) {
36605                        self.skip();
36606                    } else if self.check(TokenType::QuotedIdentifier) {
36607                        self.skip();
36608                    } else {
36609                        let _ = self.expect_identifier_or_keyword();
36610                    }
36611                }
36612                let nulls_first = if self.match_token(TokenType::Nulls) {
36613                    if self.match_token(TokenType::First) {
36614                        Some(true)
36615                    } else if self.match_token(TokenType::Last) {
36616                        Some(false)
36617                    } else {
36618                        return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
36619                    }
36620                } else {
36621                    None
36622                };
36623                // ClickHouse: WITH FILL in window ORDER BY
36624                let with_fill = if matches!(
36625                    self.config.dialect,
36626                    Some(crate::dialects::DialectType::ClickHouse)
36627                ) && self.check(TokenType::With)
36628                    && self.current + 1 < self.tokens.len()
36629                    && self.tokens[self.current + 1]
36630                        .text
36631                        .eq_ignore_ascii_case("FILL")
36632                {
36633                    self.skip(); // consume WITH
36634                    self.skip(); // consume FILL
36635                    let from_ = if self.match_token(TokenType::From) {
36636                        Some(Box::new(self.parse_or()?))
36637                    } else {
36638                        None
36639                    };
36640                    let to = if self.match_text_seq(&["TO"]) {
36641                        Some(Box::new(self.parse_or()?))
36642                    } else {
36643                        None
36644                    };
36645                    let step = if self.match_text_seq(&["STEP"]) {
36646                        Some(Box::new(self.parse_or()?))
36647                    } else {
36648                        None
36649                    };
36650                    let staleness = if self.match_text_seq(&["STALENESS"]) {
36651                        Some(Box::new(self.parse_or()?))
36652                    } else {
36653                        None
36654                    };
36655                    let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
36656                        if self.match_token(TokenType::LParen) {
36657                            let items = self.parse_expression_list()?;
36658                            self.expect(TokenType::RParen)?;
36659                            if items.len() == 1 {
36660                                Some(Box::new(items.into_iter().next().unwrap()))
36661                            } else {
36662                                Some(Box::new(Expression::Tuple(Box::new(
36663                                    crate::expressions::Tuple { expressions: items },
36664                                ))))
36665                            }
36666                        } else {
36667                            None
36668                        }
36669                    } else {
36670                        None
36671                    };
36672                    Some(Box::new(WithFill {
36673                        from_,
36674                        to,
36675                        step,
36676                        staleness,
36677                        interpolate,
36678                    }))
36679                } else {
36680                    None
36681                };
36682                exprs.push(Ordered {
36683                    this: expr,
36684                    desc,
36685                    nulls_first,
36686                    explicit_asc,
36687                    with_fill,
36688                });
36689                if !self.match_token(TokenType::Comma) {
36690                    break;
36691                }
36692            }
36693            exprs
36694        } else {
36695            Vec::new()
36696        };
36697
36698        // Parse window frame
36699        let frame = self.parse_window_frame()?;
36700
36701        self.expect(TokenType::RParen)?;
36702
36703        Ok(Over {
36704            window_name,
36705            partition_by,
36706            order_by,
36707            frame,
36708            alias: None,
36709        })
36710    }
36711
36712    /// Parse window frame specification (ROWS/RANGE/GROUPS BETWEEN ...)
36713    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>> {
36714        let (kind, kind_text) = if self.match_token(TokenType::Rows) {
36715            (
36716                WindowFrameKind::Rows,
36717                self.tokens[self.current - 1].text.clone(),
36718            )
36719        } else if self.match_token(TokenType::Range) {
36720            (
36721                WindowFrameKind::Range,
36722                self.tokens[self.current - 1].text.clone(),
36723            )
36724        } else if self.match_token(TokenType::Groups) {
36725            (
36726                WindowFrameKind::Groups,
36727                self.tokens[self.current - 1].text.clone(),
36728            )
36729        } else {
36730            return Ok(None);
36731        };
36732
36733        // Parse BETWEEN or single bound
36734        let (start, start_side_text, end, end_side_text) = if self.match_token(TokenType::Between) {
36735            let (start, st) = self.parse_window_frame_bound()?;
36736            self.expect(TokenType::And)?;
36737            let (end, et) = self.parse_window_frame_bound()?;
36738            (start, st, Some(end), et)
36739        } else {
36740            let (start, st) = self.parse_window_frame_bound()?;
36741            (start, st, None, None)
36742        };
36743
36744        // Parse optional EXCLUDE clause
36745        let exclude = if self.match_token(TokenType::Exclude) {
36746            if self.match_token(TokenType::Current) {
36747                self.expect(TokenType::Row)?;
36748                Some(WindowFrameExclude::CurrentRow)
36749            } else if self.match_token(TokenType::Group) {
36750                Some(WindowFrameExclude::Group)
36751            } else if self.match_token(TokenType::Ties) {
36752                Some(WindowFrameExclude::Ties)
36753            } else if self.match_token(TokenType::No) {
36754                self.expect(TokenType::Others)?;
36755                Some(WindowFrameExclude::NoOthers)
36756            } else {
36757                return Err(self
36758                    .parse_error("Expected CURRENT ROW, GROUP, TIES, or NO OTHERS after EXCLUDE"));
36759            }
36760        } else {
36761            None
36762        };
36763
36764        Ok(Some(WindowFrame {
36765            kind,
36766            start,
36767            end,
36768            exclude,
36769            kind_text: Some(kind_text),
36770            start_side_text,
36771            end_side_text,
36772        }))
36773    }
36774
36775    /// Parse a window frame bound, returning the bound and the original text of the side keyword
36776    fn parse_window_frame_bound(&mut self) -> Result<(WindowFrameBound, Option<String>)> {
36777        if self.match_token(TokenType::Current) {
36778            self.expect(TokenType::Row)?;
36779            Ok((WindowFrameBound::CurrentRow, None))
36780        } else if self.match_token(TokenType::Unbounded) {
36781            if self.match_token(TokenType::Preceding) {
36782                let text = self.tokens[self.current - 1].text.clone();
36783                Ok((WindowFrameBound::UnboundedPreceding, Some(text)))
36784            } else if self.match_token(TokenType::Following) {
36785                let text = self.tokens[self.current - 1].text.clone();
36786                Ok((WindowFrameBound::UnboundedFollowing, Some(text)))
36787            } else {
36788                Err(self.parse_error("Expected PRECEDING or FOLLOWING after UNBOUNDED"))
36789            }
36790        } else if self.match_token(TokenType::Preceding) {
36791            let text = self.tokens[self.current - 1].text.clone();
36792            // PRECEDING [value] (inverted syntax for some dialects)
36793            // If no value follows (e.g., just "PRECEDING" or "PRECEDING)"), use BarePreceding
36794            if self.check(TokenType::RParen) || self.check(TokenType::Comma) {
36795                Ok((WindowFrameBound::BarePreceding, Some(text)))
36796            } else {
36797                let expr = self.parse_primary()?;
36798                Ok((WindowFrameBound::Preceding(Box::new(expr)), Some(text)))
36799            }
36800        } else if self.match_token(TokenType::Following) {
36801            let text = self.tokens[self.current - 1].text.clone();
36802            // FOLLOWING [value] (inverted syntax for some dialects)
36803            // If no value follows (e.g., just "FOLLOWING" or "FOLLOWING)"), use BareFollowing
36804            if self.check(TokenType::RParen) || self.check(TokenType::Comma) {
36805                Ok((WindowFrameBound::BareFollowing, Some(text)))
36806            } else {
36807                let expr = self.parse_primary()?;
36808                Ok((WindowFrameBound::Following(Box::new(expr)), Some(text)))
36809            }
36810        } else {
36811            // <expr> PRECEDING | FOLLOWING (standard syntax)
36812            // Use parse_addition to handle expressions like 1 + 1 PRECEDING
36813            let expr = self.parse_addition()?;
36814            if self.match_token(TokenType::Preceding) {
36815                let text = self.tokens[self.current - 1].text.clone();
36816                Ok((WindowFrameBound::Preceding(Box::new(expr)), Some(text)))
36817            } else if self.match_token(TokenType::Following) {
36818                let text = self.tokens[self.current - 1].text.clone();
36819                Ok((WindowFrameBound::Following(Box::new(expr)), Some(text)))
36820            } else {
36821                // Bare numeric bounds without PRECEDING/FOLLOWING
36822                // (e.g., RANGE BETWEEN 1 AND 3)
36823                Ok((WindowFrameBound::Value(Box::new(expr)), None))
36824            }
36825        }
36826    }
36827
36828    /// Try to parse INTERVAL expression. Returns None if INTERVAL should be treated as identifier.
36829    fn try_parse_interval(&mut self) -> Result<Option<Expression>> {
36830        self.try_parse_interval_internal(true)
36831    }
36832
36833    /// Internal interval parsing that optionally matches the INTERVAL keyword.
36834    /// When match_interval is false, it parses a chained interval value-unit pair
36835    /// without requiring the INTERVAL keyword.
36836    fn try_parse_interval_internal(&mut self, match_interval: bool) -> Result<Option<Expression>> {
36837        let start_pos = self.current;
36838
36839        // Consume the INTERVAL keyword if required
36840        if match_interval {
36841            if !self.check(TokenType::Interval) {
36842                return Ok(None);
36843            }
36844            self.expect(TokenType::Interval)?;
36845
36846            // Check if next token is an operator - if so, INTERVAL is used as identifier
36847            if self.check(TokenType::Eq)
36848                || self.check(TokenType::Neq)
36849                || self.check(TokenType::Lt)
36850                || self.check(TokenType::Gt)
36851                || self.check(TokenType::Lte)
36852                || self.check(TokenType::Gte)
36853                || self.check(TokenType::And)
36854                || self.check(TokenType::Or)
36855                || self.check(TokenType::Is)
36856                || self.check(TokenType::In)
36857                || self.check(TokenType::Like)
36858                || self.check(TokenType::ILike)
36859                || self.check(TokenType::Between)
36860                || self.check(TokenType::Then)
36861                || self.check(TokenType::Else)
36862                || self.check(TokenType::When)
36863                || self.check(TokenType::End)
36864                || self.check(TokenType::Comma)
36865                || self.check(TokenType::RParen)
36866                || self.check(TokenType::DColon)
36867            {
36868                // INTERVAL is used as identifier
36869                self.current = start_pos;
36870                return Ok(None);
36871            }
36872        }
36873
36874        // Parse the value after INTERVAL
36875        // IMPORTANT: For string literals, don't use parse_primary() because it calls
36876        // maybe_parse_subscript() which would consume postfix operators like ::TYPE.
36877        // Those should be applied to the full INTERVAL expression, not just the value inside.
36878        // e.g., INTERVAL '1 hour'::VARCHAR should be CAST(INTERVAL '1 hour' AS VARCHAR)
36879        //       not INTERVAL CAST('1 hour' AS VARCHAR)
36880        // For non-string values, use parse_addition() to handle expressions like
36881        // INTERVAL 2 * 2 MONTH or INTERVAL DAYOFMONTH(dt) - 1 DAY (MySQL syntax)
36882        // This matches Python sqlglot's _parse_term() behavior which handles +, -, *, /, %
36883        let value = if self.check(TokenType::String) {
36884            let token = self.advance();
36885            Some(Expression::Literal(Box::new(Literal::String(token.text))))
36886        } else if !self.is_at_end() && !self.is_statement_terminator() {
36887            Some(self.parse_addition()?)
36888        } else {
36889            None
36890        };
36891
36892        // Check if we should treat INTERVAL as an identifier instead
36893        // This happens when:
36894        // - No value was parsed, OR
36895        // - Value is an unqualified, unquoted column reference AND
36896        //   what follows is NOT a valid interval unit
36897        if let Some(ref val) = value {
36898            if let Expression::Column(col) = val {
36899                // Column without table qualifier
36900                if col.table.is_none() {
36901                    // Check if identifier is quoted
36902                    let is_quoted = col.name.quoted;
36903                    if !is_quoted {
36904                        // Check if next token is a valid interval unit
36905                        if !self.is_valid_interval_unit() && !self.check(TokenType::As) {
36906                            // Backtrack - INTERVAL is used as identifier
36907                            self.current = start_pos;
36908                            return Ok(None);
36909                        }
36910                    }
36911                }
36912            } else if let Expression::Identifier(id) = val {
36913                // Bare identifier without table qualifier
36914                let is_quoted = id.quoted;
36915                if !is_quoted {
36916                    // Check if next token is a valid interval unit
36917                    if !self.is_valid_interval_unit() && !self.check(TokenType::As) {
36918                        // Backtrack - INTERVAL is used as identifier
36919                        self.current = start_pos;
36920                        return Ok(None);
36921                    }
36922                }
36923            }
36924        } else if self.is_at_end() || self.is_statement_terminator() {
36925            // No value, and at end/terminator - INTERVAL is an identifier
36926            self.current = start_pos;
36927            return Ok(None);
36928        }
36929
36930        // Now parse the optional unit
36931        let mut unit = self.try_parse_interval_unit()?;
36932
36933        // Split compound interval strings like '1 day' into value '1' and unit DAY
36934        // This matches Python sqlglot's INTERVAL_STRING_RE behavior
36935        // Only apply in generic mode -- dialects like PostgreSQL preserve compound strings
36936        let is_generic = self.config.dialect.is_none()
36937            || matches!(
36938                self.config.dialect,
36939                Some(crate::dialects::DialectType::Generic)
36940            );
36941        let value = if unit.is_none() && is_generic {
36942            if let Some(Expression::Literal(ref lit)) = value {
36943                if let Literal::String(ref s) = lit.as_ref() {
36944                    let trimmed = s.trim();
36945                    // Match pattern: optional negative sign, digits (optional decimal), space(s), alpha unit
36946                    let mut split_pos = None;
36947                    let mut found_space = false;
36948                    let bytes = trimmed.as_bytes();
36949                    let mut i = 0;
36950                    // Skip optional negative sign
36951                    if i < bytes.len() && bytes[i] == b'-' {
36952                        i += 1;
36953                    }
36954                    // Expect digits
36955                    let digit_start = i;
36956                    while i < bytes.len() && bytes[i].is_ascii_digit() {
36957                        i += 1;
36958                    }
36959                    if i > digit_start {
36960                        // Optional decimal part
36961                        if i < bytes.len() && bytes[i] == b'.' {
36962                            i += 1;
36963                            while i < bytes.len() && bytes[i].is_ascii_digit() {
36964                                i += 1;
36965                            }
36966                        }
36967                        // Expect whitespace
36968                        let space_start = i;
36969                        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
36970                            i += 1;
36971                        }
36972                        if i > space_start {
36973                            found_space = true;
36974                            split_pos = Some(i);
36975                        }
36976                    }
36977                    if found_space {
36978                        if let Some(pos) = split_pos {
36979                            let unit_text = &trimmed[pos..];
36980                            // Verify it's all alpha
36981                            if !unit_text.is_empty()
36982                                && unit_text.chars().all(|c| c.is_ascii_alphabetic())
36983                            {
36984                                let num_part = trimmed[..pos].trim_end().to_string();
36985                                let unit_upper = unit_text.to_ascii_uppercase();
36986                                // Try to parse as interval unit
36987                                if let Some(parsed_unit) =
36988                                    Self::parse_interval_unit_from_string(&unit_upper)
36989                                {
36990                                    // Check if the original text had an 'S' suffix (plural)
36991                                    let is_plural = unit_upper.ends_with('S');
36992                                    unit = Some(IntervalUnitSpec::Simple {
36993                                        unit: parsed_unit,
36994                                        use_plural: is_plural,
36995                                    });
36996                                    Some(Expression::Literal(Box::new(Literal::String(num_part))))
36997                                } else {
36998                                    value
36999                                }
37000                            } else {
37001                                value
37002                            }
37003                        } else {
37004                            value
37005                        }
37006                    } else {
37007                        value
37008                    }
37009                } else {
37010                    None
37011                }
37012            } else {
37013                value
37014            }
37015        } else {
37016            value
37017        };
37018
37019        // Convert number literals to string literals in intervals (canonical form).
37020        // Most dialects support INTERVAL '5' DAY, so we normalize to this form
37021        // for easier transpilation. This matches Python sqlglot's behavior in
37022        // _parse_interval_span: "if this and this.is_number: this = exp.Literal.string(this.to_py())"
37023        let value = match value {
37024            Some(Expression::Literal(lit))
37025                if unit.is_some() && matches!(lit.as_ref(), Literal::Number(_)) =>
37026            {
37027                let Literal::Number(n) = lit.as_ref() else {
37028                    unreachable!()
37029                };
37030                Some(Expression::Literal(Box::new(Literal::String(n.clone()))))
37031            }
37032            other => other,
37033        };
37034
37035        let interval = Expression::Interval(Box::new(Interval { this: value, unit }));
37036
37037        // Support for chained multi-unit interval syntax (Spark/Hive):
37038        // INTERVAL '5' HOURS '30' MINUTES -> INTERVAL '5' HOURS + INTERVAL '30' MINUTES
37039        // This is done by optionally matching a PLUS sign, and if followed by
37040        // another string or number (without INTERVAL keyword), recursively parsing
37041        // and creating an Add expression.
37042        let before_plus = self.current;
37043        let has_plus = self.match_token(TokenType::Plus);
37044
37045        // Check if followed by a STRING or NUMBER (potential chained interval)
37046        if self.check(TokenType::String) || self.check(TokenType::Number) {
37047            // Recursively parse the chained interval without the INTERVAL keyword
37048            if let Some(next_interval) = self.try_parse_interval_internal(false)? {
37049                return Ok(Some(Expression::Add(Box::new(BinaryOp::new(
37050                    interval,
37051                    next_interval,
37052                )))));
37053            }
37054        }
37055
37056        // If we consumed a PLUS but didn't find a chained interval, backtrack
37057        if has_plus {
37058            self.current = before_plus;
37059        }
37060
37061        Ok(Some(interval))
37062    }
37063
37064    /// Check if current token is a valid interval unit
37065    fn is_valid_interval_unit(&self) -> bool {
37066        if self.is_at_end() {
37067            return false;
37068        }
37069        let text = self.peek().text.to_ascii_uppercase();
37070        matches!(
37071            text.as_str(),
37072            "YEAR"
37073                | "YEARS"
37074                | "MONTH"
37075                | "MONTHS"
37076                | "DAY"
37077                | "DAYS"
37078                | "HOUR"
37079                | "HOURS"
37080                | "MINUTE"
37081                | "MINUTES"
37082                | "SECOND"
37083                | "SECONDS"
37084                | "MILLISECOND"
37085                | "MILLISECONDS"
37086                | "MICROSECOND"
37087                | "MICROSECONDS"
37088                | "NANOSECOND"
37089                | "NANOSECONDS"
37090                | "WEEK"
37091                | "WEEKS"
37092                | "QUARTER"
37093                | "QUARTERS"
37094        )
37095    }
37096
37097    /// Check if current token terminates a statement/expression context
37098    fn is_statement_terminator(&self) -> bool {
37099        if self.is_at_end() {
37100            return true;
37101        }
37102        matches!(
37103            self.peek().token_type,
37104            TokenType::Semicolon
37105                | TokenType::RParen
37106                | TokenType::RBracket
37107                | TokenType::Comma
37108                | TokenType::From
37109                | TokenType::Where
37110                | TokenType::GroupBy
37111                | TokenType::Having
37112                | TokenType::OrderBy
37113                | TokenType::Limit
37114                | TokenType::Union
37115                | TokenType::Intersect
37116                | TokenType::Except
37117                | TokenType::End
37118                | TokenType::Then
37119                | TokenType::Else
37120                | TokenType::When
37121        )
37122    }
37123
37124    /// Try to parse interval unit - returns None if no unit present
37125    fn try_parse_interval_unit(&mut self) -> Result<Option<IntervalUnitSpec>> {
37126        // First, check if there's a function (like CURRENT_DATE, CAST(...))
37127        if self.is_function_start() {
37128            let func = self.parse_primary()?;
37129            return Ok(Some(IntervalUnitSpec::Expr(Box::new(func))));
37130        }
37131
37132        // Try to parse a simple unit or span
37133        if let Some((unit, use_plural)) = self.try_parse_simple_interval_unit()? {
37134            // Check for "TO" to make it a span (e.g., YEAR TO MONTH)
37135            // Use lookahead to avoid consuming TO when it's part of WITH FILL
37136            if self.check_keyword_text("TO") {
37137                let saved = self.current;
37138                self.skip(); // consume TO
37139                if let Some((end_unit, _)) = self.try_parse_simple_interval_unit()? {
37140                    return Ok(Some(IntervalUnitSpec::Span(IntervalSpan {
37141                        this: unit,
37142                        expression: end_unit,
37143                    })));
37144                } else {
37145                    // Not followed by a valid interval unit — backtrack
37146                    self.current = saved;
37147                }
37148            }
37149            return Ok(Some(IntervalUnitSpec::Simple { unit, use_plural }));
37150        }
37151
37152        // No unit found
37153        Ok(None)
37154    }
37155
37156    /// Parse an interval unit from a string (used for splitting compound interval strings)
37157    fn parse_interval_unit_from_string(s: &str) -> Option<IntervalUnit> {
37158        // Strip trailing 'S' for plural forms
37159        let base = if s.ends_with('S') && s.len() > 1 {
37160            &s[..s.len() - 1]
37161        } else {
37162            s
37163        };
37164        match base {
37165            "YEAR" => Some(IntervalUnit::Year),
37166            "MONTH" => Some(IntervalUnit::Month),
37167            "DAY" => Some(IntervalUnit::Day),
37168            "HOUR" => Some(IntervalUnit::Hour),
37169            "MINUTE" => Some(IntervalUnit::Minute),
37170            "SECOND" => Some(IntervalUnit::Second),
37171            "MILLISECOND" => Some(IntervalUnit::Millisecond),
37172            "MICROSECOND" => Some(IntervalUnit::Microsecond),
37173            "QUARTER" => Some(IntervalUnit::Quarter),
37174            "WEEK" => Some(IntervalUnit::Week),
37175            _ => None,
37176        }
37177    }
37178
37179    /// Try to parse a simple interval unit (YEAR, MONTH, etc.) - returns (unit, is_plural)
37180    fn try_parse_simple_interval_unit(&mut self) -> Result<Option<(IntervalUnit, bool)>> {
37181        if self.is_at_end() {
37182            return Ok(None);
37183        }
37184
37185        let text_upper = self.peek().text.to_ascii_uppercase();
37186        let result = match text_upper.as_str() {
37187            "YEAR" => Some((IntervalUnit::Year, false)),
37188            "YEARS" => Some((IntervalUnit::Year, true)),
37189            "MONTH" => Some((IntervalUnit::Month, false)),
37190            "MONTHS" => Some((IntervalUnit::Month, true)),
37191            "DAY" => Some((IntervalUnit::Day, false)),
37192            "DAYS" => Some((IntervalUnit::Day, true)),
37193            "HOUR" => Some((IntervalUnit::Hour, false)),
37194            "HOURS" => Some((IntervalUnit::Hour, true)),
37195            "MINUTE" => Some((IntervalUnit::Minute, false)),
37196            "MINUTES" => Some((IntervalUnit::Minute, true)),
37197            "SECOND" => Some((IntervalUnit::Second, false)),
37198            "SECONDS" => Some((IntervalUnit::Second, true)),
37199            "MILLISECOND" => Some((IntervalUnit::Millisecond, false)),
37200            "MILLISECONDS" => Some((IntervalUnit::Millisecond, true)),
37201            "MICROSECOND" => Some((IntervalUnit::Microsecond, false)),
37202            "MICROSECONDS" => Some((IntervalUnit::Microsecond, true)),
37203            "NANOSECOND" => Some((IntervalUnit::Nanosecond, false)),
37204            "NANOSECONDS" => Some((IntervalUnit::Nanosecond, true)),
37205            "QUARTER" => Some((IntervalUnit::Quarter, false)),
37206            "QUARTERS" => Some((IntervalUnit::Quarter, true)),
37207            "WEEK" => Some((IntervalUnit::Week, false)),
37208            "WEEKS" => Some((IntervalUnit::Week, true)),
37209            _ => None,
37210        };
37211
37212        if result.is_some() {
37213            self.skip(); // consume the unit token
37214        }
37215
37216        Ok(result)
37217    }
37218
37219    /// Check if current position starts a function call or no-paren function
37220    fn is_function_start(&self) -> bool {
37221        if self.is_at_end() {
37222            return false;
37223        }
37224        let token_type = self.peek().token_type;
37225
37226        // Check NO_PAREN_FUNCTIONS configuration map
37227        if NO_PAREN_FUNCTIONS.contains(&token_type) {
37228            if !matches!(
37229                self.config.dialect,
37230                Some(crate::dialects::DialectType::ClickHouse)
37231            ) || token_type != TokenType::CurrentTimestamp
37232            {
37233                return true;
37234            }
37235        }
37236
37237        // Cast functions are always functions
37238        if matches!(
37239            token_type,
37240            TokenType::Cast | TokenType::TryCast | TokenType::SafeCast
37241        ) {
37242            return true;
37243        }
37244
37245        // Check NO_PAREN_FUNCTION_NAMES for string-based lookup
37246        // (handles cases where functions are tokenized as Var/Identifier)
37247        let text_upper = self.peek().text.to_ascii_uppercase();
37248        if crate::function_registry::is_no_paren_function_name_upper(text_upper.as_str()) {
37249            if !matches!(
37250                self.config.dialect,
37251                Some(crate::dialects::DialectType::ClickHouse)
37252            ) || text_upper.as_str() != "CURRENT_TIMESTAMP"
37253            {
37254                return true;
37255            }
37256        }
37257
37258        // Identifier followed by left paren (function call)
37259        if self.is_identifier_token() && self.check_next(TokenType::LParen) {
37260            return true;
37261        }
37262
37263        false
37264    }
37265
37266    /// Try to parse Oracle interval span after an expression.
37267    /// Syntax: (expr) DAY[(precision)] TO SECOND[(fractional_precision)]
37268    /// This is used in Oracle for interval expressions like:
37269    /// (SYSTIMESTAMP - order_date) DAY(9) TO SECOND(3)
37270    fn try_parse_oracle_interval_span(&mut self, expr: Expression) -> Result<Expression> {
37271        let start_pos = self.current;
37272
37273        // Check if current token is an interval unit keyword (DAY, HOUR, MINUTE, SECOND, YEAR, MONTH)
37274        let start_unit_name = if !self.is_at_end() {
37275            let text = self.peek().text.to_ascii_uppercase();
37276            if matches!(
37277                text.as_str(),
37278                "DAY" | "HOUR" | "MINUTE" | "SECOND" | "YEAR" | "MONTH"
37279            ) {
37280                Some(text)
37281            } else {
37282                None
37283            }
37284        } else {
37285            None
37286        };
37287
37288        if start_unit_name.is_none() {
37289            return Ok(expr);
37290        }
37291
37292        let start_unit_name = start_unit_name.unwrap();
37293        self.skip(); // consume the unit keyword
37294
37295        // Parse optional precision: DAY(9) or just DAY
37296        let start_unit = if self.match_token(TokenType::LParen) {
37297            // Parse precision
37298            let precision = self.parse_expression()?;
37299            self.expect(TokenType::RParen)?;
37300            // Create a function-like expression for the unit with precision
37301            Expression::Anonymous(Box::new(Anonymous {
37302                this: Box::new(Expression::Identifier(Identifier {
37303                    name: start_unit_name.clone(),
37304                    quoted: false,
37305                    trailing_comments: Vec::new(),
37306                    span: None,
37307                })),
37308                expressions: vec![precision],
37309            }))
37310        } else {
37311            // Simple unit without precision
37312            Expression::Var(Box::new(Var {
37313                this: start_unit_name,
37314            }))
37315        };
37316
37317        // Check for TO keyword
37318        if !self.match_keyword("TO") {
37319            // Not an interval span, backtrack
37320            self.current = start_pos;
37321            return Ok(expr);
37322        }
37323
37324        // Parse end unit
37325        let end_unit_name = if !self.is_at_end() {
37326            let text = self.peek().text.to_ascii_uppercase();
37327            if matches!(
37328                text.as_str(),
37329                "DAY" | "HOUR" | "MINUTE" | "SECOND" | "YEAR" | "MONTH"
37330            ) {
37331                Some(text)
37332            } else {
37333                None
37334            }
37335        } else {
37336            None
37337        };
37338
37339        let end_unit_name = match end_unit_name {
37340            Some(name) => name,
37341            None => {
37342                // No valid end unit, backtrack
37343                self.current = start_pos;
37344                return Ok(expr);
37345            }
37346        };
37347
37348        self.skip(); // consume the end unit keyword
37349
37350        // Parse optional precision for end unit: SECOND(3) or just SECOND
37351        let end_unit = if self.match_token(TokenType::LParen) {
37352            // Parse fractional precision
37353            let precision = self.parse_expression()?;
37354            self.expect(TokenType::RParen)?;
37355            // Create a function-like expression for the unit with precision
37356            Expression::Anonymous(Box::new(Anonymous {
37357                this: Box::new(Expression::Identifier(Identifier {
37358                    name: end_unit_name.clone(),
37359                    quoted: false,
37360                    trailing_comments: Vec::new(),
37361                    span: None,
37362                })),
37363                expressions: vec![precision],
37364            }))
37365        } else {
37366            // Simple unit without precision
37367            Expression::Var(Box::new(Var {
37368                this: end_unit_name,
37369            }))
37370        };
37371
37372        // Create an Interval expression with ExprSpan unit
37373        Ok(Expression::Interval(Box::new(Interval {
37374            this: Some(expr),
37375            unit: Some(IntervalUnitSpec::ExprSpan(IntervalSpanExpr {
37376                this: Box::new(start_unit),
37377                expression: Box::new(end_unit),
37378            })),
37379        })))
37380    }
37381
37382    /// Check if the current position starts a typed column list (for table function aliases)
37383    /// like: (col1 type1, col2 type2)
37384    /// This peeks ahead to see if the first column name is followed by a type token,
37385    /// rather than a comma or closing paren (which would indicate simple column aliases).
37386    /// Used for PostgreSQL functions like JSON_TO_RECORDSET that have typed column definitions.
37387    fn check_typed_column_list(&self) -> bool {
37388        // We're positioned after '(' - check pattern: identifier type
37389        // If we see identifier followed by something that's not ',' or ')', it's typed
37390        if self.is_at_end() {
37391            return false;
37392        }
37393
37394        // Check if current is an identifier (column name)
37395        let has_identifier = self.check(TokenType::Identifier)
37396            || self.check(TokenType::QuotedIdentifier)
37397            || self.check(TokenType::Var);
37398
37399        if !has_identifier {
37400            return false;
37401        }
37402
37403        // Look at next token (after the identifier)
37404        let next_pos = self.current + 1;
37405        if next_pos >= self.tokens.len() {
37406            return false;
37407        }
37408
37409        let next_token = &self.tokens[next_pos];
37410
37411        // If next token is comma or rparen, it's simple column aliases
37412        if next_token.token_type == TokenType::Comma || next_token.token_type == TokenType::RParen {
37413            return false;
37414        }
37415
37416        // If next token could be a type name (identifier, var, or type keyword), it's typed columns
37417        // Check for type tokens or identifiers that could be type names
37418        TYPE_TOKENS.contains(&next_token.token_type)
37419            || next_token.token_type == TokenType::Identifier
37420            || next_token.token_type == TokenType::Var
37421    }
37422
37423    /// Check if current token is a no-paren function
37424    fn is_no_paren_function(&self) -> bool {
37425        if self.is_at_end() {
37426            return false;
37427        }
37428        let token_type = self.peek().token_type;
37429        if NO_PAREN_FUNCTIONS.contains(&token_type) {
37430            if !matches!(
37431                self.config.dialect,
37432                Some(crate::dialects::DialectType::ClickHouse)
37433            ) || token_type != TokenType::CurrentTimestamp
37434            {
37435                return true;
37436            }
37437        }
37438        let text_upper = self.peek().text.to_ascii_uppercase();
37439        if crate::function_registry::is_no_paren_function_name_upper(text_upper.as_str()) {
37440            if !matches!(
37441                self.config.dialect,
37442                Some(crate::dialects::DialectType::ClickHouse)
37443            ) || text_upper.as_str() != "CURRENT_TIMESTAMP"
37444            {
37445                return true;
37446            }
37447        }
37448        false
37449    }
37450
37451    /// Match a keyword by text (case-insensitive)
37452    fn match_keyword(&mut self, keyword: &str) -> bool {
37453        if self.is_at_end() {
37454            return false;
37455        }
37456        if self.peek().text.eq_ignore_ascii_case(keyword) {
37457            self.skip();
37458            true
37459        } else {
37460            false
37461        }
37462    }
37463
37464    /// Match a sequence of keywords by text (case-insensitive)
37465    fn match_text_seq(&mut self, keywords: &[&str]) -> bool {
37466        for (i, &kw) in keywords.iter().enumerate() {
37467            if self.current + i >= self.tokens.len() {
37468                return false;
37469            }
37470            if !self.tokens[self.current + i].text.eq_ignore_ascii_case(kw) {
37471                return false;
37472            }
37473        }
37474        self.current += keywords.len();
37475        true
37476    }
37477
37478    /// Check (without consuming) if the next tokens match a sequence of keywords by text (case-insensitive)
37479    fn check_text_seq(&self, keywords: &[&str]) -> bool {
37480        for (i, &kw) in keywords.iter().enumerate() {
37481            if self.current + i >= self.tokens.len() {
37482                return false;
37483            }
37484            if !self.tokens[self.current + i].text.eq_ignore_ascii_case(kw) {
37485                return false;
37486            }
37487        }
37488        true
37489    }
37490
37491    /// Match any of the given texts (case-insensitive)
37492    fn match_texts(&mut self, texts: &[&str]) -> bool {
37493        if self.is_at_end() {
37494            return false;
37495        }
37496        for text in texts {
37497            if self.peek().text.eq_ignore_ascii_case(text) {
37498                self.skip();
37499                return true;
37500            }
37501        }
37502        false
37503    }
37504
37505    /// Parse CASE expression
37506    fn parse_case(&mut self) -> Result<Expression> {
37507        self.expect(TokenType::Case)?;
37508        // Capture trailing comments from the CASE keyword (e.g., CASE /* test */ WHEN ...)
37509        let case_comments = self.previous_trailing_comments().to_vec();
37510
37511        // Check for simple CASE (CASE expr WHEN ...)
37512        let operand = if !self.check(TokenType::When) {
37513            Some(self.parse_expression()?)
37514        } else {
37515            None
37516        };
37517
37518        let mut whens = Vec::new();
37519        while self.match_token(TokenType::When) {
37520            let condition = self.parse_expression()?;
37521            self.expect(TokenType::Then)?;
37522            let mut result = self.parse_expression()?;
37523            // ClickHouse: CASE WHEN x THEN 1 as alias WHEN y THEN alias / 2 END
37524            // Aliases can appear in CASE THEN expressions
37525            if matches!(
37526                self.config.dialect,
37527                Some(crate::dialects::DialectType::ClickHouse)
37528            ) && self.match_token(TokenType::As)
37529            {
37530                let alias = self.expect_identifier_or_keyword()?;
37531                result = Expression::Alias(Box::new(Alias {
37532                    this: result,
37533                    alias: Identifier::new(alias),
37534                    column_aliases: Vec::new(),
37535                    pre_alias_comments: Vec::new(),
37536                    trailing_comments: Vec::new(),
37537                    inferred_type: None,
37538                }));
37539            }
37540            whens.push((condition, result));
37541        }
37542
37543        let else_ = if self.match_token(TokenType::Else) {
37544            Some(self.parse_expression()?)
37545        } else {
37546            None
37547        };
37548
37549        self.expect(TokenType::End)?;
37550
37551        Ok(Expression::Case(Box::new(Case {
37552            operand,
37553            whens,
37554            else_,
37555            comments: case_comments,
37556            inferred_type: None,
37557        })))
37558    }
37559
37560    /// Parse CAST expression
37561    fn parse_cast(&mut self) -> Result<Expression> {
37562        self.expect(TokenType::Cast)?;
37563        self.expect(TokenType::LParen)?;
37564        // Use parse_or() instead of parse_expression() to avoid consuming AS
37565        // as an alias (e.g. CAST((1, 2) AS Tuple(a Int8, b Int16)))
37566        // Python sqlglot uses _parse_disjunction() here, which is equivalent.
37567        let expr = self.parse_or()?;
37568
37569        // ClickHouse: ternary operator inside CAST: CAST(cond ? true_val : false_val AS Type)
37570        let expr = if matches!(
37571            self.config.dialect,
37572            Some(crate::dialects::DialectType::ClickHouse)
37573        ) && self.match_token(TokenType::Parameter)
37574        {
37575            if self.check(TokenType::Colon) {
37576                return Err(
37577                    self.parse_error("Expected true expression after ? in ClickHouse ternary")
37578                );
37579            }
37580            let true_value = self.parse_or()?;
37581            let false_value = if self.match_token(TokenType::Colon) {
37582                self.parse_or()?
37583            } else {
37584                Expression::Null(Null)
37585            };
37586            Expression::IfFunc(Box::new(IfFunc {
37587                original_name: None,
37588                condition: expr,
37589                true_value,
37590                false_value: Some(false_value),
37591                inferred_type: None,
37592            }))
37593        } else {
37594            expr
37595        };
37596
37597        // ClickHouse: implicit alias in CAST: cast('1234' lhs AS UInt32) or cast('1234' lhs, 'UInt32')
37598        let expr = self.try_clickhouse_implicit_alias(expr);
37599
37600        // ClickHouse: CAST(expr, 'type_string') or CAST(expr, expression) syntax with comma instead of AS
37601        if matches!(
37602            self.config.dialect,
37603            Some(crate::dialects::DialectType::ClickHouse)
37604        ) && self.match_token(TokenType::Comma)
37605        {
37606            // Parse as expression to handle concat and other operations: CAST(x, 'Str' || 'ing')
37607            let type_expr = self.parse_expression()?;
37608            // ClickHouse: alias on type expr: cast('1234' lhs, 'UInt32' rhs) or cast('1234', 'UInt32' AS rhs)
37609            let type_expr = self.try_clickhouse_func_arg_alias(type_expr);
37610            self.expect(TokenType::RParen)?;
37611            let _trailing_comments = self.previous_trailing_comments().to_vec();
37612            return Ok(Expression::CastToStrType(Box::new(CastToStrType {
37613                this: Box::new(expr),
37614                to: Some(Box::new(type_expr)),
37615            })));
37616        }
37617
37618        self.expect(TokenType::As)?;
37619
37620        // ClickHouse: CAST(expr AS alias AS Type) — inner alias before type
37621        // If the next token is an identifier followed by AS, treat it as an alias
37622        let expr = if matches!(
37623            self.config.dialect,
37624            Some(crate::dialects::DialectType::ClickHouse)
37625        ) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
37626            && self
37627                .peek_nth(1)
37628                .map_or(false, |t| t.token_type == TokenType::As)
37629        {
37630            let alias = self.expect_identifier_or_keyword_with_quoted()?;
37631            self.expect(TokenType::As)?;
37632            Expression::Alias(Box::new(Alias::new(expr, alias)))
37633        } else if matches!(
37634            self.config.dialect,
37635            Some(crate::dialects::DialectType::ClickHouse)
37636        ) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
37637            && self
37638                .peek_nth(1)
37639                .map_or(false, |t| t.token_type == TokenType::Comma)
37640        {
37641            // ClickHouse: CAST(expr AS alias, type_string) — alias before comma syntax
37642            let alias = self.expect_identifier_or_keyword_with_quoted()?;
37643            let expr = Expression::Alias(Box::new(Alias::new(expr, alias)));
37644            self.expect(TokenType::Comma)?;
37645            let type_expr = self.parse_expression()?;
37646            let type_expr = self.try_clickhouse_func_arg_alias(type_expr);
37647            self.expect(TokenType::RParen)?;
37648            let _trailing_comments = self.previous_trailing_comments().to_vec();
37649            return Ok(Expression::CastToStrType(Box::new(CastToStrType {
37650                this: Box::new(expr),
37651                to: Some(Box::new(type_expr)),
37652            })));
37653        } else {
37654            expr
37655        };
37656
37657        // Teradata: CAST(x AS FORMAT 'fmt') (no explicit type)
37658        if matches!(
37659            self.config.dialect,
37660            Some(crate::dialects::DialectType::Teradata)
37661        ) && self.match_token(TokenType::Format)
37662        {
37663            let format = Some(Box::new(self.parse_expression()?));
37664            self.expect(TokenType::RParen)?;
37665            let trailing_comments = self.previous_trailing_comments().to_vec();
37666            return Ok(Expression::Cast(Box::new(Cast {
37667                this: expr,
37668                to: DataType::Unknown,
37669                trailing_comments,
37670                double_colon_syntax: false,
37671                format,
37672                default: None,
37673                inferred_type: None,
37674            })));
37675        }
37676
37677        let data_type = self.parse_data_type()?;
37678
37679        // Parse optional DEFAULT ... ON CONVERSION ERROR (Oracle)
37680        // CAST(x AS type DEFAULT val ON CONVERSION ERROR)
37681        let default = if self.match_token(TokenType::Default) {
37682            let default_val = self.parse_primary()?;
37683            // Expect "ON CONVERSION ERROR"
37684            if !self.match_text_seq(&["ON", "CONVERSION", "ERROR"]) {
37685                return Err(self.parse_error("Expected ON CONVERSION ERROR"));
37686            }
37687            Some(Box::new(default_val))
37688        } else {
37689            None
37690        };
37691
37692        // Parse optional FORMAT clause for BigQuery: CAST(x AS STRING FORMAT 'format_string')
37693        // Or for Oracle with comma: CAST(x AS DATE DEFAULT NULL ON CONVERSION ERROR, 'format')
37694        // FORMAT string may be optionally wrapped in parentheses: FORMAT ('YYYY') -> FORMAT 'YYYY'
37695        let format = if self.match_token(TokenType::Format) {
37696            let wrapped = self.match_token(TokenType::LParen);
37697            let fmt_expr = self.parse_primary()?;
37698            if wrapped {
37699                self.expect(TokenType::RParen)?;
37700            }
37701            // Check for AT TIME ZONE after format string
37702            let fmt_with_tz = if self.match_text_seq(&["AT", "TIME", "ZONE"]) {
37703                let zone = self.parse_primary()?;
37704                Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
37705                    this: fmt_expr,
37706                    zone,
37707                }))
37708            } else {
37709                fmt_expr
37710            };
37711            Some(Box::new(fmt_with_tz))
37712        } else if self.match_token(TokenType::Comma) {
37713            // Oracle date format: CAST(x AS DATE, 'format')
37714            Some(Box::new(self.parse_expression()?))
37715        } else {
37716            None
37717        };
37718
37719        self.expect(TokenType::RParen)?;
37720        let trailing_comments = self.previous_trailing_comments().to_vec();
37721
37722        Ok(Expression::Cast(Box::new(Cast {
37723            this: expr,
37724            to: data_type,
37725            trailing_comments,
37726            double_colon_syntax: false,
37727            format,
37728            default,
37729            inferred_type: None,
37730        })))
37731    }
37732
37733    /// Parse TRY_CAST expression
37734    fn parse_try_cast(&mut self) -> Result<Expression> {
37735        self.expect(TokenType::TryCast)?;
37736        self.expect(TokenType::LParen)?;
37737        let expr = self.parse_or()?;
37738        self.expect(TokenType::As)?;
37739        let data_type = self.parse_data_type()?;
37740
37741        // Parse optional FORMAT clause
37742        let format = if self.match_token(TokenType::Format) {
37743            Some(Box::new(self.parse_expression()?))
37744        } else {
37745            None
37746        };
37747
37748        self.expect(TokenType::RParen)?;
37749        let trailing_comments = self.previous_trailing_comments().to_vec();
37750
37751        Ok(Expression::TryCast(Box::new(Cast {
37752            this: expr,
37753            to: data_type,
37754            trailing_comments,
37755            double_colon_syntax: false,
37756            format,
37757            default: None,
37758            inferred_type: None,
37759        })))
37760    }
37761
37762    /// Parse SAFE_CAST expression (BigQuery)
37763    fn parse_safe_cast(&mut self) -> Result<Expression> {
37764        self.expect(TokenType::SafeCast)?;
37765        self.expect(TokenType::LParen)?;
37766        let expr = self.parse_or()?;
37767        self.expect(TokenType::As)?;
37768        let data_type = self.parse_data_type()?;
37769
37770        // Parse optional FORMAT clause
37771        let format = if self.match_token(TokenType::Format) {
37772            Some(Box::new(self.parse_expression()?))
37773        } else {
37774            None
37775        };
37776
37777        self.expect(TokenType::RParen)?;
37778        let trailing_comments = self.previous_trailing_comments().to_vec();
37779
37780        Ok(Expression::SafeCast(Box::new(Cast {
37781            this: expr,
37782            to: data_type,
37783            trailing_comments,
37784            double_colon_syntax: false,
37785            format,
37786            default: None,
37787            inferred_type: None,
37788        })))
37789    }
37790
37791    /// Parse a data type
37792    fn parse_data_type(&mut self) -> Result<DataType> {
37793        // Handle special token types that represent data type keywords
37794        // Teradata tokenizes ST_GEOMETRY as TokenType::Geometry
37795        if self.check(TokenType::Geometry) {
37796            let _token = self.advance();
37797            let (subtype, srid) = self.parse_spatial_type_args()?;
37798            return Ok(DataType::Geometry { subtype, srid });
37799        }
37800        // Data types can be keywords (DATE, TIMESTAMP, etc.) or identifiers
37801        let mut raw_name = self.expect_identifier_or_keyword()?;
37802        // Allow dotted custom types like SYSUDTLIB.INT
37803        while self.match_token(TokenType::Dot) {
37804            let part = self.expect_identifier_or_keyword()?;
37805            raw_name.push('.');
37806            raw_name.push_str(&part);
37807        }
37808        let mut name = raw_name.to_ascii_uppercase();
37809
37810        // SQL standard: NATIONAL CHAR/CHARACTER → NCHAR
37811        if name == "NATIONAL" {
37812            let next_upper = if !self.is_at_end() {
37813                self.peek().text.to_ascii_uppercase()
37814            } else {
37815                String::new()
37816            };
37817            if next_upper == "CHAR" || next_upper == "CHARACTER" {
37818                self.skip(); // consume CHAR/CHARACTER
37819                name = "NCHAR".to_string();
37820                // NATIONAL CHARACTER VARYING → NVARCHAR equivalent
37821                if next_upper == "CHARACTER" && self.check_identifier("VARYING") {
37822                    self.skip(); // consume VARYING
37823                    let length = if self.match_token(TokenType::LParen) {
37824                        if self.check(TokenType::RParen) {
37825                            self.skip();
37826                            None
37827                        } else {
37828                            let n = self.expect_number()? as u32;
37829                            self.expect(TokenType::RParen)?;
37830                            Some(n)
37831                        }
37832                    } else {
37833                        None
37834                    };
37835                    return Ok(DataType::VarChar {
37836                        length,
37837                        parenthesized_length: false,
37838                    });
37839                }
37840            }
37841        }
37842
37843        let base_type = match name.as_str() {
37844            "INT" | "INTEGER" => {
37845                // MySQL allows INT(N) for display width; ClickHouse allows INT()
37846                let length = if self.match_token(TokenType::LParen) {
37847                    if self.check(TokenType::RParen) {
37848                        self.skip();
37849                        None
37850                    } else {
37851                        let n = self.expect_number()? as u32;
37852                        self.expect(TokenType::RParen)?;
37853                        Some(n)
37854                    }
37855                } else {
37856                    None
37857                };
37858                let integer_spelling = name == "INTEGER";
37859                Ok(DataType::Int {
37860                    length,
37861                    integer_spelling,
37862                })
37863            }
37864            "BIGINT" => {
37865                // MySQL allows BIGINT(N) for display width; ClickHouse allows BIGINT()
37866                let length = if self.match_token(TokenType::LParen) {
37867                    if self.check(TokenType::RParen) {
37868                        self.skip();
37869                        None
37870                    } else {
37871                        let n = self.expect_number()? as u32;
37872                        self.expect(TokenType::RParen)?;
37873                        Some(n)
37874                    }
37875                } else {
37876                    None
37877                };
37878                Ok(DataType::BigInt { length })
37879            }
37880            "SMALLINT" => {
37881                let length = if self.match_token(TokenType::LParen) {
37882                    if self.check(TokenType::RParen) {
37883                        self.skip();
37884                        None
37885                    } else {
37886                        let n = self.expect_number()? as u32;
37887                        self.expect(TokenType::RParen)?;
37888                        Some(n)
37889                    }
37890                } else {
37891                    None
37892                };
37893                Ok(DataType::SmallInt { length })
37894            }
37895            "TINYINT" => {
37896                let length = if self.match_token(TokenType::LParen) {
37897                    if self.check(TokenType::RParen) {
37898                        self.skip();
37899                        None
37900                    } else {
37901                        let n = self.expect_number()? as u32;
37902                        self.expect(TokenType::RParen)?;
37903                        Some(n)
37904                    }
37905                } else {
37906                    None
37907                };
37908                Ok(DataType::TinyInt { length })
37909            }
37910            "FLOAT" | "REAL" => {
37911                let real_spelling = name == "REAL";
37912                // MySQL allows FLOAT(precision) or FLOAT(precision, scale)
37913                let (precision, scale) = if self.match_token(TokenType::LParen) {
37914                    let p = self.expect_number()? as u32;
37915                    let s = if self.match_token(TokenType::Comma) {
37916                        Some(self.expect_number()? as u32)
37917                    } else {
37918                        None
37919                    };
37920                    self.expect(TokenType::RParen)?;
37921                    (Some(p), s)
37922                } else {
37923                    (None, None)
37924                };
37925                Ok(DataType::Float {
37926                    precision,
37927                    scale,
37928                    real_spelling,
37929                })
37930            }
37931            "BINARY_FLOAT" => {
37932                // Oracle's BINARY_FLOAT -> DataType::Float
37933                Ok(DataType::Float {
37934                    precision: None,
37935                    scale: None,
37936                    real_spelling: false,
37937                })
37938            }
37939            "BINARY_DOUBLE" => {
37940                // Oracle's BINARY_DOUBLE -> DataType::Double
37941                Ok(DataType::Double {
37942                    precision: None,
37943                    scale: None,
37944                })
37945            }
37946            "DOUBLE" => {
37947                // Handle DOUBLE PRECISION (PostgreSQL standard SQL)
37948                let _ = self.match_identifier("PRECISION");
37949                // MySQL allows DOUBLE(precision, scale)
37950                let (precision, scale) = if self.match_token(TokenType::LParen) {
37951                    let p = self.expect_number()? as u32;
37952                    let s = if self.match_token(TokenType::Comma) {
37953                        Some(self.expect_number()? as u32)
37954                    } else {
37955                        None
37956                    };
37957                    self.expect(TokenType::RParen)?;
37958                    (Some(p), s)
37959                } else {
37960                    (None, None)
37961                };
37962                Ok(DataType::Double { precision, scale })
37963            }
37964            "DECIMAL" | "NUMERIC" => {
37965                let (precision, scale) = if self.match_token(TokenType::LParen) {
37966                    let p = self.expect_number()? as u32;
37967                    let s = if self.match_token(TokenType::Comma) {
37968                        Some(self.expect_number()? as u32)
37969                    } else {
37970                        None
37971                    };
37972                    self.expect(TokenType::RParen)?;
37973                    (Some(p), s)
37974                } else {
37975                    (None, None)
37976                };
37977                Ok(DataType::Decimal { precision, scale })
37978            }
37979            "BOOLEAN" | "BOOL" => Ok(DataType::Boolean),
37980            "CHAR" | "CHARACTER" | "NCHAR" => {
37981                let is_nchar = name == "NCHAR";
37982                // SQL standard: CHARACTER LARGE OBJECT → CLOB/TEXT
37983                if self.match_identifier("LARGE") && self.match_identifier("OBJECT") {
37984                    return Ok(DataType::Text);
37985                }
37986                // Check for VARYING to convert to VARCHAR (SQL standard: CHAR VARYING, CHARACTER VARYING)
37987                if self.match_identifier("VARYING") {
37988                    let length = if self.match_token(TokenType::LParen) {
37989                        if self.check(TokenType::RParen) {
37990                            self.skip();
37991                            None
37992                        } else {
37993                            let n = self.expect_number()? as u32;
37994                            self.expect(TokenType::RParen)?;
37995                            Some(n)
37996                        }
37997                    } else {
37998                        None
37999                    };
38000                    Ok(DataType::VarChar {
38001                        length,
38002                        parenthesized_length: false,
38003                    })
38004                } else {
38005                    let length = if self.match_token(TokenType::LParen) {
38006                        // Allow empty parens like NCHAR() - treat as no length specified
38007                        if self.check(TokenType::RParen) {
38008                            self.skip(); // consume RParen
38009                            None
38010                        } else {
38011                            let n = self.expect_number()? as u32;
38012                            self.expect(TokenType::RParen)?;
38013                            Some(n)
38014                        }
38015                    } else {
38016                        None
38017                    };
38018                    // CHAR CHARACTER SET charset (MySQL CAST context, no length)
38019                    // When length is specified (e.g., CHAR(4) CHARACTER SET LATIN),
38020                    // CHARACTER SET is a column attribute handled at the column def level
38021                    if length.is_none()
38022                        && self.match_identifier("CHARACTER")
38023                        && self.match_token(TokenType::Set)
38024                    {
38025                        let charset = self.expect_identifier_or_keyword()?;
38026                        return Ok(DataType::CharacterSet { name: charset });
38027                    }
38028                    // Preserve NCHAR as Custom DataType so target dialects can map it properly
38029                    // (Oracle keeps NCHAR, TSQL keeps NCHAR, others map to CHAR)
38030                    if is_nchar {
38031                        let name = if let Some(len) = length {
38032                            format!("NCHAR({})", len)
38033                        } else {
38034                            "NCHAR".to_string()
38035                        };
38036                        return Ok(DataType::Custom { name });
38037                    }
38038                    Ok(DataType::Char { length })
38039                }
38040            }
38041            "VARCHAR" | "NVARCHAR" => {
38042                let is_nvarchar = name == "NVARCHAR";
38043                if self.match_token(TokenType::LParen) {
38044                    // Allow empty parens like NVARCHAR() - treat as no length specified
38045                    if self.check(TokenType::RParen) {
38046                        self.skip(); // consume RParen
38047                        if is_nvarchar {
38048                            return Ok(DataType::Custom {
38049                                name: "NVARCHAR".to_string(),
38050                            });
38051                        }
38052                        Ok(DataType::VarChar {
38053                            length: None,
38054                            parenthesized_length: false,
38055                        })
38056                    } else if self.check_identifier("MAX") {
38057                        // TSQL: VARCHAR(MAX) / NVARCHAR(MAX)
38058                        self.skip(); // consume MAX
38059                        self.expect(TokenType::RParen)?;
38060                        let type_name = if is_nvarchar {
38061                            "NVARCHAR(MAX)"
38062                        } else {
38063                            "VARCHAR(MAX)"
38064                        };
38065                        Ok(DataType::Custom {
38066                            name: type_name.to_string(),
38067                        })
38068                    } else {
38069                        // Hive allows VARCHAR((50)) - extra parentheses around the length
38070                        let parenthesized_length = self.match_token(TokenType::LParen);
38071                        let n = self.expect_number()? as u32;
38072                        if parenthesized_length {
38073                            self.expect(TokenType::RParen)?;
38074                        }
38075                        self.expect(TokenType::RParen)?;
38076                        // Preserve NVARCHAR as Custom DataType so target dialects can map properly
38077                        if is_nvarchar {
38078                            return Ok(DataType::Custom {
38079                                name: format!("NVARCHAR({})", n),
38080                            });
38081                        }
38082                        Ok(DataType::VarChar {
38083                            length: Some(n),
38084                            parenthesized_length,
38085                        })
38086                    }
38087                } else {
38088                    if is_nvarchar {
38089                        return Ok(DataType::Custom {
38090                            name: "NVARCHAR".to_string(),
38091                        });
38092                    }
38093                    Ok(DataType::VarChar {
38094                        length: None,
38095                        parenthesized_length: false,
38096                    })
38097                }
38098            }
38099            "TEXT" | "NTEXT" => {
38100                // TEXT(n) - optional length parameter
38101                if self.match_token(TokenType::LParen) {
38102                    let n = self.expect_number()? as u32;
38103                    self.expect(TokenType::RParen)?;
38104                    Ok(DataType::TextWithLength { length: n })
38105                } else {
38106                    Ok(DataType::Text)
38107                }
38108            }
38109            "STRING" => {
38110                // BigQuery STRING(n) - parameterized string with max length
38111                let length = if self.match_token(TokenType::LParen) {
38112                    let n = self.expect_number()? as u32;
38113                    self.expect(TokenType::RParen)?;
38114                    Some(n)
38115                } else {
38116                    None
38117                };
38118                Ok(DataType::String { length })
38119            }
38120            "DATE" => Ok(DataType::Date),
38121            "TIME" => {
38122                // ClickHouse: Time('timezone') is a custom type with string arg
38123                if matches!(
38124                    self.config.dialect,
38125                    Some(crate::dialects::DialectType::ClickHouse)
38126                ) && self.check(TokenType::LParen)
38127                    && self.current + 1 < self.tokens.len()
38128                    && self.tokens[self.current + 1].token_type == TokenType::String
38129                {
38130                    self.skip(); // consume LParen
38131                    let args = self.parse_custom_type_args_balanced()?;
38132                    self.expect(TokenType::RParen)?;
38133                    return Ok(DataType::Custom {
38134                        name: format!("Time({})", args),
38135                    });
38136                }
38137                let precision = if self.match_token(TokenType::LParen) {
38138                    if self.check(TokenType::RParen) {
38139                        self.skip();
38140                        None
38141                    } else {
38142                        let p = self.expect_number()? as u32;
38143                        self.expect(TokenType::RParen)?;
38144                        Some(p)
38145                    }
38146                } else {
38147                    None
38148                };
38149                // Handle TIME WITH/WITHOUT TIME ZONE
38150                let timezone = if self.match_token(TokenType::With) {
38151                    self.match_keyword("TIME");
38152                    self.match_keyword("ZONE");
38153                    true
38154                } else if self.match_keyword("WITHOUT") {
38155                    self.match_keyword("TIME");
38156                    self.match_keyword("ZONE");
38157                    false
38158                } else {
38159                    false
38160                };
38161                Ok(DataType::Time {
38162                    precision,
38163                    timezone,
38164                })
38165            }
38166            "TIMETZ" => {
38167                let precision = if self.match_token(TokenType::LParen) {
38168                    let p = self.expect_number()? as u32;
38169                    self.expect(TokenType::RParen)?;
38170                    Some(p)
38171                } else {
38172                    None
38173                };
38174                Ok(DataType::Time {
38175                    precision,
38176                    timezone: true,
38177                })
38178            }
38179            "TIMESTAMP" => {
38180                // Parse optional precision: TIMESTAMP(p)
38181                let precision = if self.match_token(TokenType::LParen) {
38182                    let p = self.expect_number()? as u32;
38183                    self.expect(TokenType::RParen)?;
38184                    Some(p)
38185                } else {
38186                    None
38187                };
38188                // Parse optional WITH/WITHOUT TIME ZONE or WITH LOCAL TIME ZONE
38189                // Note: TIME is a keyword (TokenType::Time) and LOCAL is a keyword (TokenType::Local)
38190                if self.match_token(TokenType::With) {
38191                    // Check for LOCAL TIME ZONE (Exasol) vs TIME ZONE
38192                    // LOCAL is tokenized as TokenType::Local, not as Identifier
38193                    if self.match_token(TokenType::Local) {
38194                        self.match_keyword("TIME");
38195                        self.match_keyword("ZONE");
38196                        // TIMESTAMP WITH LOCAL TIME ZONE - return as custom type for Exasol handling
38197                        Ok(DataType::Custom {
38198                            name: "TIMESTAMPLTZ".to_string(),
38199                        })
38200                    } else {
38201                        self.match_keyword("TIME");
38202                        self.match_keyword("ZONE");
38203                        Ok(DataType::Timestamp {
38204                            precision,
38205                            timezone: true,
38206                        })
38207                    }
38208                } else if self.match_keyword("WITHOUT") {
38209                    self.match_keyword("TIME");
38210                    self.match_keyword("ZONE");
38211                    Ok(DataType::Timestamp {
38212                        precision,
38213                        timezone: false,
38214                    })
38215                } else {
38216                    Ok(DataType::Timestamp {
38217                        precision,
38218                        timezone: false,
38219                    })
38220                }
38221            }
38222            "TIMESTAMPTZ" => {
38223                let precision = if self.match_token(TokenType::LParen) {
38224                    let p = self.expect_number()? as u32;
38225                    self.expect(TokenType::RParen)?;
38226                    Some(p)
38227                } else {
38228                    None
38229                };
38230                Ok(DataType::Timestamp {
38231                    precision,
38232                    timezone: true,
38233                })
38234            }
38235            "TIMESTAMPLTZ" | "TIMESTAMP_LTZ" => {
38236                let precision = if self.match_token(TokenType::LParen) {
38237                    let p = self.expect_number()? as u32;
38238                    self.expect(TokenType::RParen)?;
38239                    Some(p)
38240                } else {
38241                    None
38242                };
38243                let name = if let Some(p) = precision {
38244                    format!("TIMESTAMPLTZ({})", p)
38245                } else {
38246                    "TIMESTAMPLTZ".to_string()
38247                };
38248                Ok(DataType::Custom { name })
38249            }
38250            "INTERVAL" => {
38251                // Parse optional unit (DAYS, DAY, HOUR, etc.)
38252                // Don't consume GENERATED, AS, NOT, NULL, etc. which are column constraints
38253                let unit = if (self.check(TokenType::Identifier)
38254                    || self.check(TokenType::Var)
38255                    || self.check_keyword())
38256                    && !self.check(TokenType::Generated)
38257                    && !self.check(TokenType::As)
38258                    && !self.check(TokenType::Not)
38259                    && !self.check(TokenType::Null)
38260                    && !self.check(TokenType::Default)
38261                    && !self.check(TokenType::PrimaryKey)
38262                    && !self.check(TokenType::Unique)
38263                    && !self.check(TokenType::Check)
38264                    && !self.check(TokenType::Constraint)
38265                    && !self.check(TokenType::References)
38266                    && !self.check(TokenType::Collate)
38267                    && !self.check(TokenType::Comment)
38268                    && !self.check(TokenType::RParen)
38269                    && !self.check(TokenType::Comma)
38270                {
38271                    Some(self.advance().text.to_ascii_uppercase())
38272                } else {
38273                    None
38274                };
38275                // Parse optional TO unit for range intervals like DAY TO HOUR
38276                let to = if self.match_token(TokenType::To) {
38277                    if self.check(TokenType::Identifier)
38278                        || self.check(TokenType::Var)
38279                        || self.check_keyword()
38280                    {
38281                        Some(self.advance().text.to_ascii_uppercase())
38282                    } else {
38283                        None
38284                    }
38285                } else {
38286                    None
38287                };
38288                Ok(DataType::Interval { unit, to })
38289            }
38290            "JSON" => {
38291                if matches!(
38292                    self.config.dialect,
38293                    Some(crate::dialects::DialectType::ClickHouse)
38294                ) && self.match_token(TokenType::LParen)
38295                {
38296                    // ClickHouse: JSON(subcolumn_specs) e.g. JSON(a String, b UInt32) or JSON(max_dynamic_paths=8)
38297                    let args = self.parse_custom_type_args_balanced()?;
38298                    self.expect(TokenType::RParen)?;
38299                    // Uppercase the SKIP keyword in JSON type declarations
38300                    // e.g., "col1 String, skip col2" -> "col1 String, SKIP col2"
38301                    let args = Self::uppercase_json_type_skip_keyword(&args);
38302                    Ok(DataType::Custom {
38303                        name: format!("JSON({})", args),
38304                    })
38305                } else {
38306                    Ok(DataType::Json)
38307                }
38308            }
38309            "JSONB" => Ok(DataType::JsonB),
38310            "UUID" => Ok(DataType::Uuid),
38311            "BLOB" => Ok(DataType::Blob),
38312            "BYTEA" => Ok(DataType::VarBinary { length: None }),
38313            "BIT" => {
38314                let length = if self.match_token(TokenType::LParen) {
38315                    let n = self.expect_number()? as u32;
38316                    self.expect(TokenType::RParen)?;
38317                    Some(n)
38318                } else {
38319                    None
38320                };
38321                Ok(DataType::Bit { length })
38322            }
38323            "VARBIT" | "BIT VARYING" => {
38324                let length = if self.match_token(TokenType::LParen) {
38325                    let n = self.expect_number()? as u32;
38326                    self.expect(TokenType::RParen)?;
38327                    Some(n)
38328                } else {
38329                    None
38330                };
38331                Ok(DataType::VarBit { length })
38332            }
38333            "BINARY" => {
38334                // SQL standard: BINARY LARGE OBJECT → BLOB
38335                if self.match_identifier("LARGE") && self.match_identifier("OBJECT") {
38336                    return Ok(DataType::Blob);
38337                }
38338                // Handle BINARY VARYING (SQL standard for VARBINARY)
38339                if self.match_identifier("VARYING") {
38340                    let length = if self.match_token(TokenType::LParen) {
38341                        let len = self.expect_number()? as u32;
38342                        self.expect(TokenType::RParen)?;
38343                        Some(len)
38344                    } else {
38345                        None
38346                    };
38347                    Ok(DataType::VarBinary { length })
38348                } else {
38349                    let length = if self.match_token(TokenType::LParen) {
38350                        let len = self.expect_number()? as u32;
38351                        self.expect(TokenType::RParen)?;
38352                        Some(len)
38353                    } else {
38354                        None
38355                    };
38356                    Ok(DataType::Binary { length })
38357                }
38358            }
38359            "VARBINARY" => {
38360                let length = if self.match_token(TokenType::LParen) {
38361                    let len = self.expect_number()? as u32;
38362                    self.expect(TokenType::RParen)?;
38363                    Some(len)
38364                } else {
38365                    None
38366                };
38367                Ok(DataType::VarBinary { length })
38368            }
38369            // Generic types with angle bracket or parentheses syntax: ARRAY<T>, ARRAY(T), MAP<K,V>, MAP(K,V)
38370            "ARRAY" => {
38371                if self.match_token(TokenType::Lt) {
38372                    // ARRAY<element_type> - angle bracket style
38373                    let element_type = self.parse_data_type()?;
38374                    self.expect_gt()?;
38375                    Ok(DataType::Array {
38376                        element_type: Box::new(element_type),
38377                        dimension: None,
38378                    })
38379                } else if self.match_token(TokenType::LParen) {
38380                    // ARRAY(element_type) - Snowflake parentheses style
38381                    let element_type = self.parse_data_type()?;
38382                    self.expect(TokenType::RParen)?;
38383                    Ok(DataType::Array {
38384                        element_type: Box::new(element_type),
38385                        dimension: None,
38386                    })
38387                } else {
38388                    // Just ARRAY without type parameter
38389                    Ok(DataType::Custom {
38390                        name: "ARRAY".to_string(),
38391                    })
38392                }
38393            }
38394            "MAP" => {
38395                if self.match_token(TokenType::Lt) {
38396                    // MAP<key_type, value_type> - angle bracket style
38397                    let key_type = self.parse_data_type()?;
38398                    self.expect(TokenType::Comma)?;
38399                    let value_type = self.parse_data_type()?;
38400                    self.expect_gt()?;
38401                    Ok(DataType::Map {
38402                        key_type: Box::new(key_type),
38403                        value_type: Box::new(value_type),
38404                    })
38405                } else if self.match_token(TokenType::LBracket) {
38406                    // Materialize: MAP[TEXT => INT] type syntax
38407                    let key_type = self.parse_data_type()?;
38408                    self.expect(TokenType::FArrow)?;
38409                    let value_type = self.parse_data_type()?;
38410                    self.expect(TokenType::RBracket)?;
38411                    Ok(DataType::Map {
38412                        key_type: Box::new(key_type),
38413                        value_type: Box::new(value_type),
38414                    })
38415                } else if self.match_token(TokenType::LParen) {
38416                    // MAP(key_type, value_type) - Snowflake parentheses style
38417                    let key_type = self.parse_data_type()?;
38418                    self.expect(TokenType::Comma)?;
38419                    let value_type = self.parse_data_type()?;
38420                    self.expect(TokenType::RParen)?;
38421                    Ok(DataType::Map {
38422                        key_type: Box::new(key_type),
38423                        value_type: Box::new(value_type),
38424                    })
38425                } else {
38426                    // Just MAP without type parameters
38427                    Ok(DataType::Custom {
38428                        name: "MAP".to_string(),
38429                    })
38430                }
38431            }
38432            // VECTOR(type, dimension) - Snowflake vector type
38433            // VECTOR(dimension, element_type_alias) or VECTOR(dimension) - SingleStore vector type
38434            "VECTOR" => {
38435                if self.match_token(TokenType::LParen) {
38436                    if self.check(TokenType::Number) {
38437                        // SingleStore format: VECTOR(dimension) or VECTOR(dimension, type_alias)
38438                        let dimension = self.expect_number()? as u32;
38439                        let element_type = if self.match_token(TokenType::Comma) {
38440                            // Parse the type alias (I8, I16, I32, I64, F32, F64)
38441                            let type_alias = self.expect_identifier_or_keyword()?;
38442                            let mapped_type = match type_alias.to_ascii_uppercase().as_str() {
38443                                "I8" => DataType::TinyInt { length: None },
38444                                "I16" => DataType::SmallInt { length: None },
38445                                "I32" => DataType::Int {
38446                                    length: None,
38447                                    integer_spelling: false,
38448                                },
38449                                "I64" => DataType::BigInt { length: None },
38450                                "F32" => DataType::Float {
38451                                    precision: None,
38452                                    scale: None,
38453                                    real_spelling: false,
38454                                },
38455                                "F64" => DataType::Double {
38456                                    precision: None,
38457                                    scale: None,
38458                                },
38459                                _ => DataType::Custom {
38460                                    name: type_alias.to_string(),
38461                                },
38462                            };
38463                            Some(Box::new(mapped_type))
38464                        } else {
38465                            // Just dimension, no type
38466                            None
38467                        };
38468                        self.expect(TokenType::RParen)?;
38469                        Ok(DataType::Vector {
38470                            element_type,
38471                            dimension: Some(dimension),
38472                        })
38473                    } else {
38474                        // Snowflake format: VECTOR(type, dimension)
38475                        let element_type = self.parse_data_type()?;
38476                        self.expect(TokenType::Comma)?;
38477                        let dimension = self.expect_number()? as u32;
38478                        self.expect(TokenType::RParen)?;
38479                        Ok(DataType::Vector {
38480                            element_type: Some(Box::new(element_type)),
38481                            dimension: Some(dimension),
38482                        })
38483                    }
38484                } else {
38485                    Ok(DataType::Custom {
38486                        name: "VECTOR".to_string(),
38487                    })
38488                }
38489            }
38490            // OBJECT(field1 type1, field2 type2, ...) - Snowflake structured object type
38491            "OBJECT" => {
38492                if self.match_token(TokenType::LParen) {
38493                    // ClickHouse: Object('json') — string literal argument
38494                    if matches!(
38495                        self.config.dialect,
38496                        Some(crate::dialects::DialectType::ClickHouse)
38497                    ) && self.check(TokenType::String)
38498                    {
38499                        let arg = self.advance().text;
38500                        self.expect(TokenType::RParen)?;
38501                        return Ok(DataType::Custom {
38502                            name: format!("Object('{}')", arg),
38503                        });
38504                    }
38505                    let mut fields = Vec::new();
38506                    if !self.check(TokenType::RParen) {
38507                        loop {
38508                            let field_name = self.expect_identifier_or_keyword()?;
38509                            let field_type = self.parse_data_type()?;
38510                            // Optional NOT NULL constraint
38511                            let not_null = if self.match_keyword("NOT") {
38512                                // Consume NULL if present
38513                                self.match_keyword("NULL");
38514                                true
38515                            } else {
38516                                false
38517                            };
38518                            fields.push((field_name, field_type, not_null));
38519                            if !self.match_token(TokenType::Comma) {
38520                                break;
38521                            }
38522                        }
38523                    }
38524                    self.expect(TokenType::RParen)?;
38525                    // Check for RENAME FIELDS or ADD FIELDS modifier
38526                    let modifier = if self.match_keyword("RENAME") {
38527                        if self.match_keyword("FIELDS") {
38528                            Some("RENAME FIELDS".to_string())
38529                        } else {
38530                            Some("RENAME".to_string())
38531                        }
38532                    } else if self.match_keyword("ADD") {
38533                        if self.match_keyword("FIELDS") {
38534                            Some("ADD FIELDS".to_string())
38535                        } else {
38536                            Some("ADD".to_string())
38537                        }
38538                    } else {
38539                        None
38540                    };
38541                    Ok(DataType::Object { fields, modifier })
38542                } else {
38543                    Ok(DataType::Custom {
38544                        name: "OBJECT".to_string(),
38545                    })
38546                }
38547            }
38548            "STRUCT" => {
38549                if self.match_token(TokenType::Lt) {
38550                    // STRUCT<field1 type1, field2 type2, ...> - BigQuery angle-bracket syntax
38551                    let fields = self.parse_struct_type_fields(false)?;
38552                    self.expect_gt()?;
38553                    Ok(DataType::Struct {
38554                        fields,
38555                        nested: false,
38556                    })
38557                } else if self.match_token(TokenType::LParen) {
38558                    // STRUCT(field1 type1, field2 type2, ...) - DuckDB parenthesized syntax
38559                    let fields = self.parse_struct_type_fields(true)?;
38560                    self.expect(TokenType::RParen)?;
38561                    Ok(DataType::Struct {
38562                        fields,
38563                        nested: true,
38564                    })
38565                } else {
38566                    // Just STRUCT without type parameters
38567                    Ok(DataType::Custom {
38568                        name: "STRUCT".to_string(),
38569                    })
38570                }
38571            }
38572            "ROW" => {
38573                // ROW(field1 type1, field2 type2, ...) - same as STRUCT with parens
38574                if self.match_token(TokenType::LParen) {
38575                    let fields = self.parse_struct_type_fields(true)?;
38576                    self.expect(TokenType::RParen)?;
38577                    Ok(DataType::Struct {
38578                        fields,
38579                        nested: true,
38580                    })
38581                } else {
38582                    Ok(DataType::Custom {
38583                        name: "ROW".to_string(),
38584                    })
38585                }
38586            }
38587            "RECORD" => {
38588                // RECORD(field1 type1, field2 type2, ...) - SingleStore record type (like ROW/STRUCT)
38589                if self.match_token(TokenType::LParen) {
38590                    let fields = self.parse_struct_type_fields(true)?;
38591                    self.expect(TokenType::RParen)?;
38592                    // Use Struct with nested=true, generator will output RECORD for SingleStore
38593                    Ok(DataType::Struct {
38594                        fields,
38595                        nested: true,
38596                    })
38597                } else {
38598                    Ok(DataType::Custom {
38599                        name: "RECORD".to_string(),
38600                    })
38601                }
38602            }
38603            "ENUM" => {
38604                // ENUM('RED', 'GREEN', 'BLUE') - DuckDB enum type
38605                // ClickHouse: Enum('hello' = 1, 'world' = 2)
38606                // ClickHouse also allows NULL in enum: Enum('a', 'b', NULL)
38607                if self.match_token(TokenType::LParen) {
38608                    let mut values = Vec::new();
38609                    let mut assignments = Vec::new();
38610                    if !self.check(TokenType::RParen) {
38611                        loop {
38612                            let val = if matches!(
38613                                self.config.dialect,
38614                                Some(crate::dialects::DialectType::ClickHouse)
38615                            ) && self.check(TokenType::Null)
38616                            {
38617                                self.skip();
38618                                "NULL".to_string()
38619                            } else {
38620                                self.expect_string()?
38621                            };
38622                            values.push(val);
38623                            // ClickHouse: optional = value assignment (including negative numbers)
38624                            if self.match_token(TokenType::Eq) {
38625                                let negative = self.match_token(TokenType::Dash);
38626                                let num_token = self.advance();
38627                                let val = if negative {
38628                                    format!("-{}", num_token.text)
38629                                } else {
38630                                    num_token.text.clone()
38631                                };
38632                                assignments.push(Some(val));
38633                            } else {
38634                                assignments.push(None);
38635                            }
38636                            if !self.match_token(TokenType::Comma) {
38637                                break;
38638                            }
38639                        }
38640                    }
38641                    self.expect(TokenType::RParen)?;
38642                    Ok(DataType::Enum {
38643                        values,
38644                        assignments,
38645                    })
38646                } else {
38647                    Ok(DataType::Custom {
38648                        name: "ENUM".to_string(),
38649                    })
38650                }
38651            }
38652            "SET" => {
38653                // MySQL SET('a', 'b', 'c') type
38654                if self.match_token(TokenType::LParen) {
38655                    let mut values = Vec::new();
38656                    if !self.check(TokenType::RParen) {
38657                        loop {
38658                            let val = self.expect_string()?;
38659                            values.push(val);
38660                            if !self.match_token(TokenType::Comma) {
38661                                break;
38662                            }
38663                        }
38664                    }
38665                    self.expect(TokenType::RParen)?;
38666                    Ok(DataType::Set { values })
38667                } else {
38668                    Ok(DataType::Custom {
38669                        name: "SET".to_string(),
38670                    })
38671                }
38672            }
38673            "UNION" if self.check(TokenType::LParen) => {
38674                // UNION(num INT, str TEXT) - DuckDB union type (only when followed by paren)
38675                self.skip(); // consume LParen
38676                let struct_fields = self.parse_struct_type_fields(true)?;
38677                self.expect(TokenType::RParen)?;
38678                // Convert StructField to (String, DataType) for Union
38679                let fields: Vec<(String, DataType)> = struct_fields
38680                    .into_iter()
38681                    .map(|f| (f.name, f.data_type))
38682                    .collect();
38683                Ok(DataType::Union { fields })
38684            }
38685            // Spatial types
38686            "GEOMETRY" => {
38687                let (subtype, srid) = self.parse_spatial_type_args()?;
38688                Ok(DataType::Geometry { subtype, srid })
38689            }
38690            "GEOGRAPHY" => {
38691                let (subtype, srid) = self.parse_spatial_type_args()?;
38692                Ok(DataType::Geography { subtype, srid })
38693            }
38694            // MySQL spatial subtypes without wrapper
38695            "POINT" | "LINESTRING" | "POLYGON" | "MULTIPOINT" | "MULTILINESTRING"
38696            | "MULTIPOLYGON" | "GEOMETRYCOLLECTION" => {
38697                // Check for optional SRID clause (MySQL syntax)
38698                let srid = if self.match_identifier("SRID") {
38699                    Some(self.expect_number()? as u32)
38700                } else {
38701                    None
38702                };
38703                Ok(DataType::Geometry {
38704                    subtype: Some(name),
38705                    srid,
38706                })
38707            }
38708            // BigQuery ANY TYPE - templated parameter type for UDFs
38709            "ANY" => {
38710                if self.match_token(TokenType::Type) {
38711                    Ok(DataType::Custom {
38712                        name: "ANY TYPE".to_string(),
38713                    })
38714                } else {
38715                    Ok(DataType::Custom {
38716                        name: "ANY".to_string(),
38717                    })
38718                }
38719            }
38720            // LONG VARCHAR (Exasol) - same as TEXT
38721            "LONG" => {
38722                if self.match_identifier("VARCHAR") {
38723                    Ok(DataType::Text)
38724                } else {
38725                    Ok(DataType::Custom {
38726                        name: "LONG".to_string(),
38727                    })
38728                }
38729            }
38730            // MySQL SIGNED [INTEGER] / UNSIGNED [INTEGER] in CAST context
38731            // CAST(x AS SIGNED INTEGER) -> CAST(x AS SIGNED)
38732            "SIGNED" | "UNSIGNED" => {
38733                // Consume optional INTEGER keyword after SIGNED/UNSIGNED
38734                if self.check_identifier("INTEGER")
38735                    || self.check_keyword_text("INTEGER")
38736                    || self.check_keyword_text("INT")
38737                {
38738                    self.skip();
38739                }
38740                Ok(DataType::Custom { name })
38741            }
38742            // ClickHouse Nullable(T) wrapper type
38743            "NULLABLE" => {
38744                self.expect(TokenType::LParen)?;
38745                let inner = self.parse_data_type()?;
38746                self.expect(TokenType::RParen)?;
38747                Ok(DataType::Nullable {
38748                    inner: Box::new(inner),
38749                })
38750            }
38751            _ => {
38752                // Handle custom types with optional parenthesized precision/args
38753                // e.g., DATETIME2(2), DATETIMEOFFSET(7), NVARCHAR2(100)
38754                // Use uppercase name for known SQL custom types, but preserve original case
38755                // for user-defined type names (e.g., UserDefinedTableType)
38756                let is_known = convert_name_is_known_custom(&name);
38757                let custom_name = if is_known {
38758                    name.clone()
38759                } else {
38760                    raw_name.clone()
38761                };
38762                if self.match_token(TokenType::LParen) {
38763                    if matches!(
38764                        self.config.dialect,
38765                        Some(crate::dialects::DialectType::ClickHouse)
38766                    ) {
38767                        let args = self.parse_custom_type_args_balanced()?;
38768                        self.expect(TokenType::RParen)?;
38769                        Ok(DataType::Custom {
38770                            name: format!("{}({})", custom_name, args),
38771                        })
38772                    } else {
38773                        let mut args = Vec::new();
38774                        let mut after_comma = true; // treat first token as start of new arg
38775                        loop {
38776                            if self.check(TokenType::RParen) {
38777                                break;
38778                            }
38779                            let token = self.advance();
38780                            // If the previous token was space-separated (not comma-separated),
38781                            // append to the last arg. E.g., VARCHAR2(2328 CHAR) -> "2328 CHAR"
38782                            if !after_comma && !args.is_empty() {
38783                                if let Some(last) = args.last_mut() {
38784                                    *last = format!("{} {}", last, token.text);
38785                                }
38786                            } else {
38787                                args.push(token.text.clone());
38788                            }
38789                            after_comma = self.match_token(TokenType::Comma);
38790                        }
38791                        self.expect(TokenType::RParen)?;
38792                        // Include args in the name: DATETIME2(2), VARCHAR2(2328 CHAR)
38793                        Ok(DataType::Custom {
38794                            name: format!("{}({})", custom_name, args.join(", ")),
38795                        })
38796                    }
38797                } else {
38798                    Ok(DataType::Custom { name: custom_name })
38799                }
38800            }
38801        }?;
38802
38803        // UNSIGNED/SIGNED modifiers for integer types (MySQL) are handled
38804        // by the column definition parser which sets col.unsigned = true.
38805        // Do NOT consume them here; the column parser needs to see them.
38806        let mut result_type = base_type;
38807
38808        // Materialize: handle postfix LIST syntax (INT LIST, INT LIST LIST LIST)
38809        let is_materialize = matches!(
38810            self.config.dialect,
38811            Some(crate::dialects::DialectType::Materialize)
38812        );
38813        if is_materialize {
38814            while self.check_identifier("LIST") || self.check(TokenType::List) {
38815                self.skip(); // consume LIST
38816                result_type = DataType::List {
38817                    element_type: Box::new(result_type),
38818                };
38819            }
38820        }
38821
38822        // PostgreSQL array syntax: TYPE[], TYPE[N], TYPE[N][M], etc.
38823        let result_type = self.maybe_parse_array_dimensions(result_type)?;
38824
38825        // ClickHouse: mark string-like standard types as non-nullable by converting to Custom
38826        // This prevents the generator from wrapping them in Nullable() during identity transforms.
38827        // Types parsed from other dialects remain standard and will get Nullable wrapping when
38828        // transpiling to ClickHouse.
38829        if matches!(
38830            self.config.dialect,
38831            Some(crate::dialects::DialectType::ClickHouse)
38832        ) {
38833            return Ok(Self::clickhouse_mark_non_nullable(result_type));
38834        }
38835
38836        Ok(result_type)
38837    }
38838
38839    /// Convert standard types to Custom equivalents for ClickHouse to prevent Nullable wrapping.
38840    /// This mirrors Python sqlglot's behavior of marking ClickHouse-parsed types as non-nullable.
38841    fn clickhouse_mark_non_nullable(dt: DataType) -> DataType {
38842        match dt {
38843            DataType::Text => DataType::Custom {
38844                name: "String".to_string(),
38845            },
38846            DataType::VarChar { .. } => DataType::Custom {
38847                name: "String".to_string(),
38848            },
38849            DataType::Char { .. } => DataType::Custom {
38850                name: "String".to_string(),
38851            },
38852            DataType::String { .. } => DataType::Custom {
38853                name: "String".to_string(),
38854            },
38855            _ => dt,
38856        }
38857    }
38858
38859    /// Parse a data type for cast syntax (::TYPE)
38860    /// For dialects that support fixed-size arrays (like DuckDB), brackets like [3] are
38861    /// parsed as array dimensions (e.g., x::INT[3] means cast to INT[3] array type).
38862    /// For other dialects (like Snowflake), brackets are subscript operations
38863    /// (e.g., x::VARIANT[0] means cast to VARIANT, then subscript with [0]).
38864    fn parse_data_type_for_cast(&mut self) -> Result<DataType> {
38865        // Check if dialect supports array type suffixes (e.g., INT[], VARCHAR[3])
38866        // PostgreSQL: INT[], TEXT[] (no fixed size)
38867        // DuckDB: INT[3] (fixed size arrays)
38868        let supports_array_type_suffix = matches!(
38869            self.config.dialect,
38870            Some(crate::dialects::DialectType::DuckDB)
38871                | Some(crate::dialects::DialectType::PostgreSQL)
38872                | Some(crate::dialects::DialectType::Redshift)
38873        );
38874
38875        // Check if it's a quoted identifier (e.g., "udt") — preserve case and quoting
38876        let is_quoted = self.check(TokenType::QuotedIdentifier);
38877        let raw_name = self.expect_identifier_or_keyword()?;
38878        if is_quoted {
38879            // Check if the quoted name matches a known type — if so, normalize it
38880            let known_type = self.convert_name_to_type(&raw_name);
38881            if let Ok(ref dt) = known_type {
38882                if !matches!(dt, DataType::Custom { .. }) {
38883                    return known_type;
38884                }
38885            }
38886            // Truly custom type — preserve original case with quotes
38887            return Ok(DataType::Custom {
38888                name: format!("\"{}\"", raw_name),
38889            });
38890        }
38891        let name = raw_name.to_ascii_uppercase();
38892
38893        // Handle parametric types like ARRAY<T>, MAP<K,V>
38894        let base_type = match name.as_str() {
38895            "ARRAY" => {
38896                if self.match_token(TokenType::Lt) {
38897                    let element_type = self.parse_data_type()?;
38898                    self.expect_gt()?;
38899                    DataType::Array {
38900                        element_type: Box::new(element_type),
38901                        dimension: None,
38902                    }
38903                } else if self.match_token(TokenType::LParen) {
38904                    // ClickHouse: Array(Type) syntax with parentheses
38905                    let element_type = self.parse_data_type_for_cast()?;
38906                    self.expect(TokenType::RParen)?;
38907                    DataType::Array {
38908                        element_type: Box::new(element_type),
38909                        dimension: None,
38910                    }
38911                } else {
38912                    DataType::Custom { name }
38913                }
38914            }
38915            "MAP" => {
38916                if self.match_token(TokenType::Lt) {
38917                    let key_type = self.parse_data_type()?;
38918                    self.expect(TokenType::Comma)?;
38919                    let value_type = self.parse_data_type()?;
38920                    self.expect_gt()?;
38921                    DataType::Map {
38922                        key_type: Box::new(key_type),
38923                        value_type: Box::new(value_type),
38924                    }
38925                } else if self.match_token(TokenType::LParen) {
38926                    // Snowflake: MAP(key_type, value_type) syntax
38927                    let key_type = self.parse_data_type_for_cast()?;
38928                    self.expect(TokenType::Comma)?;
38929                    let value_type = self.parse_data_type_for_cast()?;
38930                    self.expect(TokenType::RParen)?;
38931                    DataType::Map {
38932                        key_type: Box::new(key_type),
38933                        value_type: Box::new(value_type),
38934                    }
38935                } else if self.match_token(TokenType::LBracket) {
38936                    // Materialize: MAP[TEXT => INT] type syntax
38937                    let key_type = self.parse_data_type_for_cast()?;
38938                    self.expect(TokenType::FArrow)?;
38939                    let value_type = self.parse_data_type_for_cast()?;
38940                    self.expect(TokenType::RBracket)?;
38941                    DataType::Map {
38942                        key_type: Box::new(key_type),
38943                        value_type: Box::new(value_type),
38944                    }
38945                } else {
38946                    DataType::Custom { name }
38947                }
38948            }
38949            "STRUCT" => {
38950                if self.match_token(TokenType::Lt) {
38951                    let fields = self.parse_struct_type_fields(false)?;
38952                    self.expect_gt()?;
38953                    DataType::Struct {
38954                        fields,
38955                        nested: false,
38956                    }
38957                } else if self.match_token(TokenType::LParen) {
38958                    let fields = self.parse_struct_type_fields(true)?;
38959                    self.expect(TokenType::RParen)?;
38960                    DataType::Struct {
38961                        fields,
38962                        nested: true,
38963                    }
38964                } else {
38965                    DataType::Custom { name }
38966                }
38967            }
38968            "ROW" => {
38969                if self.match_token(TokenType::LParen) {
38970                    let fields = self.parse_struct_type_fields(true)?;
38971                    self.expect(TokenType::RParen)?;
38972                    DataType::Struct {
38973                        fields,
38974                        nested: true,
38975                    }
38976                } else {
38977                    DataType::Custom { name }
38978                }
38979            }
38980            "RECORD" => {
38981                // SingleStore RECORD type (like ROW/STRUCT)
38982                if self.match_token(TokenType::LParen) {
38983                    let fields = self.parse_struct_type_fields(true)?;
38984                    self.expect(TokenType::RParen)?;
38985                    DataType::Struct {
38986                        fields,
38987                        nested: true,
38988                    }
38989                } else {
38990                    DataType::Custom { name }
38991                }
38992            }
38993            // Multi-word types that need special handling in cast context
38994            "DOUBLE" => {
38995                // Handle DOUBLE PRECISION
38996                let _ = self.match_identifier("PRECISION");
38997                // ClickHouse/SQL: DOUBLE(precision) or DOUBLE(precision, scale)
38998                let (precision, scale) = if self.match_token(TokenType::LParen) {
38999                    let p = Some(self.expect_number()? as u32);
39000                    let s = if self.match_token(TokenType::Comma) {
39001                        Some(self.expect_number()? as u32)
39002                    } else {
39003                        None
39004                    };
39005                    self.expect(TokenType::RParen)?;
39006                    (p, s)
39007                } else {
39008                    (None, None)
39009                };
39010                DataType::Double { precision, scale }
39011            }
39012            "CHARACTER" | "CHAR" | "NCHAR" => {
39013                // Handle CHARACTER VARYING / CHAR VARYING
39014                if self.match_identifier("VARYING") {
39015                    let length = if self.match_token(TokenType::LParen) {
39016                        let len = Some(self.expect_number()? as u32);
39017                        self.expect(TokenType::RParen)?;
39018                        len
39019                    } else {
39020                        None
39021                    };
39022                    DataType::VarChar {
39023                        length,
39024                        parenthesized_length: false,
39025                    }
39026                } else {
39027                    let length = if self.match_token(TokenType::LParen) {
39028                        let len = Some(self.expect_number()? as u32);
39029                        self.expect(TokenType::RParen)?;
39030                        len
39031                    } else {
39032                        None
39033                    };
39034                    // CHAR CHARACTER SET charset (MySQL CAST context, no length)
39035                    if length.is_none()
39036                        && self.match_identifier("CHARACTER")
39037                        && self.match_token(TokenType::Set)
39038                    {
39039                        let charset = self.expect_identifier_or_keyword()?;
39040                        return Ok(DataType::CharacterSet { name: charset });
39041                    }
39042                    DataType::Char { length }
39043                }
39044            }
39045            "TIME" => {
39046                // Handle TIME(precision) WITH/WITHOUT TIME ZONE
39047                let precision = if self.match_token(TokenType::LParen) {
39048                    let p = Some(self.expect_number()? as u32);
39049                    self.expect(TokenType::RParen)?;
39050                    p
39051                } else {
39052                    None
39053                };
39054                let timezone = if self.match_token(TokenType::With) {
39055                    self.match_keyword("TIME");
39056                    self.match_keyword("ZONE");
39057                    true
39058                } else if self.match_keyword("WITHOUT") {
39059                    self.match_keyword("TIME");
39060                    self.match_keyword("ZONE");
39061                    false
39062                } else {
39063                    false
39064                };
39065                DataType::Time {
39066                    precision,
39067                    timezone,
39068                }
39069            }
39070            "TIMETZ" => {
39071                let precision = if self.match_token(TokenType::LParen) {
39072                    let p = Some(self.expect_number()? as u32);
39073                    self.expect(TokenType::RParen)?;
39074                    p
39075                } else {
39076                    None
39077                };
39078                DataType::Time {
39079                    precision,
39080                    timezone: true,
39081                }
39082            }
39083            "TIMESTAMP" => {
39084                // Handle TIMESTAMP(precision) WITH/WITHOUT TIME ZONE or WITH LOCAL TIME ZONE
39085                let precision = if self.match_token(TokenType::LParen) {
39086                    let p = Some(self.expect_number()? as u32);
39087                    self.expect(TokenType::RParen)?;
39088                    p
39089                } else {
39090                    None
39091                };
39092                // Note: TIME is a keyword (TokenType::Time), so use match_keyword instead of match_identifier
39093                if self.match_token(TokenType::With) {
39094                    // Check for LOCAL TIME ZONE vs TIME ZONE
39095                    if self.match_token(TokenType::Local) {
39096                        self.match_keyword("TIME");
39097                        self.match_keyword("ZONE");
39098                        // TIMESTAMP WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
39099                        DataType::Custom {
39100                            name: "TIMESTAMPLTZ".to_string(),
39101                        }
39102                    } else {
39103                        self.match_keyword("TIME");
39104                        self.match_keyword("ZONE");
39105                        DataType::Timestamp {
39106                            precision,
39107                            timezone: true,
39108                        }
39109                    }
39110                } else if self.match_keyword("WITHOUT") {
39111                    self.match_keyword("TIME");
39112                    self.match_keyword("ZONE");
39113                    DataType::Timestamp {
39114                        precision,
39115                        timezone: false,
39116                    }
39117                } else {
39118                    DataType::Timestamp {
39119                        precision,
39120                        timezone: false,
39121                    }
39122                }
39123            }
39124            "TIMESTAMPTZ" => {
39125                let precision = if self.match_token(TokenType::LParen) {
39126                    let p = self.expect_number()? as u32;
39127                    self.expect(TokenType::RParen)?;
39128                    Some(p)
39129                } else {
39130                    None
39131                };
39132                DataType::Timestamp {
39133                    precision,
39134                    timezone: true,
39135                }
39136            }
39137            "TIMESTAMPLTZ" | "TIMESTAMP_LTZ" => {
39138                let precision = if self.match_token(TokenType::LParen) {
39139                    let p = self.expect_number()? as u32;
39140                    self.expect(TokenType::RParen)?;
39141                    Some(p)
39142                } else {
39143                    None
39144                };
39145                let dt_name = if let Some(p) = precision {
39146                    format!("TIMESTAMPLTZ({})", p)
39147                } else {
39148                    "TIMESTAMPLTZ".to_string()
39149                };
39150                DataType::Custom { name: dt_name }
39151            }
39152            "INTERVAL" => {
39153                // Parse optional unit (DAY, HOUR, etc.) after INTERVAL in cast context
39154                let unit = if (self.check(TokenType::Identifier)
39155                    || self.check(TokenType::Var)
39156                    || self.check_keyword())
39157                    && !self.check(TokenType::RParen)
39158                    && !self.check(TokenType::Comma)
39159                    && !self.check(TokenType::As)
39160                    && !self.check(TokenType::Not)
39161                    && !self.check(TokenType::Null)
39162                {
39163                    Some(self.advance().text.to_ascii_uppercase())
39164                } else {
39165                    None
39166                };
39167                // Parse optional TO unit for range intervals like DAY TO HOUR
39168                let to = if self.match_token(TokenType::To) {
39169                    if self.check(TokenType::Identifier)
39170                        || self.check(TokenType::Var)
39171                        || self.check_keyword()
39172                    {
39173                        Some(self.advance().text.to_ascii_uppercase())
39174                    } else {
39175                        None
39176                    }
39177                } else {
39178                    None
39179                };
39180                DataType::Interval { unit, to }
39181            }
39182            // VARCHAR/NVARCHAR with optional (N) or (MAX) parameter
39183            "VARCHAR" | "NVARCHAR" => {
39184                let is_nvarchar = name == "NVARCHAR";
39185                if self.match_token(TokenType::LParen) {
39186                    if self.check(TokenType::RParen) {
39187                        self.skip();
39188                        DataType::VarChar {
39189                            length: None,
39190                            parenthesized_length: false,
39191                        }
39192                    } else if self.check_identifier("MAX") {
39193                        self.skip();
39194                        self.expect(TokenType::RParen)?;
39195                        let type_name = if is_nvarchar {
39196                            "NVARCHAR(MAX)"
39197                        } else {
39198                            "VARCHAR(MAX)"
39199                        };
39200                        DataType::Custom {
39201                            name: type_name.to_string(),
39202                        }
39203                    } else {
39204                        let n = self.expect_number()? as u32;
39205                        self.expect(TokenType::RParen)?;
39206                        DataType::VarChar {
39207                            length: Some(n),
39208                            parenthesized_length: false,
39209                        }
39210                    }
39211                } else {
39212                    DataType::VarChar {
39213                        length: None,
39214                        parenthesized_length: false,
39215                    }
39216                }
39217            }
39218            // VARBINARY with optional (N) or (MAX) parameter
39219            "VARBINARY" => {
39220                if self.match_token(TokenType::LParen) {
39221                    if self.check(TokenType::RParen) {
39222                        self.skip();
39223                        DataType::VarBinary { length: None }
39224                    } else if self.check_identifier("MAX") {
39225                        self.skip();
39226                        self.expect(TokenType::RParen)?;
39227                        DataType::Custom {
39228                            name: "VARBINARY(MAX)".to_string(),
39229                        }
39230                    } else {
39231                        let n = self.expect_number()? as u32;
39232                        self.expect(TokenType::RParen)?;
39233                        DataType::VarBinary { length: Some(n) }
39234                    }
39235                } else {
39236                    DataType::VarBinary { length: None }
39237                }
39238            }
39239            // DECIMAL/NUMERIC with optional (precision, scale)
39240            "DECIMAL" | "NUMERIC" | "NUMBER" => {
39241                if self.match_token(TokenType::LParen) {
39242                    let precision = Some(self.expect_number()? as u32);
39243                    let scale = if self.match_token(TokenType::Comma) {
39244                        Some(self.expect_number()? as u32)
39245                    } else {
39246                        None
39247                    };
39248                    self.expect(TokenType::RParen)?;
39249                    DataType::Decimal { precision, scale }
39250                } else {
39251                    DataType::Decimal {
39252                        precision: None,
39253                        scale: None,
39254                    }
39255                }
39256            }
39257            // INT/INTEGER/BIGINT/SMALLINT/TINYINT with optional (N) display width
39258            "INT" | "INTEGER" => {
39259                let length = if self.match_token(TokenType::LParen) {
39260                    let n = Some(self.expect_number()? as u32);
39261                    self.expect(TokenType::RParen)?;
39262                    n
39263                } else {
39264                    None
39265                };
39266                DataType::Int {
39267                    length,
39268                    integer_spelling: name == "INTEGER",
39269                }
39270            }
39271            "BIGINT" => {
39272                let length = if self.match_token(TokenType::LParen) {
39273                    let n = Some(self.expect_number()? as u32);
39274                    self.expect(TokenType::RParen)?;
39275                    n
39276                } else {
39277                    None
39278                };
39279                DataType::BigInt { length }
39280            }
39281            "SMALLINT" => {
39282                let length = if self.match_token(TokenType::LParen) {
39283                    let n = Some(self.expect_number()? as u32);
39284                    self.expect(TokenType::RParen)?;
39285                    n
39286                } else {
39287                    None
39288                };
39289                DataType::SmallInt { length }
39290            }
39291            "TINYINT" => {
39292                let length = if self.match_token(TokenType::LParen) {
39293                    let n = Some(self.expect_number()? as u32);
39294                    self.expect(TokenType::RParen)?;
39295                    n
39296                } else {
39297                    None
39298                };
39299                DataType::TinyInt { length }
39300            }
39301            // FLOAT with optional (precision)
39302            "FLOAT" | "REAL" | "BINARY_FLOAT" => {
39303                let (precision, scale) = if self.match_token(TokenType::LParen) {
39304                    let n = Some(self.expect_number()? as u32);
39305                    let s = if self.match_token(TokenType::Comma) {
39306                        Some(self.expect_number()? as u32)
39307                    } else {
39308                        None
39309                    };
39310                    self.expect(TokenType::RParen)?;
39311                    (n, s)
39312                } else {
39313                    (None, None)
39314                };
39315                DataType::Float {
39316                    precision,
39317                    scale,
39318                    real_spelling: name == "REAL",
39319                }
39320            }
39321            "BINARY_DOUBLE" => DataType::Double {
39322                precision: None,
39323                scale: None,
39324            },
39325            // BINARY with optional (length)
39326            "BINARY" => {
39327                let length = if self.match_token(TokenType::LParen) {
39328                    let n = Some(self.expect_number()? as u32);
39329                    self.expect(TokenType::RParen)?;
39330                    n
39331                } else {
39332                    None
39333                };
39334                DataType::Binary { length }
39335            }
39336            // MySQL SIGNED [INTEGER] / UNSIGNED [INTEGER] in CAST context
39337            // CAST(x AS SIGNED INTEGER) -> CAST(x AS SIGNED)
39338            // CAST(x AS UNSIGNED INTEGER) -> CAST(x AS UNSIGNED)
39339            "SIGNED" | "UNSIGNED" => {
39340                // Consume optional INTEGER keyword after SIGNED/UNSIGNED
39341                if self.check_identifier("INTEGER")
39342                    || self.check_keyword_text("INTEGER")
39343                    || self.check_keyword_text("INT")
39344                {
39345                    self.skip();
39346                }
39347                DataType::Custom { name }
39348            }
39349            // ClickHouse Nullable(T) wrapper type
39350            "NULLABLE" => {
39351                self.expect(TokenType::LParen)?;
39352                let inner = self.parse_data_type_for_cast()?;
39353                self.expect(TokenType::RParen)?;
39354                DataType::Nullable {
39355                    inner: Box::new(inner),
39356                }
39357            }
39358            // VECTOR(type, dimension) - Snowflake vector type
39359            // VECTOR(dimension, element_type_alias) or VECTOR(dimension) - SingleStore vector type
39360            "VECTOR" => {
39361                if self.match_token(TokenType::LParen) {
39362                    if self.check(TokenType::Number) {
39363                        // SingleStore format: VECTOR(dimension) or VECTOR(dimension, type_alias)
39364                        let dimension = self.expect_number()? as u32;
39365                        let element_type = if self.match_token(TokenType::Comma) {
39366                            let type_alias = self.expect_identifier_or_keyword()?;
39367                            let mapped_type = match type_alias.to_ascii_uppercase().as_str() {
39368                                "I8" => DataType::TinyInt { length: None },
39369                                "I16" => DataType::SmallInt { length: None },
39370                                "I32" => DataType::Int {
39371                                    length: None,
39372                                    integer_spelling: false,
39373                                },
39374                                "I64" => DataType::BigInt { length: None },
39375                                "F32" => DataType::Float {
39376                                    precision: None,
39377                                    scale: None,
39378                                    real_spelling: false,
39379                                },
39380                                "F64" => DataType::Double {
39381                                    precision: None,
39382                                    scale: None,
39383                                },
39384                                _ => DataType::Custom {
39385                                    name: type_alias.to_string(),
39386                                },
39387                            };
39388                            Some(Box::new(mapped_type))
39389                        } else {
39390                            None
39391                        };
39392                        self.expect(TokenType::RParen)?;
39393                        DataType::Vector {
39394                            element_type,
39395                            dimension: Some(dimension),
39396                        }
39397                    } else {
39398                        // Snowflake format: VECTOR(type, dimension)
39399                        let element_type = self.parse_data_type()?;
39400                        self.expect(TokenType::Comma)?;
39401                        let dimension = self.expect_number()? as u32;
39402                        self.expect(TokenType::RParen)?;
39403                        DataType::Vector {
39404                            element_type: Some(Box::new(element_type)),
39405                            dimension: Some(dimension),
39406                        }
39407                    }
39408                } else {
39409                    DataType::Custom {
39410                        name: "VECTOR".to_string(),
39411                    }
39412                }
39413            }
39414            // For simple types, use convert_name_to_type to get proper DataType variants
39415            // This ensures VARCHAR becomes DataType::VarChar, not DataType::Custom
39416            // For user-defined types in generic mode, preserve original case from raw_name
39417            _ => {
39418                let base = self.convert_name_to_type(&name)?;
39419                // ClickHouse: consume parenthesized args for custom types like DateTime('UTC'),
39420                // LowCardinality(String), Variant(String, UInt64), JSON(max_dynamic_paths=8)
39421                if matches!(
39422                    self.config.dialect,
39423                    Some(crate::dialects::DialectType::ClickHouse)
39424                ) && self.check(TokenType::LParen)
39425                    && (matches!(
39426                        base,
39427                        DataType::Custom { .. } | DataType::Json | DataType::JsonB
39428                    ))
39429                {
39430                    self.skip(); // consume (
39431                    let args = self.parse_custom_type_args_balanced()?;
39432                    self.expect(TokenType::RParen)?;
39433                    let base_name = match &base {
39434                        DataType::Json => "JSON".to_string(),
39435                        DataType::JsonB => "JSONB".to_string(),
39436                        DataType::Custom { name } => name.clone(),
39437                        _ => unreachable!(),
39438                    };
39439                    DataType::Custom {
39440                        name: format!("{}({})", base_name, args),
39441                    }
39442                } else if matches!(base, DataType::Custom { .. }) && self.check(TokenType::Dot) {
39443                    // Handle schema-qualified user-defined types (e.g., app.status_enum)
39444                    // by consuming dot-separated identifiers like Python sqlglot's
39445                    // _parse_user_defined_type()
39446                    // Use raw_name to preserve original case for schema-qualified types
39447                    let mut type_name = raw_name.to_string();
39448                    while self.match_token(TokenType::Dot) {
39449                        let tok = self.advance();
39450                        type_name = format!("{}.{}", type_name, tok.text);
39451                    }
39452                    DataType::Custom { name: type_name }
39453                } else if matches!(base, DataType::Custom { .. }) && self.config.dialect.is_none() {
39454                    // Preserve original case for user-defined types in generic mode
39455                    DataType::Custom {
39456                        name: raw_name.to_string(),
39457                    }
39458                } else {
39459                    base
39460                }
39461            }
39462        };
39463
39464        // Materialize: handle postfix LIST syntax (INT LIST, INT LIST LIST LIST)
39465        let is_materialize = matches!(
39466            self.config.dialect,
39467            Some(crate::dialects::DialectType::Materialize)
39468        );
39469        let mut result_type = base_type;
39470        if is_materialize {
39471            while self.check_identifier("LIST") || self.check(TokenType::List) {
39472                self.skip(); // consume LIST
39473                result_type = DataType::List {
39474                    element_type: Box::new(result_type),
39475                };
39476            }
39477        }
39478
39479        // For dialects that support array type suffixes (DuckDB, PostgreSQL, Redshift),
39480        // parse array dimensions. For other dialects, brackets after a cast are subscript operations.
39481        if supports_array_type_suffix {
39482            self.maybe_parse_array_dimensions(result_type)
39483        } else {
39484            Ok(result_type)
39485        }
39486    }
39487
39488    /// Parse custom type arguments with balanced parentheses, preserving nested types
39489    fn parse_custom_type_args_balanced(&mut self) -> Result<String> {
39490        let mut depth = 0usize;
39491        let mut out = String::new();
39492        let mut prev_wordish = false;
39493
39494        while !self.is_at_end() {
39495            if self.check(TokenType::RParen) && depth == 0 {
39496                break;
39497            }
39498
39499            let token = self.advance();
39500            match token.token_type {
39501                TokenType::LParen => {
39502                    out.push('(');
39503                    depth += 1;
39504                    prev_wordish = false;
39505                }
39506                TokenType::RParen => {
39507                    if depth == 0 {
39508                        break;
39509                    }
39510                    depth -= 1;
39511                    out.push(')');
39512                    prev_wordish = true;
39513                }
39514                TokenType::Comma => {
39515                    out.push_str(", ");
39516                    prev_wordish = false;
39517                }
39518                TokenType::Eq => {
39519                    out.push_str(" = ");
39520                    prev_wordish = false;
39521                }
39522                TokenType::Plus => {
39523                    out.push_str(" + ");
39524                    prev_wordish = false;
39525                }
39526                TokenType::Dash => {
39527                    out.push('-');
39528                    prev_wordish = false;
39529                }
39530                TokenType::Dot => {
39531                    out.push('.');
39532                    prev_wordish = false;
39533                }
39534                TokenType::String | TokenType::DollarString => {
39535                    if prev_wordish {
39536                        out.push(' ');
39537                    }
39538                    let escaped = token.text.replace('\'', "''");
39539                    out.push('\'');
39540                    out.push_str(&escaped);
39541                    out.push('\'');
39542                    prev_wordish = true;
39543                }
39544                TokenType::Number | TokenType::Parameter => {
39545                    if prev_wordish {
39546                        out.push(' ');
39547                    }
39548                    out.push_str(&token.text);
39549                    prev_wordish = true;
39550                }
39551                TokenType::QuotedIdentifier => {
39552                    if prev_wordish {
39553                        out.push(' ');
39554                    }
39555                    out.push('"');
39556                    out.push_str(&token.text);
39557                    out.push('"');
39558                    prev_wordish = true;
39559                }
39560                _ => {
39561                    if prev_wordish {
39562                        out.push(' ');
39563                    }
39564                    out.push_str(&token.text);
39565                    prev_wordish = true;
39566                }
39567            }
39568        }
39569
39570        Ok(out)
39571    }
39572
39573    /// Uppercase the `skip` keyword in ClickHouse JSON type declarations.
39574    /// In ClickHouse, `SKIP col` within JSON(...) type specs must use uppercase SKIP.
39575    fn uppercase_json_type_skip_keyword(args: &str) -> String {
39576        // Replace "skip " at the start of the string or after ", " with "SKIP "
39577        let mut result = String::with_capacity(args.len());
39578        let mut rest = args;
39579        let mut at_start = true;
39580        while !rest.is_empty() {
39581            if at_start
39582                && rest.len() >= 5
39583                && rest[..4].eq_ignore_ascii_case("skip")
39584                && rest.as_bytes()[4] == b' '
39585            {
39586                result.push_str("SKIP");
39587                rest = &rest[4..];
39588                at_start = false;
39589            } else if rest.starts_with(", ") {
39590                result.push_str(", ");
39591                rest = &rest[2..];
39592                at_start = true;
39593            } else {
39594                result.push(rest.as_bytes()[0] as char);
39595                rest = &rest[1..];
39596                at_start = false;
39597            }
39598        }
39599        result
39600    }
39601
39602    /// Parse a data type from a text string by tokenizing and sub-parsing it.
39603    /// Used for ClickHouse JSON path types where a quoted identifier like "Array(JSON)"
39604    /// needs to be parsed as a proper structured DataType.
39605    fn parse_data_type_from_text(&mut self, text: &str) -> Result<DataType> {
39606        use crate::tokens::Tokenizer;
39607        let tokenizer = Tokenizer::default();
39608        let tokens = tokenizer.tokenize(text)?;
39609        if tokens.is_empty() {
39610            return Ok(DataType::Custom {
39611                name: text.to_string(),
39612            });
39613        }
39614        // Save parser state and temporarily swap in the sub-tokens
39615        let saved_tokens = std::mem::replace(&mut self.tokens, tokens);
39616        let saved_current = std::mem::replace(&mut self.current, 0);
39617        let result = self.parse_data_type();
39618        // Restore original parser state
39619        self.tokens = saved_tokens;
39620        self.current = saved_current;
39621        result
39622    }
39623
39624    /// Try to parse a data type optionally - returns None if no valid type found
39625    /// Used for JSON_TABLE column definitions where type may or may not be present
39626    fn parse_data_type_optional(&mut self) -> Result<Option<DataType>> {
39627        // Check if current token looks like a type name
39628        if !self.check(TokenType::Identifier)
39629            && !self.check(TokenType::Var)
39630            && !self.check_keyword()
39631        {
39632            return Ok(None);
39633        }
39634
39635        // Don't try to parse PATH as a type
39636        if self.check_identifier("PATH") {
39637            return Ok(None);
39638        }
39639
39640        // ClickHouse: ALIAS, EPHEMERAL, MATERIALIZED are column modifiers, not types
39641        if matches!(
39642            self.config.dialect,
39643            Some(crate::dialects::DialectType::ClickHouse)
39644        ) && (self.check_identifier("ALIAS")
39645            || self.check_identifier("EPHEMERAL")
39646            || self.check(TokenType::Materialized))
39647        {
39648            return Ok(None);
39649        }
39650
39651        let saved_pos = self.current;
39652        match self.parse_data_type() {
39653            Ok(dt) => Ok(Some(dt)),
39654            Err(_) => {
39655                self.current = saved_pos;
39656                Ok(None)
39657            }
39658        }
39659    }
39660
39661    /// Convert a DataType to a string representation for JSONColumnDef.kind
39662    fn data_type_to_string(&self, dt: &DataType) -> String {
39663        match dt {
39664            DataType::Int {
39665                length: Some(n),
39666                integer_spelling: true,
39667            } => format!("INTEGER({})", n),
39668            DataType::Int {
39669                length: Some(n), ..
39670            } => format!("INT({})", n),
39671            DataType::Int {
39672                length: None,
39673                integer_spelling: true,
39674            } => "INTEGER".to_string(),
39675            DataType::Int { length: None, .. } => "INT".to_string(),
39676            DataType::BigInt { length: Some(n) } => format!("BIGINT({})", n),
39677            DataType::BigInt { length: None } => "BIGINT".to_string(),
39678            DataType::SmallInt { length: Some(n) } => format!("SMALLINT({})", n),
39679            DataType::SmallInt { length: None } => "SMALLINT".to_string(),
39680            DataType::TinyInt { length: Some(n) } => format!("TINYINT({})", n),
39681            DataType::TinyInt { length: None } => "TINYINT".to_string(),
39682            DataType::Float {
39683                precision: Some(p),
39684                scale: Some(s),
39685                ..
39686            } => format!("FLOAT({}, {})", p, s),
39687            DataType::Float {
39688                precision: Some(p),
39689                scale: None,
39690                ..
39691            } => format!("FLOAT({})", p),
39692            DataType::Float {
39693                precision: None, ..
39694            } => "FLOAT".to_string(),
39695            DataType::Double {
39696                precision: Some(p),
39697                scale: Some(s),
39698            } => format!("DOUBLE({}, {})", p, s),
39699            DataType::Double {
39700                precision: Some(p),
39701                scale: None,
39702            } => format!("DOUBLE({})", p),
39703            DataType::Double {
39704                precision: None, ..
39705            } => "DOUBLE".to_string(),
39706            DataType::Decimal {
39707                precision: Some(p),
39708                scale: Some(s),
39709            } => format!("DECIMAL({}, {})", p, s),
39710            DataType::Decimal {
39711                precision: Some(p),
39712                scale: None,
39713            } => format!("DECIMAL({})", p),
39714            DataType::Decimal {
39715                precision: None, ..
39716            } => "DECIMAL".to_string(),
39717            DataType::VarChar {
39718                length: Some(n), ..
39719            } => format!("VARCHAR({})", n),
39720            DataType::VarChar { length: None, .. } => "VARCHAR".to_string(),
39721            DataType::Char { length: Some(n) } => format!("CHAR({})", n),
39722            DataType::Char { length: None } => "CHAR".to_string(),
39723            DataType::Text => "TEXT".to_string(),
39724            DataType::Boolean => "BOOLEAN".to_string(),
39725            DataType::Date => "DATE".to_string(),
39726            DataType::Time {
39727                precision: Some(p), ..
39728            } => format!("TIME({})", p),
39729            DataType::Time {
39730                precision: None, ..
39731            } => "TIME".to_string(),
39732            DataType::Timestamp {
39733                precision: Some(p),
39734                timezone: true,
39735            } => format!("TIMESTAMPTZ({})", p),
39736            DataType::Timestamp {
39737                precision: Some(p),
39738                timezone: false,
39739            } => format!("TIMESTAMP({})", p),
39740            DataType::Timestamp {
39741                precision: None,
39742                timezone: true,
39743            } => "TIMESTAMPTZ".to_string(),
39744            DataType::Timestamp {
39745                precision: None,
39746                timezone: false,
39747            } => "TIMESTAMP".to_string(),
39748            DataType::Json => "JSON".to_string(),
39749            DataType::JsonB => "JSONB".to_string(),
39750            DataType::Binary { length: Some(n) } => format!("BINARY({})", n),
39751            DataType::Binary { length: None } => "BINARY".to_string(),
39752            DataType::VarBinary { length: Some(n) } => format!("VARBINARY({})", n),
39753            DataType::VarBinary { length: None } => "VARBINARY".to_string(),
39754            DataType::String { length: Some(n) } => format!("STRING({})", n),
39755            DataType::String { length: None } => "STRING".to_string(),
39756            DataType::Array { element_type, .. } => {
39757                format!("ARRAY({})", self.data_type_to_string(element_type))
39758            }
39759            DataType::Nullable { inner } => {
39760                format!("Nullable({})", self.data_type_to_string(inner))
39761            }
39762            DataType::Custom { name } => name.clone(),
39763            _ => format!("{:?}", dt),
39764        }
39765    }
39766
39767    /// Parse optional array dimensions after a type: [], [N], [N][M], ARRAY, ARRAY[N], etc.
39768    fn maybe_parse_array_dimensions(&mut self, base_type: DataType) -> Result<DataType> {
39769        let mut current_type = base_type;
39770
39771        // Handle PostgreSQL ARRAY keyword suffix: type ARRAY or type ARRAY[3]
39772        if self.check_identifier("ARRAY") {
39773            self.skip(); // consume ARRAY
39774                         // Check for optional dimension: ARRAY[N]
39775            let dimension = if self.match_token(TokenType::LBracket) {
39776                let dim = if self.check(TokenType::Number) {
39777                    let n = self.expect_number()? as u32;
39778                    Some(n)
39779                } else {
39780                    None
39781                };
39782                self.expect(TokenType::RBracket)?;
39783                dim
39784            } else {
39785                None
39786            };
39787            current_type = DataType::Array {
39788                element_type: Box::new(current_type),
39789                dimension,
39790            };
39791        }
39792
39793        // Handle bracket-based array dimensions: TYPE[], TYPE[N], TYPE[][N], etc.
39794        while self.match_token(TokenType::LBracket) {
39795            // Check for optional dimension: [N] or just []
39796            let dimension = if self.check(TokenType::Number) {
39797                let n = self.expect_number()? as u32;
39798                Some(n)
39799            } else {
39800                None
39801            };
39802            self.expect(TokenType::RBracket)?;
39803
39804            current_type = DataType::Array {
39805                element_type: Box::new(current_type),
39806                dimension,
39807            };
39808        }
39809
39810        Ok(current_type)
39811    }
39812
39813    /// Parse spatial type arguments like GEOMETRY(Point, 4326) or GEOGRAPHY
39814    fn parse_spatial_type_args(&mut self) -> Result<(Option<String>, Option<u32>)> {
39815        if self.match_token(TokenType::LParen) {
39816            // First arg can be a subtype name (POINT, LINESTRING, etc.) or a numeric dimension
39817            if self.check(TokenType::Number) {
39818                // Numeric argument (e.g., ST_GEOMETRY(1) in Teradata)
39819                let n = self.expect_number()? as u32;
39820                self.expect(TokenType::RParen)?;
39821                return Ok((None, Some(n)));
39822            }
39823            // Parse subtype
39824            let subtype = Some(self.expect_identifier()?.to_ascii_uppercase());
39825
39826            // Parse optional SRID
39827            let srid = if self.match_token(TokenType::Comma) {
39828                Some(self.expect_number()? as u32)
39829            } else {
39830                None
39831            };
39832
39833            self.expect(TokenType::RParen)?;
39834            Ok((subtype, srid))
39835        } else {
39836            Ok((None, None))
39837        }
39838    }
39839
39840    /// Parse struct/row/union type fields: name TYPE, name TYPE, ...
39841    /// `paren_style` indicates whether we're parsing parenthesized syntax (terminates at RParen)
39842    /// or angle-bracket syntax (terminates at Gt/GtGt).
39843    fn parse_struct_type_fields(&mut self, paren_style: bool) -> Result<Vec<StructField>> {
39844        let mut fields = Vec::new();
39845        // Check for empty field list
39846        if (paren_style && self.check(TokenType::RParen))
39847            || (!paren_style && (self.check(TokenType::Gt) || self.check(TokenType::GtGt)))
39848        {
39849            return Ok(fields);
39850        }
39851        loop {
39852            // Parse field name or just type (for anonymous struct fields)
39853            // Track whether it was a quoted identifier to preserve quoting
39854            let is_quoted = self.check(TokenType::QuotedIdentifier);
39855            let first = self.expect_identifier_or_keyword()?;
39856            let first_upper = first.to_ascii_uppercase();
39857
39858            // Check if this is a parametric type (ARRAY<T>, MAP<K,V>, STRUCT<...>, STRUCT(...))
39859            let is_parametric_type = (first_upper == "ARRAY"
39860                || first_upper == "MAP"
39861                || first_upper == "STRUCT"
39862                || first_upper == "ROW")
39863                && (self.check(TokenType::Lt) || self.check(TokenType::LParen));
39864
39865            let (field_name, field_type) = if is_parametric_type {
39866                // This is a parametric type as an anonymous field
39867                let field_type = self.parse_data_type_from_name(&first_upper)?;
39868                (String::new(), field_type)
39869            } else if self.check(TokenType::Comma)
39870                || self.match_identifier("OPTIONS")  // Check for OPTIONS (but don't consume yet)
39871                || (paren_style && self.check(TokenType::RParen))
39872                || (!paren_style && (self.check(TokenType::Gt) || self.check(TokenType::GtGt)))
39873            {
39874                // Check if we just matched OPTIONS - if so, retreat
39875                if self.previous().text.eq_ignore_ascii_case("OPTIONS") {
39876                    self.current -= 1;
39877                }
39878                // Anonymous field: just a type name
39879                let field_type = self.convert_name_to_type(&first)?;
39880                (String::new(), field_type)
39881            } else if self.is_identifier_token()
39882                || self.is_safe_keyword_as_identifier()
39883                || self.check(TokenType::Lt)
39884                || self.check(TokenType::LParen)
39885                || self.check(TokenType::Colon)
39886            {
39887                // Named field: fieldname TYPE (or fieldname: TYPE for Hive)
39888                // Consume optional colon separator (Hive-style: `STRUCT<field_name: TYPE>`)
39889                self.match_token(TokenType::Colon);
39890                let field_type = self.parse_data_type()?;
39891                // Preserve quoting for field names
39892                let field_name = if is_quoted {
39893                    format!("\"{}\"", first)
39894                } else {
39895                    first
39896                };
39897                (field_name, field_type)
39898            } else {
39899                // Just a type name
39900                let field_type = self.convert_name_to_type(&first)?;
39901                (String::new(), field_type)
39902            };
39903
39904            // Spark/Databricks: Check for COMMENT clause on struct field
39905            let comment = if self.match_token(TokenType::Comment) {
39906                Some(self.expect_string()?)
39907            } else {
39908                None
39909            };
39910
39911            // BigQuery: Check for OPTIONS clause on struct field
39912            let options = if self.match_identifier("OPTIONS") {
39913                self.parse_options_list()?
39914            } else {
39915                Vec::new()
39916            };
39917
39918            fields.push(StructField::with_options_and_comment(
39919                field_name, field_type, options, comment,
39920            ));
39921
39922            if !self.match_token(TokenType::Comma) {
39923                break;
39924            }
39925        }
39926        Ok(fields)
39927    }
39928
39929    /// Parse a data type given a name that was already consumed
39930    /// This is used for standalone type expressions like ARRAY<T>
39931    fn parse_data_type_from_name(&mut self, name: &str) -> Result<DataType> {
39932        match name {
39933            "ARRAY" => {
39934                if self.match_token(TokenType::Lt) {
39935                    let element_type = self.parse_data_type()?;
39936                    self.expect_gt()?;
39937                    Ok(DataType::Array {
39938                        element_type: Box::new(element_type),
39939                        dimension: None,
39940                    })
39941                } else {
39942                    Ok(DataType::Custom {
39943                        name: "ARRAY".to_string(),
39944                    })
39945                }
39946            }
39947            "MAP" => {
39948                if self.match_token(TokenType::Lt) {
39949                    let key_type = self.parse_data_type()?;
39950                    self.expect(TokenType::Comma)?;
39951                    let value_type = self.parse_data_type()?;
39952                    self.expect_gt()?;
39953                    Ok(DataType::Map {
39954                        key_type: Box::new(key_type),
39955                        value_type: Box::new(value_type),
39956                    })
39957                } else {
39958                    Ok(DataType::Custom {
39959                        name: "MAP".to_string(),
39960                    })
39961                }
39962            }
39963            "STRUCT" => {
39964                if self.match_token(TokenType::Lt) {
39965                    let fields = self.parse_struct_type_fields(false)?;
39966                    self.expect_gt()?;
39967                    Ok(DataType::Struct {
39968                        fields,
39969                        nested: false,
39970                    })
39971                } else if self.match_token(TokenType::LParen) {
39972                    let fields = self.parse_struct_type_fields(true)?;
39973                    self.expect(TokenType::RParen)?;
39974                    Ok(DataType::Struct {
39975                        fields,
39976                        nested: true,
39977                    })
39978                } else {
39979                    Ok(DataType::Custom {
39980                        name: "STRUCT".to_string(),
39981                    })
39982                }
39983            }
39984            "ROW" => {
39985                if self.match_token(TokenType::LParen) {
39986                    let fields = self.parse_struct_type_fields(true)?;
39987                    self.expect(TokenType::RParen)?;
39988                    Ok(DataType::Struct {
39989                        fields,
39990                        nested: true,
39991                    })
39992                } else {
39993                    Ok(DataType::Custom {
39994                        name: "ROW".to_string(),
39995                    })
39996                }
39997            }
39998            _ => Ok(DataType::Custom {
39999                name: name.to_string(),
40000            }),
40001        }
40002    }
40003
40004    /// Convert a type name string to a DataType
40005    /// Used for anonymous struct fields where we have just a type name
40006    fn convert_name_to_type(&self, name: &str) -> Result<DataType> {
40007        let upper = name.to_ascii_uppercase();
40008        Ok(match upper.as_str() {
40009            "INT" => DataType::Int {
40010                length: None,
40011                integer_spelling: false,
40012            },
40013            "INTEGER" => DataType::Int {
40014                length: None,
40015                integer_spelling: true,
40016            },
40017            "BIGINT" => DataType::BigInt { length: None },
40018            "SMALLINT" => DataType::SmallInt { length: None },
40019            "TINYINT" => DataType::TinyInt { length: None },
40020            "FLOAT" | "BINARY_FLOAT" => DataType::Float {
40021                precision: None,
40022                scale: None,
40023                real_spelling: false,
40024            },
40025            "REAL" => DataType::Float {
40026                precision: None,
40027                scale: None,
40028                real_spelling: true,
40029            },
40030            "DOUBLE" | "BINARY_DOUBLE" => DataType::Double {
40031                precision: None,
40032                scale: None,
40033            },
40034            "DECIMAL" | "NUMERIC" => DataType::Decimal {
40035                precision: None,
40036                scale: None,
40037            },
40038            "BOOLEAN" | "BOOL" => DataType::Boolean,
40039            "CHAR" | "CHARACTER" | "NCHAR" => DataType::Char { length: None },
40040            "VARCHAR" | "NVARCHAR" => DataType::VarChar {
40041                length: None,
40042                parenthesized_length: false,
40043            },
40044            "TEXT" | "STRING" | "NTEXT" => DataType::Text,
40045            "DATE" => DataType::Date,
40046            "TIME" => DataType::Time {
40047                precision: None,
40048                timezone: false,
40049            },
40050            "TIMETZ" => DataType::Time {
40051                precision: None,
40052                timezone: true,
40053            },
40054            "TIMESTAMP" => DataType::Timestamp {
40055                precision: None,
40056                timezone: false,
40057            },
40058            "INTERVAL" => DataType::Interval {
40059                unit: None,
40060                to: None,
40061            },
40062            "JSON" => DataType::Json,
40063            "JSONB" => DataType::JsonB,
40064            "UUID" => DataType::Uuid,
40065            "BLOB" => DataType::Blob,
40066            "BYTEA" => DataType::VarBinary { length: None },
40067            "BINARY" => DataType::Binary { length: None },
40068            "VARBINARY" => DataType::VarBinary { length: None },
40069            "BIT" => DataType::Bit { length: None },
40070            "VARBIT" => DataType::VarBit { length: None },
40071            _ => DataType::Custom {
40072                name: name.to_string(),
40073            },
40074        })
40075    }
40076
40077    /// Parse star modifiers: EXCLUDE/EXCEPT, REPLACE, RENAME
40078    /// Syntax varies by dialect:
40079    /// - DuckDB: * EXCLUDE (col1, col2)
40080    /// - BigQuery: * EXCEPT (col1, col2), * REPLACE (expr AS col)
40081    /// - Snowflake: * EXCLUDE col, * RENAME (old AS new)
40082    fn parse_star_modifiers(&mut self, table: Option<Identifier>) -> Result<Star> {
40083        self.parse_star_modifiers_with_comments(table, Vec::new())
40084    }
40085
40086    /// Parse star modifiers with explicit trailing comments from the star token
40087    fn parse_star_modifiers_with_comments(
40088        &mut self,
40089        table: Option<Identifier>,
40090        star_trailing_comments: Vec<String>,
40091    ) -> Result<Star> {
40092        let mut except = None;
40093        let mut replace = None;
40094        let mut rename = None;
40095
40096        // Parse EXCLUDE / EXCEPT clause
40097        if self.match_token(TokenType::Exclude) || self.match_token(TokenType::Except) {
40098            // ClickHouse: EXCEPT STRICT col1, col2 (STRICT is optional modifier)
40099            let _ = self.match_text_seq(&["STRICT"]);
40100            let mut columns = Vec::new();
40101            if self.match_token(TokenType::LParen) {
40102                // EXCLUDE (col1, col2) or EXCEPT (A.COL_1, B.COL_2)
40103                loop {
40104                    // ClickHouse: allow string literals in EXCEPT ('col_regex')
40105                    // and keywords like 'key', 'index' as column names
40106                    let col = if self.check(TokenType::String) {
40107                        self.advance().text
40108                    } else if self.is_safe_keyword_as_identifier() {
40109                        self.advance().text
40110                    } else {
40111                        self.expect_identifier()?
40112                    };
40113                    // Handle qualified column names like A.COL_1
40114                    if self.match_token(TokenType::Dot) {
40115                        let subcol = if self.is_safe_keyword_as_identifier() {
40116                            self.advance().text
40117                        } else {
40118                            self.expect_identifier()?
40119                        };
40120                        columns.push(Identifier::new(format!("{}.{}", col, subcol)));
40121                    } else {
40122                        columns.push(Identifier::new(col));
40123                    }
40124                    if !self.match_token(TokenType::Comma) {
40125                        break;
40126                    }
40127                }
40128                self.expect(TokenType::RParen)?;
40129            } else {
40130                // EXCLUDE col (single column, Snowflake) or EXCEPT col1, col2 (ClickHouse)
40131                // or EXCEPT 'regex' (ClickHouse)
40132                loop {
40133                    let col = if self.check(TokenType::String) {
40134                        self.advance().text
40135                    } else if self.is_safe_keyword_as_identifier() {
40136                        self.advance().text
40137                    } else {
40138                        self.expect_identifier()?
40139                    };
40140                    columns.push(Identifier::new(col));
40141                    // ClickHouse allows comma-separated columns without parens: EXCEPT col1, col2
40142                    // But only if the next token after comma looks like a column name
40143                    if !matches!(
40144                        self.config.dialect,
40145                        Some(crate::dialects::DialectType::ClickHouse)
40146                    ) || !self.check(TokenType::Comma)
40147                        || !matches!(
40148                            self.peek_nth(1).map(|t| t.token_type),
40149                            Some(TokenType::Identifier)
40150                                | Some(TokenType::QuotedIdentifier)
40151                                | Some(TokenType::Var)
40152                                | Some(TokenType::String)
40153                        )
40154                    {
40155                        break;
40156                    }
40157                    self.skip(); // consume comma
40158                }
40159            }
40160            except = Some(columns);
40161        }
40162
40163        // Parse REPLACE clause
40164        if self.match_token(TokenType::Replace) {
40165            // ClickHouse: REPLACE STRICT is optional modifier
40166            let _ = self.match_text_seq(&["STRICT"]);
40167            let mut replacements = Vec::new();
40168            if self.match_token(TokenType::LParen) {
40169                loop {
40170                    let expr = self.parse_expression()?;
40171                    self.expect(TokenType::As)?;
40172                    let alias = self.expect_identifier_or_keyword()?;
40173                    replacements.push(Alias::new(expr, Identifier::new(alias)));
40174                    if !self.match_token(TokenType::Comma) {
40175                        break;
40176                    }
40177                }
40178                self.expect(TokenType::RParen)?;
40179            } else if matches!(
40180                self.config.dialect,
40181                Some(crate::dialects::DialectType::ClickHouse)
40182            ) {
40183                // ClickHouse: REPLACE [STRICT] expr AS name (single entry without parens)
40184                // Multiple entries require parens: REPLACE(expr1 AS name1, expr2 AS name2)
40185                let expr = self.parse_expression()?;
40186                self.expect(TokenType::As)?;
40187                let alias = self.expect_identifier_or_keyword()?;
40188                replacements.push(Alias::new(expr, Identifier::new(alias)));
40189            } else {
40190                return Err(self.parse_error("Expected LParen after REPLACE"));
40191            }
40192            replace = Some(replacements);
40193        }
40194
40195        // Parse RENAME clause (Snowflake)
40196        if self.match_token(TokenType::Rename) {
40197            let mut renames = Vec::new();
40198            if self.match_token(TokenType::LParen) {
40199                loop {
40200                    let old_name = self.expect_identifier()?;
40201                    self.expect(TokenType::As)?;
40202                    let new_name = self.expect_identifier()?;
40203                    renames.push((Identifier::new(old_name), Identifier::new(new_name)));
40204                    if !self.match_token(TokenType::Comma) {
40205                        break;
40206                    }
40207                }
40208                self.expect(TokenType::RParen)?;
40209            } else {
40210                // Single rename without parens
40211                let old_name = self.expect_identifier()?;
40212                self.expect(TokenType::As)?;
40213                let new_name = self.expect_identifier()?;
40214                renames.push((Identifier::new(old_name), Identifier::new(new_name)));
40215            }
40216            rename = Some(renames);
40217        }
40218
40219        Ok(Star {
40220            table,
40221            except,
40222            replace,
40223            rename,
40224            trailing_comments: star_trailing_comments,
40225            span: None,
40226        })
40227    }
40228
40229    // === Helper methods ===
40230
40231    /// Check if at end of tokens
40232    #[inline]
40233    fn is_at_end(&self) -> bool {
40234        self.current >= self.tokens.len()
40235    }
40236
40237    /// Check if current token is a query modifier keyword or end of input.
40238    /// Used after GROUP BY ALL/DISTINCT to decide whether to parse expression lists.
40239    fn is_at_query_modifier_or_end(&self) -> bool {
40240        if self.is_at_end() {
40241            return true;
40242        }
40243        matches!(
40244            self.peek().token_type,
40245            TokenType::Having
40246                | TokenType::Qualify
40247                | TokenType::Window
40248                | TokenType::Order
40249                | TokenType::Limit
40250                | TokenType::Fetch
40251                | TokenType::Offset
40252                | TokenType::For
40253                | TokenType::Lock
40254                | TokenType::Union
40255                | TokenType::Except
40256                | TokenType::Intersect
40257                | TokenType::RParen
40258                | TokenType::Semicolon
40259                | TokenType::Where
40260        )
40261    }
40262
40263    /// Create a parse error with position from the current token
40264    fn parse_error(&self, message: impl Into<String>) -> Error {
40265        let span = self.peek().span;
40266        Error::parse(message, span.line, span.column, span.start, span.end)
40267    }
40268
40269    /// Peek at current token
40270    /// Returns reference to current token, or last token if at end
40271    #[inline]
40272    fn peek(&self) -> &Token {
40273        if self.current >= self.tokens.len() {
40274            // Return last token as fallback when at end
40275            // In practice, callers should check is_at_end() before calling peek()
40276            // but this prevents panic
40277            self.tokens.last().expect("Token list should not be empty")
40278        } else {
40279            &self.tokens[self.current]
40280        }
40281    }
40282
40283    /// Look ahead by n positions (0 = current token)
40284    fn peek_nth(&self, n: usize) -> Option<&Token> {
40285        let idx = self.current + n;
40286        if idx < self.tokens.len() {
40287            Some(&self.tokens[idx])
40288        } else {
40289            None
40290        }
40291    }
40292
40293    /// Advance to next token
40294    #[inline]
40295    fn advance(&mut self) -> Token {
40296        if self.current >= self.tokens.len() {
40297            // Return last token as fallback if we're past the end
40298            // In practice, callers should check is_at_end() before calling advance()
40299            return self
40300                .tokens
40301                .last()
40302                .cloned()
40303                .expect("Token list should not be empty");
40304        }
40305        let token = self.tokens[self.current].clone();
40306        self.current += 1;
40307        token
40308    }
40309
40310    /// Advance to next token without returning it (when result is unused)
40311    #[inline]
40312    fn skip(&mut self) {
40313        if self.current < self.tokens.len() {
40314            self.current += 1;
40315        }
40316    }
40317
40318    /// Get the previous token (last consumed)
40319    fn previous(&self) -> &Token {
40320        &self.tokens[self.current - 1]
40321    }
40322
40323    /// Get trailing comments from the previous token
40324    fn previous_trailing_comments(&self) -> &[String] {
40325        if self.current > 0 {
40326            &self.tokens[self.current - 1].trailing_comments
40327        } else {
40328            &[]
40329        }
40330    }
40331
40332    /// Get the token type of the previous token (the one before current).
40333    fn previous_token_type(&self) -> Option<TokenType> {
40334        if self.current > 0 {
40335            Some(self.tokens[self.current - 1].token_type.clone())
40336        } else {
40337            None
40338        }
40339    }
40340
40341    /// Wrap a query expression in a Subquery node.
40342    /// Only wraps if the expression is a query statement (Select, Union, etc.),
40343    /// not for simple expressions like column references.
40344    fn maybe_wrap_in_subquery(&self, inner: Expression) -> Expression {
40345        if matches!(
40346            &inner,
40347            Expression::Select(_)
40348                | Expression::Union(_)
40349                | Expression::Intersect(_)
40350                | Expression::Except(_)
40351        ) {
40352            Expression::Subquery(Box::new(Subquery {
40353                this: inner,
40354                alias: None,
40355                column_aliases: Vec::new(),
40356                order_by: None,
40357                limit: None,
40358                offset: None,
40359                distribute_by: None,
40360                sort_by: None,
40361                cluster_by: None,
40362                lateral: false,
40363                modifiers_inside: false,
40364                trailing_comments: Vec::new(),
40365                inferred_type: None,
40366            }))
40367        } else {
40368            inner
40369        }
40370    }
40371
40372    /// Clear trailing_comments from the rightmost leaf of an expression tree.
40373    /// Used by parse_and/parse_or to avoid comment duplication: when the same comment
40374    /// is captured both in an expression's trailing_comments (during parse_primary) and
40375    /// in a BinaryOp's operator_comments (during parse_and/parse_or), we clear the
40376    /// expression's copy since the operator_comments position (after AND/OR) is correct.
40377    fn clear_rightmost_trailing_comments(expr: &mut Expression) {
40378        match expr {
40379            Expression::Column(col) => col.trailing_comments.clear(),
40380            Expression::And(op) | Expression::Or(op) => {
40381                Self::clear_rightmost_trailing_comments(&mut op.right);
40382            }
40383            Expression::Not(op) => {
40384                Self::clear_rightmost_trailing_comments(&mut op.this);
40385            }
40386            // For comparison ops, the rightmost is the right operand
40387            Expression::Eq(op)
40388            | Expression::Neq(op)
40389            | Expression::Lt(op)
40390            | Expression::Lte(op)
40391            | Expression::Gt(op)
40392            | Expression::Gte(op)
40393            | Expression::Add(op)
40394            | Expression::Sub(op)
40395            | Expression::Mul(op)
40396            | Expression::Div(op) => {
40397                Self::clear_rightmost_trailing_comments(&mut op.right);
40398            }
40399            // For other expressions, trailing_comments might be stored differently
40400            // We don't need to handle all variants, just the common ones that appear
40401            // as operands in AND/OR expressions
40402            _ => {}
40403        }
40404    }
40405
40406    /// Get leading comments from the current token (comments that appeared before it)
40407    fn current_leading_comments(&self) -> &[String] {
40408        if !self.is_at_end() {
40409            &self.tokens[self.current].comments
40410        } else {
40411            &[]
40412        }
40413    }
40414
40415    /// Convert a slice of tokens to SQL string with proper quoting for strings
40416    fn tokens_to_sql(&self, start: usize, end: usize) -> String {
40417        let mut result = String::new();
40418        let mut prev_line: Option<usize> = None;
40419        let mut prev_end_offset: Option<usize> = None;
40420
40421        for t in &self.tokens[start..end] {
40422            // Check if we moved to a new line (preserve original line structure)
40423            let is_new_line = prev_line.is_some() && t.span.line > prev_line.unwrap();
40424
40425            // Use byte offsets to determine original spacing between tokens.
40426            // This preserves the exact spacing from the source (e.g., TRANSFORM( vs OPTIONS ())
40427            if is_new_line {
40428                result.push('\n');
40429                // Preserve original indentation
40430                // span.column is the column AFTER the last character (1-based),
40431                // so start column = span.column - text.chars().count()
40432                let text_len = t.text.chars().count();
40433                let start_col = t.span.column.saturating_sub(text_len);
40434                // For string tokens, add 2 for the quotes that were stripped
40435                let start_col = if t.token_type == TokenType::String {
40436                    start_col.saturating_sub(2)
40437                } else {
40438                    start_col
40439                };
40440                let indent = if start_col > 1 { start_col - 1 } else { 0 };
40441                for _ in 0..indent {
40442                    result.push(' ');
40443                }
40444            } else if !result.is_empty() {
40445                // Same line: use byte offsets to detect if there was whitespace
40446                let had_space = prev_end_offset.map_or(false, |prev_end| t.span.start > prev_end);
40447                if had_space {
40448                    result.push(' ');
40449                }
40450            }
40451
40452            if t.token_type == TokenType::String {
40453                // Re-add quotes around string literals
40454                result.push('\'');
40455                result.push_str(&t.text.replace('\'', "''"));
40456                result.push('\'');
40457            } else {
40458                result.push_str(&t.text);
40459            }
40460
40461            prev_line = Some(t.span.line);
40462            prev_end_offset = Some(t.span.end);
40463        }
40464        result
40465    }
40466
40467    /// Convert tokens to SQL for CREATE STAGE, normalizing FILE_FORMAT clause
40468    /// Transforms FILE_FORMAT='value' to FILE_FORMAT=(FORMAT_NAME='value')
40469    /// and FILE_FORMAT=schema.format to FILE_FORMAT=(FORMAT_NAME=schema.format)
40470    fn tokens_to_sql_stage_format(&self, start: usize, end: usize) -> String {
40471        let mut result = String::new();
40472        let mut prev_token_type: Option<TokenType> = None;
40473        let mut i = start;
40474
40475        while i < end {
40476            let t = &self.tokens[i];
40477
40478            // Check for FILE_FORMAT= pattern that needs normalization
40479            // FILE_FORMAT must be followed by = and then NOT by (
40480            if (t.token_type == TokenType::Var || t.token_type == TokenType::Identifier)
40481                && t.text.eq_ignore_ascii_case("FILE_FORMAT")
40482                && i + 1 < end
40483                && self.tokens[i + 1].token_type == TokenType::Eq
40484                && (i + 2 >= end || self.tokens[i + 2].token_type != TokenType::LParen)
40485            {
40486                // Need to normalize: FILE_FORMAT=value -> FILE_FORMAT=(FORMAT_NAME=value)
40487                if !result.is_empty() && prev_token_type != Some(TokenType::LParen) {
40488                    result.push(' ');
40489                }
40490                result.push_str("FILE_FORMAT=(FORMAT_NAME=");
40491
40492                // Skip FILE_FORMAT and =
40493                i += 2;
40494
40495                // Collect the value (string literal or qualified identifier like schema.format)
40496                while i < end {
40497                    let val = &self.tokens[i];
40498                    if val.token_type == TokenType::String {
40499                        // String literal: 'format1'
40500                        result.push('\'');
40501                        result.push_str(&val.text.replace('\'', "''"));
40502                        result.push('\'');
40503                        i += 1;
40504                        break;
40505                    } else if val.token_type == TokenType::Var
40506                        || val.token_type == TokenType::Identifier
40507                    {
40508                        // Identifier: schema1 or format1
40509                        result.push_str(&val.text);
40510                        i += 1;
40511                        // Check for dot (qualified name)
40512                        if i < end && self.tokens[i].token_type == TokenType::Dot {
40513                            result.push('.');
40514                            i += 1;
40515                            // Expect identifier after dot
40516                            if i < end {
40517                                result.push_str(&self.tokens[i].text);
40518                                i += 1;
40519                            }
40520                        }
40521                        break;
40522                    } else {
40523                        break;
40524                    }
40525                }
40526                result.push(')');
40527                prev_token_type = Some(TokenType::RParen);
40528                continue;
40529            }
40530
40531            // Normal token handling (same as tokens_to_sql)
40532            let needs_space = !result.is_empty()
40533                && prev_token_type != Some(TokenType::LParen)
40534                && prev_token_type != Some(TokenType::Eq)
40535                && prev_token_type != Some(TokenType::Dot)
40536                && t.token_type != TokenType::Comma
40537                && t.token_type != TokenType::RParen
40538                && t.token_type != TokenType::LParen
40539                && t.token_type != TokenType::Eq
40540                && t.token_type != TokenType::Dot;
40541
40542            if needs_space {
40543                result.push(' ');
40544            }
40545
40546            if t.token_type == TokenType::String {
40547                result.push('\'');
40548                result.push_str(&t.text.replace('\'', "''"));
40549                result.push('\'');
40550            } else {
40551                result.push_str(&t.text);
40552            }
40553
40554            prev_token_type = Some(t.token_type);
40555            i += 1;
40556        }
40557        result
40558    }
40559
40560    /// Like tokens_to_sql but also uppercases keyword tokens and adds space after commas
40561    fn tokens_to_sql_uppercased(&self, start: usize, end: usize) -> String {
40562        let mut result = String::new();
40563        let mut prev_token_type: Option<TokenType> = None;
40564        let mut prev_token_text: Option<String> = None;
40565
40566        for t in &self.tokens[start..end] {
40567            // Smart spacing: no space before comma, ), . or after (, .
40568            // Add space before ( only when preceded by a structural keyword or identifier
40569            // (e.g., "PRIMARY KEY (Id)", "CLUSTERED (EmpID)")
40570            // but NOT after data type keywords (e.g., "VARCHAR(100)", "INT(11)")
40571            let is_lparen_after_keyword = t.token_type == TokenType::LParen
40572                && prev_token_type.map_or(false, |p: TokenType| {
40573                    // Only add space for structural SQL keywords, not data type keywords
40574                    match p {
40575                        TokenType::PrimaryKey | TokenType::ForeignKey | TokenType::Unique
40576                        | TokenType::Check | TokenType::Index | TokenType::Key
40577                        | TokenType::Constraint | TokenType::References
40578                        | TokenType::Not | TokenType::Null
40579                        | TokenType::Default | TokenType::Values | TokenType::In
40580                        | TokenType::Exists | TokenType::Select | TokenType::From
40581                        | TokenType::Where | TokenType::Having | TokenType::Using
40582                        | TokenType::On | TokenType::Set | TokenType::Into
40583                        | TokenType::Table | TokenType::View | TokenType::Create
40584                        | TokenType::Insert | TokenType::Update | TokenType::Delete
40585                        | TokenType::Join | TokenType::Left | TokenType::Right
40586                        | TokenType::Inner | TokenType::Outer | TokenType::Full
40587                        | TokenType::Cross | TokenType::Case | TokenType::When
40588                        | TokenType::Then | TokenType::Else | TokenType::End
40589                        | TokenType::If | TokenType::Partition | TokenType::Over
40590                        | TokenType::Between | TokenType::Like | TokenType::Replace
40591                        | TokenType::Grant | TokenType::Revoke
40592                        => true,
40593                        _ => false,
40594                    }
40595                })
40596                // For Var/Identifier tokens, add space before ( only for structural tokens
40597                // (CLUSTERED, NONCLUSTERED, INDEX) but not data types (VARCHAR, INT, etc.)
40598                || (t.token_type == TokenType::LParen
40599                    && prev_token_text.as_ref().map_or(false, |text| {
40600                        let upper = text.to_ascii_uppercase();
40601                        matches!(upper.as_str(),
40602                            "CLUSTERED" | "NONCLUSTERED" | "HASH" | "RANGE"
40603                            | "INCLUDE" | "FILLFACTOR" | "PAD_INDEX"
40604                        )
40605                    }));
40606            let needs_space = !result.is_empty()
40607                && prev_token_type != Some(TokenType::LParen)
40608                && prev_token_type != Some(TokenType::Dot)
40609                && t.token_type != TokenType::Comma
40610                && t.token_type != TokenType::RParen
40611                && t.token_type != TokenType::Dot
40612                && (t.token_type != TokenType::LParen || is_lparen_after_keyword);
40613
40614            // Add space after comma
40615            if prev_token_type == Some(TokenType::Comma) {
40616                result.push(' ');
40617            } else if needs_space {
40618                result.push(' ');
40619            }
40620
40621            if t.token_type == TokenType::String {
40622                // Re-add quotes around string literals
40623                result.push('\'');
40624                result.push_str(&t.text.replace('\'', "''"));
40625                result.push('\'');
40626            } else if t.token_type.is_keyword() {
40627                // Uppercase keyword tokens
40628                result.push_str(&t.text.to_ascii_uppercase());
40629            } else {
40630                // For non-keyword tokens, preserve original text
40631                result.push_str(&t.text);
40632            }
40633
40634            prev_token_type = Some(t.token_type);
40635            prev_token_text = Some(t.text.clone());
40636        }
40637        result
40638    }
40639
40640    /// Check if current token matches type
40641    #[inline]
40642    fn check(&self, token_type: TokenType) -> bool {
40643        if self.is_at_end() {
40644            false
40645        } else {
40646            self.peek().token_type == token_type
40647        }
40648    }
40649
40650    /// Check if current token is a keyword
40651    fn check_keyword(&self) -> bool {
40652        if self.is_at_end() {
40653            false
40654        } else {
40655            self.peek().token_type.is_keyword()
40656        }
40657    }
40658
40659    /// Check if current UNPIVOT token starts an UNPIVOT clause (vs being an alias).
40660    /// UNPIVOT clause starts with: UNPIVOT(, UNPIVOT INCLUDE, or UNPIVOT EXCLUDE
40661    fn is_unpivot_clause_start(&self) -> bool {
40662        if !self.check(TokenType::Unpivot) {
40663            return false;
40664        }
40665        let next_idx = self.current + 1;
40666        if next_idx >= self.tokens.len() {
40667            return false;
40668        }
40669        let next = &self.tokens[next_idx];
40670        if next.token_type == TokenType::LParen {
40671            return true;
40672        }
40673        // UNPIVOT INCLUDE NULLS (...) or UNPIVOT EXCLUDE NULLS (...)
40674        let next_text = next.text.to_ascii_uppercase();
40675        next_text == "INCLUDE" || next_text == "EXCLUDE"
40676    }
40677
40678    /// Check if current token text matches (case-insensitive), does not advance
40679    fn check_keyword_text(&self, keyword: &str) -> bool {
40680        if self.is_at_end() {
40681            false
40682        } else {
40683            self.peek().text.eq_ignore_ascii_case(keyword)
40684        }
40685    }
40686
40687    /// Check if current token is FROM keyword
40688    fn check_from_keyword(&self) -> bool {
40689        self.check(TokenType::From)
40690    }
40691
40692    /// Check if next token matches type
40693    fn check_next(&self, token_type: TokenType) -> bool {
40694        if self.current + 1 >= self.tokens.len() {
40695            false
40696        } else {
40697            self.tokens[self.current + 1].token_type == token_type
40698        }
40699    }
40700
40701    /// Check if next token is an identifier with specific name (case-insensitive)
40702    fn check_next_identifier(&self, name: &str) -> bool {
40703        if self.current + 1 >= self.tokens.len() {
40704            false
40705        } else {
40706            let token = &self.tokens[self.current + 1];
40707            (token.token_type == TokenType::Var || token.token_type == TokenType::Identifier)
40708                && token.text.eq_ignore_ascii_case(name)
40709        }
40710    }
40711
40712    /// Match an identifier with specific text (case insensitive)
40713    /// Checks for Identifier, Var, and QuotedIdentifier tokens
40714    fn match_identifier(&mut self, text: &str) -> bool {
40715        if (self.check(TokenType::Identifier)
40716            || self.check(TokenType::Var)
40717            || self.check(TokenType::QuotedIdentifier))
40718            && self.peek().text.eq_ignore_ascii_case(text)
40719        {
40720            self.skip();
40721            true
40722        } else {
40723            false
40724        }
40725    }
40726
40727    /// Check if current token is an identifier with specific text (case insensitive)
40728    /// Does NOT advance the parser
40729    fn check_identifier(&self, text: &str) -> bool {
40730        if self.is_at_end() {
40731            return false;
40732        }
40733        (self.check(TokenType::Identifier)
40734            || self.check(TokenType::Var)
40735            || self.check(TokenType::QuotedIdentifier))
40736            && self.peek().text.eq_ignore_ascii_case(text)
40737    }
40738
40739    /// Check if current token is a "safe" keyword that can be used as an identifier.
40740    /// Check if the current Percent token is a PERCENT modifier (not a modulo operator).
40741    /// "PERCENT" spelled out is always a modifier. "%" is a modifier when followed by
40742    /// a clause boundary (OFFSET, end of input, semicolon, RParen, comma, etc.)
40743    fn is_percent_modifier(&self) -> bool {
40744        if self.is_at_end() {
40745            return false;
40746        }
40747        if self.peek().text.eq_ignore_ascii_case("PERCENT") {
40748            return true;
40749        }
40750        // "%" symbol — only treat as PERCENT modifier if followed by a boundary
40751        if self.peek().text == "%" {
40752            let next_idx = self.current + 1;
40753            if next_idx >= self.tokens.len() {
40754                return true; // at end — it's PERCENT
40755            }
40756            let next_type = self.tokens[next_idx].token_type;
40757            return matches!(
40758                next_type,
40759                TokenType::Offset
40760                    | TokenType::Semicolon
40761                    | TokenType::RParen
40762                    | TokenType::From
40763                    | TokenType::Where
40764                    | TokenType::GroupBy
40765                    | TokenType::OrderBy
40766                    | TokenType::Having
40767                    | TokenType::Union
40768                    | TokenType::Intersect
40769                    | TokenType::Except
40770                    | TokenType::Comma
40771                    | TokenType::With // WITH TIES
40772            ) || next_idx >= self.tokens.len();
40773        }
40774        false
40775    }
40776
40777    /// Structural keywords like FROM, WHERE, JOIN, SELECT are NOT safe.
40778    /// Non-structural keywords like FILTER, UPDATE, END, VALUES can be used as identifiers.
40779    fn is_safe_keyword_as_identifier(&self) -> bool {
40780        if self.is_at_end() {
40781            return false;
40782        }
40783        let token_type = self.peek().token_type;
40784        // Structural keywords that should NOT be used as identifiers
40785        let is_structural = matches!(
40786            token_type,
40787            TokenType::From
40788                | TokenType::Where
40789                | TokenType::Select
40790                | TokenType::Insert
40791                | TokenType::Delete
40792                | TokenType::Create
40793                | TokenType::Drop
40794                | TokenType::Alter
40795                | TokenType::Join
40796                | TokenType::Inner
40797                | TokenType::Cross
40798                | TokenType::On
40799                | TokenType::GroupBy
40800                | TokenType::OrderBy
40801                | TokenType::Having
40802                | TokenType::With
40803                | TokenType::Union
40804                | TokenType::Intersect
40805                | TokenType::Except
40806                | TokenType::Qualify
40807                | TokenType::Into
40808                | TokenType::Set
40809                | TokenType::Using
40810                | TokenType::Lateral
40811                | TokenType::Natural
40812        );
40813        // ClickHouse allows many SQL keywords as identifiers (table names, column aliases, etc.)
40814        if matches!(
40815            self.config.dialect,
40816            Some(crate::dialects::DialectType::ClickHouse)
40817        ) {
40818            let is_ch_structural = matches!(
40819                token_type,
40820                TokenType::From
40821                    | TokenType::Where
40822                    | TokenType::Select
40823                    | TokenType::Create
40824                    | TokenType::Drop
40825                    | TokenType::Alter
40826                    | TokenType::On
40827                    | TokenType::GroupBy
40828                    | TokenType::OrderBy
40829                    | TokenType::Having
40830                    | TokenType::With
40831                    | TokenType::Union
40832                    | TokenType::Intersect
40833                    | TokenType::Except
40834                    | TokenType::Into
40835                    | TokenType::Using
40836                    | TokenType::Lateral
40837                    | TokenType::Natural
40838            );
40839            // Also allow certain operator tokens and non-keyword tokens as identifiers
40840            if matches!(token_type, TokenType::RLike | TokenType::Values) {
40841                return true;
40842            }
40843            return self.peek().token_type.is_keyword() && !is_ch_structural;
40844        }
40845        // If it's a keyword but NOT structural, it's safe to use as identifier
40846        self.peek().token_type.is_keyword() && !is_structural
40847    }
40848
40849    /// Check if a token at current position is the last meaningful token in an expression context.
40850    /// This is used to detect when a keyword like IS or KEEP should be treated as an alias
40851    /// instead of an operator keyword.
40852    fn is_last_expression_token(&self, _token_type: TokenType) -> bool {
40853        // Check if the token after the current one is end-of-input or a clause boundary
40854        let next_idx = self.current + 1;
40855        if next_idx >= self.tokens.len() {
40856            return true; // at end of input
40857        }
40858        let next_type = self.tokens[next_idx].token_type;
40859        // Clause boundaries that indicate the current token is the last in the expression
40860        matches!(
40861            next_type,
40862            TokenType::From
40863                | TokenType::Where
40864                | TokenType::GroupBy
40865                | TokenType::OrderBy
40866                | TokenType::Having
40867                | TokenType::Limit
40868                | TokenType::Union
40869                | TokenType::Intersect
40870                | TokenType::Except
40871                | TokenType::Semicolon
40872                | TokenType::RParen
40873                | TokenType::Comma
40874        )
40875    }
40876
40877    /// Check if current token is a type keyword (for lambda type annotations)
40878    fn is_type_keyword(&self) -> bool {
40879        if self.is_at_end() {
40880            return false;
40881        }
40882        let token = self.peek();
40883        // Check for common type keywords that might appear in lambda annotations
40884        // Use text comparison to avoid depending on specific TokenType variants
40885        let text_upper = token.text.to_ascii_uppercase();
40886        matches!(
40887            text_upper.as_str(),
40888            "INT"
40889                | "INTEGER"
40890                | "BIGINT"
40891                | "SMALLINT"
40892                | "TINYINT"
40893                | "DOUBLE"
40894                | "FLOAT"
40895                | "DECIMAL"
40896                | "NUMERIC"
40897                | "REAL"
40898                | "VARCHAR"
40899                | "CHAR"
40900                | "TEXT"
40901                | "STRING"
40902                | "NVARCHAR"
40903                | "NCHAR"
40904                | "BOOLEAN"
40905                | "BOOL"
40906                | "DATE"
40907                | "TIME"
40908                | "TIMESTAMP"
40909                | "DATETIME"
40910                | "INTERVAL"
40911                | "BINARY"
40912                | "VARBINARY"
40913                | "BLOB"
40914                | "ARRAY"
40915                | "MAP"
40916                | "STRUCT"
40917                | "OBJECT"
40918                | "VARIANT"
40919                | "JSON"
40920                | "NUMBER"
40921                | "VARCHAR2"
40922        )
40923    }
40924
40925    /// Check if current token is a command keyword that can safely be used as an implicit alias.
40926    /// This is a narrow set of command-like keywords (GET, PUT, COPY, SHOW, etc.) that are
40927    /// unlikely to conflict with SQL clause keywords when used as implicit aliases.
40928    fn is_command_keyword_as_alias(&self) -> bool {
40929        if self.is_at_end() {
40930            return false;
40931        }
40932        let token_type = self.peek().token_type;
40933        // FORMAT is a query modifier in ClickHouse, so don't treat it as an alias there
40934        if matches!(token_type, TokenType::Format) {
40935            return !matches!(
40936                self.config.dialect,
40937                Some(crate::dialects::DialectType::ClickHouse)
40938            );
40939        }
40940        // Base keywords that can be aliases in all dialects
40941        if matches!(
40942            token_type,
40943            TokenType::Get
40944                | TokenType::Put
40945                | TokenType::Copy
40946                | TokenType::Show
40947                | TokenType::Rename
40948                | TokenType::Enum
40949                | TokenType::Sample
40950                | TokenType::Collate
40951                | TokenType::Add
40952        ) {
40953            return true;
40954        }
40955        // Spark/Hive allow LIMIT and OFFSET as aliases (without quoting),
40956        // but only when NOT followed by a number/expression (which means it's the actual clause)
40957        if matches!(
40958            self.config.dialect,
40959            Some(crate::dialects::DialectType::Spark)
40960                | Some(crate::dialects::DialectType::Hive)
40961                | Some(crate::dialects::DialectType::Databricks)
40962        ) && matches!(token_type, TokenType::Limit | TokenType::Offset)
40963        {
40964            let next = self.current + 1;
40965            let next_is_value = next < self.tokens.len()
40966                && matches!(
40967                    self.tokens[next].token_type,
40968                    TokenType::Number
40969                        | TokenType::LParen
40970                        | TokenType::Var
40971                        | TokenType::Parameter
40972                        | TokenType::All
40973                );
40974            if !next_is_value {
40975                return true;
40976            }
40977        }
40978        false
40979    }
40980
40981    /// Check if current token is a keyword that can be used as a table alias.
40982    /// This is more permissive than is_safe_keyword_as_identifier - it allows
40983    /// LEFT, RIGHT, OUTER, FULL which are JOIN keywords but can also be aliases.
40984    fn can_be_alias_keyword(&self) -> bool {
40985        if self.is_at_end() {
40986            return false;
40987        }
40988        let token_type = self.peek().token_type;
40989        // Keywords that can be used as aliases (similar to is_safe_keyword but more permissive)
40990        matches!(
40991            token_type,
40992            TokenType::Left
40993                | TokenType::Right
40994                | TokenType::Outer
40995                | TokenType::Full
40996                | TokenType::Only
40997                | TokenType::Next
40998                | TokenType::All
40999                | TokenType::If
41000        ) || self.is_safe_keyword_as_identifier()
41001    }
41002
41003    /// Match and consume a token type
41004    fn match_token(&mut self, token_type: TokenType) -> bool {
41005        if self.check(token_type) {
41006            self.skip();
41007            true
41008        } else {
41009            false
41010        }
41011    }
41012
41013    /// Match a sequence of keywords
41014    fn match_keywords(&mut self, keywords: &[TokenType]) -> bool {
41015        // Check if all keywords match
41016        for (i, &kw) in keywords.iter().enumerate() {
41017            if self.current + i >= self.tokens.len() {
41018                return false;
41019            }
41020            if self.tokens[self.current + i].token_type != kw {
41021                return false;
41022            }
41023        }
41024
41025        // Consume all matched keywords
41026        self.current += keywords.len();
41027        true
41028    }
41029
41030    /// Expect a specific token type
41031    fn expect(&mut self, token_type: TokenType) -> Result<Token> {
41032        if self.check(token_type) {
41033            Ok(self.advance())
41034        } else {
41035            let got = if self.is_at_end() {
41036                "end of input".to_string()
41037            } else {
41038                format!("{:?}", self.peek().token_type)
41039            };
41040            let got_text = if self.is_at_end() {
41041                "".to_string()
41042            } else {
41043                self.peek().text.clone()
41044            };
41045            let start = self.current.saturating_sub(3);
41046            let end = (self.current + 4).min(self.tokens.len());
41047            let context = self.tokens_to_sql(start, end).replace('\n', " ");
41048            Err(self.parse_error(format!(
41049                "Expected {:?}, got {} ('{}') near [{}]",
41050                token_type, got, got_text, context
41051            )))
41052        }
41053    }
41054
41055    /// Expect a `>` token, handling the case where `>>` was tokenized as GtGt
41056    /// This is needed for parsing nested generic types like `ARRAY<ARRAY<INT>>`
41057    fn expect_gt(&mut self) -> Result<Token> {
41058        if self.check(TokenType::Gt) {
41059            Ok(self.advance())
41060        } else if self.check(TokenType::GtGt) {
41061            // Split >> into two > tokens
41062            // Replace the GtGt with Gt and return a synthetic Gt token
41063            let token = self.peek().clone();
41064            self.tokens[self.current] = Token {
41065                token_type: TokenType::Gt,
41066                text: ">".to_string(),
41067                span: Span {
41068                    start: token.span.start + 1,
41069                    end: token.span.end,
41070                    line: token.span.line,
41071                    column: token.span.column + 1,
41072                },
41073                comments: Vec::new(),
41074                trailing_comments: Vec::new(),
41075            };
41076            Ok(Token {
41077                token_type: TokenType::Gt,
41078                text: ">".to_string(),
41079                span: Span {
41080                    start: token.span.start,
41081                    end: token.span.start + 1,
41082                    line: token.span.line,
41083                    column: token.span.column,
41084                },
41085                comments: token.comments,
41086                trailing_comments: Vec::new(),
41087            })
41088        } else {
41089            Err(self.parse_error(format!(
41090                "Expected Gt, got {:?}",
41091                if self.is_at_end() {
41092                    "end of input".to_string()
41093                } else {
41094                    format!("{:?}", self.peek().token_type)
41095                }
41096            )))
41097        }
41098    }
41099
41100    /// Expect a string literal and return its value
41101    fn expect_string(&mut self) -> Result<String> {
41102        if self.check(TokenType::String) || self.check(TokenType::DollarString) {
41103            Ok(self.advance().text)
41104        } else {
41105            Err(self.parse_error(format!(
41106                "Expected string, got {:?}",
41107                if self.is_at_end() {
41108                    "end of input".to_string()
41109                } else {
41110                    format!("{:?}", self.peek().token_type)
41111                }
41112            )))
41113        }
41114    }
41115
41116    /// Check if the current token is any kind of identifier (regular, quoted, or var)
41117    fn is_identifier_token(&self) -> bool {
41118        self.check(TokenType::Var)
41119            || self.check(TokenType::Identifier)
41120            || self.check(TokenType::QuotedIdentifier)
41121    }
41122
41123    /// Check if current token is a stage reference (starts with @)
41124    /// This handles both DAt token and Var tokens that start with @
41125    fn is_stage_reference(&self) -> bool {
41126        self.check(TokenType::DAt)
41127            || (self.check(TokenType::Var) && self.peek().text.starts_with('@'))
41128    }
41129
41130    /// Check if the current token could be a MySQL numeric-starting identifier (e.g., 00f, 1d)
41131    /// This checks that the Number token is followed by a connected Var/Identifier token
41132    fn is_mysql_numeric_identifier(&self) -> bool {
41133        if !self.check(TokenType::Number)
41134            || !matches!(
41135                self.config.dialect,
41136                Some(crate::dialects::DialectType::MySQL)
41137            )
41138        {
41139            return false;
41140        }
41141        // Check if the next token is connected (no space) and is a var/identifier
41142        if self.current + 1 < self.tokens.len() {
41143            let curr = &self.tokens[self.current];
41144            let next = &self.tokens[self.current + 1];
41145            // Tokens are connected if they are immediately adjacent (no whitespace between)
41146            // span.end is exclusive, so if curr.end == next.start, they are adjacent
41147            let connected = curr.span.end == next.span.start;
41148            connected
41149                && (next.token_type == TokenType::Var || next.token_type == TokenType::Identifier)
41150        } else {
41151            false
41152        }
41153    }
41154
41155    /// Parse a MySQL numeric-starting identifier (e.g., 00f, 1d)
41156    /// Merges the number token with connected identifier tokens
41157    fn parse_mysql_numeric_identifier(&mut self) -> Identifier {
41158        let num_token = self.advance();
41159        let mut name = num_token.text.clone();
41160        // Merge with connected identifier/var tokens
41161        while !self.is_at_end()
41162            && self.is_connected()
41163            && (self.check(TokenType::Var) || self.check(TokenType::Identifier))
41164        {
41165            let tok = self.advance();
41166            name.push_str(&tok.text);
41167        }
41168        Identifier {
41169            name,
41170            // sqlglot treats this as an identifier token and re-emits it quoted.
41171            quoted: true,
41172            trailing_comments: Vec::new(),
41173            span: None,
41174        }
41175    }
41176
41177    /// Check if an uppercase string starting with '_' is a MySQL charset introducer
41178    fn is_mysql_charset_introducer(text: &str) -> bool {
41179        matches!(
41180            text,
41181            "_ARMSCII8"
41182                | "_ASCII"
41183                | "_BIG5"
41184                | "_BINARY"
41185                | "_CP1250"
41186                | "_CP1251"
41187                | "_CP1256"
41188                | "_CP1257"
41189                | "_CP850"
41190                | "_CP852"
41191                | "_CP866"
41192                | "_CP932"
41193                | "_DEC8"
41194                | "_EUCJPMS"
41195                | "_EUCKR"
41196                | "_GB18030"
41197                | "_GB2312"
41198                | "_GBK"
41199                | "_GEOSTD8"
41200                | "_GREEK"
41201                | "_HEBREW"
41202                | "_HP8"
41203                | "_KEYBCS2"
41204                | "_KOI8R"
41205                | "_KOI8U"
41206                | "_LATIN1"
41207                | "_LATIN2"
41208                | "_LATIN5"
41209                | "_LATIN7"
41210                | "_MACCE"
41211                | "_MACROMAN"
41212                | "_SJIS"
41213                | "_SWE7"
41214                | "_TIS620"
41215                | "_UCS2"
41216                | "_UJIS"
41217                | "_UTF8"
41218                | "_UTF16"
41219                | "_UTF16LE"
41220                | "_UTF32"
41221                | "_UTF8MB3"
41222                | "_UTF8MB4"
41223        )
41224    }
41225
41226    /// Check if the current token can be used as an identifier (includes keywords)
41227    fn is_identifier_or_keyword_token(&self) -> bool {
41228        self.is_identifier_token() || self.check_keyword()
41229    }
41230
41231    /// Expect an identifier and return an Identifier struct with quoted flag
41232    fn expect_identifier_with_quoted(&mut self) -> Result<Identifier> {
41233        if self.is_mysql_numeric_identifier() {
41234            return Ok(self.parse_mysql_numeric_identifier());
41235        }
41236        if self.is_identifier_token() {
41237            let token = self.advance();
41238            let quoted = token.token_type == TokenType::QuotedIdentifier;
41239            Ok(Identifier {
41240                name: token.text,
41241                quoted,
41242                trailing_comments: Vec::new(),
41243                span: None,
41244            })
41245        } else if self.check(TokenType::LBrace)
41246            && matches!(
41247                self.config.dialect,
41248                Some(crate::dialects::DialectType::ClickHouse)
41249            )
41250        {
41251            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
41252                if let Expression::Parameter(param) = &param_expr {
41253                    let name = format!(
41254                        "{{{}: {}}}",
41255                        param.name.as_deref().unwrap_or(""),
41256                        param.expression.as_deref().unwrap_or("")
41257                    );
41258                    return Ok(Identifier {
41259                        name,
41260                        quoted: false,
41261                        trailing_comments: Vec::new(),
41262                        span: None,
41263                    });
41264                }
41265            }
41266            Err(self.parse_error("Expected identifier, got LBrace"))
41267        } else {
41268            Err(self.parse_error(format!(
41269                "Expected identifier, got {:?}",
41270                if self.is_at_end() {
41271                    "end of input".to_string()
41272                } else {
41273                    format!("{:?}", self.peek().token_type)
41274                }
41275            )))
41276        }
41277    }
41278
41279    /// Parse a possibly dot-qualified identifier into parts (e.g. "mydb.hr" → [mydb, hr]).
41280    fn parse_identifier_parts(&mut self) -> Result<Vec<Identifier>> {
41281        let first = self.expect_identifier_with_quoted()?;
41282        let mut parts = vec![first];
41283        while self.match_token(TokenType::Dot) {
41284            parts.push(self.expect_identifier_with_quoted()?);
41285        }
41286        Ok(parts)
41287    }
41288
41289    /// Expect an identifier or keyword (for column names, field names, etc.)
41290    fn expect_identifier_or_keyword_with_quoted(&mut self) -> Result<Identifier> {
41291        // MySQL numeric-starting identifiers (e.g., 00f, 1d)
41292        if self.is_mysql_numeric_identifier() {
41293            return Ok(self.parse_mysql_numeric_identifier());
41294        }
41295        // Also accept ? (Parameter) as an identifier placeholder
41296        // For positional parameters like $23, the token text is "23" (without $)
41297        if self.check(TokenType::Parameter) {
41298            let token = self.advance();
41299            // If the text is a number, it's a positional parameter like $1, $2, $23
41300            // Construct $N as the identifier name
41301            let name = if token.text.chars().all(|c| c.is_ascii_digit()) && !token.text.is_empty() {
41302                format!("${}", token.text)
41303            } else {
41304                // Plain ? placeholder or other parameter
41305                "?".to_string()
41306            };
41307            return Ok(Identifier {
41308                name,
41309                quoted: false,
41310                trailing_comments: Vec::new(),
41311                span: None,
41312            });
41313        }
41314        if self.is_identifier_or_keyword_token() {
41315            let token = self.advance();
41316            let quoted = token.token_type == TokenType::QuotedIdentifier;
41317            Ok(Identifier {
41318                name: token.text,
41319                quoted,
41320                trailing_comments: Vec::new(),
41321                span: None,
41322            })
41323        } else if self.check(TokenType::LBrace)
41324            && matches!(
41325                self.config.dialect,
41326                Some(crate::dialects::DialectType::ClickHouse)
41327            )
41328        {
41329            // ClickHouse query parameter: {name:Type}
41330            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
41331                // Extract the parameter name to use as the identifier
41332                if let Expression::Parameter(param) = &param_expr {
41333                    let name = format!(
41334                        "{{{}: {}}}",
41335                        param.name.as_deref().unwrap_or(""),
41336                        param.expression.as_deref().unwrap_or("")
41337                    );
41338                    return Ok(Identifier {
41339                        name,
41340                        quoted: false,
41341                        trailing_comments: Vec::new(),
41342                        span: None,
41343                    });
41344                }
41345            }
41346            Err(self.parse_error("Expected identifier, got LBrace"))
41347        } else {
41348            Err(self.parse_error(format!(
41349                "Expected identifier, got {:?}",
41350                if self.is_at_end() {
41351                    "end of input".to_string()
41352                } else {
41353                    format!("{:?}", self.peek().token_type)
41354                }
41355            )))
41356        }
41357    }
41358
41359    /// Expect an identifier
41360    fn expect_identifier(&mut self) -> Result<String> {
41361        if self.is_identifier_token() {
41362            Ok(self.advance().text)
41363        } else if self.check(TokenType::LBrace)
41364            && matches!(
41365                self.config.dialect,
41366                Some(crate::dialects::DialectType::ClickHouse)
41367            )
41368        {
41369            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
41370                if let Expression::Parameter(param) = &param_expr {
41371                    return Ok(format!(
41372                        "{{{}: {}}}",
41373                        param.name.as_deref().unwrap_or(""),
41374                        param.expression.as_deref().unwrap_or("")
41375                    ));
41376                }
41377            }
41378            Err(self.parse_error("Expected identifier, got LBrace"))
41379        } else {
41380            Err(self.parse_error(format!(
41381                "Expected identifier, got {:?}",
41382                if self.is_at_end() {
41383                    "end of input".to_string()
41384                } else {
41385                    format!("{:?}", self.peek().token_type)
41386                }
41387            )))
41388        }
41389    }
41390
41391    /// Expect an identifier or keyword (for aliases, column names, etc.)
41392    fn expect_identifier_or_keyword(&mut self) -> Result<String> {
41393        if self.is_identifier_or_keyword_token() {
41394            Ok(self.advance().text)
41395        } else if self.check(TokenType::LBrace)
41396            && matches!(
41397                self.config.dialect,
41398                Some(crate::dialects::DialectType::ClickHouse)
41399            )
41400        {
41401            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
41402                if let Expression::Parameter(param) = &param_expr {
41403                    return Ok(format!(
41404                        "{{{}: {}}}",
41405                        param.name.as_deref().unwrap_or(""),
41406                        param.expression.as_deref().unwrap_or("")
41407                    ));
41408                }
41409            }
41410            Err(self.parse_error("Expected identifier, got LBrace"))
41411        } else {
41412            Err(self.parse_error(format!(
41413                "Expected identifier, got {:?}",
41414                if self.is_at_end() {
41415                    "end of input".to_string()
41416                } else {
41417                    format!("{:?}", self.peek().token_type)
41418                }
41419            )))
41420        }
41421    }
41422
41423    /// Expect an identifier or safe keyword (for CTE names, column names in CREATE TABLE, etc.)
41424    /// This is more permissive than expect_identifier but excludes structural keywords
41425    fn expect_identifier_or_safe_keyword(&mut self) -> Result<String> {
41426        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
41427            Ok(self.advance().text)
41428        } else if self.check(TokenType::LBrace)
41429            && matches!(
41430                self.config.dialect,
41431                Some(crate::dialects::DialectType::ClickHouse)
41432            )
41433        {
41434            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
41435                if let Expression::Parameter(param) = &param_expr {
41436                    return Ok(format!(
41437                        "{{{}: {}}}",
41438                        param.name.as_deref().unwrap_or(""),
41439                        param.expression.as_deref().unwrap_or("")
41440                    ));
41441                }
41442            }
41443            Err(self.parse_error("Expected identifier, got LBrace"))
41444        } else {
41445            Err(self.parse_error(format!(
41446                "Expected identifier, got {:?}",
41447                if self.is_at_end() {
41448                    "end of input".to_string()
41449                } else {
41450                    format!("{:?}", self.peek().token_type)
41451                }
41452            )))
41453        }
41454    }
41455
41456    /// Expect an identifier or safe keyword, preserving quoted flag
41457    fn expect_identifier_or_safe_keyword_with_quoted(&mut self) -> Result<Identifier> {
41458        if self.is_mysql_numeric_identifier() {
41459            return Ok(self.parse_mysql_numeric_identifier());
41460        }
41461        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
41462            let token = self.advance();
41463            let quoted = token.token_type == TokenType::QuotedIdentifier;
41464            Ok(Identifier {
41465                name: token.text,
41466                quoted,
41467                trailing_comments: Vec::new(),
41468                span: None,
41469            })
41470        } else {
41471            Err(self.parse_error(format!(
41472                "Expected identifier, got {:?}",
41473                if self.is_at_end() {
41474                    "end of input".to_string()
41475                } else {
41476                    format!("{:?}", self.peek().token_type)
41477                }
41478            )))
41479        }
41480    }
41481
41482    fn expect_identifier_or_alias_keyword_with_quoted(&mut self) -> Result<Identifier> {
41483        // ClickHouse: any keyword can be used as a table alias after explicit AS
41484        let ch_keyword = matches!(
41485            self.config.dialect,
41486            Some(crate::dialects::DialectType::ClickHouse)
41487        ) && self.peek().token_type.is_keyword();
41488        if self.is_identifier_token()
41489            || self.can_be_alias_keyword()
41490            || self.is_safe_keyword_as_identifier()
41491            || ch_keyword
41492        {
41493            let token = self.advance();
41494            let quoted = token.token_type == TokenType::QuotedIdentifier;
41495            Ok(Identifier {
41496                name: token.text,
41497                quoted,
41498                trailing_comments: Vec::new(),
41499                span: None,
41500            })
41501        } else if self.check(TokenType::String)
41502            && matches!(
41503                self.config.dialect,
41504                Some(crate::dialects::DialectType::DuckDB)
41505            )
41506        {
41507            // DuckDB allows string literals as identifiers (e.g., WITH 'x' AS (...))
41508            let token = self.advance();
41509            Ok(Identifier {
41510                name: token.text,
41511                quoted: true,
41512                trailing_comments: Vec::new(),
41513                span: None,
41514            })
41515        } else {
41516            Err(self.parse_error(format!(
41517                "Expected identifier, got {:?}",
41518                if self.is_at_end() {
41519                    "end of input".to_string()
41520                } else {
41521                    format!("{:?}", self.peek().token_type)
41522                }
41523            )))
41524        }
41525    }
41526
41527    /// Expect a number
41528    fn expect_number(&mut self) -> Result<i64> {
41529        let negative = self.match_token(TokenType::Dash);
41530        if self.check(TokenType::Number) {
41531            let text = self.advance().text;
41532            let val = text
41533                .parse::<i64>()
41534                .map_err(|_| self.parse_error(format!("Invalid number: {}", text)))?;
41535            Ok(if negative { -val } else { val })
41536        } else {
41537            Err(self.parse_error("Expected number"))
41538        }
41539    }
41540
41541    /// Parse a comma-separated list of expressions.
41542    /// Supports named arguments with => or := syntax.
41543    fn parse_expression_list_with_capacity(
41544        &mut self,
41545        capacity_hint: usize,
41546    ) -> Result<Vec<Expression>> {
41547        let mut expressions = Vec::with_capacity(capacity_hint);
41548
41549        loop {
41550            // Check if this is a named argument: identifier => value or identifier := value
41551            // Also check for safe keywords (like TYPE, FORMAT, etc.) that can be used as named arg names
41552            let expr = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
41553                let start_pos = self.current;
41554                let name = self.expect_identifier_or_keyword_with_quoted()?;
41555
41556                if self.match_token(TokenType::FArrow) {
41557                    // name => value
41558                    let value = self.parse_expression()?;
41559                    Expression::NamedArgument(Box::new(NamedArgument {
41560                        name,
41561                        value,
41562                        separator: NamedArgSeparator::DArrow,
41563                    }))
41564                } else if self.match_token(TokenType::ColonEq) {
41565                    // name := value
41566                    let value = self.parse_expression()?;
41567                    Expression::NamedArgument(Box::new(NamedArgument {
41568                        name,
41569                        value,
41570                        separator: NamedArgSeparator::ColonEq,
41571                    }))
41572                } else {
41573                    // Not a named argument, backtrack and parse as regular expression
41574                    self.current = start_pos;
41575                    self.parse_expression()?
41576                }
41577            } else {
41578                self.parse_expression()?
41579            };
41580
41581            // Check for AS alias on this expression (Spark/Hive: IF(cond, val AS name, ...))
41582            let expr = if self.check(TokenType::As) {
41583                let as_pos = self.current;
41584                self.skip(); // consume AS
41585                             // Check if what follows looks like an alias name
41586                if self.is_identifier_token()
41587                    || self.is_safe_keyword_as_identifier()
41588                    || (matches!(
41589                        self.config.dialect,
41590                        Some(crate::dialects::DialectType::ClickHouse)
41591                    ) && self.peek().token_type.is_keyword())
41592                {
41593                    let alias = self.expect_identifier_or_keyword_with_quoted()?;
41594                    let alias_expr = Expression::Alias(Box::new(Alias {
41595                        this: expr,
41596                        alias,
41597                        column_aliases: Vec::new(),
41598                        pre_alias_comments: Vec::new(),
41599                        trailing_comments: Vec::new(),
41600                        inferred_type: None,
41601                    }));
41602                    // ClickHouse: if followed by an operator, the alias is part of a bigger expression
41603                    // e.g., blockSize() AS bs < 1000 means (blockSize() AS bs) < 1000
41604                    if matches!(
41605                        self.config.dialect,
41606                        Some(crate::dialects::DialectType::ClickHouse)
41607                    ) && matches!(
41608                        self.peek().token_type,
41609                        TokenType::Lt
41610                            | TokenType::Gt
41611                            | TokenType::Lte
41612                            | TokenType::Gte
41613                            | TokenType::Eq
41614                            | TokenType::Neq
41615                            | TokenType::Plus
41616                            | TokenType::Dash
41617                            | TokenType::Star
41618                            | TokenType::Slash
41619                            | TokenType::Percent
41620                            | TokenType::And
41621                            | TokenType::Or
41622                            | TokenType::Like
41623                            | TokenType::Not
41624                            | TokenType::In
41625                            | TokenType::Is
41626                            | TokenType::Between
41627                    ) {
41628                        // Parse the operator and right-hand side
41629                        let op_token = self.advance();
41630                        let right = self.parse_expression()?;
41631                        match op_token.token_type {
41632                            TokenType::Lt => {
41633                                Expression::Lt(Box::new(BinaryOp::new(alias_expr, right)))
41634                            }
41635                            TokenType::Gt => {
41636                                Expression::Gt(Box::new(BinaryOp::new(alias_expr, right)))
41637                            }
41638                            TokenType::Lte => {
41639                                Expression::Lte(Box::new(BinaryOp::new(alias_expr, right)))
41640                            }
41641                            TokenType::Gte => {
41642                                Expression::Gte(Box::new(BinaryOp::new(alias_expr, right)))
41643                            }
41644                            TokenType::Eq => {
41645                                Expression::Eq(Box::new(BinaryOp::new(alias_expr, right)))
41646                            }
41647                            TokenType::Neq => {
41648                                Expression::Neq(Box::new(BinaryOp::new(alias_expr, right)))
41649                            }
41650                            TokenType::Plus => {
41651                                Expression::Add(Box::new(BinaryOp::new(alias_expr, right)))
41652                            }
41653                            TokenType::Dash => {
41654                                Expression::Sub(Box::new(BinaryOp::new(alias_expr, right)))
41655                            }
41656                            TokenType::Star => {
41657                                Expression::Mul(Box::new(BinaryOp::new(alias_expr, right)))
41658                            }
41659                            TokenType::Slash => {
41660                                Expression::Div(Box::new(BinaryOp::new(alias_expr, right)))
41661                            }
41662                            TokenType::Percent => {
41663                                Expression::Mod(Box::new(BinaryOp::new(alias_expr, right)))
41664                            }
41665                            TokenType::And => {
41666                                Expression::And(Box::new(BinaryOp::new(alias_expr, right)))
41667                            }
41668                            TokenType::Or => {
41669                                Expression::Or(Box::new(BinaryOp::new(alias_expr, right)))
41670                            }
41671                            _ => alias_expr, // fallback, shouldn't happen
41672                        }
41673                    } else {
41674                        alias_expr
41675                    }
41676                } else {
41677                    // Not an alias name, backtrack
41678                    self.current = as_pos;
41679                    expr
41680                }
41681            } else {
41682                expr
41683            };
41684
41685            // Check for trailing comments on this expression
41686            // Only wrap in Annotated for expression types that don't have their own trailing_comments field
41687            let trailing_comments = self.previous_trailing_comments().to_vec();
41688            let expr = if trailing_comments.is_empty() {
41689                expr
41690            } else {
41691                // Only annotate Literals and other types that don't capture trailing comments
41692                match &expr {
41693                    Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
41694                        Expression::Annotated(Box::new(Annotated {
41695                            this: expr,
41696                            trailing_comments,
41697                        }))
41698                    }
41699                    // For expressions that already capture trailing_comments, don't double-wrap
41700                    _ => expr,
41701                }
41702            };
41703            expressions.push(expr);
41704
41705            if !self.match_token(TokenType::Comma) {
41706                break;
41707            }
41708            // ClickHouse: allow trailing comma before RParen in expression lists
41709            if matches!(
41710                self.config.dialect,
41711                Some(crate::dialects::DialectType::ClickHouse)
41712            ) && self.check(TokenType::RParen)
41713            {
41714                break;
41715            }
41716        }
41717
41718        Ok(expressions)
41719    }
41720
41721    /// Parse a comma-separated list of expressions.
41722    /// Supports named arguments with => or := syntax.
41723    fn parse_expression_list(&mut self) -> Result<Vec<Expression>> {
41724        self.parse_expression_list_with_capacity(0)
41725    }
41726
41727    /// Estimate top-level expression count until the next unmatched `)`.
41728    ///
41729    /// This is used for pre-allocating comma-separated lists like `IN (...)`
41730    /// to reduce `Vec` growth churn on very large lists.
41731    fn estimate_expression_list_capacity_until_rparen(&self) -> usize {
41732        if self.current >= self.tokens.len() || self.check(TokenType::RParen) {
41733            return 0;
41734        }
41735
41736        let mut idx = self.current;
41737        let mut paren_depth = 0usize;
41738        let mut bracket_depth = 0usize;
41739        let mut brace_depth = 0usize;
41740        let mut commas = 0usize;
41741        let mut has_any_token = false;
41742
41743        while idx < self.tokens.len() {
41744            let token_type = self.tokens[idx].token_type;
41745            match token_type {
41746                TokenType::LParen => paren_depth += 1,
41747                TokenType::RParen => {
41748                    if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 {
41749                        break;
41750                    }
41751                    paren_depth = paren_depth.saturating_sub(1);
41752                }
41753                TokenType::LBracket => bracket_depth += 1,
41754                TokenType::RBracket => bracket_depth = bracket_depth.saturating_sub(1),
41755                TokenType::LBrace => brace_depth += 1,
41756                TokenType::RBrace => brace_depth = brace_depth.saturating_sub(1),
41757                TokenType::Comma if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
41758                    commas += 1;
41759                }
41760                _ => {}
41761            }
41762            has_any_token = true;
41763            idx += 1;
41764        }
41765
41766        if has_any_token {
41767            commas + 1
41768        } else {
41769            0
41770        }
41771    }
41772
41773    /// Parse function arguments with lambda support (for TRANSFORM and similar functions).
41774    /// Handles Snowflake typed lambda syntax: `a int -> a + 1`
41775    fn parse_function_args_with_lambda(&mut self) -> Result<Vec<Expression>> {
41776        let mut expressions = Vec::new();
41777
41778        loop {
41779            // Try to detect typed lambda: identifier type -> body
41780            let expr = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
41781                let saved_pos = self.current;
41782                let ident_token = self.advance();
41783                let ident_name = ident_token.text.clone();
41784
41785                // Check for arrow (simple lambda: a -> body)
41786                if self.match_token(TokenType::Arrow) {
41787                    let body = self.parse_expression()?;
41788                    Expression::Lambda(Box::new(LambdaExpr {
41789                        parameters: vec![Identifier::new(ident_name)],
41790                        body,
41791                        colon: false,
41792                        parameter_types: Vec::new(),
41793                    }))
41794                }
41795                // Check for type annotation followed by arrow: a int -> body
41796                else if !self.is_at_end()
41797                    && self.is_type_keyword()
41798                    && !self.check(TokenType::FArrow)
41799                    && !self.check(TokenType::ColonEq)
41800                {
41801                    let type_annotation = self.parse_data_type()?;
41802                    if self.match_token(TokenType::Arrow) {
41803                        let body = self.parse_expression()?;
41804                        Expression::Lambda(Box::new(LambdaExpr {
41805                            parameters: vec![Identifier::new(ident_name)],
41806                            body,
41807                            colon: false,
41808                            parameter_types: vec![Some(type_annotation)],
41809                        }))
41810                    } else {
41811                        self.current = saved_pos;
41812                        self.parse_expression()?
41813                    }
41814                } else {
41815                    // Not a lambda, backtrack and parse as regular expression
41816                    self.current = saved_pos;
41817                    self.parse_expression()?
41818                }
41819            } else {
41820                self.parse_expression()?
41821            };
41822
41823            expressions.push(expr);
41824            if !self.match_token(TokenType::Comma) {
41825                break;
41826            }
41827        }
41828
41829        Ok(expressions)
41830    }
41831
41832    /// Parse a comma-separated list of expressions for VALUES tuples
41833    /// This variant supports AS aliases on each element (Hive syntax): VALUES (1 AS a, 2 AS b, 3)
41834    fn parse_values_expression_list(&mut self) -> Result<Vec<Expression>> {
41835        let mut expressions = Vec::new();
41836
41837        loop {
41838            // Handle DEFAULT keyword in VALUES - output as unquoted Var (like Python sqlglot's exp.var("DEFAULT"))
41839            let expr = if self.match_token(TokenType::Default) {
41840                Expression::Var(Box::new(crate::expressions::Var {
41841                    this: "DEFAULT".to_string(),
41842                }))
41843            } else {
41844                self.parse_expression()?
41845            };
41846
41847            // Capture trailing comments on the expression (e.g., `1 /* c4 */`)
41848            let trailing_comments = self.previous_trailing_comments().to_vec();
41849            let expr = if !trailing_comments.is_empty() {
41850                match &expr {
41851                    Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
41852                        Expression::Annotated(Box::new(crate::expressions::Annotated {
41853                            this: expr,
41854                            trailing_comments,
41855                        }))
41856                    }
41857                    _ => expr,
41858                }
41859            } else {
41860                expr
41861            };
41862
41863            // Check for AS alias on this value element (Hive syntax)
41864            let expr_with_alias = if self.match_token(TokenType::As) {
41865                let alias = self.expect_identifier_or_keyword_with_quoted()?;
41866                Expression::Alias(Box::new(Alias::new(expr, alias)))
41867            } else {
41868                expr
41869            };
41870
41871            expressions.push(expr_with_alias);
41872
41873            if !self.match_token(TokenType::Comma) {
41874                break;
41875            }
41876            // ClickHouse: trailing comma in VALUES, e.g., (1, 2, 3,)
41877            if self.check(TokenType::RParen) {
41878                break;
41879            }
41880        }
41881
41882        Ok(expressions)
41883    }
41884
41885    /// Parse a comma-separated list of identifiers
41886    fn parse_identifier_list(&mut self) -> Result<Vec<Identifier>> {
41887        let mut identifiers = Vec::new();
41888
41889        loop {
41890            // Allow keywords as identifiers in identifier lists (e.g., CTE column aliases)
41891            // Check if it's a quoted identifier before consuming
41892            let quoted = self.check(TokenType::QuotedIdentifier);
41893            let mut name = self.expect_identifier_or_safe_keyword()?;
41894            // ClickHouse: handle dotted names in identifier lists (e.g., INSERT INTO t (n.a, n.b))
41895            // Use keyword_with_quoted to allow any keyword after dot (e.g., replace.from)
41896            if matches!(
41897                self.config.dialect,
41898                Some(crate::dialects::DialectType::ClickHouse)
41899            ) {
41900                while self.match_token(TokenType::Dot) {
41901                    let sub_id = self.expect_identifier_or_keyword_with_quoted()?;
41902                    name = format!("{}.{}", name, sub_id.name);
41903                }
41904            }
41905            let trailing_comments = self.previous_trailing_comments().to_vec();
41906            identifiers.push(Identifier {
41907                name,
41908                quoted,
41909                trailing_comments,
41910                span: None,
41911            });
41912
41913            if !self.match_token(TokenType::Comma) {
41914                break;
41915            }
41916            // ClickHouse: allow trailing comma before RParen in identifier lists
41917            if matches!(
41918                self.config.dialect,
41919                Some(crate::dialects::DialectType::ClickHouse)
41920            ) && self.check(TokenType::RParen)
41921            {
41922                break;
41923            }
41924        }
41925
41926        Ok(identifiers)
41927    }
41928
41929    /// Parse a comma-separated list of column references for USING clause
41930    /// Supports qualified names like table.col but extracts only the column part
41931    fn parse_using_column_list(&mut self) -> Result<Vec<Identifier>> {
41932        let mut identifiers = Vec::new();
41933
41934        loop {
41935            // ClickHouse: USING * — wildcard in USING clause
41936            if matches!(
41937                self.config.dialect,
41938                Some(crate::dialects::DialectType::ClickHouse)
41939            ) && self.match_token(TokenType::Star)
41940            {
41941                identifiers.push(Identifier::new("*".to_string()));
41942                if !self.match_token(TokenType::Comma) {
41943                    break;
41944                }
41945                continue;
41946            }
41947            // Check if it's a quoted identifier before consuming
41948            let quoted = self.check(TokenType::QuotedIdentifier);
41949            let mut name = self.expect_identifier_or_safe_keyword()?;
41950            let mut final_quoted = quoted;
41951
41952            // Handle qualified names: table.column or schema.table.column
41953            // Keep only the final column name
41954            while self.match_token(TokenType::Dot) {
41955                final_quoted = self.check(TokenType::QuotedIdentifier);
41956                name = self.expect_identifier_or_safe_keyword()?;
41957            }
41958
41959            // ClickHouse: USING (col AS alias) — consume optional AS alias
41960            if matches!(
41961                self.config.dialect,
41962                Some(crate::dialects::DialectType::ClickHouse)
41963            ) && self.match_token(TokenType::As)
41964            {
41965                // Use the alias name instead
41966                final_quoted = self.check(TokenType::QuotedIdentifier);
41967                name = self.expect_identifier_or_safe_keyword()?;
41968            }
41969
41970            let trailing_comments = self.previous_trailing_comments().to_vec();
41971            identifiers.push(Identifier {
41972                name,
41973                quoted: final_quoted,
41974                trailing_comments,
41975                span: None,
41976            });
41977
41978            if !self.match_token(TokenType::Comma) {
41979                break;
41980            }
41981        }
41982
41983        Ok(identifiers)
41984    }
41985
41986    /// Parse a comma-separated list of identifiers for index columns.
41987    /// Supports MySQL prefix lengths: col(16) and sort order: col DESC
41988    fn parse_index_identifier_list(&mut self) -> Result<Vec<Identifier>> {
41989        let mut identifiers = Vec::new();
41990
41991        loop {
41992            let quoted = self.check(TokenType::QuotedIdentifier);
41993            let name = self.expect_identifier_or_safe_keyword()?;
41994            let trailing_comments = self.previous_trailing_comments().to_vec();
41995
41996            // Check for prefix length: col(16)
41997            let mut display_name = name.clone();
41998            if self.match_token(TokenType::LParen) {
41999                if self.check(TokenType::Number) {
42000                    let len = self.advance().text;
42001                    display_name = format!("{}({})", name, len);
42002                }
42003                self.expect(TokenType::RParen)?;
42004            }
42005
42006            // Check for DESC/ASC sort order
42007            if self.match_token(TokenType::Desc) {
42008                display_name = format!("{} DESC", display_name);
42009            } else if self.match_token(TokenType::Asc) {
42010                display_name = format!("{} ASC", display_name);
42011            }
42012
42013            identifiers.push(Identifier {
42014                name: display_name,
42015                quoted,
42016                trailing_comments,
42017                span: None,
42018            });
42019
42020            if !self.match_token(TokenType::Comma) {
42021                break;
42022            }
42023        }
42024
42025        Ok(identifiers)
42026    }
42027    // =============================================================================
42028    // Auto-generated Missing Parser Methods
42029    // Total: 296 methods
42030    // =============================================================================
42031
42032    /// parse_add_column - Implemented from Python _parse_add_column
42033    /// Calls: parse_column, parse_column_def_with_exists
42034    #[allow(unused_variables, unused_mut)]
42035    pub fn parse_add_column(&mut self) -> Result<Option<Expression>> {
42036        if self.match_texts(&["FIRST", "AFTER"]) {
42037            // Matched one of: FIRST, AFTER
42038            return Ok(None);
42039        }
42040        Ok(None)
42041    }
42042
42043    /// parse_alias - Parses alias for an expression
42044    /// This method parses just the alias part (AS name or just name)
42045    /// Python: _parse_alias
42046    pub fn parse_alias(&mut self) -> Result<Option<Expression>> {
42047        // Check for AS keyword (explicit alias)
42048        let _explicit = self.match_token(TokenType::Alias);
42049
42050        // Parse the alias identifier
42051        if let Some(alias_expr) = self.parse_id_var()? {
42052            let alias_ident = match alias_expr {
42053                Expression::Identifier(id) => id,
42054                _ => return Ok(None),
42055            };
42056            // Return just the alias identifier wrapped in an expression
42057            return Ok(Some(Expression::Identifier(alias_ident)));
42058        }
42059
42060        Ok(None)
42061    }
42062
42063    /// parse_alias_with_expr - Wraps an expression with an alias if present
42064    pub fn parse_alias_with_expr(
42065        &mut self,
42066        this: Option<Expression>,
42067    ) -> Result<Option<Expression>> {
42068        if this.is_none() {
42069            return Ok(None);
42070        }
42071        let expr = this.unwrap();
42072
42073        // Check for AS keyword (explicit alias)
42074        // Accept both TokenType::Alias and TokenType::As
42075        let has_as = self.match_token(TokenType::Alias) || self.match_token(TokenType::As);
42076
42077        // Check for column aliases: (col1, col2)
42078        if has_as && self.match_token(TokenType::LParen) {
42079            let mut column_aliases = Vec::new();
42080            loop {
42081                if let Some(col_expr) = self.parse_id_var()? {
42082                    if let Expression::Identifier(id) = col_expr {
42083                        column_aliases.push(id);
42084                    }
42085                } else {
42086                    break;
42087                }
42088                if !self.match_token(TokenType::Comma) {
42089                    break;
42090                }
42091            }
42092            self.match_token(TokenType::RParen);
42093
42094            if !column_aliases.is_empty() {
42095                return Ok(Some(Expression::Alias(Box::new(Alias {
42096                    this: expr,
42097                    alias: Identifier::new(String::new()), // Empty alias when only column aliases
42098                    column_aliases,
42099                    pre_alias_comments: Vec::new(),
42100                    trailing_comments: Vec::new(),
42101                    inferred_type: None,
42102                }))));
42103            }
42104        }
42105
42106        // Parse the alias identifier
42107        if let Some(alias_expr) = self.parse_id_var()? {
42108            let alias_ident = match alias_expr {
42109                Expression::Identifier(id) => id,
42110                _ => return Ok(Some(expr)),
42111            };
42112            return Ok(Some(Expression::Alias(Box::new(Alias {
42113                this: expr,
42114                alias: alias_ident,
42115                column_aliases: Vec::new(),
42116                pre_alias_comments: Vec::new(),
42117                trailing_comments: Vec::new(),
42118                inferred_type: None,
42119            }))));
42120        }
42121
42122        Ok(Some(expr))
42123    }
42124
42125    /// parse_alter_diststyle - Implemented from Python _parse_alter_diststyle
42126    #[allow(unused_variables, unused_mut)]
42127    /// parse_alter_diststyle - Parses ALTER TABLE DISTSTYLE clause (Redshift)
42128    /// Python: parser.py:7797-7802
42129    pub fn parse_alter_diststyle(&mut self) -> Result<Option<Expression>> {
42130        // Check for ALL, EVEN, AUTO
42131        if self.match_texts(&["ALL", "EVEN", "AUTO"]) {
42132            let style = self.previous().text.to_ascii_uppercase();
42133            return Ok(Some(Expression::DistStyleProperty(Box::new(
42134                DistStyleProperty {
42135                    this: Box::new(Expression::Identifier(Identifier::new(style))),
42136                },
42137            ))));
42138        }
42139
42140        // KEY DISTKEY column
42141        if self.match_text_seq(&["KEY", "DISTKEY"]) {
42142            if let Some(column) = self.parse_column()? {
42143                return Ok(Some(Expression::DistStyleProperty(Box::new(
42144                    DistStyleProperty {
42145                        this: Box::new(column),
42146                    },
42147                ))));
42148            }
42149        }
42150
42151        Ok(None)
42152    }
42153
42154    /// parse_alter_session - Parses ALTER SESSION SET/UNSET statements
42155    /// Python: parser.py:7879-7889
42156    pub fn parse_alter_session(&mut self) -> Result<Option<Expression>> {
42157        // ALTER SESSION SET var = value, ...
42158        if self.match_token(TokenType::Set) {
42159            let mut expressions = Vec::new();
42160            loop {
42161                if let Some(item) = self.parse_set_item_assignment()? {
42162                    expressions.push(item);
42163                }
42164                if !self.match_token(TokenType::Comma) {
42165                    break;
42166                }
42167            }
42168            return Ok(Some(Expression::AlterSession(Box::new(AlterSession {
42169                expressions,
42170                unset: None,
42171            }))));
42172        }
42173
42174        // ALTER SESSION UNSET var, ...
42175        if self.match_text_seq(&["UNSET"]) {
42176            let mut expressions = Vec::new();
42177            loop {
42178                if let Some(var) = self.parse_id_var()? {
42179                    // For UNSET, we just use the identifier directly
42180                    expressions.push(var);
42181                }
42182                if !self.match_token(TokenType::Comma) {
42183                    break;
42184                }
42185            }
42186            return Ok(Some(Expression::AlterSession(Box::new(AlterSession {
42187                expressions,
42188                unset: Some(Box::new(Expression::Boolean(BooleanLiteral {
42189                    value: true,
42190                }))),
42191            }))));
42192        }
42193
42194        Ok(None)
42195    }
42196
42197    /// parse_alter_sortkey - Parses ALTER TABLE SORTKEY clause (Redshift)
42198    /// Python: parser.py:7804-7816
42199    pub fn parse_alter_sortkey(&mut self) -> Result<Option<Expression>> {
42200        self.parse_alter_sortkey_impl(None)
42201    }
42202
42203    /// Implementation of parse_alter_sortkey with compound option
42204    pub fn parse_alter_sortkey_impl(
42205        &mut self,
42206        compound: Option<bool>,
42207    ) -> Result<Option<Expression>> {
42208        // For compound sortkey, match SORTKEY keyword
42209        if compound == Some(true) {
42210            self.match_text_seq(&["SORTKEY"]);
42211        }
42212
42213        // Check for (column_list) syntax
42214        if self.check(TokenType::LParen) {
42215            let wrapped = self.parse_wrapped_id_vars()?;
42216            // Extract expressions from Tuple
42217            let expressions = if let Some(Expression::Tuple(t)) = wrapped {
42218                t.expressions
42219            } else {
42220                Vec::new()
42221            };
42222            return Ok(Some(Expression::AlterSortKey(Box::new(AlterSortKey {
42223                this: None,
42224                expressions,
42225                compound: compound
42226                    .map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
42227            }))));
42228        }
42229
42230        // Check for AUTO or NONE
42231        if self.match_texts(&["AUTO", "NONE"]) {
42232            let style = self.previous().text.to_ascii_uppercase();
42233            return Ok(Some(Expression::AlterSortKey(Box::new(AlterSortKey {
42234                this: Some(Box::new(Expression::Identifier(Identifier::new(style)))),
42235                expressions: Vec::new(),
42236                compound: compound
42237                    .map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
42238            }))));
42239        }
42240
42241        Ok(None)
42242    }
42243
42244    /// parse_alter_table_add - Parses ALTER TABLE ADD clause
42245    /// Python: parser.py:7715-7751
42246    pub fn parse_alter_table_add(&mut self) -> Result<Option<Expression>> {
42247        // Check for ADD keyword (optional in some contexts)
42248        self.match_text_seq(&["ADD"]);
42249
42250        // Check for INDEX/KEY with optional FULLTEXT/SPATIAL prefix (MySQL)
42251        // Syntax: ADD [FULLTEXT|SPATIAL] {INDEX|KEY} [name] (columns) [USING {BTREE|HASH}]
42252        let kind = if self.match_identifier("FULLTEXT") {
42253            Some("FULLTEXT".to_string())
42254        } else if self.match_identifier("SPATIAL") {
42255            Some("SPATIAL".to_string())
42256        } else {
42257            None
42258        };
42259
42260        if self.check(TokenType::Index) || self.check(TokenType::Key) || kind.is_some() {
42261            // Consume INDEX or KEY keyword, track which was used
42262            let use_key_keyword = if self.match_token(TokenType::Key) {
42263                true
42264            } else {
42265                self.match_token(TokenType::Index);
42266                false
42267            };
42268
42269            // Optional index name (before the columns)
42270            let name = if !self.check(TokenType::LParen) && !self.check(TokenType::Using) {
42271                Some(self.expect_identifier_with_quoted()?)
42272            } else {
42273                None
42274            };
42275
42276            // Parse columns (with optional prefix length and DESC)
42277            self.expect(TokenType::LParen)?;
42278            let columns = self.parse_index_identifier_list()?;
42279            self.expect(TokenType::RParen)?;
42280
42281            // Parse optional USING BTREE|HASH
42282            let modifiers = self.parse_constraint_modifiers();
42283
42284            return Ok(Some(Expression::AlterTable(Box::new(AlterTable {
42285                name: TableRef::new(""),
42286                actions: vec![AlterTableAction::AddConstraint(TableConstraint::Index {
42287                    name,
42288                    columns,
42289                    kind,
42290                    modifiers,
42291                    use_key_keyword,
42292                    expression: None,
42293                    index_type: None,
42294                    granularity: None,
42295                })],
42296                if_exists: false,
42297                algorithm: None,
42298                lock: None,
42299                with_check: None,
42300                partition: None,
42301                on_cluster: None,
42302                table_modifier: None,
42303            }))));
42304        }
42305
42306        // Check for constraint keywords (PRIMARY KEY, FOREIGN KEY, UNIQUE, CHECK, CONSTRAINT)
42307        if self.check(TokenType::PrimaryKey)
42308            || self.check(TokenType::ForeignKey)
42309            || self.check(TokenType::Unique)
42310            || self.check(TokenType::Check)
42311            || self.check(TokenType::Constraint)
42312        {
42313            // Parse a single constraint and return it wrapped in Constraint
42314            if let Some(constraint) = self.parse_constraint()? {
42315                return Ok(Some(Expression::Constraint(Box::new(Constraint {
42316                    this: Box::new(constraint),
42317                    expressions: Vec::new(),
42318                }))));
42319            }
42320        }
42321
42322        // Check for COLUMNS keyword (batch column addition)
42323        if self.match_text_seq(&["COLUMNS"]) {
42324            // Parse schema or column definitions
42325            if let Some(schema) = self.parse_schema()? {
42326                return Ok(Some(schema));
42327            }
42328        }
42329
42330        // Check for IF NOT EXISTS PARTITION (must check before parse_add_column)
42331        let exists = self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
42332        if self.match_token(TokenType::Partition) {
42333            // Parse PARTITION(key = value, ...)
42334            self.expect(TokenType::LParen)?;
42335            let mut partition_exprs = Vec::new();
42336            loop {
42337                if let Some(expr) = self.parse_conjunction()? {
42338                    partition_exprs.push(expr);
42339                }
42340                if !self.match_token(TokenType::Comma) {
42341                    break;
42342                }
42343            }
42344            self.expect(TokenType::RParen)?;
42345
42346            let partition = Expression::Partition(Box::new(crate::expressions::Partition {
42347                expressions: partition_exprs,
42348                subpartition: false,
42349            }));
42350
42351            let location = if self.match_text_seq(&["LOCATION"]) {
42352                self.parse_property()?
42353            } else {
42354                None
42355            };
42356            return Ok(Some(Expression::AddPartition(Box::new(AddPartition {
42357                this: Box::new(partition),
42358                exists,
42359                location: location.map(Box::new),
42360            }))));
42361        }
42362
42363        // Try to parse column definition (after checking for PARTITION)
42364        if let Some(column) = self.parse_add_column()? {
42365            return Ok(Some(column));
42366        }
42367
42368        Ok(None)
42369    }
42370
42371    /// parse_alter_table_alter - Parses ALTER TABLE ALTER COLUMN clause
42372    /// Python: parser.py:7753-7795
42373    pub fn parse_alter_table_alter(&mut self) -> Result<Option<Expression>> {
42374        // Match optional COLUMN keyword
42375        self.match_token(TokenType::Column);
42376
42377        // Parse the column name - required for ALTER COLUMN
42378        let column = match self.parse_field()? {
42379            Some(c) => c,
42380            None => return Ok(None),
42381        };
42382
42383        // DROP DEFAULT
42384        if self.match_keywords(&[TokenType::Drop, TokenType::Default]) {
42385            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42386                this: Box::new(column),
42387                dtype: None,
42388                collate: None,
42389                using: None,
42390                default: None,
42391                drop: Some(Box::new(Expression::Boolean(BooleanLiteral {
42392                    value: true,
42393                }))),
42394                allow_null: None,
42395                comment: None,
42396                visible: None,
42397                rename_to: None,
42398            }))));
42399        }
42400
42401        // SET DEFAULT expr
42402        if self.match_keywords(&[TokenType::Set, TokenType::Default]) {
42403            let default_val = self.parse_disjunction()?;
42404            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42405                this: Box::new(column),
42406                dtype: None,
42407                collate: None,
42408                using: None,
42409                default: default_val.map(Box::new),
42410                drop: None,
42411                allow_null: None,
42412                comment: None,
42413                visible: None,
42414                rename_to: None,
42415            }))));
42416        }
42417
42418        // COMMENT 'string'
42419        if self.match_token(TokenType::Comment) {
42420            let comment_val = self.parse_string()?;
42421            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42422                this: Box::new(column),
42423                dtype: None,
42424                collate: None,
42425                using: None,
42426                default: None,
42427                drop: None,
42428                allow_null: None,
42429                comment: comment_val.map(Box::new),
42430                visible: None,
42431                rename_to: None,
42432            }))));
42433        }
42434
42435        // DROP NOT NULL
42436        if self.match_text_seq(&["DROP", "NOT", "NULL"]) {
42437            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42438                this: Box::new(column),
42439                dtype: None,
42440                collate: None,
42441                using: None,
42442                default: None,
42443                drop: Some(Box::new(Expression::Boolean(BooleanLiteral {
42444                    value: true,
42445                }))),
42446                allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
42447                    value: true,
42448                }))),
42449                comment: None,
42450                visible: None,
42451                rename_to: None,
42452            }))));
42453        }
42454
42455        // SET NOT NULL
42456        if self.match_text_seq(&["SET", "NOT", "NULL"]) {
42457            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42458                this: Box::new(column),
42459                dtype: None,
42460                collate: None,
42461                using: None,
42462                default: None,
42463                drop: None,
42464                allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
42465                    value: false,
42466                }))),
42467                comment: None,
42468                visible: None,
42469                rename_to: None,
42470            }))));
42471        }
42472
42473        // SET VISIBLE
42474        if self.match_text_seq(&["SET", "VISIBLE"]) {
42475            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42476                this: Box::new(column),
42477                dtype: None,
42478                collate: None,
42479                using: None,
42480                default: None,
42481                drop: None,
42482                allow_null: None,
42483                comment: None,
42484                visible: Some(Box::new(Expression::Identifier(Identifier::new(
42485                    "VISIBLE".to_string(),
42486                )))),
42487                rename_to: None,
42488            }))));
42489        }
42490
42491        // SET INVISIBLE
42492        if self.match_text_seq(&["SET", "INVISIBLE"]) {
42493            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42494                this: Box::new(column),
42495                dtype: None,
42496                collate: None,
42497                using: None,
42498                default: None,
42499                drop: None,
42500                allow_null: None,
42501                comment: None,
42502                visible: Some(Box::new(Expression::Identifier(Identifier::new(
42503                    "INVISIBLE".to_string(),
42504                )))),
42505                rename_to: None,
42506            }))));
42507        }
42508
42509        // [SET DATA] TYPE type [COLLATE collation] [USING expr]
42510        self.match_text_seq(&["SET", "DATA"]);
42511        self.match_text_seq(&["TYPE"]);
42512
42513        let dtype = self.parse_types()?;
42514        let collate = if self.match_token(TokenType::Collate) {
42515            self.parse_term()?
42516        } else {
42517            None
42518        };
42519        let using = if self.match_token(TokenType::Using) {
42520            self.parse_disjunction()?
42521        } else {
42522            None
42523        };
42524
42525        Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
42526            this: Box::new(column),
42527            dtype: dtype.map(Box::new),
42528            collate: collate.map(Box::new),
42529            using: using.map(Box::new),
42530            default: None,
42531            drop: None,
42532            allow_null: None,
42533            comment: None,
42534            visible: None,
42535            rename_to: None,
42536        }))))
42537    }
42538
42539    /// Parse ALTER TABLE DROP action
42540    /// Note: Main ALTER TABLE DROP logic is implemented inline in parse_alter_table
42541    /// This method provides a separate entry point for the same functionality
42542    pub fn parse_alter_table_drop(&mut self) -> Result<Option<Expression>> {
42543        // Check for IF EXISTS before PARTITION
42544        let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
42545
42546        // Check if this is DROP PARTITION
42547        if self.check(TokenType::Partition) {
42548            return self.parse_drop_partition_with_exists(exists);
42549        }
42550
42551        // Check for DROP FOREIGN KEY (Oracle/MySQL)
42552        if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
42553            let name = self.expect_identifier_with_quoted()?;
42554            return Ok(Some(Expression::AlterTable(Box::new(AlterTable {
42555                name: TableRef::new(""),
42556                actions: vec![AlterTableAction::DropForeignKey { name }],
42557                if_exists: false,
42558                algorithm: None,
42559                lock: None,
42560                with_check: None,
42561                partition: None,
42562                on_cluster: None,
42563                table_modifier: None,
42564            }))));
42565        }
42566
42567        // Check for DROP COLUMNS (col1, col2, ...) syntax (Spark/Databricks)
42568        if self.check_identifier("COLUMNS") && self.check_next(TokenType::LParen) {
42569            self.skip(); // consume COLUMNS
42570            self.expect(TokenType::LParen)?;
42571            let mut columns = Vec::new();
42572            loop {
42573                if let Some(col) = self.parse_identifier()? {
42574                    columns.push(col);
42575                }
42576                if !self.match_token(TokenType::Comma) {
42577                    break;
42578                }
42579            }
42580            self.expect(TokenType::RParen)?;
42581            if columns.is_empty() {
42582                return Ok(None);
42583            } else if columns.len() == 1 {
42584                return Ok(Some(columns.remove(0)));
42585            } else {
42586                return Ok(Some(Expression::Tuple(Box::new(Tuple {
42587                    expressions: columns,
42588                }))));
42589            }
42590        }
42591
42592        // Otherwise, parse as DROP COLUMN(s)
42593        let mut columns = Vec::new();
42594
42595        // Parse first column
42596        if let Some(col) = self.parse_drop_column()? {
42597            columns.push(col);
42598        }
42599
42600        // Parse additional columns (comma-separated)
42601        while self.match_token(TokenType::Comma) {
42602            // Match optional DROP keyword before next column
42603            self.match_token(TokenType::Drop);
42604            if let Some(col) = self.parse_drop_column()? {
42605                columns.push(col);
42606            }
42607        }
42608
42609        if columns.is_empty() {
42610            Ok(None)
42611        } else if columns.len() == 1 {
42612            Ok(Some(columns.remove(0)))
42613        } else {
42614            // Multiple columns - wrap in a Tuple
42615            Ok(Some(Expression::Tuple(Box::new(Tuple {
42616                expressions: columns,
42617            }))))
42618        }
42619    }
42620
42621    /// parse_alter_table_rename - Parses ALTER TABLE RENAME clause
42622    /// Python: parser.py:7828-7841
42623    pub fn parse_alter_table_rename(&mut self) -> Result<Option<Expression>> {
42624        // RENAME COLUMN old_name TO new_name
42625        if self.match_token(TokenType::Column) {
42626            let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
42627            let old_column = match self.parse_column()? {
42628                Some(c) => c,
42629                None => return Ok(None),
42630            };
42631
42632            if !self.match_text_seq(&["TO"]) {
42633                return Ok(None);
42634            }
42635
42636            let new_column = self.parse_column()?;
42637
42638            return Ok(Some(Expression::RenameColumn(Box::new(RenameColumn {
42639                this: Box::new(old_column),
42640                to: new_column.map(Box::new),
42641                exists,
42642            }))));
42643        }
42644
42645        // RENAME TO new_table_name
42646        if self.match_text_seq(&["TO"]) {
42647            // Return the table expression directly - the caller will handle it as a rename target
42648            let new_table = self.parse_table()?;
42649            return Ok(new_table);
42650        }
42651
42652        // SQLite allows: RENAME old_name TO new_name (without COLUMN keyword)
42653        // Try to parse as column rename if followed by identifier and TO
42654        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
42655            let old_column = match self.parse_column()? {
42656                Some(c) => c,
42657                None => return Ok(None),
42658            };
42659
42660            if self.match_text_seq(&["TO"]) {
42661                let new_column = self.parse_column()?;
42662                return Ok(Some(Expression::RenameColumn(Box::new(RenameColumn {
42663                    this: Box::new(old_column),
42664                    to: new_column.map(Box::new),
42665                    exists: false,
42666                }))));
42667            } else {
42668                // Not TO after identifier - put it back and return error
42669                return Err(self.parse_error("Expected COLUMN or TO after RENAME"));
42670            }
42671        }
42672
42673        Ok(None)
42674    }
42675
42676    /// parse_alter_table_set - Parses ALTER TABLE SET clause
42677    /// Python: parser.py:7843-7877
42678    pub fn parse_alter_table_set(&mut self) -> Result<Option<Expression>> {
42679        let mut alter_set = AlterSet {
42680            expressions: Vec::new(),
42681            option: None,
42682            tablespace: None,
42683            access_method: None,
42684            file_format: None,
42685            copy_options: None,
42686            tag: None,
42687            location: None,
42688            serde: None,
42689        };
42690
42691        // SET AUTHORIZATION [ROLE] user
42692        if self.match_token(TokenType::Authorization) {
42693            let mut auth_text = "AUTHORIZATION ".to_string();
42694            if self.match_texts(&["ROLE"]) {
42695                auth_text.push_str("ROLE ");
42696            }
42697            let user = self.expect_identifier()?;
42698            auth_text.push_str(&user);
42699            alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(auth_text))));
42700            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42701        }
42702
42703        // SET PROPERTIES prop = value, ...
42704        if self.match_text_seq(&["PROPERTIES"]) {
42705            let mut assignments = Vec::new();
42706            loop {
42707                // Parse property name (could be identifier or string literal)
42708                let key = if self.check(TokenType::String) {
42709                    self.parse_string()?.unwrap_or(Expression::Null(Null))
42710                } else {
42711                    let name = self.expect_identifier()?;
42712                    Expression::Identifier(Identifier::new(name))
42713                };
42714                self.expect(TokenType::Eq)?;
42715                // Parse value (could be DEFAULT or an expression)
42716                let value = if self.match_token(TokenType::Default) {
42717                    Expression::Identifier(Identifier::new("DEFAULT".to_string()))
42718                } else {
42719                    self.parse_expression()?
42720                };
42721                assignments.push(Expression::Eq(Box::new(BinaryOp {
42722                    left: key,
42723                    right: value,
42724                    left_comments: Vec::new(),
42725                    operator_comments: Vec::new(),
42726                    trailing_comments: Vec::new(),
42727                    inferred_type: None,
42728                })));
42729                if !self.match_token(TokenType::Comma) {
42730                    break;
42731                }
42732            }
42733            alter_set.expressions = assignments;
42734            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42735        }
42736
42737        // SET (properties) or SET TABLE PROPERTIES (properties)
42738        if self.check(TokenType::LParen) || self.match_text_seq(&["TABLE", "PROPERTIES"]) {
42739            let assignments = self.parse_wrapped_csv_assignments()?;
42740            alter_set.expressions = assignments;
42741            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42742        }
42743
42744        // SET FILESTREAM_ON = value
42745        if self.match_text_seq(&["FILESTREAM_ON"]) {
42746            if let Some(assignment) = self.parse_assignment()? {
42747                alter_set.expressions = vec![assignment];
42748            }
42749            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42750        }
42751
42752        // SET LOGGED or SET UNLOGGED
42753        if self.match_texts(&["LOGGED", "UNLOGGED"]) {
42754            let option = self.previous().text.to_ascii_uppercase();
42755            alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(option))));
42756            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42757        }
42758
42759        // SET WITHOUT CLUSTER or SET WITHOUT OIDS
42760        if self.match_text_seq(&["WITHOUT"]) {
42761            if self.match_texts(&["CLUSTER", "OIDS"]) {
42762                let option = format!("WITHOUT {}", self.previous().text.to_ascii_uppercase());
42763                alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(option))));
42764                return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42765            }
42766        }
42767
42768        // SET LOCATION path
42769        if self.match_text_seq(&["LOCATION"]) {
42770            let loc = self.parse_field()?;
42771            alter_set.location = loc.map(Box::new);
42772            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42773        }
42774
42775        // SET ACCESS METHOD method
42776        if self.match_text_seq(&["ACCESS", "METHOD"]) {
42777            let method = self.parse_field()?;
42778            alter_set.access_method = method.map(Box::new);
42779            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42780        }
42781
42782        // SET TABLESPACE name
42783        if self.match_text_seq(&["TABLESPACE"]) {
42784            let tablespace = self.parse_field()?;
42785            alter_set.tablespace = tablespace.map(Box::new);
42786            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42787        }
42788
42789        // SET FILE FORMAT format or SET FILEFORMAT format
42790        if self.match_text_seq(&["FILE", "FORMAT"]) || self.match_text_seq(&["FILEFORMAT"]) {
42791            let format = self.parse_field()?;
42792            alter_set.file_format = format.map(Box::new);
42793            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42794        }
42795
42796        // SET STAGE_FILE_FORMAT = (options)
42797        if self.match_text_seq(&["STAGE_FILE_FORMAT"]) {
42798            let options = self.parse_wrapped_options()?;
42799            alter_set.file_format = options.map(Box::new);
42800            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42801        }
42802
42803        // SET STAGE_COPY_OPTIONS = (options)
42804        if self.match_text_seq(&["STAGE_COPY_OPTIONS"]) {
42805            let options = self.parse_wrapped_options()?;
42806            alter_set.copy_options = options.map(Box::new);
42807            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42808        }
42809
42810        // SET TAG or SET TAGS
42811        if self.match_text_seq(&["TAG"]) || self.match_text_seq(&["TAGS"]) {
42812            let mut tags = Vec::new();
42813            loop {
42814                if let Some(assignment) = self.parse_assignment()? {
42815                    tags.push(assignment);
42816                }
42817                if !self.match_token(TokenType::Comma) {
42818                    break;
42819                }
42820            }
42821            if !tags.is_empty() {
42822                alter_set.tag = Some(Box::new(Expression::Tuple(Box::new(Tuple {
42823                    expressions: tags,
42824                }))));
42825            }
42826            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42827        }
42828
42829        // SET SERDE 'class' [WITH SERDEPROPERTIES (...)]
42830        if self.match_text_seq(&["SERDE"]) {
42831            let serde = self.parse_field()?;
42832            alter_set.serde = serde.map(Box::new);
42833
42834            // Parse optional properties
42835            let properties = self.parse_wrapped()?;
42836            if let Some(props) = properties {
42837                alter_set.expressions = vec![props];
42838            }
42839            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
42840        }
42841
42842        Ok(None)
42843    }
42844
42845    /// Helper to parse wrapped CSV of assignments
42846    fn parse_wrapped_csv_assignments(&mut self) -> Result<Vec<Expression>> {
42847        if !self.match_token(TokenType::LParen) {
42848            return Ok(Vec::new());
42849        }
42850        let mut assignments = Vec::new();
42851        loop {
42852            if let Some(assignment) = self.parse_assignment()? {
42853                assignments.push(assignment);
42854            }
42855            if !self.match_token(TokenType::Comma) {
42856                break;
42857            }
42858        }
42859        self.expect(TokenType::RParen)?;
42860        Ok(assignments)
42861    }
42862
42863    /// parse_analyze - Implemented from Python _parse_analyze
42864    /// Calls: parse_table_parts, parse_number, parse_table
42865    #[allow(unused_variables, unused_mut)]
42866    /// parse_analyze - Parses ANALYZE statement
42867    /// Python: parser.py:7937-7999
42868    pub fn parse_analyze(&mut self) -> Result<Option<Expression>> {
42869        // If no more tokens, return empty Analyze
42870        if self.is_at_end() {
42871            return Ok(Some(Expression::Analyze(Box::new(Analyze {
42872                kind: None,
42873                this: None,
42874                options: Vec::new(),
42875                mode: None,
42876                partition: None,
42877                expression: None,
42878                properties: Vec::new(),
42879                columns: Vec::new(),
42880            }))));
42881        }
42882
42883        // Parse options (VERBOSE, SKIP_LOCKED, etc.)
42884        // StarRocks uses FULL and SAMPLE as options
42885        let mut options = Vec::new();
42886        let analyze_styles = [
42887            "VERBOSE",
42888            "SKIP_LOCKED",
42889            "BUFFER_USAGE_LIMIT",
42890            "FULL",
42891            "SAMPLE",
42892        ];
42893        while self.match_texts(&analyze_styles) {
42894            let style = self.previous().text.to_ascii_uppercase();
42895            if style == "BUFFER_USAGE_LIMIT" {
42896                // Parse number after BUFFER_USAGE_LIMIT
42897                if let Some(num) = self.parse_number()? {
42898                    options.push(Expression::Identifier(Identifier::new(format!(
42899                        "BUFFER_USAGE_LIMIT {}",
42900                        if let Expression::Literal(lit) = &num {
42901                            if let Literal::Number(n) = lit.as_ref() {
42902                                n.clone()
42903                            } else {
42904                                String::new()
42905                            }
42906                        } else {
42907                            String::new()
42908                        }
42909                    ))));
42910                }
42911            } else {
42912                options.push(Expression::Identifier(Identifier::new(style)));
42913            }
42914        }
42915
42916        let mut this: Option<Expression> = None;
42917        let mut kind: Option<String> = None;
42918        let mut inner_expression: Option<Expression> = None;
42919
42920        // Parse TABLE or INDEX
42921        if self.match_token(TokenType::Table) {
42922            kind = Some("TABLE".to_string());
42923            this = self.parse_table_parts()?;
42924        } else if self.match_token(TokenType::Index) {
42925            kind = Some("INDEX".to_string());
42926            this = self.parse_table_parts()?;
42927        } else if self.match_text_seq(&["TABLES"]) {
42928            kind = Some("TABLES".to_string());
42929            if self.match_token(TokenType::From) || self.match_token(TokenType::In) {
42930                let dir = self.previous().text.to_ascii_uppercase();
42931                kind = Some(format!("TABLES {}", dir));
42932                // Parse database name as identifier
42933                let db_name = self.expect_identifier()?;
42934                this = Some(Expression::Identifier(Identifier::new(db_name)));
42935            }
42936        } else if self.match_text_seq(&["DATABASE"]) {
42937            kind = Some("DATABASE".to_string());
42938            this = self.parse_table_parts()?;
42939        } else if self.match_text_seq(&["CLUSTER"]) {
42940            kind = Some("CLUSTER".to_string());
42941            this = self.parse_table_parts()?;
42942        } else if self.match_texts(&["LOCAL", "NO_WRITE_TO_BINLOG"]) {
42943            // MySQL: ANALYZE LOCAL TABLE tbl / ANALYZE NO_WRITE_TO_BINLOG TABLE tbl
42944            let opt_text = self.previous().text.to_ascii_uppercase();
42945            options.push(Expression::Identifier(Identifier::new(opt_text)));
42946            if self.match_token(TokenType::Table) {
42947                kind = Some("TABLE".to_string());
42948            }
42949            this = self.parse_table_parts()?;
42950        } else if self.match_text_seq(&["COMPUTE"]) {
42951            // Check ANALYZE_EXPRESSION_PARSERS keywords before fallback to parse_table_parts
42952            // Python: elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS)
42953            inner_expression = self.parse_analyze_statistics()?;
42954        } else if self.match_text_seq(&["DELETE"]) {
42955            inner_expression = self.parse_analyze_delete()?;
42956        } else if self.match_text_seq(&["VALIDATE"]) {
42957            inner_expression = self.parse_analyze_validate()?;
42958        } else if self.match_text_seq(&["LIST"]) {
42959            inner_expression = self.parse_analyze_list()?;
42960        } else if self.match_text_seq(&["DROP"]) {
42961            inner_expression = self.parse_analyze_histogram()?;
42962        } else if self.match_text_seq(&["UPDATE"]) {
42963            inner_expression = self.parse_analyze_histogram()?;
42964        } else if self.match_texts(&["ALL", "PREDICATE"]) {
42965            inner_expression = self.parse_analyze_columns()?;
42966        } else {
42967            // Try to parse table directly (empty kind - https://prestodb.io/docs/current/sql/analyze.html)
42968            this = self.parse_table_parts()?;
42969        }
42970
42971        // Parse optional column list: ANALYZE tbl(col1, col2) (PostgreSQL)
42972        let columns = if this.is_some() && self.match_token(TokenType::LParen) {
42973            let mut cols = Vec::new();
42974            loop {
42975                cols.push(self.expect_identifier_or_keyword()?);
42976                if !self.match_token(TokenType::Comma) {
42977                    break;
42978                }
42979            }
42980            self.expect(TokenType::RParen)?;
42981            cols
42982        } else {
42983            Vec::new()
42984        };
42985
42986        // Parse optional PARTITION
42987        let partition = self.parse_partition()?;
42988
42989        // Parse optional WITH SYNC/ASYNC MODE or WITH (prop=val, ...) for Presto
42990        let mut mode = None;
42991        let mut properties = Vec::new();
42992
42993        if self.match_text_seq(&["WITH", "SYNC", "MODE"]) {
42994            mode = Some(Box::new(Expression::Identifier(Identifier::new(
42995                "WITH SYNC MODE".to_string(),
42996            ))));
42997        } else if self.match_text_seq(&["WITH", "ASYNC", "MODE"]) {
42998            mode = Some(Box::new(Expression::Identifier(Identifier::new(
42999                "WITH ASYNC MODE".to_string(),
43000            ))));
43001        } else if self.match_text_seq(&["WITH"]) {
43002            // Presto syntax: ANALYZE tbl WITH (prop1=val1, prop2=val2)
43003            if self.match_token(TokenType::LParen) {
43004                loop {
43005                    // Parse key=value pairs
43006                    let key = self.parse_id_var()?;
43007                    if key.is_none() {
43008                        break;
43009                    }
43010
43011                    // Expect = sign
43012                    if self.match_token(TokenType::Eq) {
43013                        // Parse the value
43014                        let value = self.parse_primary()?;
43015                        if let Some(k) = key {
43016                            properties.push(Expression::Property(Box::new(Property {
43017                                this: Box::new(k),
43018                                value: Some(Box::new(value)),
43019                            })));
43020                        }
43021                    } else if let Some(k) = key {
43022                        // Key without value
43023                        properties.push(Expression::Property(Box::new(Property {
43024                            this: Box::new(k),
43025                            value: None,
43026                        })));
43027                    }
43028
43029                    if !self.match_token(TokenType::Comma) {
43030                        break;
43031                    }
43032                }
43033                self.expect(TokenType::RParen)?;
43034            }
43035        }
43036
43037        // Parse optional inner expressions (COMPUTE, DELETE, etc.)
43038        // Only if inner_expression wasn't already set (for cases like ANALYZE TABLE tbl VALIDATE...)
43039        if inner_expression.is_none() {
43040            if self.match_text_seq(&["COMPUTE"]) {
43041                inner_expression = self.parse_analyze_statistics()?;
43042            } else if self.match_text_seq(&["DELETE"]) {
43043                inner_expression = self.parse_analyze_delete()?;
43044            } else if self.match_text_seq(&["VALIDATE"]) {
43045                inner_expression = self.parse_analyze_validate()?;
43046            } else if self.match_text_seq(&["LIST"]) {
43047                inner_expression = self.parse_analyze_list()?;
43048            } else if self.match_text_seq(&["DROP"]) {
43049                inner_expression = self.parse_analyze_histogram()?;
43050            } else if self.match_text_seq(&["UPDATE"]) {
43051                inner_expression = self.parse_analyze_histogram()?;
43052            } else if self.match_texts(&["ALL", "PREDICATE"]) {
43053                // Redshift: ANALYZE TBL ALL COLUMNS / ANALYZE TBL PREDICATE COLUMNS
43054                inner_expression = self.parse_analyze_columns()?;
43055            }
43056        }
43057
43058        // Parse optional properties (if not already parsed from WITH clause)
43059        // StarRocks syntax: ANALYZE TABLE TBL PROPERTIES ('prop1'=val1, 'prop2'=val2)
43060        if properties.is_empty() && self.match_text_seq(&["PROPERTIES"]) {
43061            if self.match_token(TokenType::LParen) {
43062                loop {
43063                    // Parse key (can be a string literal or identifier)
43064                    let key = if self.check(TokenType::String) {
43065                        self.skip();
43066                        let key_str = self.previous().text.clone();
43067                        Expression::Literal(Box::new(Literal::String(key_str)))
43068                    } else {
43069                        self.parse_id_var()?
43070                            .unwrap_or(Expression::Identifier(Identifier::new(String::new())))
43071                    };
43072
43073                    // Expect = sign
43074                    if self.match_token(TokenType::Eq) {
43075                        // Parse the value
43076                        let value = self.parse_primary()?;
43077                        properties.push(Expression::Property(Box::new(Property {
43078                            this: Box::new(key),
43079                            value: Some(Box::new(value)),
43080                        })));
43081                    } else {
43082                        // Key without value
43083                        properties.push(Expression::Property(Box::new(Property {
43084                            this: Box::new(key),
43085                            value: None,
43086                        })));
43087                    }
43088
43089                    if !self.match_token(TokenType::Comma) {
43090                        break;
43091                    }
43092                }
43093                self.expect(TokenType::RParen)?;
43094            }
43095        }
43096
43097        Ok(Some(Expression::Analyze(Box::new(Analyze {
43098            kind,
43099            this: this.map(Box::new),
43100            options,
43101            mode,
43102            partition: partition.map(Box::new),
43103            expression: inner_expression.map(Box::new),
43104            properties,
43105            columns,
43106        }))))
43107    }
43108
43109    /// parse_analyze_columns - Parses ANALYZE ... COLUMNS
43110    /// Python: parser.py:8055-8059
43111    /// Note: AnalyzeColumns not in expressions.rs, using Identifier instead
43112    pub fn parse_analyze_columns(&mut self) -> Result<Option<Expression>> {
43113        let prev_text = self.previous().text.to_ascii_uppercase();
43114        if self.match_text_seq(&["COLUMNS"]) {
43115            return Ok(Some(Expression::Identifier(Identifier::new(format!(
43116                "{} COLUMNS",
43117                prev_text
43118            )))));
43119        }
43120        Ok(None)
43121    }
43122
43123    /// parse_analyze_delete - Parses ANALYZE DELETE STATISTICS
43124    /// Python: parser.py:8061-8065
43125    pub fn parse_analyze_delete(&mut self) -> Result<Option<Expression>> {
43126        let kind = if self.match_text_seq(&["SYSTEM"]) {
43127            Some("SYSTEM".to_string())
43128        } else {
43129            None
43130        };
43131
43132        if self.match_text_seq(&["STATISTICS"]) {
43133            return Ok(Some(Expression::AnalyzeDelete(Box::new(AnalyzeDelete {
43134                kind,
43135            }))));
43136        }
43137
43138        Ok(None)
43139    }
43140
43141    /// parse_analyze_histogram - Parses ANALYZE ... HISTOGRAM ON
43142    /// Python: parser.py:8073-8108
43143    pub fn parse_analyze_histogram(&mut self) -> Result<Option<Expression>> {
43144        let action = self.previous().text.to_ascii_uppercase(); // DROP or UPDATE
43145        let mut expressions = Vec::new();
43146        let mut update_options: Option<Box<Expression>> = None;
43147        let mut expression: Option<Box<Expression>> = None;
43148
43149        if !self.match_text_seq(&["HISTOGRAM", "ON"]) {
43150            return Ok(None);
43151        }
43152
43153        // Parse column references
43154        loop {
43155            if let Some(col) = self.parse_column_reference()? {
43156                expressions.push(col);
43157            } else {
43158                break;
43159            }
43160            if !self.match_token(TokenType::Comma) {
43161                break;
43162            }
43163        }
43164
43165        // Parse USING DATA 'json_data' (MySQL) - must check before WITH
43166        if self.match_text_seq(&["USING", "DATA"]) {
43167            if self.check(TokenType::String) {
43168                let tok = self.advance();
43169                expression = Some(Box::new(Expression::Identifier(Identifier::new(format!(
43170                    "USING DATA '{}'",
43171                    tok.text
43172                )))));
43173            } else {
43174                expression = Some(Box::new(Expression::Identifier(Identifier::new(
43175                    "USING DATA".to_string(),
43176                ))));
43177            }
43178        }
43179
43180        // Parse WITH options - can have two WITH clauses:
43181        // 1. WITH SYNC/ASYNC MODE (optional)
43182        // 2. WITH n BUCKETS (optional)
43183        // StarRocks syntax: WITH SYNC MODE WITH 5 BUCKETS
43184        let mut mode_str: Option<String> = None;
43185        let mut buckets_str: Option<String> = None;
43186
43187        if self.match_token(TokenType::With) {
43188            if self.match_texts(&["SYNC", "ASYNC"]) {
43189                let mode = self.previous().text.to_ascii_uppercase();
43190                if self.match_text_seq(&["MODE"]) {
43191                    mode_str = Some(format!("WITH {} MODE", mode));
43192                }
43193                // Check for second WITH clause for buckets
43194                if self.match_token(TokenType::With) {
43195                    if let Some(num) = self.parse_number()? {
43196                        if self.match_text_seq(&["BUCKETS"]) {
43197                            let num_str = if let Expression::Literal(lit) = &num {
43198                                if let Literal::Number(n) = lit.as_ref() {
43199                                    n.clone()
43200                                } else {
43201                                    String::new()
43202                                }
43203                            } else {
43204                                String::new()
43205                            };
43206                            buckets_str = Some(format!("WITH {} BUCKETS", num_str));
43207                        }
43208                    }
43209                }
43210            } else if let Some(num) = self.parse_number()? {
43211                if self.match_text_seq(&["BUCKETS"]) {
43212                    let num_str = if let Expression::Literal(lit) = &num {
43213                        if let Literal::Number(n) = lit.as_ref() {
43214                            n.clone()
43215                        } else {
43216                            String::new()
43217                        }
43218                    } else {
43219                        String::new()
43220                    };
43221                    buckets_str = Some(format!("WITH {} BUCKETS", num_str));
43222                }
43223            }
43224        }
43225
43226        // Combine mode and buckets into expression
43227        match (mode_str, buckets_str) {
43228            (Some(m), Some(b)) => {
43229                expression = Some(Box::new(Expression::Identifier(Identifier::new(format!(
43230                    "{} {}",
43231                    m, b
43232                )))));
43233            }
43234            (Some(m), None) => {
43235                expression = Some(Box::new(Expression::Identifier(Identifier::new(m))));
43236            }
43237            (None, Some(b)) => {
43238                expression = Some(Box::new(Expression::Identifier(Identifier::new(b))));
43239            }
43240            (None, None) => {}
43241        }
43242
43243        // Parse AUTO UPDATE or MANUAL UPDATE (MySQL 8.0.27+)
43244        if self.match_texts(&["MANUAL", "AUTO"]) {
43245            let mode = self.previous().text.to_ascii_uppercase();
43246            if self.check(TokenType::Update) {
43247                update_options = Some(Box::new(Expression::Identifier(Identifier::new(mode))));
43248                self.skip(); // consume UPDATE
43249            }
43250        }
43251
43252        Ok(Some(Expression::AnalyzeHistogram(Box::new(
43253            AnalyzeHistogram {
43254                this: Box::new(Expression::Identifier(Identifier::new(action))),
43255                expressions,
43256                expression,
43257                update_options,
43258            },
43259        ))))
43260    }
43261
43262    /// parse_analyze_list - Parses ANALYZE LIST CHAINED ROWS
43263    /// Python: parser.py:8067-8070
43264    pub fn parse_analyze_list(&mut self) -> Result<Option<Expression>> {
43265        if self.match_text_seq(&["CHAINED", "ROWS"]) {
43266            let expression = self.parse_into()?.map(Box::new);
43267            return Ok(Some(Expression::AnalyzeListChainedRows(Box::new(
43268                AnalyzeListChainedRows { expression },
43269            ))));
43270        }
43271        Ok(None)
43272    }
43273
43274    /// parse_analyze_statistics - Parses ANALYZE ... STATISTICS
43275    /// Python: parser.py:8002-8031
43276    pub fn parse_analyze_statistics(&mut self) -> Result<Option<Expression>> {
43277        let kind = self.previous().text.to_ascii_uppercase();
43278        let option = if self.match_text_seq(&["DELTA"]) {
43279            Some(Box::new(Expression::Identifier(Identifier::new(
43280                "DELTA".to_string(),
43281            ))))
43282        } else {
43283            None
43284        };
43285
43286        // Expect STATISTICS keyword
43287        if !self.match_text_seq(&["STATISTICS"]) {
43288            return Ok(None);
43289        }
43290
43291        let mut this: Option<Box<Expression>> = None;
43292        let mut expressions = Vec::new();
43293
43294        if self.match_text_seq(&["NOSCAN"]) {
43295            this = Some(Box::new(Expression::Identifier(Identifier::new(
43296                "NOSCAN".to_string(),
43297            ))));
43298        } else if self.match_token(TokenType::For) {
43299            if self.match_text_seq(&["ALL", "COLUMNS"]) {
43300                this = Some(Box::new(Expression::Identifier(Identifier::new(
43301                    "FOR ALL COLUMNS".to_string(),
43302                ))));
43303            } else if self.match_text_seq(&["COLUMNS"]) {
43304                this = Some(Box::new(Expression::Identifier(Identifier::new(
43305                    "FOR COLUMNS".to_string(),
43306                ))));
43307                // Parse column list
43308                loop {
43309                    if let Some(col) = self.parse_column_reference()? {
43310                        expressions.push(col);
43311                    } else {
43312                        break;
43313                    }
43314                    if !self.match_token(TokenType::Comma) {
43315                        break;
43316                    }
43317                }
43318            }
43319        } else if self.match_text_seq(&["SAMPLE"]) {
43320            // Parse SAMPLE number [PERCENT]
43321            if let Some(sample) = self.parse_number()? {
43322                let sample_kind = if self.match_token(TokenType::Percent) {
43323                    Some("PERCENT".to_string())
43324                } else {
43325                    None
43326                };
43327                expressions.push(Expression::AnalyzeSample(Box::new(AnalyzeSample {
43328                    kind: sample_kind.unwrap_or_default(),
43329                    sample: Some(Box::new(sample)),
43330                })));
43331            }
43332        }
43333
43334        Ok(Some(Expression::AnalyzeStatistics(Box::new(
43335            AnalyzeStatistics {
43336                kind,
43337                option,
43338                this,
43339                expressions,
43340            },
43341        ))))
43342    }
43343
43344    /// parse_analyze_validate - Parses ANALYZE VALIDATE
43345    /// Python: parser.py:8034-8053
43346    pub fn parse_analyze_validate(&mut self) -> Result<Option<Expression>> {
43347        let mut kind = String::new();
43348        let mut this: Option<Box<Expression>> = None;
43349        let mut expression: Option<Box<Expression>> = None;
43350
43351        if self.match_text_seq(&["REF", "UPDATE"]) {
43352            kind = "REF".to_string();
43353            this = Some(Box::new(Expression::Identifier(Identifier::new(
43354                "UPDATE".to_string(),
43355            ))));
43356            if self.match_text_seq(&["SET", "DANGLING", "TO", "NULL"]) {
43357                this = Some(Box::new(Expression::Identifier(Identifier::new(
43358                    "UPDATE SET DANGLING TO NULL".to_string(),
43359                ))));
43360            }
43361        } else if self.match_text_seq(&["STRUCTURE"]) {
43362            kind = "STRUCTURE".to_string();
43363            if self.match_text_seq(&["CASCADE", "FAST"]) {
43364                this = Some(Box::new(Expression::Identifier(Identifier::new(
43365                    "CASCADE FAST".to_string(),
43366                ))));
43367            } else if self.match_text_seq(&["CASCADE", "COMPLETE"]) {
43368                if self.match_texts(&["ONLINE", "OFFLINE"]) {
43369                    let mode = self.previous().text.to_ascii_uppercase();
43370                    this = Some(Box::new(Expression::Identifier(Identifier::new(format!(
43371                        "CASCADE COMPLETE {}",
43372                        mode
43373                    )))));
43374                    expression = self.parse_into()?.map(Box::new);
43375                }
43376            }
43377        }
43378
43379        if kind.is_empty() {
43380            return Ok(None);
43381        }
43382
43383        Ok(Some(Expression::AnalyzeValidate(Box::new(
43384            AnalyzeValidate {
43385                kind,
43386                this,
43387                expression,
43388            },
43389        ))))
43390    }
43391
43392    /// parse_attach_detach - Parses ATTACH/DETACH statements (DuckDB)
43393    /// Python: DuckDB._parse_attach_detach
43394    pub fn parse_attach_detach(&mut self, is_attach: bool) -> Result<Expression> {
43395        // ATTACH [DATABASE] [IF NOT EXISTS] 'path' [AS alias] [(options)]
43396        // DETACH [DATABASE] [IF EXISTS] name
43397        // DATABASE can be tokenized as TokenType::Database (keyword), not just Var
43398        let _ = self.match_identifier("DATABASE") || self.match_token(TokenType::Database);
43399
43400        let exists = if is_attach {
43401            self.match_text_seq(&["IF", "NOT", "EXISTS"])
43402        } else {
43403            self.match_text_seq(&["IF", "EXISTS"])
43404        };
43405
43406        // Parse the expression (can be a path string, identifier, or expression like 'foo' || '.foo2'
43407        // or NOT EXISTS(subquery) for conditional attach)
43408        let this_expr = self.parse_expression()?;
43409
43410        // Check for AS alias
43411        let this = if self.match_token(TokenType::As) {
43412            let alias = self.expect_identifier_or_keyword_with_quoted()?;
43413            Expression::Alias(Box::new(Alias {
43414                this: this_expr,
43415                alias,
43416                column_aliases: Vec::new(),
43417                pre_alias_comments: Vec::new(),
43418                trailing_comments: Vec::new(),
43419                inferred_type: None,
43420            }))
43421        } else {
43422            this_expr
43423        };
43424
43425        if is_attach {
43426            // Parse optional (options)
43427            let expressions = if self.match_token(TokenType::LParen) {
43428                let mut opts = Vec::new();
43429                loop {
43430                    // Parse option: KEY [VALUE]
43431                    let key_name = self.advance().text.to_ascii_uppercase();
43432                    let key = Expression::Identifier(Identifier::new(key_name));
43433                    let value = if !self.check(TokenType::Comma) && !self.check(TokenType::RParen) {
43434                        // The value can be an identifier, string, boolean, etc.
43435                        let val_token = self.advance();
43436                        let val_expr = if val_token.token_type == TokenType::String {
43437                            Expression::Literal(Box::new(Literal::String(val_token.text.clone())))
43438                        } else if val_token.token_type == TokenType::True {
43439                            Expression::Boolean(BooleanLiteral { value: true })
43440                        } else if val_token.token_type == TokenType::False {
43441                            Expression::Boolean(BooleanLiteral { value: false })
43442                        } else {
43443                            Expression::Identifier(Identifier::new(val_token.text.clone()))
43444                        };
43445                        Some(Box::new(val_expr))
43446                    } else {
43447                        None
43448                    };
43449                    opts.push(Expression::AttachOption(Box::new(AttachOption {
43450                        this: Box::new(key),
43451                        expression: value,
43452                    })));
43453                    if !self.match_token(TokenType::Comma) {
43454                        break;
43455                    }
43456                }
43457                self.expect(TokenType::RParen)?;
43458                opts
43459            } else {
43460                Vec::new()
43461            };
43462
43463            Ok(Expression::Attach(Box::new(Attach {
43464                this: Box::new(this),
43465                exists,
43466                expressions,
43467            })))
43468        } else {
43469            Ok(Expression::Detach(Box::new(Detach {
43470                this: Box::new(this),
43471                exists,
43472            })))
43473        }
43474    }
43475
43476    /// parse_install - Parses INSTALL statement (DuckDB)
43477    /// Python: DuckDB._parse_install
43478    pub fn parse_install(&mut self, force: bool) -> Result<Expression> {
43479        // INSTALL extension [FROM source]
43480        let name = self.expect_identifier_or_keyword()?;
43481        let this = Expression::Identifier(Identifier::new(name));
43482
43483        let from_ = if self.match_token(TokenType::From) {
43484            // FROM can be followed by a string or identifier
43485            Some(Box::new(self.parse_primary()?))
43486        } else {
43487            None
43488        };
43489
43490        Ok(Expression::Install(Box::new(Install {
43491            this: Box::new(this),
43492            from_,
43493            force: if force {
43494                Some(Box::new(Expression::Boolean(BooleanLiteral {
43495                    value: true,
43496                })))
43497            } else {
43498                None
43499            },
43500        })))
43501    }
43502
43503    /// parse_force_statement - Parses FORCE INSTALL/CHECKPOINT (DuckDB)
43504    /// Python: DuckDB._parse_force
43505    pub fn parse_force_statement(&mut self) -> Result<Expression> {
43506        if self.match_identifier("INSTALL") {
43507            return self.parse_install(true);
43508        }
43509        // FORCE CHECKPOINT or other: fallback to command
43510        self.parse_as_command()?
43511            .ok_or_else(|| self.parse_error("Failed to parse FORCE statement"))
43512    }
43513
43514    /// parse_summarize_statement - Parses SUMMARIZE statement (DuckDB)
43515    /// Python: DuckDB parser for SUMMARIZE
43516    pub fn parse_summarize_statement(&mut self) -> Result<Expression> {
43517        // SUMMARIZE [TABLE] expression
43518        let is_table = self.match_token(TokenType::Table);
43519
43520        // Try to parse a SELECT statement, string, or table reference
43521        let this = if self.check(TokenType::Select) || self.check(TokenType::With) {
43522            self.parse_select()?
43523        } else if self.check(TokenType::String) {
43524            self.parse_primary()?
43525        } else {
43526            // Parse as table name
43527            self.parse_table_parts()?
43528                .unwrap_or(Expression::Identifier(Identifier::new(String::new())))
43529        };
43530
43531        Ok(Expression::Summarize(Box::new(Summarize {
43532            this: Box::new(this),
43533            table: if is_table {
43534                Some(Box::new(Expression::Boolean(BooleanLiteral {
43535                    value: true,
43536                })))
43537            } else {
43538                None
43539            },
43540        })))
43541    }
43542
43543    /// parse_deallocate_prepare - Parses DEALLOCATE PREPARE <name>
43544    /// Presto/Trino syntax for deallocating prepared statements
43545    pub fn parse_deallocate_prepare(&mut self) -> Result<Expression> {
43546        self.skip(); // consume DEALLOCATE
43547
43548        // Check for PREPARE keyword
43549        if self.match_identifier("PREPARE") {
43550            // Parse the statement name
43551            let name = if !self.is_at_end() && !self.check(TokenType::Semicolon) {
43552                self.advance().text.clone()
43553            } else {
43554                String::new()
43555            };
43556
43557            // Build the command text
43558            let command_text = if name.is_empty() {
43559                "DEALLOCATE PREPARE".to_string()
43560            } else {
43561                format!("DEALLOCATE PREPARE {}", name)
43562            };
43563
43564            Ok(Expression::Command(Box::new(Command {
43565                this: command_text,
43566            })))
43567        } else {
43568            // Just DEALLOCATE without PREPARE - consume rest as command
43569            let mut parts = vec!["DEALLOCATE".to_string()];
43570            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
43571                let token = self.advance();
43572                parts.push(token.text.clone());
43573            }
43574            Ok(Expression::Command(Box::new(Command {
43575                this: parts.join(" "),
43576            })))
43577        }
43578    }
43579
43580    /// parse_as_command - Creates Command expression
43581    #[allow(unused_variables, unused_mut)]
43582    /// parse_as_command - Parses remaining tokens as a raw command
43583    /// Python: _parse_as_command
43584    /// Used as fallback when specific parsing fails
43585    pub fn parse_as_command(&mut self) -> Result<Option<Expression>> {
43586        // Get the starting token text
43587        let start_text = if self.current > 0 {
43588            self.tokens
43589                .get(self.current - 1)
43590                .map(|t| t.text.clone())
43591                .unwrap_or_default()
43592        } else {
43593            String::new()
43594        };
43595
43596        // Consume all remaining tokens, storing both text and type
43597        let mut tokens_info: Vec<(String, TokenType)> = Vec::new();
43598        while !self.is_at_end() {
43599            let token = self.advance();
43600            tokens_info.push((token.text.clone(), token.token_type.clone()));
43601        }
43602
43603        // Join tokens intelligently, avoiding spaces around punctuation
43604        let mut expression = String::new();
43605        for (i, (text, token_type)) in tokens_info.iter().enumerate() {
43606            if i > 0 {
43607                // Check if we should add a space before this token
43608                let prev_type = &tokens_info[i - 1].1;
43609                let needs_space = !Self::is_punctuation_token(prev_type)
43610                    && !Self::is_punctuation_token(token_type);
43611                if needs_space {
43612                    expression.push(' ');
43613                }
43614            }
43615            expression.push_str(text);
43616        }
43617
43618        Ok(Some(Expression::Command(Box::new(Command {
43619            this: if expression.is_empty() {
43620                start_text
43621            } else {
43622                format!("{} {}", start_text, expression)
43623            },
43624        }))))
43625    }
43626
43627    /// Helper to determine if a token type is punctuation that shouldn't have spaces around it
43628    fn is_punctuation_token(token_type: &TokenType) -> bool {
43629        matches!(
43630            token_type,
43631            TokenType::Dot | TokenType::Colon | TokenType::DColon
43632        )
43633    }
43634
43635    /// Fallback to Command expression from a saved position.
43636    /// Extracts verbatim SQL text from source if available, consuming tokens until semicolon/EOF.
43637    fn fallback_to_command(&mut self, start_pos: usize) -> Result<Expression> {
43638        let start_span = self.tokens[start_pos].span.start;
43639        // Consume until semicolon or end
43640        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
43641            self.skip();
43642        }
43643        let command_text = if let Some(ref source) = self.source {
43644            let end_span = if self.current > 0 {
43645                self.tokens[self.current - 1].span.end
43646            } else {
43647                start_span
43648            };
43649            source[start_span..end_span].trim().to_string()
43650        } else {
43651            // Fallback: join token texts
43652            let mut parts = Vec::new();
43653            for i in start_pos..self.current {
43654                if self.tokens[i].token_type == TokenType::String {
43655                    parts.push(format!("'{}'", self.tokens[i].text.replace('\'', "''")));
43656                } else {
43657                    parts.push(self.tokens[i].text.clone());
43658                }
43659            }
43660            parts.join(" ")
43661        };
43662        Ok(Expression::Command(Box::new(Command {
43663            this: command_text,
43664        })))
43665    }
43666
43667    /// parse_assignment - Parses assignment expressions (variable := value)
43668    /// Python: _parse_assignment
43669    pub fn parse_assignment(&mut self) -> Result<Option<Expression>> {
43670        // First parse a disjunction (left side of potential assignment)
43671        let mut this = self.parse_disjunction()?;
43672
43673        // Handle := assignment operator
43674        while self.match_token(TokenType::ColonEq) {
43675            if let Some(left) = this {
43676                let right = self.parse_assignment()?;
43677                if let Some(right_expr) = right {
43678                    this = Some(Expression::PropertyEQ(Box::new(BinaryOp {
43679                        left,
43680                        right: right_expr,
43681                        left_comments: Vec::new(),
43682                        operator_comments: Vec::new(),
43683                        trailing_comments: Vec::new(),
43684                        inferred_type: None,
43685                    })));
43686                } else {
43687                    this = Some(left);
43688                    break;
43689                }
43690            } else {
43691                break;
43692            }
43693        }
43694
43695        // ClickHouse ternary operator: condition ? true_value : false_value
43696        // Parsed as: If(this=condition, true=true_value, false=false_value)
43697        if matches!(
43698            self.config.dialect,
43699            Some(crate::dialects::DialectType::ClickHouse)
43700        ) {
43701            if let Some(condition) = this {
43702                if self.match_token(TokenType::Parameter) {
43703                    if self.check(TokenType::Colon) {
43704                        return Err(self.parse_error(
43705                            "Expected true expression after ? in ClickHouse ternary",
43706                        ));
43707                    }
43708                    let true_value = self.parse_assignment()?.ok_or_else(|| {
43709                        self.parse_error("Expected true expression after ? in ClickHouse ternary")
43710                    })?;
43711                    let false_value = if self.match_token(TokenType::Colon) {
43712                        self.parse_assignment()?.unwrap_or(Expression::Null(Null))
43713                    } else {
43714                        Expression::Null(Null)
43715                    };
43716                    return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
43717                        original_name: None,
43718                        condition,
43719                        true_value,
43720                        false_value: Some(false_value),
43721                        inferred_type: None,
43722                    }))));
43723                }
43724                this = Some(condition);
43725            }
43726        }
43727
43728        Ok(this)
43729    }
43730
43731    /// parse_auto_increment - Implemented from Python _parse_auto_increment
43732    /// Calls: parse_bitwise
43733    #[allow(unused_variables, unused_mut)]
43734    pub fn parse_auto_increment(&mut self) -> Result<Option<Expression>> {
43735        if self.match_text_seq(&["START"]) {
43736            return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
43737                Box::new(GeneratedAsIdentityColumnConstraint {
43738                    this: None,
43739                    expression: None,
43740                    on_null: None,
43741                    start: None,
43742                    increment: None,
43743                    minvalue: None,
43744                    maxvalue: None,
43745                    cycle: None,
43746                    order: None,
43747                }),
43748            )));
43749        }
43750        if self.match_text_seq(&["INCREMENT"]) {
43751            // Matched: INCREMENT
43752            return Ok(None);
43753        }
43754        if self.match_text_seq(&["ORDER"]) {
43755            // Matched: ORDER
43756            return Ok(None);
43757        }
43758        Ok(None)
43759    }
43760
43761    /// parse_auto_property - Implemented from Python _parse_auto_property
43762    #[allow(unused_variables, unused_mut)]
43763    pub fn parse_auto_property(&mut self) -> Result<Option<Expression>> {
43764        if self.match_text_seq(&["REFRESH"]) {
43765            // Matched: REFRESH
43766            return Ok(None);
43767        }
43768        Ok(None)
43769    }
43770
43771    /// parse_between - Implemented from Python _parse_between
43772    #[allow(unused_variables, unused_mut)]
43773    pub fn parse_between(&mut self) -> Result<Option<Expression>> {
43774        if self.match_text_seq(&["SYMMETRIC"]) {
43775            // Matched: SYMMETRIC
43776            return Ok(None);
43777        }
43778        if self.match_text_seq(&["ASYMMETRIC"]) {
43779            // Matched: ASYMMETRIC
43780            return Ok(None);
43781        }
43782        Ok(None)
43783    }
43784
43785    /// parse_bitwise - Parses bitwise OR/XOR/AND expressions
43786    /// Python: _parse_bitwise
43787    /// Delegates to the existing parse_bitwise_or in the operator precedence chain
43788    pub fn parse_bitwise(&mut self) -> Result<Option<Expression>> {
43789        let start = self.current;
43790        match self.parse_bitwise_or() {
43791            Ok(expr) => Ok(Some(expr)),
43792            Err(_err) if self.current == start => Ok(None),
43793            Err(err) => Err(err),
43794        }
43795    }
43796
43797    /// parse_blockcompression - Implemented from Python _parse_blockcompression
43798    #[allow(unused_variables, unused_mut)]
43799    pub fn parse_blockcompression(&mut self) -> Result<Option<Expression>> {
43800        if self.match_text_seq(&["ALWAYS"]) {
43801            return Ok(Some(Expression::BlockCompressionProperty(Box::new(
43802                BlockCompressionProperty {
43803                    autotemp: None,
43804                    always: None,
43805                    default: None,
43806                    manual: None,
43807                    never: None,
43808                },
43809            ))));
43810        }
43811        if self.match_text_seq(&["MANUAL"]) {
43812            // Matched: MANUAL
43813            return Ok(None);
43814        }
43815        Ok(None)
43816    }
43817
43818    /// parse_boolean - Parse boolean literal (TRUE/FALSE)
43819    /// Python: if self._match(TokenType.TRUE): return exp.Boolean(this=True)
43820    pub fn parse_boolean(&mut self) -> Result<Option<Expression>> {
43821        if self.match_token(TokenType::True) {
43822            return Ok(Some(Expression::Boolean(BooleanLiteral { value: true })));
43823        }
43824        if self.match_token(TokenType::False) {
43825            return Ok(Some(Expression::Boolean(BooleanLiteral { value: false })));
43826        }
43827        Ok(None)
43828    }
43829
43830    /// parse_bracket - Ported from Python _parse_bracket
43831    /// Parses bracket expressions: array[index], array literal [1,2,3], or struct {key: value}
43832    #[allow(unused_variables, unused_mut)]
43833    pub fn parse_bracket(&mut self) -> Result<Option<Expression>> {
43834        self.parse_bracket_with_expr(None)
43835    }
43836
43837    /// parse_bracket_with_expr - Parses bracket with optional left-side expression
43838    fn parse_bracket_with_expr(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
43839        // Check for [ or {
43840        let is_bracket = self.match_token(TokenType::LBracket);
43841        let is_brace = if !is_bracket {
43842            self.match_token(TokenType::LBrace)
43843        } else {
43844            false
43845        };
43846
43847        if !is_bracket && !is_brace {
43848            return Ok(this);
43849        }
43850
43851        // Parse comma-separated expressions inside brackets
43852        let mut expressions: Vec<Expression> = Vec::new();
43853
43854        if is_bracket && !self.check(TokenType::RBracket) {
43855            // Check for slice syntax at the start: [:...] or [:-...]
43856            // This needs to be detected before parse_bracket_key_value which calls parse_primary
43857            // and parse_primary would consume : as a parameter prefix
43858            let first_expr = if self.check(TokenType::Colon) {
43859                // This is slice syntax like [:] or [:-1] or [::step]
43860                // Parse it using slice parser with no 'this'
43861                if let Some(slice) = self.parse_slice()? {
43862                    slice
43863                } else {
43864                    self.parse_expression()?
43865                }
43866            } else if let Ok(Some(expr)) = self.parse_bracket_key_value() {
43867                expr
43868            } else {
43869                // Parse regular expression and check for slice
43870                let expr = self.parse_expression()?;
43871                // Check if followed by colon (slice syntax like [start:end])
43872                if self.check(TokenType::Colon) {
43873                    if let Some(slice) = self.parse_slice_with_this(Some(expr))? {
43874                        slice
43875                    } else {
43876                        return Err(self.parse_error("Failed to parse slice"));
43877                    }
43878                } else {
43879                    expr
43880                }
43881            };
43882
43883            // Check for comprehension syntax: [expr FOR var IN iterator [IF condition]]
43884            if self.match_token(TokenType::For) {
43885                // Parse loop variable - typically a simple identifier like 'x'
43886                let loop_var = self.parse_primary()?;
43887
43888                // Parse optional position (second variable after comma)
43889                let position = if self.match_token(TokenType::Comma) {
43890                    Some(self.parse_primary()?)
43891                } else {
43892                    None
43893                };
43894
43895                // Expect IN keyword
43896                if !self.match_token(TokenType::In) {
43897                    return Err(self.parse_error("Expected IN in comprehension"));
43898                }
43899
43900                // Parse iterator expression
43901                let iterator = self.parse_expression()?;
43902
43903                // Parse optional condition after IF
43904                let condition = if self.match_token(TokenType::If) {
43905                    Some(self.parse_expression()?)
43906                } else {
43907                    None
43908                };
43909
43910                // Expect closing bracket
43911                self.expect(TokenType::RBracket)?;
43912
43913                // Return Comprehension wrapped in an expression
43914                return Ok(Some(Expression::Comprehension(Box::new(Comprehension {
43915                    this: Box::new(first_expr),
43916                    expression: Box::new(loop_var),
43917                    position: position.map(Box::new),
43918                    iterator: Some(Box::new(iterator)),
43919                    condition: condition.map(Box::new),
43920                }))));
43921            }
43922
43923            expressions.push(first_expr);
43924
43925            // Continue parsing remaining expressions
43926            while self.match_token(TokenType::Comma) {
43927                if let Ok(Some(expr)) = self.parse_bracket_key_value() {
43928                    expressions.push(expr);
43929                } else {
43930                    match self.parse_expression() {
43931                        Ok(expr) => expressions.push(expr),
43932                        Err(_) => break,
43933                    }
43934                }
43935            }
43936        } else if is_brace && !self.check(TokenType::RBrace) {
43937            loop {
43938                if let Ok(Some(expr)) = self.parse_bracket_key_value() {
43939                    expressions.push(expr);
43940                } else {
43941                    match self.parse_expression() {
43942                        Ok(expr) => expressions.push(expr),
43943                        Err(_) => break,
43944                    }
43945                }
43946                if !self.match_token(TokenType::Comma) {
43947                    break;
43948                }
43949            }
43950        }
43951
43952        // Expect closing bracket
43953        if is_bracket {
43954            self.expect(TokenType::RBracket)?;
43955        } else if is_brace {
43956            self.expect(TokenType::RBrace)?;
43957        }
43958
43959        // Build the result
43960        if is_brace {
43961            // Struct literal: {key: value, ...}
43962            // Convert expressions to (Option<name>, expr) pairs
43963            let fields: Vec<(Option<String>, Expression)> =
43964                expressions.into_iter().map(|e| (None, e)).collect();
43965            Ok(Some(Expression::Struct(Box::new(Struct { fields }))))
43966        } else if let Some(base_expr) = this {
43967            // Subscript access: base[index]
43968            if expressions.len() == 1 {
43969                Ok(Some(Expression::Subscript(Box::new(Subscript {
43970                    this: base_expr,
43971                    index: expressions.remove(0),
43972                }))))
43973            } else {
43974                // Multiple indices - create nested subscripts or array
43975                let mut result = base_expr;
43976                for expr in expressions {
43977                    result = Expression::Subscript(Box::new(Subscript {
43978                        this: result,
43979                        index: expr,
43980                    }));
43981                }
43982                Ok(Some(result))
43983            }
43984        } else {
43985            // Array literal: [1, 2, 3]
43986            Ok(Some(Expression::Array(Box::new(Array { expressions }))))
43987        }
43988    }
43989
43990    /// parse_bracket_key_value - Ported from Python _parse_bracket_key_value
43991    /// Parses key-value pairs in brackets: key: value or key => value
43992    #[allow(unused_variables, unused_mut)]
43993    pub fn parse_bracket_key_value(&mut self) -> Result<Option<Expression>> {
43994        let saved_pos = self.current;
43995
43996        // Try to parse as key: value or key => value
43997        if let Ok(key) = self.parse_primary() {
43998            // Check for : or =>
43999            if self.match_token(TokenType::Colon) || self.match_text_seq(&["=>"]) {
44000                match self.parse_expression() {
44001                    Ok(value) => {
44002                        // Return as NamedArgument for key-value pair
44003                        // Extract the name from the key (identifier or string literal)
44004                        let name = match &key {
44005                            Expression::Identifier(id) => id.clone(),
44006                            Expression::Literal(lit)
44007                                if matches!(
44008                                    lit.as_ref(),
44009                                    crate::expressions::Literal::String(s)
44010                                ) =>
44011                            {
44012                                let crate::expressions::Literal::String(s) = lit.as_ref() else {
44013                                    unreachable!()
44014                                };
44015                                Identifier::new(s.clone())
44016                            }
44017                            _ => Identifier::new("".to_string()),
44018                        };
44019                        return Ok(Some(Expression::NamedArgument(Box::new(NamedArgument {
44020                            name,
44021                            value,
44022                            separator: NamedArgSeparator::DArrow, // Using DArrow for colon-style key: value
44023                        }))));
44024                    }
44025                    Err(_) => {
44026                        self.current = saved_pos;
44027                        return Ok(None);
44028                    }
44029                }
44030            }
44031            self.current = saved_pos;
44032        }
44033
44034        Ok(None)
44035    }
44036
44037    /// parse_ceil_floor - Implemented from Python _parse_ceil_floor
44038    /// Calls: parse_lambda, parse_var
44039    #[allow(unused_variables, unused_mut)]
44040    pub fn parse_ceil_floor(&mut self) -> Result<Option<Expression>> {
44041        if self.match_text_seq(&["TO"]) {
44042            // Matched: TO
44043            return Ok(None);
44044        }
44045        Ok(None)
44046    }
44047
44048    /// parse_changes - Implemented from Python _parse_changes
44049    /// Parses: CHANGES(INFORMATION => var) AT|BEFORE(...) END(...)
44050    pub fn parse_changes(&mut self) -> Result<Option<Expression>> {
44051        // Match: CHANGES(INFORMATION =>
44052        if !self.match_text_seq(&["CHANGES", "(", "INFORMATION", "=>"]) {
44053            return Ok(None);
44054        }
44055
44056        // Parse information (any token as var, matching Python's any_token=True)
44057        let information = if !self.is_at_end() && !self.check(TokenType::RParen) {
44058            let tok = self.advance();
44059            Some(Box::new(Expression::Var(Box::new(crate::expressions::Var {
44060                this: tok.text.clone(),
44061            }))))
44062        } else {
44063            None
44064        };
44065
44066        // Match closing paren
44067        self.match_token(TokenType::RParen);
44068
44069        // Parse at_before (Snowflake AT/BEFORE clause)
44070        let at_before = self.parse_historical_data()?.map(Box::new);
44071
44072        // Parse end (optional second historical data clause)
44073        let end = self.parse_historical_data()?.map(Box::new);
44074
44075        Ok(Some(Expression::Changes(Box::new(Changes {
44076            information,
44077            at_before,
44078            end,
44079        }))))
44080    }
44081
44082    /// parse_char - Parses CHAR/CHR function with optional USING charset
44083    /// Python: CHAR(args...) [USING charset]
44084    /// MySQL: CHAR(n1, n2, ... USING charset)
44085    pub fn parse_char(&mut self) -> Result<Option<Expression>> {
44086        // Parse expressions inside CHAR()
44087        let mut args = Vec::new();
44088        loop {
44089            let expr = self.parse_expression()?;
44090            args.push(expr);
44091            if !self.match_token(TokenType::Comma) {
44092                break;
44093            }
44094        }
44095
44096        // Check for USING charset
44097        let charset = if self.match_token(TokenType::Using) {
44098            self.parse_var()?.map(|v| {
44099                if let Expression::Identifier(id) = v {
44100                    id.name
44101                } else {
44102                    String::new()
44103                }
44104            })
44105        } else {
44106            None
44107        };
44108
44109        if args.is_empty() {
44110            return Ok(None);
44111        }
44112
44113        // If there's a charset or multiple args, use CharFunc (MySQL-style)
44114        // Otherwise use simple Chr for single-arg CHR function
44115        if charset.is_some() || args.len() > 1 {
44116            Ok(Some(Expression::CharFunc(Box::new(
44117                crate::expressions::CharFunc {
44118                    args,
44119                    charset,
44120                    name: None, // defaults to CHAR
44121                },
44122            ))))
44123        } else {
44124            Ok(Some(Expression::Chr(Box::new(UnaryFunc::new(
44125                args.into_iter().next().unwrap(),
44126            )))))
44127        }
44128    }
44129
44130    /// parse_character_set - Ported from Python _parse_character_set
44131    #[allow(unused_variables, unused_mut)]
44132    /// parse_character_set - Parses CHARACTER SET property
44133    /// Example: CHARACTER SET = utf8 or CHARACTER SET utf8mb4
44134    pub fn parse_character_set(&mut self) -> Result<Option<Expression>> {
44135        // Optional = sign
44136        self.match_token(TokenType::Eq);
44137
44138        // Parse the charset name (variable or string)
44139        let charset = self.parse_var_or_string()?;
44140        if charset.is_none() {
44141            return Ok(None);
44142        }
44143
44144        Ok(Some(Expression::CharacterSetProperty(Box::new(
44145            CharacterSetProperty {
44146                this: Box::new(charset.unwrap()),
44147                default: None,
44148            },
44149        ))))
44150    }
44151
44152    /// parse_checksum - Implemented from Python _parse_checksum
44153    #[allow(unused_variables, unused_mut)]
44154    pub fn parse_checksum(&mut self) -> Result<Option<Expression>> {
44155        if self.match_text_seq(&["OFF"]) {
44156            return Ok(Some(Expression::ChecksumProperty(Box::new(
44157                ChecksumProperty {
44158                    on: None,
44159                    default: None,
44160                },
44161            ))));
44162        }
44163        Ok(None)
44164    }
44165
44166    /// parse_cluster - CLUSTER BY clause for Hive/Spark-style queries
44167    /// Parses a list of ordered expressions (columns with optional ASC/DESC)
44168    #[allow(unused_variables, unused_mut)]
44169    pub fn parse_cluster(&mut self) -> Result<Option<Expression>> {
44170        let mut expressions: Vec<Ordered> = Vec::new();
44171
44172        loop {
44173            // Parse an ordered expression (column with optional direction)
44174            if let Some(ordered) = self.parse_ordered_item()? {
44175                expressions.push(ordered);
44176            } else {
44177                break;
44178            }
44179
44180            if !self.match_token(TokenType::Comma) {
44181                break;
44182            }
44183        }
44184
44185        if expressions.is_empty() {
44186            return Ok(None);
44187        }
44188
44189        Ok(Some(Expression::ClusterBy(Box::new(ClusterBy {
44190            expressions,
44191        }))))
44192    }
44193
44194    /// parse_clustered_by - Implemented from Python _parse_clustered_by
44195    #[allow(unused_variables, unused_mut)]
44196    pub fn parse_clustered_by(&mut self) -> Result<Option<Expression>> {
44197        if self.match_text_seq(&["BY"]) {
44198            return Ok(Some(Expression::ClusteredByProperty(Box::new(
44199                ClusteredByProperty {
44200                    expressions: Vec::new(),
44201                    sorted_by: None,
44202                    buckets: None,
44203                },
44204            ))));
44205        }
44206        if self.match_text_seq(&["SORTED", "BY"]) {
44207            // Matched: SORTED BY
44208            return Ok(None);
44209        }
44210        Ok(None)
44211    }
44212
44213    /// Parse Snowflake colon JSON path extraction: data:field or data:field.subfield
44214    /// Python: def _parse_colon_as_variant_extract(self, this)
44215    pub fn parse_colon_as_variant_extract(
44216        &mut self,
44217        this: Expression,
44218    ) -> Result<Option<Expression>> {
44219        // Build a JSON path from colon-separated identifiers
44220        // Track whether each segment was quoted (needs bracket notation for spaces/special chars)
44221        let mut json_path: Vec<(String, bool)> = Vec::new();
44222
44223        while self.match_token(TokenType::Colon) {
44224            // Parse the path segment (field name)
44225            if let Some(field) = self.parse_identifier()? {
44226                if let Expression::Identifier(ident) = field {
44227                    json_path.push((
44228                        ident.name.clone(),
44229                        ident.quoted || ident.name.contains(' ') || ident.name.contains('\''),
44230                    ));
44231                }
44232            }
44233
44234            // Check for dot-separated sub-paths
44235            while self.match_token(TokenType::Dot) {
44236                if let Some(subfield) = self.parse_identifier()? {
44237                    if let Expression::Identifier(ident) = subfield {
44238                        json_path.push((
44239                            ident.name.clone(),
44240                            ident.quoted || ident.name.contains(' ') || ident.name.contains('\''),
44241                        ));
44242                    }
44243                }
44244            }
44245        }
44246
44247        if json_path.is_empty() {
44248            return Ok(Some(this));
44249        }
44250
44251        // Build the JSON path expression string
44252        // Use bracket notation for segments with spaces/special chars: a["b c"]
44253        // Use dot notation for simple segments: a.b.c
44254        let mut path_str = String::new();
44255        for (i, (segment, needs_bracket)) in json_path.iter().enumerate() {
44256            if *needs_bracket {
44257                // Bracket notation: ["key with spaces"]
44258                path_str.push('[');
44259                path_str.push('"');
44260                path_str.push_str(segment);
44261                path_str.push('"');
44262                path_str.push(']');
44263            } else {
44264                if i > 0 {
44265                    path_str.push('.');
44266                }
44267                path_str.push_str(segment);
44268            }
44269        }
44270
44271        Ok(Some(Expression::JSONExtract(Box::new(JSONExtract {
44272            this: Box::new(this),
44273            expression: Box::new(Expression::Literal(Box::new(Literal::String(path_str)))),
44274            only_json_types: None,
44275            expressions: Vec::new(),
44276            variant_extract: Some(Box::new(Expression::Boolean(BooleanLiteral {
44277                value: true,
44278            }))),
44279            json_query: None,
44280            option: None,
44281            quote: None,
44282            on_condition: None,
44283            requires_json: None,
44284        }))))
44285    }
44286
44287    /// parse_column - Parse column expression
44288    /// Python: this = self._parse_column_reference(); return self._parse_column_ops(this)
44289    pub fn parse_column(&mut self) -> Result<Option<Expression>> {
44290        // Parse column reference (field name that becomes a Column expression)
44291        let column_ref = self.parse_column_reference()?;
44292        if column_ref.is_some() {
44293            // Apply column ops (bracket subscript, property access with dots, casts)
44294            return self.parse_column_ops_with_expr(column_ref);
44295        }
44296        // Try parsing bracket directly if no column reference
44297        self.parse_bracket()
44298    }
44299
44300    /// parse_column_constraint - Ported from Python _parse_column_constraint
44301    /// Parses column-level constraints like NOT NULL, PRIMARY KEY, UNIQUE, DEFAULT, CHECK, etc.
44302    #[allow(unused_variables, unused_mut)]
44303    pub fn parse_column_constraint(&mut self) -> Result<Option<Expression>> {
44304        // Check for optional CONSTRAINT keyword and name
44305        let constraint_name = if self.match_token(TokenType::Constraint) {
44306            self.parse_id_var()?.and_then(|e| {
44307                if let Expression::Identifier(id) = e {
44308                    Some(id)
44309                } else {
44310                    None
44311                }
44312            })
44313        } else {
44314            None
44315        };
44316
44317        // NOT NULL
44318        if self.match_text_seq(&["NOT", "NULL"]) {
44319            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
44320                NotNullColumnConstraint { allow_null: None },
44321            ))));
44322        }
44323
44324        // NOT FOR REPLICATION (SQL Server) - must be before NULL check
44325        if self.match_text_seq(&["NOT", "FOR", "REPLICATION"]) {
44326            return Ok(Some(Expression::Property(Box::new(
44327                crate::expressions::Property {
44328                    this: Box::new(Expression::Identifier(Identifier::new(
44329                        "NOT FOR REPLICATION".to_string(),
44330                    ))),
44331                    value: None,
44332                },
44333            ))));
44334        }
44335
44336        // NULL
44337        if self.match_text_seq(&["NULL"]) {
44338            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
44339                NotNullColumnConstraint {
44340                    allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
44341                        value: true,
44342                    }))),
44343                },
44344            ))));
44345        }
44346
44347        // PRIMARY KEY
44348        if self.match_text_seq(&["PRIMARY", "KEY"]) {
44349            return Ok(Some(Expression::PrimaryKeyColumnConstraint(Box::new(
44350                PrimaryKeyColumnConstraint {
44351                    desc: None,
44352                    options: Vec::new(),
44353                },
44354            ))));
44355        }
44356
44357        // UNIQUE
44358        if self.match_text_seq(&["UNIQUE"]) {
44359            // Check for optional KEY/INDEX
44360            let _ = self.match_texts(&["KEY", "INDEX"]);
44361            // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
44362            let nulls = if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
44363                Some(Box::new(Expression::Boolean(BooleanLiteral {
44364                    value: true,
44365                })))
44366            } else {
44367                None
44368            };
44369            return Ok(Some(Expression::UniqueColumnConstraint(Box::new(
44370                UniqueColumnConstraint {
44371                    this: None,
44372                    index_type: None,
44373                    on_conflict: None,
44374                    nulls,
44375                    options: Vec::new(),
44376                },
44377            ))));
44378        }
44379
44380        // DEFAULT
44381        if self.match_text_seq(&["DEFAULT"]) {
44382            let default_value = self.parse_select_or_expression()?;
44383            if let Some(val) = default_value {
44384                return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
44385                    DefaultColumnConstraint {
44386                        this: Box::new(val),
44387                        for_column: None,
44388                    },
44389                ))));
44390            }
44391            return Ok(None);
44392        }
44393
44394        // CHECK
44395        if self.match_text_seq(&["CHECK"]) {
44396            if self.match_token(TokenType::LParen) {
44397                let expr = self.parse_select_or_expression()?;
44398                self.match_token(TokenType::RParen);
44399                if let Some(check_expr) = expr {
44400                    return Ok(Some(Expression::CheckColumnConstraint(Box::new(
44401                        CheckColumnConstraint {
44402                            this: Box::new(check_expr),
44403                            enforced: None,
44404                        },
44405                    ))));
44406                }
44407            }
44408            return Ok(None);
44409        }
44410
44411        // REFERENCES (foreign key)
44412        if self.match_text_seq(&["REFERENCES"]) {
44413            let table = self.parse_table_parts()?;
44414            let columns = if self.match_token(TokenType::LParen) {
44415                let mut cols = Vec::new();
44416                loop {
44417                    if let Some(col) = self.parse_id_var()? {
44418                        cols.push(col);
44419                    }
44420                    if !self.match_token(TokenType::Comma) {
44421                        break;
44422                    }
44423                }
44424                self.match_token(TokenType::RParen);
44425                cols
44426            } else {
44427                Vec::new()
44428            };
44429
44430            return Ok(Some(Expression::ForeignKey(Box::new(ForeignKey {
44431                expressions: columns,
44432                reference: table.map(Box::new),
44433                delete: None,
44434                update: None,
44435                options: Vec::new(),
44436            }))));
44437        }
44438
44439        // AUTO_INCREMENT / AUTOINCREMENT / IDENTITY
44440        if self.match_texts(&["AUTO_INCREMENT", "AUTOINCREMENT", "IDENTITY"]) {
44441            // Check for IDENTITY(start, increment) or IDENTITY START x INCREMENT y syntax
44442            let mut start = None;
44443            let mut increment = None;
44444
44445            if self.match_token(TokenType::LParen) {
44446                // Parse (start, increment)
44447                start = self.parse_bitwise()?;
44448                if self.match_token(TokenType::Comma) {
44449                    increment = self.parse_bitwise()?;
44450                }
44451                self.expect(TokenType::RParen)?;
44452            } else if self.match_text_seq(&["START"]) {
44453                // Parse START x INCREMENT y
44454                start = self.parse_bitwise()?;
44455                if self.match_text_seq(&["INCREMENT"]) {
44456                    increment = self.parse_bitwise()?;
44457                }
44458            }
44459
44460            if start.is_some() || increment.is_some() {
44461                return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
44462                    Box::new(GeneratedAsIdentityColumnConstraint {
44463                        this: Some(Box::new(Expression::Boolean(BooleanLiteral {
44464                            value: false,
44465                        }))),
44466                        expression: None,
44467                        on_null: None,
44468                        start: start.map(Box::new),
44469                        increment: increment.map(Box::new),
44470                        minvalue: None,
44471                        maxvalue: None,
44472                        cycle: None,
44473                        order: None,
44474                    }),
44475                )));
44476            }
44477            return Ok(Some(Expression::AutoIncrementColumnConstraint(
44478                AutoIncrementColumnConstraint,
44479            )));
44480        }
44481
44482        // COMMENT 'text' - CommentColumnConstraint is a unit struct, use a different expression
44483        if self.match_text_seq(&["COMMENT"]) {
44484            if let Some(comment) = self.parse_string()? {
44485                // Use CommentColumnConstraint unit struct
44486                return Ok(Some(Expression::CommentColumnConstraint(
44487                    CommentColumnConstraint,
44488                )));
44489            }
44490            return Ok(None);
44491        }
44492
44493        // COLLATE collation_name - use CollateProperty instead
44494        if self.match_text_seq(&["COLLATE"]) {
44495            if let Some(collation) = self.parse_id_var()? {
44496                return Ok(Some(Expression::CollateProperty(Box::new(
44497                    CollateProperty {
44498                        this: Box::new(collation),
44499                        default: None,
44500                    },
44501                ))));
44502            }
44503            return Ok(None);
44504        }
44505
44506        // ClickHouse dictionary column attributes: HIERARCHICAL, IS_OBJECT_ID, INJECTIVE
44507        if matches!(
44508            self.config.dialect,
44509            Some(crate::dialects::DialectType::ClickHouse)
44510        ) {
44511            if self.match_texts(&["HIERARCHICAL", "IS_OBJECT_ID", "INJECTIVE"]) {
44512                let attr_name = self.previous().text.to_ascii_uppercase();
44513                return Ok(Some(Expression::Property(Box::new(
44514                    crate::expressions::Property {
44515                        this: Box::new(Expression::Identifier(Identifier::new(attr_name))),
44516                        value: None,
44517                    },
44518                ))));
44519            }
44520            // ClickHouse EXPRESSION expr and ALIAS expr (dictionary column attributes)
44521            if self.match_texts(&["EXPRESSION"]) {
44522                let expr = self.parse_expression()?;
44523                return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
44524                    DefaultColumnConstraint {
44525                        this: Box::new(expr),
44526                        for_column: None,
44527                    },
44528                ))));
44529            }
44530        }
44531
44532        // GENERATED ... AS IDENTITY
44533        if self.match_text_seq(&["GENERATED"]) {
44534            let always = self.match_text_seq(&["ALWAYS"]);
44535            if !always {
44536                self.match_text_seq(&["BY", "DEFAULT"]);
44537            }
44538            let on_null = self.match_text_seq(&["ON", "NULL"]);
44539            if self.match_text_seq(&["AS", "IDENTITY"]) {
44540                return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
44541                    Box::new(GeneratedAsIdentityColumnConstraint {
44542                        this: None,
44543                        expression: None,
44544                        on_null: if on_null {
44545                            Some(Box::new(Expression::Boolean(BooleanLiteral {
44546                                value: true,
44547                            })))
44548                        } else {
44549                            None
44550                        },
44551                        start: None,
44552                        increment: None,
44553                        minvalue: None,
44554                        maxvalue: None,
44555                        cycle: None,
44556                        order: None,
44557                    }),
44558                )));
44559            }
44560            return Ok(None);
44561        }
44562
44563        // PATH 'xpath' - for XMLTABLE/JSON_TABLE columns
44564        if self.match_text_seq(&["PATH"]) {
44565            if let Some(path_expr) = self.parse_string()? {
44566                return Ok(Some(Expression::PathColumnConstraint(Box::new(
44567                    PathColumnConstraint {
44568                        this: Box::new(path_expr),
44569                    },
44570                ))));
44571            }
44572            return Ok(None);
44573        }
44574
44575        // Return the constraint name if we matched CONSTRAINT but no actual constraint
44576        if let Some(name) = constraint_name {
44577            return Ok(Some(Expression::Identifier(name)));
44578        }
44579
44580        Ok(None)
44581    }
44582
44583    /// parse_column_def_with_exists - Ported from Python _parse_column_def_with_exists
44584    /// Parses a column definition with optional IF [NOT] EXISTS clause
44585    #[allow(unused_variables, unused_mut)]
44586    pub fn parse_column_def_with_exists(&mut self) -> Result<Option<Expression>> {
44587        let start = self.current;
44588
44589        // Optionally match COLUMN keyword
44590        let _ = self.match_text_seq(&["COLUMN"]);
44591
44592        // Check for IF NOT EXISTS
44593        let not_exists = self.match_text_seq(&["IF", "NOT", "EXISTS"]);
44594        let exists = if !not_exists {
44595            self.match_text_seq(&["IF", "EXISTS"])
44596        } else {
44597            false
44598        };
44599
44600        // Parse the field definition
44601        let expression = self.parse_field_def()?;
44602
44603        if expression.is_none() {
44604            self.current = start;
44605            return Ok(None);
44606        }
44607
44608        // If it's a ColumnDef, we're good
44609        if let Some(Expression::ColumnDef(ref _col_def)) = expression {
44610            // The exists flag would be set on the ColumnDef, but our struct doesn't have that field
44611            // Just return the expression as-is
44612            return Ok(expression);
44613        }
44614
44615        // Not a ColumnDef, backtrack
44616        self.current = start;
44617        Ok(None)
44618    }
44619
44620    /// parse_column_ops - Parses column operations (stub for compatibility)
44621    pub fn parse_column_ops(&mut self) -> Result<Option<Expression>> {
44622        self.parse_column_ops_with_expr(None)
44623    }
44624
44625    /// parse_column_ops_with_expr - Parses column operations (dot access, brackets, casts)
44626    /// Python: _parse_column_ops(this)
44627    pub fn parse_column_ops_with_expr(
44628        &mut self,
44629        this: Option<Expression>,
44630    ) -> Result<Option<Expression>> {
44631        // First apply any bracket subscripts
44632        let mut result = if let Some(expr) = this {
44633            if self.match_token(TokenType::LBracket) {
44634                let index = self.parse_disjunction()?;
44635                self.match_token(TokenType::RBracket);
44636                if let Some(idx) = index {
44637                    Some(Expression::Subscript(Box::new(Subscript {
44638                        this: expr,
44639                        index: idx,
44640                    })))
44641                } else {
44642                    Some(expr)
44643                }
44644            } else {
44645                Some(expr)
44646            }
44647        } else {
44648            None
44649        };
44650
44651        // Handle DOT for qualified column names: table.column or schema.table.column
44652        while self.match_token(TokenType::Dot) {
44653            if result.is_none() {
44654                break;
44655            }
44656            // Handle .* (qualified star) with modifiers
44657            if self.match_token(TokenType::Star) {
44658                // Determine table name from the expression
44659                let table_name = match &result {
44660                    Some(Expression::Column(col)) if col.table.is_none() => Some(col.name.clone()),
44661                    Some(Expression::Dot(dot)) => {
44662                        // For deep qualified names like schema.table.*, use the whole expression name
44663                        fn dot_to_name(expr: &Expression) -> String {
44664                            match expr {
44665                                Expression::Column(col) => {
44666                                    if let Some(ref table) = col.table {
44667                                        format!("{}.{}", table.name, col.name.name)
44668                                    } else {
44669                                        col.name.name.clone()
44670                                    }
44671                                }
44672                                Expression::Dot(d) => {
44673                                    format!("{}.{}", dot_to_name(&d.this), d.field.name)
44674                                }
44675                                _ => String::new(),
44676                            }
44677                        }
44678                        Some(Identifier::new(dot_to_name(&Expression::Dot(dot.clone()))))
44679                    }
44680                    _ => None,
44681                };
44682                let star = self.parse_star_modifiers(table_name)?;
44683                result = Some(Expression::Star(star));
44684                break;
44685            }
44686            // Parse the field identifier - use is_identifier_or_keyword_token to allow keywords
44687            // like "schema" as field names in dot access
44688            // ClickHouse: also allow numeric tuple index access like expr.1, expr.2
44689            if self.is_identifier_or_keyword_token()
44690                || self.check(TokenType::QuotedIdentifier)
44691                || (matches!(
44692                    self.config.dialect,
44693                    Some(crate::dialects::DialectType::ClickHouse)
44694                ) && self.check(TokenType::Number))
44695            {
44696                let token = self.advance();
44697                let field_ident = Identifier {
44698                    name: token.text,
44699                    quoted: token.token_type == TokenType::QuotedIdentifier,
44700                    trailing_comments: Vec::new(),
44701                    span: None,
44702                };
44703                result = Some(Expression::Dot(Box::new(DotAccess {
44704                    this: result.take().unwrap(),
44705                    field: field_ident,
44706                })));
44707            } else {
44708                break;
44709            }
44710        }
44711
44712        // Handle EXCLAMATION for Snowflake model attribute syntax: model!PREDICT(...)
44713        if self.match_token(TokenType::Exclamation) {
44714            if let Some(expr) = result.take() {
44715                // Parse the attribute/function after the exclamation mark
44716                // This can be either a simple identifier (model!admin) or a function call (model!PREDICT(1))
44717                let attr = self.parse_unary()?;
44718                result = Some(Expression::ModelAttribute(Box::new(ModelAttribute {
44719                    this: Box::new(expr),
44720                    expression: Box::new(attr),
44721                })));
44722            }
44723        }
44724
44725        // Handle DCOLON for casts (PostgreSQL syntax: column::type)
44726        if self.match_token(TokenType::DColon) {
44727            if let Some(type_expr) = self.parse_types()? {
44728                if let Some(expr) = result {
44729                    // Extract DataType from the expression
44730                    let data_type = match type_expr {
44731                        Expression::DataType(dt) => dt,
44732                        _ => {
44733                            result = Some(expr);
44734                            return Ok(result);
44735                        }
44736                    };
44737                    result = Some(Expression::Cast(Box::new(Cast {
44738                        this: expr,
44739                        to: data_type,
44740                        trailing_comments: Vec::new(),
44741                        double_colon_syntax: true,
44742                        format: None,
44743                        default: None,
44744                        inferred_type: None,
44745                    })));
44746                }
44747            }
44748        }
44749
44750        // Teradata: (FORMAT '...') phrase after a column/expression
44751        if matches!(
44752            self.config.dialect,
44753            Some(crate::dialects::DialectType::Teradata)
44754        ) && self.check(TokenType::LParen)
44755            && self.check_next(TokenType::Format)
44756        {
44757            self.skip(); // consume (
44758            self.skip(); // consume FORMAT
44759            let format = self.expect_string()?;
44760            self.expect(TokenType::RParen)?;
44761            if let Some(expr) = result.take() {
44762                result = Some(Expression::FormatPhrase(Box::new(FormatPhrase {
44763                    this: Box::new(expr),
44764                    format,
44765                })));
44766            }
44767        }
44768
44769        Ok(result)
44770    }
44771
44772    /// parse_column_reference - Parse column reference (field -> Column)
44773    /// Python: this = self._parse_field(); if isinstance(this, exp.Identifier): return exp.Column(this=this)
44774    pub fn parse_column_reference(&mut self) -> Result<Option<Expression>> {
44775        // Parse the field (identifier or literal)
44776        if let Some(field) = self.parse_field()? {
44777            // If it's an identifier, wrap it in a Column expression
44778            match &field {
44779                Expression::Identifier(id) => {
44780                    return Ok(Some(Expression::boxed_column(Column {
44781                        name: id.clone(),
44782                        table: None,
44783                        join_mark: false,
44784                        trailing_comments: Vec::new(),
44785                        span: None,
44786                        inferred_type: None,
44787                    })));
44788                }
44789                // If it's already something else (like a literal), return as-is
44790                _ => return Ok(Some(field)),
44791            }
44792        }
44793        Ok(None)
44794    }
44795
44796    /// parse_command - Parses a generic SQL command
44797    /// Python: _parse_command
44798    /// Used for commands that we don't have specific parsing for
44799    pub fn parse_command(&mut self) -> Result<Option<Expression>> {
44800        // Get the command keyword from the previous token
44801        let command_text = self.previous().text.to_ascii_uppercase();
44802
44803        // Collect remaining tokens as the command expression (until statement end)
44804        // Use (text, token_type) tuples for smart spacing with join_command_tokens
44805        let mut tokens: Vec<(String, TokenType)> = vec![(command_text, TokenType::Var)];
44806        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
44807            let token = self.advance();
44808            // Preserve quotes for quoted identifiers and strings
44809            let text = if token.token_type == TokenType::QuotedIdentifier {
44810                // Re-add the identifier quote characters
44811                // Use backticks as default; this handles MySQL backtick-quoted identifiers
44812                // and double-quoted identifiers for other dialects
44813                let quote_char = if self.config.dialect == Some(crate::dialects::DialectType::MySQL)
44814                    || self.config.dialect == Some(crate::dialects::DialectType::SingleStore)
44815                    || self.config.dialect == Some(crate::dialects::DialectType::Doris)
44816                    || self.config.dialect == Some(crate::dialects::DialectType::StarRocks)
44817                {
44818                    '`'
44819                } else {
44820                    '"'
44821                };
44822                format!("{}{}{}", quote_char, token.text, quote_char)
44823            } else if token.token_type == TokenType::String {
44824                format!("'{}'", token.text)
44825            } else {
44826                token.text.clone()
44827            };
44828            tokens.push((text, token.token_type));
44829        }
44830
44831        Ok(Some(Expression::Command(Box::new(Command {
44832            this: self.join_command_tokens(tokens),
44833        }))))
44834    }
44835
44836    /// parse_commit_or_rollback - Implemented from Python _parse_commit_or_rollback
44837    #[allow(unused_variables, unused_mut)]
44838    pub fn parse_commit_or_rollback(&mut self) -> Result<Option<Expression>> {
44839        if self.match_text_seq(&["TO"]) {
44840            return Ok(Some(Expression::Rollback(Box::new(Rollback {
44841                savepoint: None,
44842                this: None,
44843            }))));
44844        }
44845        if self.match_text_seq(&["SAVEPOINT"]) {
44846            // Matched: SAVEPOINT
44847            return Ok(None);
44848        }
44849        Ok(None)
44850    }
44851
44852    /// parse_composite_key_property - Implemented from Python _parse_composite_key_property
44853    #[allow(unused_variables, unused_mut)]
44854    pub fn parse_composite_key_property(&mut self) -> Result<Option<Expression>> {
44855        if self.match_text_seq(&["KEY"]) {
44856            // Matched: KEY
44857            return Ok(None);
44858        }
44859        Ok(None)
44860    }
44861
44862    /// parse_comprehension - Implemented from Python _parse_comprehension
44863    /// Parses list comprehension: expr FOR var [, position] IN iterator [IF condition]
44864    pub fn parse_comprehension(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
44865        let start_index = self.current;
44866
44867        // Parse expression (column)
44868        let expression = self.parse_column()?;
44869
44870        // Parse optional position (if comma follows)
44871        let position = if self.match_token(TokenType::Comma) {
44872            self.parse_column()?.map(Box::new)
44873        } else {
44874            None
44875        };
44876
44877        // Must have IN keyword
44878        if !self.match_token(TokenType::In) {
44879            // Backtrack
44880            self.current = start_index.saturating_sub(1);
44881            return Ok(None);
44882        }
44883
44884        // Parse iterator
44885        let iterator = self.parse_column()?.map(Box::new);
44886
44887        // Parse optional condition (IF followed by expression)
44888        let condition = if self.match_text_seq(&["IF"]) {
44889            self.parse_disjunction()?.map(Box::new)
44890        } else {
44891            None
44892        };
44893
44894        // Build the comprehension expression
44895        match (this, expression) {
44896            (Some(t), Some(e)) => Ok(Some(Expression::Comprehension(Box::new(Comprehension {
44897                this: Box::new(t),
44898                expression: Box::new(e),
44899                position,
44900                iterator,
44901                condition,
44902            })))),
44903            _ => Ok(None),
44904        }
44905    }
44906
44907    /// parse_compress - Parses COMPRESS column constraint (Teradata)
44908    /// Python: _parse_compress
44909    /// Format: COMPRESS or COMPRESS (value1, value2, ...)
44910    pub fn parse_compress(&mut self) -> Result<Option<Expression>> {
44911        // Check if it's a parenthesized list of values
44912        if self.check(TokenType::LParen) {
44913            // Parse wrapped CSV of bitwise expressions
44914            self.skip(); // consume LParen
44915            let mut expressions = Vec::new();
44916            loop {
44917                if let Some(expr) = self.parse_bitwise()? {
44918                    expressions.push(expr);
44919                } else {
44920                    break;
44921                }
44922                if !self.match_token(TokenType::Comma) {
44923                    break;
44924                }
44925            }
44926            self.expect(TokenType::RParen)?;
44927
44928            // Wrap in a Tuple if multiple values
44929            let this = if expressions.len() == 1 {
44930                Some(Box::new(expressions.into_iter().next().unwrap()))
44931            } else if expressions.is_empty() {
44932                None
44933            } else {
44934                Some(Box::new(Expression::Tuple(Box::new(Tuple { expressions }))))
44935            };
44936
44937            Ok(Some(Expression::CompressColumnConstraint(Box::new(
44938                CompressColumnConstraint { this },
44939            ))))
44940        } else {
44941            // Single value or no value
44942            let this = self.parse_bitwise()?.map(Box::new);
44943            Ok(Some(Expression::CompressColumnConstraint(Box::new(
44944                CompressColumnConstraint { this },
44945            ))))
44946        }
44947    }
44948
44949    /// parse_conjunction - Parses AND expressions
44950    /// Python: _parse_conjunction
44951    /// Delegates to the existing parse_and in the operator precedence chain
44952    pub fn parse_conjunction(&mut self) -> Result<Option<Expression>> {
44953        match self.parse_and() {
44954            Ok(expr) => Ok(Some(expr)),
44955            Err(_) => Ok(None),
44956        }
44957    }
44958
44959    /// parse_connect_with_prior - Parses expression in CONNECT BY context with PRIOR support
44960    /// Python: _parse_connect_with_prior
44961    /// This method temporarily treats PRIOR as a prefix operator while parsing the expression
44962    pub fn parse_connect_with_prior(&mut self) -> Result<Option<Expression>> {
44963        // parse_connect_expression already handles PRIOR as a prefix operator
44964        let connect = self.parse_connect_expression()?;
44965        Ok(Some(connect))
44966    }
44967
44968    /// parse_constraint - Parses named or unnamed constraint
44969    /// Python: _parse_constraint
44970    pub fn parse_constraint(&mut self) -> Result<Option<Expression>> {
44971        // Check for CONSTRAINT keyword (named constraint)
44972        if !self.match_token(TokenType::Constraint) {
44973            // Try to parse an unnamed constraint
44974            return self.parse_unnamed_constraint();
44975        }
44976
44977        // Parse the constraint name
44978        let name = self.parse_id_var()?;
44979        if name.is_none() {
44980            return Ok(None);
44981        }
44982
44983        // Parse the constraint expressions (PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK, etc.)
44984        let expressions = self.parse_unnamed_constraints()?;
44985
44986        Ok(Some(Expression::Constraint(Box::new(Constraint {
44987            this: Box::new(name.unwrap()),
44988            expressions,
44989        }))))
44990    }
44991
44992    /// parse_unnamed_constraints - Parses multiple unnamed constraints
44993    /// Python: _parse_unnamed_constraints
44994    pub fn parse_unnamed_constraints(&mut self) -> Result<Vec<Expression>> {
44995        let mut constraints = Vec::new();
44996
44997        loop {
44998            if let Some(constraint) = self.parse_unnamed_constraint()? {
44999                constraints.push(constraint);
45000            } else {
45001                break;
45002            }
45003        }
45004
45005        Ok(constraints)
45006    }
45007
45008    /// parse_unnamed_constraint - Parses a single unnamed constraint
45009    /// Python: _parse_unnamed_constraint
45010    pub fn parse_unnamed_constraint(&mut self) -> Result<Option<Expression>> {
45011        // Try PRIMARY KEY
45012        if self.match_text_seq(&["PRIMARY", "KEY"]) {
45013            // ClickHouse: PRIMARY KEY expr (without parens) in schema = table-level PK expression
45014            if matches!(
45015                self.config.dialect,
45016                Some(crate::dialects::DialectType::ClickHouse)
45017            ) && !self.check(TokenType::LParen)
45018            {
45019                let expr = self.parse_expression()?;
45020                return Ok(Some(Expression::Raw(Raw {
45021                    sql: format!("PRIMARY KEY {}", expr),
45022                })));
45023            }
45024            return self.parse_primary_key();
45025        }
45026
45027        // Try UNIQUE
45028        if self.match_texts(&["UNIQUE"]) {
45029            return self.parse_unique();
45030        }
45031
45032        // Try FOREIGN KEY
45033        if self.match_text_seq(&["FOREIGN", "KEY"]) {
45034            return self.parse_foreign_key();
45035        }
45036
45037        // Try CHECK
45038        if self.match_texts(&["CHECK"]) {
45039            let expr = self.parse_wrapped()?;
45040            if let Some(check_expr) = expr {
45041                return Ok(Some(Expression::CheckColumnConstraint(Box::new(
45042                    CheckColumnConstraint {
45043                        this: Box::new(check_expr),
45044                        enforced: None,
45045                    },
45046                ))));
45047            }
45048        }
45049
45050        // Try NOT NULL
45051        if self.match_text_seq(&["NOT", "NULL"]) {
45052            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
45053                NotNullColumnConstraint {
45054                    allow_null: None, // NOT NULL means allow_null is not set
45055                },
45056            ))));
45057        }
45058
45059        // Try NULL (allow null)
45060        if self.match_texts(&["NULL"]) {
45061            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
45062                NotNullColumnConstraint {
45063                    allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
45064                        value: true,
45065                    }))),
45066                },
45067            ))));
45068        }
45069
45070        // Try DEFAULT
45071        if self.match_token(TokenType::Default) {
45072            let default_value = self.parse_bitwise()?;
45073            if let Some(val) = default_value {
45074                // TSQL: DEFAULT value FOR column (table-level default constraint)
45075                let for_column = if self.match_token(TokenType::For) {
45076                    Some(self.expect_identifier_with_quoted()?)
45077                } else {
45078                    None
45079                };
45080                return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
45081                    DefaultColumnConstraint {
45082                        this: Box::new(val),
45083                        for_column,
45084                    },
45085                ))));
45086            }
45087        }
45088
45089        // Try REFERENCES (inline foreign key)
45090        if self.match_texts(&["REFERENCES"]) {
45091            return self.parse_references();
45092        }
45093
45094        // ClickHouse: INDEX name expr TYPE type_name [GRANULARITY n]
45095        if matches!(
45096            self.config.dialect,
45097            Some(crate::dialects::DialectType::ClickHouse)
45098        ) && self.match_token(TokenType::Index)
45099        {
45100            let name = self.expect_identifier_or_keyword_with_quoted()?;
45101            // Use parse_conjunction to handle comparisons like c0 < (SELECT _table)
45102            let expression = self.parse_conjunction()?.ok_or_else(|| {
45103                self.parse_error("Expected expression in ClickHouse INDEX definition")
45104            })?;
45105            let index_type = if self.match_token(TokenType::Type) {
45106                if let Some(func) = self.parse_function()? {
45107                    Some(Box::new(func))
45108                } else if !self.is_at_end() {
45109                    let type_name = self.advance().text.clone();
45110                    if self.check(TokenType::LParen) {
45111                        self.skip();
45112                        let mut args = Vec::new();
45113                        if !self.check(TokenType::RParen) {
45114                            args.push(self.parse_expression()?);
45115                            while self.match_token(TokenType::Comma) {
45116                                args.push(self.parse_expression()?);
45117                            }
45118                        }
45119                        self.expect(TokenType::RParen)?;
45120                        Some(Box::new(Expression::Function(Box::new(Function::new(
45121                            type_name, args,
45122                        )))))
45123                    } else {
45124                        Some(Box::new(Expression::Identifier(Identifier::new(type_name))))
45125                    }
45126                } else {
45127                    None
45128                }
45129            } else {
45130                None
45131            };
45132            let _granularity = if self.match_identifier("GRANULARITY") {
45133                let _ = self.parse_expression()?;
45134                true
45135            } else {
45136                false
45137            };
45138            // Return as a raw SQL expression preserving the INDEX definition
45139            let mut sql = format!("INDEX {} ", name.name);
45140            if let Some(ref idx_type) = index_type {
45141                sql.push_str(&format!("{} TYPE {} ", expression, idx_type));
45142            }
45143            return Ok(Some(Expression::Raw(Raw {
45144                sql: sql.trim().to_string(),
45145            })));
45146        }
45147
45148        // ClickHouse: PROJECTION name (SELECT ...) or PROJECTION name INDEX expr TYPE type_name
45149        if matches!(
45150            self.config.dialect,
45151            Some(crate::dialects::DialectType::ClickHouse)
45152        ) && self.check_identifier("PROJECTION")
45153        {
45154            self.skip(); // consume PROJECTION
45155            let name = self.expect_identifier_or_keyword_with_quoted()?;
45156            // Parse the projection body - either (SELECT ...) or INDEX expr TYPE type_name
45157            if self.match_token(TokenType::LParen) {
45158                let mut depth = 1i32;
45159                let start = self.current;
45160                while !self.is_at_end() && depth > 0 {
45161                    if self.check(TokenType::LParen) {
45162                        depth += 1;
45163                    }
45164                    if self.check(TokenType::RParen) {
45165                        depth -= 1;
45166                        if depth == 0 {
45167                            break;
45168                        }
45169                    }
45170                    self.skip();
45171                }
45172                let body_sql = self.tokens_to_sql(start, self.current);
45173                self.expect(TokenType::RParen)?;
45174                return Ok(Some(Expression::Raw(Raw {
45175                    sql: format!("PROJECTION {} ({})", name.name, body_sql),
45176                })));
45177            }
45178            // PROJECTION name INDEX expr TYPE type_name
45179            if self.match_token(TokenType::Index) {
45180                let expr = self.parse_bitwise()?.ok_or_else(|| {
45181                    self.parse_error(
45182                        "Expected expression in ClickHouse PROJECTION INDEX definition",
45183                    )
45184                })?;
45185                let type_str = if self.match_token(TokenType::Type) {
45186                    if !self.is_at_end() {
45187                        let t = self.advance().text.clone();
45188                        format!(" TYPE {}", t)
45189                    } else {
45190                        String::new()
45191                    }
45192                } else {
45193                    String::new()
45194                };
45195                return Ok(Some(Expression::Raw(Raw {
45196                    sql: format!("PROJECTION {} INDEX {}{}", name.name, expr, type_str),
45197                })));
45198            }
45199            return Ok(Some(Expression::Raw(Raw {
45200                sql: format!("PROJECTION {}", name.name),
45201            })));
45202        }
45203
45204        Ok(None)
45205    }
45206
45207    /// parse_contains_property - Implemented from Python _parse_contains_property
45208    #[allow(unused_variables, unused_mut)]
45209    pub fn parse_contains_property(&mut self) -> Result<Option<Expression>> {
45210        if self.match_text_seq(&["SQL"]) {
45211            // Matched: SQL
45212            return Ok(None);
45213        }
45214        Ok(None)
45215    }
45216
45217    /// parse_convert - Ported from Python _parse_convert
45218    /// Parses CONVERT function: CONVERT(expr USING charset) or CONVERT(expr, type)
45219    #[allow(unused_variables, unused_mut)]
45220    pub fn parse_convert(&mut self) -> Result<Option<Expression>> {
45221        // Parse the expression to convert
45222        let this = match self.parse_bitwise() {
45223            Ok(Some(expr)) => expr,
45224            Ok(None) => return Ok(None),
45225            Err(e) => return Err(e),
45226        };
45227
45228        // Check for USING charset (CONVERT(x USING utf8))
45229        if self.match_token(TokenType::Using) {
45230            let _ = self.parse_var(); // charset
45231                                      // Return as Cast with charset
45232            return Ok(Some(Expression::Cast(Box::new(Cast {
45233                this,
45234                to: DataType::Char { length: None },
45235                trailing_comments: Vec::new(),
45236                double_colon_syntax: false,
45237                format: None,
45238                default: None,
45239                inferred_type: None,
45240            }))));
45241        }
45242
45243        // Check for comma then type (CONVERT(x, INT))
45244        if self.match_token(TokenType::Comma) {
45245            let data_type = self.parse_data_type()?;
45246            return Ok(Some(Expression::Cast(Box::new(Cast {
45247                this,
45248                to: data_type,
45249                trailing_comments: Vec::new(),
45250                double_colon_syntax: false,
45251                format: None,
45252                default: None,
45253                inferred_type: None,
45254            }))));
45255        }
45256
45257        // No type specified, return as-is wrapped in Cast
45258        Ok(Some(Expression::Cast(Box::new(Cast {
45259            this,
45260            to: DataType::Char { length: None },
45261            trailing_comments: Vec::new(),
45262            double_colon_syntax: false,
45263            format: None,
45264            default: None,
45265            inferred_type: None,
45266        }))))
45267    }
45268
45269    /// parse_copy_parameters - Implemented from Python _parse_copy_parameters
45270    /// parse_copy_parameters - Parses COPY statement parameters
45271    /// Returns a tuple of CopyParameter expressions
45272    pub fn parse_copy_parameters(&mut self) -> Result<Option<Expression>> {
45273        let mut options = Vec::new();
45274
45275        while !self.is_at_end() && !self.check(TokenType::RParen) {
45276            // Parse option name as var
45277            let option = self.parse_var()?;
45278            if option.is_none() {
45279                break;
45280            }
45281
45282            let option_name = match &option {
45283                Some(Expression::Var(v)) => v.this.to_ascii_uppercase(),
45284                Some(Expression::Identifier(id)) => id.name.to_ascii_uppercase(),
45285                _ => String::new(),
45286            };
45287
45288            // Options and values may be separated by whitespace, "=" or "AS"
45289            self.match_token(TokenType::Eq);
45290            self.match_token(TokenType::Alias);
45291
45292            // Parse value based on option type
45293            let (expression, expressions) = if (option_name == "FILE_FORMAT"
45294                || option_name == "FORMAT_OPTIONS")
45295                && self.check(TokenType::LParen)
45296            {
45297                // Parse wrapped options for FILE_FORMAT
45298                let wrapped = self.parse_wrapped_options()?;
45299                let exprs = match wrapped {
45300                    Some(Expression::Tuple(t)) => t.expressions,
45301                    Some(e) => vec![e],
45302                    None => Vec::new(),
45303                };
45304                (None, exprs)
45305            } else if option_name == "FILE_FORMAT" {
45306                // T-SQL external file format case
45307                let field = self.parse_field()?;
45308                (field, Vec::new())
45309            } else if option_name == "FORMAT"
45310                && self.previous().token_type == TokenType::Alias
45311                && self.match_texts(&["AVRO", "JSON"])
45312            {
45313                // FORMAT AS AVRO/JSON
45314                let format_type = self.previous().text.to_ascii_uppercase();
45315                let field = self.parse_field()?;
45316                (
45317                    Some(Expression::Var(Box::new(Var {
45318                        this: format!("FORMAT AS {}", format_type),
45319                    }))),
45320                    field.map_or(Vec::new(), |f| vec![f]),
45321                )
45322            } else {
45323                // Parse unquoted field or bracket
45324                let expr = self
45325                    .parse_unquoted_field()?
45326                    .or_else(|| self.parse_bracket().ok().flatten());
45327                (expr, Vec::new())
45328            };
45329
45330            options.push(Expression::CopyParameter(Box::new(CopyParameter {
45331                name: option_name,
45332                value: expression,
45333                values: expressions,
45334                eq: true,
45335            })));
45336
45337            // Optional comma separator (dialect-specific)
45338            self.match_token(TokenType::Comma);
45339        }
45340
45341        if options.is_empty() {
45342            Ok(None)
45343        } else {
45344            Ok(Some(Expression::Tuple(Box::new(Tuple {
45345                expressions: options,
45346            }))))
45347        }
45348    }
45349
45350    /// parse_copy_property - Implemented from Python _parse_copy_property
45351    #[allow(unused_variables, unused_mut)]
45352    pub fn parse_copy_property(&mut self) -> Result<Option<Expression>> {
45353        if self.match_text_seq(&["GRANTS"]) {
45354            // Matched: GRANTS
45355            return Ok(None);
45356        }
45357        Ok(None)
45358    }
45359
45360    /// parse_create_like - Implemented from Python _parse_create_like
45361    /// Calls: parse_id_var
45362    #[allow(unused_variables, unused_mut)]
45363    pub fn parse_create_like(&mut self) -> Result<Option<Expression>> {
45364        if self.match_texts(&["INCLUDING", "EXCLUDING"]) {
45365            // Matched one of: INCLUDING, EXCLUDING
45366            return Ok(None);
45367        }
45368        Ok(None)
45369    }
45370
45371    /// parse_credentials - Implemented from Python _parse_credentials
45372    #[allow(unused_variables, unused_mut)]
45373    pub fn parse_credentials(&mut self) -> Result<Option<Expression>> {
45374        if self.match_text_seq(&["STORAGE_INTEGRATION", "="]) {
45375            return Ok(Some(Expression::Credentials(Box::new(Credentials {
45376                credentials: Vec::new(),
45377                encryption: None,
45378                storage: None,
45379            }))));
45380        }
45381        if self.match_text_seq(&["CREDENTIALS"]) {
45382            // Matched: CREDENTIALS
45383            return Ok(None);
45384        }
45385        Ok(None)
45386    }
45387
45388    /// parse_csv - Parses comma-separated expressions
45389    /// Python: _parse_csv
45390    /// In Python this takes a parse_method callback, but in Rust we use parse_expression_list
45391    pub fn parse_csv(&mut self) -> Result<Option<Expression>> {
45392        let expressions = self.parse_expression_list()?;
45393        if expressions.is_empty() {
45394            return Ok(None);
45395        }
45396        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
45397    }
45398
45399    /// parse_cte - Implemented from Python _parse_cte
45400    /// Calls: parse_wrapped_id_vars
45401    #[allow(unused_variables, unused_mut)]
45402    pub fn parse_cte(&mut self) -> Result<Option<Expression>> {
45403        if self.match_text_seq(&["USING", "KEY"]) {
45404            return Ok(Some(Expression::Values(Box::new(Values {
45405                expressions: Vec::new(),
45406                alias: None,
45407                column_aliases: Vec::new(),
45408            }))));
45409        }
45410        if self.match_text_seq(&["NOT", "MATERIALIZED"]) {
45411            // Matched: NOT MATERIALIZED
45412            return Ok(None);
45413        }
45414        if self.match_text_seq(&["MATERIALIZED"]) {
45415            // Matched: MATERIALIZED
45416            return Ok(None);
45417        }
45418        Ok(None)
45419    }
45420
45421    /// parse_cube_or_rollup - Ported from Python _parse_cube_or_rollup
45422    /// Parses CUBE(...) or ROLLUP(...) expressions in GROUP BY
45423    #[allow(unused_variables, unused_mut)]
45424    pub fn parse_cube_or_rollup(&mut self) -> Result<Option<Expression>> {
45425        // Check for CUBE or ROLLUP keyword
45426        let is_cube = self.match_texts(&["CUBE"]);
45427        let is_rollup = if !is_cube {
45428            self.match_texts(&["ROLLUP"])
45429        } else {
45430            false
45431        };
45432
45433        if !is_cube && !is_rollup {
45434            return Ok(None);
45435        }
45436
45437        // Parse wrapped expressions
45438        self.expect(TokenType::LParen)?;
45439        let mut expressions = Vec::new();
45440        if !self.check(TokenType::RParen) {
45441            loop {
45442                match self.parse_bitwise() {
45443                    Ok(Some(expr)) => expressions.push(expr),
45444                    Ok(None) => break,
45445                    Err(e) => return Err(e),
45446                }
45447                if !self.match_token(TokenType::Comma) {
45448                    break;
45449                }
45450            }
45451        }
45452        self.expect(TokenType::RParen)?;
45453
45454        if is_cube {
45455            Ok(Some(Expression::Cube(Box::new(Cube { expressions }))))
45456        } else {
45457            Ok(Some(Expression::Rollup(Box::new(Rollup { expressions }))))
45458        }
45459    }
45460
45461    /// parse_data_deletion_property - Implemented from Python _parse_data_deletion_property
45462    /// Calls: parse_column, parse_retention_period
45463    #[allow(unused_variables, unused_mut)]
45464    pub fn parse_data_deletion_property(&mut self) -> Result<Option<Expression>> {
45465        if self.match_text_seq(&["ON"]) {
45466            // Matched: ON
45467            return Ok(None);
45468        }
45469        if self.match_text_seq(&["OFF"]) {
45470            // Matched: OFF
45471            return Ok(None);
45472        }
45473        if self.match_text_seq(&["FILTER_COLUMN", "="]) {
45474            // Matched: FILTER_COLUMN =
45475            return Ok(None);
45476        }
45477        Ok(None)
45478    }
45479
45480    /// parse_datablocksize - Implemented from Python _parse_datablocksize
45481    /// Calls: parse_number
45482    #[allow(unused_variables, unused_mut)]
45483    pub fn parse_datablocksize(&mut self) -> Result<Option<Expression>> {
45484        if self.match_texts(&["BYTES", "KBYTES", "KILOBYTES"]) {
45485            // Matched one of: BYTES, KBYTES, KILOBYTES
45486            return Ok(None);
45487        }
45488        Ok(None)
45489    }
45490
45491    /// parse_dcolon - Delegates to parse_types
45492    #[allow(unused_variables, unused_mut)]
45493    pub fn parse_dcolon(&mut self) -> Result<Option<Expression>> {
45494        self.parse_types()
45495    }
45496
45497    /// parse_ddl_select - Ported from Python _parse_ddl_select
45498    /// Parses a SELECT statement in DDL context (CREATE TABLE AS SELECT, INSERT INTO ... SELECT)
45499    #[allow(unused_variables, unused_mut)]
45500    pub fn parse_ddl_select(&mut self) -> Result<Option<Expression>> {
45501        // Parse a nested SELECT statement
45502        let select = self.parse_select_query()?;
45503
45504        if select.is_none() {
45505            return Ok(None);
45506        }
45507
45508        // Apply set operations (UNION, INTERSECT, EXCEPT)
45509        let with_set_ops = self.parse_set_operations_with_expr(select)?;
45510
45511        // Return the result (query modifiers would be applied by parse_select_query already)
45512        Ok(with_set_ops)
45513    }
45514
45515    /// parse_for_in - BigQuery procedural FOR...IN...DO loop
45516    /// Python: BigQuery._parse_for_in
45517    /// Format: FOR variable IN (query) DO statement(s) END FOR
45518    /// Example: FOR record IN (SELECT * FROM t) DO SELECT record.col
45519    pub fn parse_for_in(&mut self) -> Result<Expression> {
45520        // Parse: variable IN (query)
45521        // This is handled by parse_range which produces an In expression
45522        let this = self
45523            .parse_range()?
45524            .ok_or_else(|| self.parse_error("Expected expression after FOR"))?;
45525
45526        // Match DO keyword
45527        self.match_text_seq(&["DO"]);
45528
45529        // Parse the body statement
45530        let expression = self.parse_statement()?;
45531
45532        Ok(Expression::ForIn(Box::new(ForIn {
45533            this: Box::new(this),
45534            expression: Box::new(expression),
45535        })))
45536    }
45537
45538    /// parse_declare - Parses DECLARE statement
45539    /// Python: _parse_declare
45540    /// Format: DECLARE var1 type [DEFAULT expr], var2 type [DEFAULT expr], ...
45541    pub fn parse_declare(&mut self) -> Result<Option<Expression>> {
45542        // Check for OR REPLACE (Spark/Databricks)
45543        let replace = self.match_text_seq(&["OR", "REPLACE"]);
45544
45545        // Try to parse comma-separated declare items
45546        let mut expressions = Vec::new();
45547
45548        // BigQuery multi-variable DECLARE: DECLARE X, Y, Z INT64 [DEFAULT expr]
45549        // Detect by looking ahead: if we see identifier, comma, identifier pattern
45550        // before a data type keyword, collect all names then parse type once.
45551        let saved = self.current;
45552        let mut multi_names: Vec<Expression> = Vec::new();
45553        if let Some(first_var) = self.parse_id_var()? {
45554            // Check if next is a comma (BigQuery multi-var syntax)
45555            if self.check(TokenType::Comma) && !self.check_identifier("CURSOR") {
45556                // Speculatively collect comma-separated identifiers
45557                multi_names.push(first_var);
45558                while self.match_token(TokenType::Comma) {
45559                    if let Some(next_var) = self.parse_id_var()? {
45560                        multi_names.push(next_var);
45561                    } else {
45562                        break;
45563                    }
45564                }
45565                // Now check if we're at a data type (not comma, not @, not semicolon)
45566                // If so, this is BigQuery multi-var syntax
45567                if multi_names.len() > 1 && !self.is_at_end() && !self.check(TokenType::Semicolon) {
45568                    let data_type = self.parse_data_type()?;
45569                    let kind_str = self.data_type_to_sql(&data_type);
45570                    let default = if self.match_token(TokenType::Default)
45571                        || self.match_token(TokenType::Eq)
45572                    {
45573                        Some(Box::new(self.parse_expression()?))
45574                    } else {
45575                        None
45576                    };
45577                    let first_name = multi_names.remove(0);
45578                    expressions.push(Expression::DeclareItem(Box::new(DeclareItem {
45579                        this: Box::new(first_name),
45580                        kind: Some(kind_str),
45581                        default,
45582                        has_as: false,
45583                        additional_names: multi_names,
45584                    })));
45585                    return Ok(Some(Expression::Declare(Box::new(Declare {
45586                        expressions,
45587                        replace,
45588                    }))));
45589                }
45590            }
45591        }
45592        // Reset and parse normally
45593        self.current = saved;
45594
45595        loop {
45596            if let Some(item) = self.parse_declareitem()? {
45597                expressions.push(item);
45598            } else {
45599                break;
45600            }
45601            // Accept comma (TSQL/BigQuery) or semicolon (Snowflake scripting) as separator
45602            if self.match_token(TokenType::Comma)
45603                || self.match_token(TokenType::Semicolon)
45604            {
45605                // Stop if next token is BEGIN (end of DECLARE block)
45606                if self.check(TokenType::Begin) {
45607                    break;
45608                }
45609                continue;
45610            }
45611            break;
45612        }
45613
45614        // If we successfully parsed at least one item, return the Declare
45615        if !expressions.is_empty() {
45616            return Ok(Some(Expression::Declare(Box::new(Declare {
45617                expressions,
45618                replace,
45619            }))));
45620        }
45621
45622        Ok(None)
45623    }
45624
45625    /// parse_declareitem - Parse a DECLARE item (variable declaration)
45626    /// TSQL format: @var AS type [= expr] or @var type [= expr]
45627    /// Also handles: DECLARE name CURSOR FOR SELECT ...
45628    /// Also handles: DECLARE @var TABLE (col_defs)
45629    #[allow(unused_variables, unused_mut)]
45630    pub fn parse_declareitem(&mut self) -> Result<Option<Expression>> {
45631        // Consume optional VAR or VARIABLE keyword (Spark/Databricks)
45632        if self.check_identifier("VAR") || self.check_identifier("VARIABLE") {
45633            self.skip();
45634        }
45635
45636        // Parse the variable name (starts with @ or is a cursor name)
45637        let var = if let Some(v) = self.parse_id_var()? {
45638            v
45639        } else {
45640            return Ok(None);
45641        };
45642
45643        // Check for CURSOR FOR syntax: DECLARE name CURSOR FOR SELECT ...
45644        if self.check_identifier("CURSOR") {
45645            self.skip(); // consume CURSOR
45646                         // Parse optional cursor options before FOR (e.g., SCROLL, INSENSITIVE, etc.)
45647                         // For now just look for FOR
45648            if self.match_token(TokenType::For) {
45649                // Capture the remaining tokens as the cursor query using tokens_to_sql for proper spacing
45650                let start = self.current;
45651                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
45652                    self.skip();
45653                }
45654                let query_str = self.tokens_to_sql_uppercased(start, self.current);
45655                let kind_str = format!("CURSOR FOR {}", query_str);
45656                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
45657                    this: Box::new(var),
45658                    kind: Some(kind_str),
45659                    default: None,
45660                    has_as: false,
45661                    additional_names: Vec::new(),
45662                }))));
45663            } else {
45664                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
45665                    this: Box::new(var),
45666                    kind: Some("CURSOR".to_string()),
45667                    default: None,
45668                    has_as: false,
45669                    additional_names: Vec::new(),
45670                }))));
45671            }
45672        }
45673
45674        // Parse optional AS keyword
45675        let has_as = self.match_token(TokenType::As);
45676
45677        // Check for TABLE type with column definitions
45678        if self.check(TokenType::Table) {
45679            self.skip(); // consume TABLE
45680            if self.match_token(TokenType::LParen) {
45681                // Parse the TABLE column definitions using tokens_to_sql for proper spacing
45682                let start = self.current;
45683                let mut depth = 1;
45684                while depth > 0 && !self.is_at_end() {
45685                    if self.check(TokenType::LParen) {
45686                        depth += 1;
45687                    }
45688                    if self.check(TokenType::RParen) {
45689                        depth -= 1;
45690                        if depth == 0 {
45691                            break;
45692                        }
45693                    }
45694                    self.skip();
45695                }
45696                let col_defs_str = self.tokens_to_sql_uppercased(start, self.current);
45697                self.expect(TokenType::RParen)?;
45698                let kind_str = format!("TABLE ({})", col_defs_str);
45699                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
45700                    this: Box::new(var),
45701                    kind: Some(kind_str),
45702                    default: None,
45703                    has_as,
45704                    additional_names: Vec::new(),
45705                }))));
45706            } else {
45707                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
45708                    this: Box::new(var),
45709                    kind: Some("TABLE".to_string()),
45710                    default: None,
45711                    has_as,
45712                    additional_names: Vec::new(),
45713                }))));
45714            }
45715        }
45716
45717        // Check if next token is = or DEFAULT (no type, just default value)
45718        // or if at end of statement (no type, no default)
45719        let kind_str = if self.check(TokenType::Eq)
45720            || self.check(TokenType::Default)
45721            || self.is_at_end()
45722            || self.check(TokenType::Semicolon)
45723            || self.check(TokenType::Comma)
45724        {
45725            // No type specified
45726            None
45727        } else {
45728            // Parse the data type
45729            let data_type = self.parse_data_type()?;
45730            Some(self.data_type_to_sql(&data_type))
45731        };
45732
45733        // Parse optional DEFAULT value or = value (TSQL uses =)
45734        let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq) {
45735            Some(Box::new(self.parse_expression()?))
45736        } else {
45737            None
45738        };
45739
45740        Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
45741            this: Box::new(var),
45742            kind: kind_str,
45743            default,
45744            has_as,
45745            additional_names: Vec::new(),
45746        }))))
45747    }
45748
45749    /// Convert a DataType to its SQL string representation
45750    fn data_type_to_sql(&self, dt: &DataType) -> String {
45751        match dt {
45752            DataType::Boolean => "BOOLEAN".to_string(),
45753            DataType::TinyInt { length } => {
45754                if let Some(n) = length {
45755                    format!("TINYINT({})", n)
45756                } else {
45757                    "TINYINT".to_string()
45758                }
45759            }
45760            DataType::SmallInt { length } => {
45761                if let Some(n) = length {
45762                    format!("SMALLINT({})", n)
45763                } else {
45764                    "SMALLINT".to_string()
45765                }
45766            }
45767            DataType::Int {
45768                length,
45769                integer_spelling,
45770            } => {
45771                if let Some(n) = length {
45772                    if *integer_spelling {
45773                        format!("INTEGER({})", n)
45774                    } else {
45775                        format!("INT({})", n)
45776                    }
45777                } else if *integer_spelling {
45778                    "INTEGER".to_string()
45779                } else {
45780                    "INT".to_string()
45781                }
45782            }
45783            DataType::BigInt { length } => {
45784                if let Some(n) = length {
45785                    format!("BIGINT({})", n)
45786                } else {
45787                    "BIGINT".to_string()
45788                }
45789            }
45790            DataType::Float {
45791                precision, scale, ..
45792            } => match (precision, scale) {
45793                (Some(p), Some(s)) => format!("FLOAT({}, {})", p, s),
45794                (Some(p), None) => format!("FLOAT({})", p),
45795                _ => "FLOAT".to_string(),
45796            },
45797            DataType::Double { precision, scale } => match (precision, scale) {
45798                (Some(p), Some(s)) => format!("DOUBLE({}, {})", p, s),
45799                (Some(p), None) => format!("DOUBLE({})", p),
45800                _ => "DOUBLE".to_string(),
45801            },
45802            DataType::Decimal { precision, scale } => match (precision, scale) {
45803                (Some(p), Some(s)) => format!("DECIMAL({}, {})", p, s),
45804                (Some(p), None) => format!("DECIMAL({})", p),
45805                _ => "DECIMAL".to_string(),
45806            },
45807            DataType::Char { length } => {
45808                if let Some(n) = length {
45809                    format!("CHAR({})", n)
45810                } else {
45811                    "CHAR".to_string()
45812                }
45813            }
45814            DataType::VarChar { length, .. } => {
45815                if let Some(n) = length {
45816                    format!("VARCHAR({})", n)
45817                } else {
45818                    "VARCHAR".to_string()
45819                }
45820            }
45821            DataType::Text => "TEXT".to_string(),
45822            DataType::Date => "DATE".to_string(),
45823            DataType::Time { precision, .. } => {
45824                if let Some(p) = precision {
45825                    format!("TIME({})", p)
45826                } else {
45827                    "TIME".to_string()
45828                }
45829            }
45830            DataType::Timestamp { precision, .. } => {
45831                if let Some(p) = precision {
45832                    format!("TIMESTAMP({})", p)
45833                } else {
45834                    "TIMESTAMP".to_string()
45835                }
45836            }
45837            DataType::Binary { length } => {
45838                if let Some(n) = length {
45839                    format!("BINARY({})", n)
45840                } else {
45841                    "BINARY".to_string()
45842                }
45843            }
45844            DataType::VarBinary { length } => {
45845                if let Some(n) = length {
45846                    format!("VARBINARY({})", n)
45847                } else {
45848                    "VARBINARY".to_string()
45849                }
45850            }
45851            DataType::Blob => "BLOB".to_string(),
45852            DataType::String { length: Some(n) } => format!("STRING({})", n),
45853            DataType::String { length: None } => "STRING".to_string(),
45854            DataType::Json => "JSON".to_string(),
45855            DataType::Uuid => "UUID".to_string(),
45856            DataType::Custom { name } => name.clone(), // Custom types (INT64, FLOAT64, etc.)
45857            _ => format!("{:?}", dt),                  // Fallback for unknown types
45858        }
45859    }
45860
45861    /// parse_decode - Ported from Python _parse_decode
45862    /// Parses Oracle-style DECODE or simple DECODE function
45863    /// If 3+ args: Oracle DECODE(expr, search1, result1, ..., default)
45864    /// If 2 args: character set decode (expr, charset)
45865    #[allow(unused_variables, unused_mut)]
45866    pub fn parse_decode(&mut self) -> Result<Option<Expression>> {
45867        // Parse comma-separated arguments
45868        let mut args: Vec<Expression> = Vec::new();
45869        loop {
45870            match self.parse_expression() {
45871                Ok(expr) => args.push(expr),
45872                Err(_) => break,
45873            }
45874            if !self.match_token(TokenType::Comma) {
45875                break;
45876            }
45877        }
45878
45879        if args.len() < 3 {
45880            // Simple decode with charset
45881            return Ok(Some(Expression::DecodeCase(Box::new(DecodeCase {
45882                expressions: args,
45883            }))));
45884        }
45885
45886        // Oracle DECODE: first arg is the expression being compared
45887        // Remaining args are search/result pairs, with optional default at end
45888        Ok(Some(Expression::DecodeCase(Box::new(DecodeCase {
45889            expressions: args,
45890        }))))
45891    }
45892
45893    /// parse_definer - MySQL DEFINER property
45894    /// Parses: DEFINER = user@host
45895    #[allow(unused_variables, unused_mut)]
45896    pub fn parse_definer(&mut self) -> Result<Option<Expression>> {
45897        // Optionally consume = sign
45898        self.match_token(TokenType::Eq);
45899
45900        // Parse the user part
45901        let user = self.parse_id_var()?;
45902        if user.is_none() {
45903            return Ok(None);
45904        }
45905
45906        // Expect @ symbol
45907        if !self.match_token(TokenType::DAt) {
45908            return Ok(None);
45909        }
45910
45911        // Parse the host part (can be identifier or % wildcard)
45912        let host = if let Some(id) = self.parse_id_var()? {
45913            id
45914        } else if self.match_token(TokenType::Mod) {
45915            // % wildcard for any host
45916            Expression::Identifier(Identifier::new(self.previous().text.clone()))
45917        } else {
45918            return Ok(None);
45919        };
45920
45921        // Combine user@host into a string
45922        let user_str = match &user {
45923            Some(Expression::Identifier(id)) => id.name.clone(),
45924            _ => "".to_string(),
45925        };
45926        let host_str = match &host {
45927            Expression::Identifier(id) => id.name.clone(),
45928            _ => "".to_string(),
45929        };
45930
45931        let definer_str = format!("{}@{}", user_str, host_str);
45932
45933        Ok(Some(Expression::DefinerProperty(Box::new(
45934            DefinerProperty {
45935                this: Box::new(Expression::Literal(Box::new(Literal::String(definer_str)))),
45936            },
45937        ))))
45938    }
45939
45940    /// parse_derived_table_values - Implemented from Python _parse_derived_table_values
45941    #[allow(unused_variables, unused_mut)]
45942    pub fn parse_derived_table_values(&mut self) -> Result<Option<Expression>> {
45943        if self.match_text_seq(&["VALUES"]) {
45944            return Ok(Some(Expression::Values(Box::new(Values {
45945                expressions: Vec::new(),
45946                alias: None,
45947                column_aliases: Vec::new(),
45948            }))));
45949        }
45950        if self.match_text_seq(&["FORMAT", "VALUES"]) {
45951            // Matched: FORMAT VALUES
45952            return Ok(None);
45953        }
45954        Ok(None)
45955    }
45956
45957    /// parse_dict_property - ClickHouse dictionary property
45958    /// Parses: property_name(kind(key1 value1, key2 value2, ...))
45959    /// property_name should be the already matched property keyword (LAYOUT, SOURCE, etc.)
45960    #[allow(unused_variables, unused_mut)]
45961    pub fn parse_dict_property(&mut self, property_name: &str) -> Result<Option<Expression>> {
45962        // Expect opening paren
45963        if !self.match_token(TokenType::LParen) {
45964            return Ok(None);
45965        }
45966
45967        // Parse the kind (e.g., HASHED, FLAT, CLICKHOUSE, CACHE, etc.)
45968        // Accept Var, Identifier, or keyword tokens as the kind name
45969        let kind_str = if self.is_identifier_token() || self.check_keyword() {
45970            self.advance().text.clone()
45971        } else {
45972            String::new()
45973        };
45974        if kind_str.is_empty() {
45975            return Err(self.parse_error("Expected dictionary property kind"));
45976        }
45977
45978        // Parse optional settings in nested parens
45979        let settings = if self.match_token(TokenType::LParen) {
45980            let mut setting_pairs = Vec::new();
45981            loop {
45982                let key = if let Some(k) = self.parse_id_var()? {
45983                    Some(k)
45984                } else if self.is_safe_keyword_as_identifier() || self.check_keyword() {
45985                    let name = self.advance().text.clone();
45986                    Some(Expression::Identifier(Identifier::new(name)))
45987                } else if !self.check(TokenType::RParen) && !self.check(TokenType::Comma) {
45988                    let name = self.advance().text.clone();
45989                    Some(Expression::Identifier(Identifier::new(name)))
45990                } else {
45991                    None
45992                };
45993                // ClickHouse: STRUCTURE (...) contains column defs without commas — consume balanced parens
45994                let is_structure = key.as_ref().map_or(false, |k| {
45995                    matches!(k, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("STRUCTURE"))
45996                });
45997                let value = if is_structure && self.check(TokenType::LParen) {
45998                    let mut raw = String::new();
45999                    let mut depth = 0i32;
46000                    while !self.is_at_end() {
46001                        let tok = self.advance();
46002                        match tok.token_type {
46003                            TokenType::LParen => {
46004                                depth += 1;
46005                                raw.push('(');
46006                            }
46007                            TokenType::RParen => {
46008                                depth -= 1;
46009                                if depth == 0 {
46010                                    raw.push(')');
46011                                    break;
46012                                }
46013                                raw.push(')');
46014                            }
46015                            _ => {
46016                                if !raw.is_empty() && !raw.ends_with('(') {
46017                                    raw.push(' ');
46018                                }
46019                                raw.push_str(&tok.text);
46020                            }
46021                        }
46022                    }
46023                    Some(Expression::Var(Box::new(Var { this: raw })))
46024                } else {
46025                    self.parse_primary_or_var()?
46026                };
46027                if key.is_none() && value.is_none() {
46028                    break;
46029                }
46030                if let (Some(k), Some(v)) = (key, value) {
46031                    // Store as a tuple-like expression
46032                    setting_pairs.push(Expression::Tuple(Box::new(Tuple {
46033                        expressions: vec![k, v],
46034                    })));
46035                }
46036                // ClickHouse dict properties are space-separated, not comma-separated
46037                // e.g. SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'test'))
46038                // Accept optional comma but don't require it
46039                self.match_token(TokenType::Comma);
46040                // Break if we see RParen (end of settings)
46041                if self.check(TokenType::RParen) {
46042                    break;
46043                }
46044            }
46045            self.expect(TokenType::RParen)?;
46046            if !setting_pairs.is_empty() {
46047                Some(Box::new(Expression::Tuple(Box::new(Tuple {
46048                    expressions: setting_pairs,
46049                }))))
46050            } else {
46051                None
46052            }
46053        } else {
46054            None
46055        };
46056
46057        self.expect(TokenType::RParen)?;
46058
46059        Ok(Some(Expression::DictProperty(Box::new(DictProperty {
46060            this: Box::new(Expression::Identifier(Identifier::new(
46061                property_name.to_string(),
46062            ))),
46063            kind: kind_str,
46064            settings,
46065        }))))
46066    }
46067
46068    /// parse_dict_range - Implemented from Python _parse_dict_range
46069    /// Parses dictionary range specification: (MIN min_val MAX max_val) or (max_val)
46070    pub fn parse_dict_range(&mut self, property_name: &str) -> Result<Option<Expression>> {
46071        // Expect opening paren
46072        self.expect(TokenType::LParen)?;
46073
46074        // Prefer id/var first for dictionary bounds to avoid function-keyword ambiguity
46075        // such as `MIN discount_start_date MAX discount_end_date`.
46076        let parse_bound = |parser: &mut Parser| -> Result<Option<Expression>> {
46077            // Handle negative numbers: -1, -100, etc.
46078            if parser.check(TokenType::Dash)
46079                && parser
46080                    .peek_nth(1)
46081                    .is_some_and(|t| t.token_type == TokenType::Number)
46082            {
46083                parser.advance(); // consume -
46084                let num = parser.advance().text.clone();
46085                return Ok(Some(Expression::Literal(Box::new(Literal::Number(
46086                    format!("-{}", num),
46087                )))));
46088            }
46089            if let Some(id) = parser.parse_id_var()? {
46090                return Ok(Some(id));
46091            }
46092            parser.parse_primary_or_var()
46093        };
46094
46095        let (min_val, max_val) = if self.peek().text.eq_ignore_ascii_case("MIN") {
46096            self.skip(); // consume MIN
46097            let min = parse_bound(self)?;
46098            if self.peek().text.eq_ignore_ascii_case("MAX") {
46099                self.skip(); // consume MAX
46100            }
46101            let max = parse_bound(self)?;
46102            (min, max)
46103        } else {
46104            let max = parse_bound(self)?;
46105            let min = Some(Expression::Literal(Box::new(Literal::Number(
46106                "0".to_string(),
46107            ))));
46108            (min, max)
46109        };
46110
46111        // Match closing paren
46112        self.expect(TokenType::RParen)?;
46113
46114        Ok(Some(Expression::DictRange(Box::new(DictRange {
46115            this: Box::new(Expression::Var(Box::new(Var {
46116                this: property_name.to_string(),
46117            }))),
46118            min: min_val.map(Box::new),
46119            max: max_val.map(Box::new),
46120        }))))
46121    }
46122
46123    /// parse_disjunction - Parses OR expressions
46124    /// Python: _parse_disjunction
46125    /// Delegates to the existing parse_or in the operator precedence chain
46126    pub fn parse_disjunction(&mut self) -> Result<Option<Expression>> {
46127        match self.parse_or() {
46128            Ok(expr) => Ok(Some(expr)),
46129            Err(_) => Ok(None),
46130        }
46131    }
46132
46133    /// parse_distkey - Redshift DISTKEY property for distribution key
46134    /// Parses: DISTKEY(column_name)
46135    #[allow(unused_variables, unused_mut)]
46136    pub fn parse_distkey(&mut self) -> Result<Option<Expression>> {
46137        // Parse wrapped column identifier (in parentheses)
46138        if !self.match_token(TokenType::LParen) {
46139            return Ok(None);
46140        }
46141
46142        let column = self.parse_id_var()?;
46143        if column.is_none() {
46144            return Ok(None);
46145        }
46146
46147        self.match_token(TokenType::RParen);
46148
46149        Ok(Some(Expression::DistKeyProperty(Box::new(
46150            DistKeyProperty {
46151                this: Box::new(column.unwrap()),
46152            },
46153        ))))
46154    }
46155
46156    /// parse_distributed_property - Implemented from Python _parse_distributed_property
46157    #[allow(unused_variables, unused_mut)]
46158    /// parse_distributed_property - Parses DISTRIBUTED BY property
46159    /// Python: parser.py:2462-2481
46160    pub fn parse_distributed_property(&mut self) -> Result<Option<Expression>> {
46161        let mut kind = "HASH".to_string();
46162        let mut expressions = Vec::new();
46163
46164        if self.match_text_seq(&["BY", "HASH"]) {
46165            // Parse column list: (col1, col2, ...)
46166            if let Some(wrapped) = self.parse_wrapped_id_vars()? {
46167                if let Expression::Tuple(t) = wrapped {
46168                    expressions = t.expressions;
46169                }
46170            }
46171        } else if self.match_text_seq(&["BY", "RANDOM"]) {
46172            kind = "RANDOM".to_string();
46173        } else {
46174            return Ok(None);
46175        }
46176
46177        // Parse optional BUCKETS
46178        let buckets = if self.match_text_seq(&["BUCKETS"]) {
46179            if !self.match_text_seq(&["AUTO"]) {
46180                self.parse_number()?
46181            } else {
46182                None
46183            }
46184        } else {
46185            None
46186        };
46187
46188        // Parse optional ORDER BY
46189        let order = self.parse_order()?;
46190
46191        Ok(Some(Expression::DistributedByProperty(Box::new(
46192            DistributedByProperty {
46193                expressions,
46194                kind,
46195                buckets: buckets.map(Box::new),
46196                order: order.map(Box::new),
46197            },
46198        ))))
46199    }
46200
46201    /// Parse DROP COLUMN in ALTER TABLE
46202    /// Note: Main ALTER TABLE DROP COLUMN logic is in parse_alter_table -> AlterTableAction::DropColumn
46203    pub fn parse_drop_column(&mut self) -> Result<Option<Expression>> {
46204        // Optionally match COLUMN keyword
46205        self.match_token(TokenType::Column);
46206
46207        // Parse IF EXISTS
46208        let _if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
46209
46210        // Parse the column identifier
46211        if let Some(column) = self.parse_identifier()? {
46212            // Check for CASCADE
46213            let _cascade = self.match_text_seq(&["CASCADE"]);
46214            // Return the column as an identifier (the caller handles the drop semantics)
46215            Ok(Some(column))
46216        } else {
46217            Ok(None)
46218        }
46219    }
46220
46221    /// Parse DROP PARTITION in ALTER TABLE
46222    /// Note: Main ALTER TABLE DROP PARTITION logic is in parse_alter_table -> AlterTableAction::DropPartition
46223    pub fn parse_drop_partition(&mut self) -> Result<Option<Expression>> {
46224        self.parse_drop_partition_with_exists(false)
46225    }
46226
46227    /// Parse DROP PARTITION with exists flag
46228    pub fn parse_drop_partition_with_exists(&mut self, exists: bool) -> Result<Option<Expression>> {
46229        // Parse one or more partitions
46230        let mut partitions = Vec::new();
46231
46232        loop {
46233            // Parse PARTITION (key = value, ...)
46234            if self.match_token(TokenType::Partition) {
46235                if self.match_token(TokenType::LParen) {
46236                    // Parse partition expressions
46237                    let mut exprs = Vec::new();
46238                    loop {
46239                        let expr = self.parse_expression()?;
46240                        exprs.push(expr);
46241                        if !self.match_token(TokenType::Comma) {
46242                            break;
46243                        }
46244                    }
46245                    self.match_token(TokenType::RParen);
46246                    partitions.push(Expression::Tuple(Box::new(Tuple { expressions: exprs })));
46247                }
46248            } else {
46249                break;
46250            }
46251
46252            if !self.match_token(TokenType::Comma) {
46253                break;
46254            }
46255        }
46256
46257        if partitions.is_empty() {
46258            Ok(None)
46259        } else {
46260            Ok(Some(Expression::DropPartition(Box::new(DropPartition {
46261                expressions: partitions,
46262                exists,
46263            }))))
46264        }
46265    }
46266
46267    /// parse_equality - Parses comparison/equality expressions (= <> < > <= >=)
46268    /// Python: _parse_equality
46269    /// Delegates to the existing parse_comparison in the operator precedence chain
46270    pub fn parse_equality(&mut self) -> Result<Option<Expression>> {
46271        match self.parse_comparison() {
46272            Ok(expr) => Ok(Some(expr)),
46273            Err(_) => Ok(None),
46274        }
46275    }
46276
46277    /// parse_escape - Parses ESCAPE clause for LIKE patterns
46278    /// Python: _parse_escape
46279    /// Returns the escape character/expression if ESCAPE keyword is found
46280    pub fn parse_escape(&mut self) -> Result<Option<Expression>> {
46281        if !self.match_token(TokenType::Escape) {
46282            return Ok(None);
46283        }
46284
46285        // Parse escape character (usually a string like '\')
46286        if let Some(escape_char) = self.parse_string()? {
46287            return Ok(Some(escape_char));
46288        }
46289
46290        // Or parse NULL
46291        if let Some(null_expr) = self.parse_null()? {
46292            return Ok(Some(null_expr));
46293        }
46294
46295        Ok(None)
46296    }
46297
46298    /// parse_exists - Implemented from Python _parse_exists
46299    #[allow(unused_variables, unused_mut)]
46300    pub fn parse_exists(&mut self) -> Result<Option<Expression>> {
46301        if self.match_text_seq(&["IF"]) {
46302            // Matched: IF
46303            return Ok(None);
46304        }
46305        Ok(None)
46306    }
46307
46308    /// parse_exponent - Parses exponent/power expressions
46309    /// Python: _parse_exponent
46310    /// In most dialects, EXPONENT is empty, so this delegates to parse_unary
46311    pub fn parse_exponent(&mut self) -> Result<Option<Expression>> {
46312        match self.parse_unary() {
46313            Ok(expr) => Ok(Some(expr)),
46314            Err(_) => Ok(None),
46315        }
46316    }
46317
46318    /// parse_expressions - Parse comma-separated expressions
46319    /// Returns a Tuple containing all expressions, or None if empty
46320    #[allow(unused_variables, unused_mut)]
46321    pub fn parse_expressions(&mut self) -> Result<Option<Expression>> {
46322        let expressions = self.parse_expression_list()?;
46323        if expressions.is_empty() {
46324            return Ok(None);
46325        }
46326        if expressions.len() == 1 {
46327            return Ok(expressions.into_iter().next());
46328        }
46329        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
46330    }
46331
46332    /// parse_extract - Ported from Python _parse_extract
46333    /// Parses EXTRACT(field FROM expression) function
46334    #[allow(unused_variables, unused_mut)]
46335    pub fn parse_extract(&mut self) -> Result<Option<Expression>> {
46336        // Parse the field (YEAR, MONTH, DAY, HOUR, etc.)
46337        let field_name = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
46338            let token = self.advance();
46339            token.text.to_ascii_uppercase()
46340        } else {
46341            return Ok(None);
46342        };
46343
46344        // Convert field name to DateTimeField
46345        let field = match field_name.as_str() {
46346            "YEAR" => DateTimeField::Year,
46347            "MONTH" => DateTimeField::Month,
46348            "DAY" => DateTimeField::Day,
46349            "HOUR" => DateTimeField::Hour,
46350            "MINUTE" => DateTimeField::Minute,
46351            "SECOND" => DateTimeField::Second,
46352            "MILLISECOND" | "MILLISECONDS" | "MS" => DateTimeField::Millisecond,
46353            "MICROSECOND" | "MICROSECONDS" | "US" => DateTimeField::Microsecond,
46354            "DOW" | "DAYOFWEEK" => DateTimeField::DayOfWeek,
46355            "DOY" | "DAYOFYEAR" => DateTimeField::DayOfYear,
46356            "WEEK" => DateTimeField::Week,
46357            "QUARTER" => DateTimeField::Quarter,
46358            "EPOCH" => DateTimeField::Epoch,
46359            "TIMEZONE" => DateTimeField::Timezone,
46360            "TIMEZONE_HOUR" => DateTimeField::TimezoneHour,
46361            "TIMEZONE_MINUTE" => DateTimeField::TimezoneMinute,
46362            "DATE" => DateTimeField::Date,
46363            "TIME" => DateTimeField::Time,
46364            other => DateTimeField::Custom(other.to_string()),
46365        };
46366
46367        // Expect FROM or comma
46368        if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
46369            return Err(self.parse_error("Expected FROM or comma after EXTRACT field"));
46370        }
46371
46372        // Parse the expression to extract from
46373        let expression = self.parse_bitwise()?;
46374        let this = match expression {
46375            Some(expr) => self.try_clickhouse_func_arg_alias(expr),
46376            None => return Err(self.parse_error("Expected expression after FROM in EXTRACT")),
46377        };
46378
46379        Ok(Some(Expression::Extract(Box::new(ExtractFunc {
46380            this,
46381            field,
46382        }))))
46383    }
46384
46385    /// parse_factor - Parses multiplication/division expressions (* / % operators)
46386    /// Python: _parse_factor
46387    /// Delegates to the existing parse_multiplication in the operator precedence chain
46388    pub fn parse_factor(&mut self) -> Result<Option<Expression>> {
46389        // Delegate to the existing multiplication parsing
46390        match self.parse_multiplication() {
46391            Ok(expr) => Ok(Some(expr)),
46392            Err(_) => Ok(None),
46393        }
46394    }
46395
46396    /// parse_fallback - Implemented from Python _parse_fallback
46397    #[allow(unused_variables, unused_mut)]
46398    pub fn parse_fallback(&mut self) -> Result<Option<Expression>> {
46399        if self.match_text_seq(&["PROTECTION"]) {
46400            return Ok(Some(Expression::FallbackProperty(Box::new(
46401                FallbackProperty {
46402                    no: None,
46403                    protection: None,
46404                },
46405            ))));
46406        }
46407        Ok(None)
46408    }
46409
46410    /// parse_field - Parse a field (column name, literal, or expression)
46411    /// Python: field = self._parse_primary() or self._parse_function() or self._parse_id_var()
46412    pub fn parse_field(&mut self) -> Result<Option<Expression>> {
46413        // Try parsing literals first
46414        if let Some(expr) = self.parse_string()? {
46415            return Ok(Some(expr));
46416        }
46417        if let Some(expr) = self.parse_number()? {
46418            return Ok(Some(expr));
46419        }
46420        if let Some(expr) = self.parse_boolean()? {
46421            return Ok(Some(expr));
46422        }
46423        if let Some(expr) = self.parse_null()? {
46424            return Ok(Some(expr));
46425        }
46426        if let Some(expr) = self.parse_star()? {
46427            return Ok(Some(expr));
46428        }
46429        // Try parsing identifier
46430        if let Some(expr) = self.parse_identifier()? {
46431            return Ok(Some(expr));
46432        }
46433        // Try parsing a variable/identifier
46434        if let Some(expr) = self.parse_var()? {
46435            return Ok(Some(expr));
46436        }
46437        // Allow keywords as identifiers in field context (e.g., "schema" as a field name)
46438        if self.check_keyword() {
46439            let token = self.advance();
46440            return Ok(Some(Expression::Identifier(Identifier {
46441                name: token.text,
46442                quoted: false,
46443                trailing_comments: Vec::new(),
46444                span: None,
46445            })));
46446        }
46447        Ok(None)
46448    }
46449
46450    /// parse_field_def - Ported from Python _parse_field_def
46451    /// Parses a field definition (column name + type + optional constraints)
46452    #[allow(unused_variables, unused_mut)]
46453    pub fn parse_field_def(&mut self) -> Result<Option<Expression>> {
46454        // First parse the field name (identifier)
46455        let field = self.parse_field()?;
46456
46457        if field.is_none() {
46458            return Ok(None);
46459        }
46460
46461        // Parse the column definition with the field as the name
46462        self.parse_column_def_with_field(field)
46463    }
46464
46465    /// Helper to parse a column definition with a pre-parsed field name
46466    fn parse_column_def_with_field(
46467        &mut self,
46468        field: Option<Expression>,
46469    ) -> Result<Option<Expression>> {
46470        if field.is_none() {
46471            return Ok(None);
46472        }
46473
46474        let this = field.unwrap();
46475
46476        // Get the identifier from the expression and preserve quoted-identifier state.
46477        let name_ident = match &this {
46478            Expression::Column(col) => col.name.clone(),
46479            Expression::Identifier(id) => id.clone(),
46480            Expression::Var(v) => Identifier::new(v.this.clone()),
46481            _ => return Ok(None),
46482        };
46483
46484        // Parse the data type using parse_data_type_optional (which handles unknown types gracefully)
46485        let data_type = match self.parse_data_type_optional()? {
46486            Some(dt) => dt,
46487            None => DataType::Unknown,
46488        };
46489
46490        // Create ColumnDef with default values
46491        let mut col_def = ColumnDef::new(name_ident.name.clone(), data_type);
46492        col_def.name = name_ident;
46493
46494        // Check for FOR ORDINALITY (JSON table columns)
46495        if self.match_text_seq(&["FOR", "ORDINALITY"]) {
46496            return Ok(Some(Expression::ColumnDef(Box::new(col_def))));
46497        }
46498
46499        // Parse constraints and extract specific constraint values
46500        loop {
46501            if let Some(constraint) = self.parse_column_constraint()? {
46502                // Check specific constraint types
46503                match &constraint {
46504                    Expression::NotNullColumnConstraint(_) => {
46505                        col_def.nullable = Some(false);
46506                        col_def.constraints.push(ColumnConstraint::NotNull);
46507                    }
46508                    Expression::PrimaryKeyColumnConstraint(_) => {
46509                        col_def.primary_key = true;
46510                        col_def.constraints.push(ColumnConstraint::PrimaryKey);
46511                    }
46512                    Expression::UniqueColumnConstraint(_) => {
46513                        col_def.unique = true;
46514                        col_def.constraints.push(ColumnConstraint::Unique);
46515                    }
46516                    Expression::DefaultColumnConstraint(dc) => {
46517                        col_def.default = Some((*dc.this).clone());
46518                        col_def
46519                            .constraints
46520                            .push(ColumnConstraint::Default((*dc.this).clone()));
46521                    }
46522                    Expression::AutoIncrementColumnConstraint(_) => {
46523                        col_def.auto_increment = true;
46524                    }
46525                    Expression::CommentColumnConstraint(_) => {
46526                        // Comment is a unit struct, we'd need the actual comment text
46527                    }
46528                    Expression::CheckColumnConstraint(cc) => {
46529                        col_def
46530                            .constraints
46531                            .push(ColumnConstraint::Check((*cc.this).clone()));
46532                    }
46533                    Expression::PathColumnConstraint(pc) => {
46534                        col_def
46535                            .constraints
46536                            .push(ColumnConstraint::Path((*pc.this).clone()));
46537                        col_def.constraint_order.push(ConstraintType::Path);
46538                    }
46539                    _ => {}
46540                }
46541            } else if matches!(
46542                self.config.dialect,
46543                Some(crate::dialects::DialectType::ClickHouse)
46544            ) && self.match_identifier("ALIAS")
46545            {
46546                // ClickHouse: ALIAS expr
46547                let expr = self.parse_or()?;
46548                col_def.alias_expr = Some(Box::new(expr));
46549            } else if matches!(
46550                self.config.dialect,
46551                Some(crate::dialects::DialectType::ClickHouse)
46552            ) && self.check(TokenType::Materialized)
46553                && !self.check_next(TokenType::View)
46554            {
46555                // ClickHouse: MATERIALIZED expr
46556                self.skip(); // consume MATERIALIZED
46557                let expr = self.parse_or()?;
46558                col_def.materialized_expr = Some(Box::new(expr));
46559            } else if matches!(
46560                self.config.dialect,
46561                Some(crate::dialects::DialectType::ClickHouse)
46562            ) && self.match_identifier("EPHEMERAL")
46563            {
46564                // ClickHouse: EPHEMERAL [expr]
46565                if !self.check(TokenType::Comma)
46566                    && !self.check(TokenType::RParen)
46567                    && !self.is_at_end()
46568                    && !self.check_identifier("CODEC")
46569                    && !self.check_identifier("TTL")
46570                    && !self.check(TokenType::Comment)
46571                {
46572                    let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
46573                    col_def.ephemeral = Some(Some(Box::new(expr)));
46574                } else {
46575                    col_def.ephemeral = Some(None);
46576                }
46577            } else if matches!(
46578                self.config.dialect,
46579                Some(crate::dialects::DialectType::ClickHouse)
46580            ) && self.check_identifier("CODEC")
46581            {
46582                // ClickHouse: CODEC(LZ4HC(9), ZSTD, DELTA)
46583                self.skip(); // consume CODEC
46584                self.expect(TokenType::LParen)?;
46585                let start = self.current;
46586                let mut depth = 1;
46587                while !self.is_at_end() && depth > 0 {
46588                    if self.check(TokenType::LParen) {
46589                        depth += 1;
46590                    }
46591                    if self.check(TokenType::RParen) {
46592                        depth -= 1;
46593                        if depth == 0 {
46594                            break;
46595                        }
46596                    }
46597                    self.skip();
46598                }
46599                let codec_text = self.tokens_to_sql(start, self.current);
46600                self.expect(TokenType::RParen)?;
46601                col_def.codec = Some(codec_text);
46602            } else if matches!(
46603                self.config.dialect,
46604                Some(crate::dialects::DialectType::ClickHouse)
46605            ) && self.match_identifier("TTL")
46606            {
46607                // ClickHouse: TTL expr
46608                let expr = self.parse_expression()?;
46609                col_def.ttl_expr = Some(Box::new(expr));
46610            } else {
46611                break;
46612            }
46613        }
46614
46615        Ok(Some(Expression::ColumnDef(Box::new(col_def))))
46616    }
46617
46618    /// parse_foreign_key - Implemented from Python _parse_foreign_key
46619    /// Calls: parse_key_constraint_options, parse_wrapped_id_vars, parse_references
46620    #[allow(unused_variables, unused_mut)]
46621    pub fn parse_foreign_key(&mut self) -> Result<Option<Expression>> {
46622        if self.match_text_seq(&["NO", "ACTION"]) {
46623            return Ok(Some(Expression::ForeignKey(Box::new(ForeignKey {
46624                expressions: Vec::new(),
46625                reference: None,
46626                delete: None,
46627                update: None,
46628                options: Vec::new(),
46629            }))));
46630        }
46631        Ok(None)
46632    }
46633
46634    /// parse_format_json - Implemented from Python _parse_format_json
46635    #[allow(unused_variables, unused_mut)]
46636    pub fn parse_format_json(&mut self) -> Result<Option<Expression>> {
46637        if self.match_text_seq(&["FORMAT", "JSON"]) {
46638            // Matched: FORMAT JSON
46639            return Ok(None);
46640        }
46641        Ok(None)
46642    }
46643
46644    /// parse_format_name - Snowflake FILE_FORMAT = format_name property
46645    /// Parses: format_name (string or identifier)
46646    #[allow(unused_variables, unused_mut)]
46647    pub fn parse_format_name(&mut self) -> Result<Option<Expression>> {
46648        // Try to parse a string first, then fall back to table parts
46649        let value = if let Some(s) = self.parse_string()? {
46650            s
46651        } else if let Some(tp) = self.parse_table_parts()? {
46652            tp
46653        } else {
46654            return Ok(None);
46655        };
46656
46657        Ok(Some(Expression::Property(Box::new(Property {
46658            this: Box::new(Expression::Identifier(Identifier::new(
46659                "FORMAT_NAME".to_string(),
46660            ))),
46661            value: Some(Box::new(value)),
46662        }))))
46663    }
46664
46665    /// parse_freespace - Teradata FREESPACE property
46666    /// Parses: FREESPACE = number [PERCENT]
46667    #[allow(unused_variables, unused_mut)]
46668    pub fn parse_freespace(&mut self) -> Result<Option<Expression>> {
46669        // Optionally consume = sign
46670        self.match_token(TokenType::Eq);
46671
46672        // Parse the number value
46673        let this = self.parse_number()?;
46674        if this.is_none() {
46675            return Ok(None);
46676        }
46677
46678        // Check for PERCENT keyword
46679        let percent = if self.match_token(TokenType::Percent) {
46680            Some(Box::new(Expression::Boolean(BooleanLiteral {
46681                value: true,
46682            })))
46683        } else {
46684            None
46685        };
46686
46687        Ok(Some(Expression::FreespaceProperty(Box::new(
46688            FreespaceProperty {
46689                this: Box::new(this.unwrap()),
46690                percent,
46691            },
46692        ))))
46693    }
46694
46695    /// parse_function - Ported from Python _parse_function
46696    /// Parses function calls like func_name(args) or {fn func_name(args)} (ODBC syntax)
46697    pub fn parse_function(&mut self) -> Result<Option<Expression>> {
46698        // Check for ODBC escape syntax: {fn function_call}
46699        let fn_syntax = if self.check(TokenType::LBrace) {
46700            if let Some(next) = self.tokens.get(self.current + 1) {
46701                if next.text.eq_ignore_ascii_case("FN") {
46702                    self.skip(); // consume {
46703                    self.skip(); // consume FN
46704                    true
46705                } else {
46706                    false
46707                }
46708            } else {
46709                false
46710            }
46711        } else {
46712            false
46713        };
46714
46715        let func = self.parse_function_call()?;
46716
46717        if fn_syntax {
46718            self.match_token(TokenType::RBrace);
46719        }
46720
46721        Ok(func)
46722    }
46723
46724    /// parse_function_args - Ported from Python _parse_function_args
46725    /// Parses the arguments inside a function call, handling aliases and key-value pairs
46726    pub fn parse_function_args_list(&mut self) -> Result<Vec<Expression>> {
46727        let mut args = Vec::new();
46728
46729        if self.check(TokenType::RParen) {
46730            return Ok(args);
46731        }
46732
46733        loop {
46734            // Try to parse expression with optional alias
46735            if let Some(expr) = self.parse_assignment()? {
46736                // Handle explicit AS alias inside function args (e.g. `tuple(1 AS "a", 2 AS "b")`)
46737                if self.match_token(TokenType::As) {
46738                    let alias_token = self.advance();
46739                    let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
46740                        // Preserve quoted identifiers
46741                        let raw = alias_token.text.clone();
46742                        let mut ident = Identifier::new(raw);
46743                        ident.quoted = true;
46744                        ident
46745                    } else {
46746                        Identifier::new(alias_token.text.clone())
46747                    };
46748                    args.push(Expression::Alias(Box::new(crate::expressions::Alias {
46749                        this: expr,
46750                        alias: alias_name,
46751                        column_aliases: Vec::new(),
46752                        pre_alias_comments: Vec::new(),
46753                        trailing_comments: Vec::new(),
46754                        inferred_type: None,
46755                    })));
46756                } else {
46757                    args.push(expr);
46758                }
46759            }
46760
46761            if !self.match_token(TokenType::Comma) {
46762                break;
46763            }
46764        }
46765
46766        Ok(args)
46767    }
46768
46769    /// parse_function_call - Ported from Python _parse_function_call
46770    /// Parses a function call expression like func_name(arg1, arg2, ...)
46771    pub fn parse_function_call(&mut self) -> Result<Option<Expression>> {
46772        if self.is_at_end() {
46773            return Ok(None);
46774        }
46775
46776        let token = self.peek().clone();
46777        let token_type = token.token_type.clone();
46778        let name = token.text.clone();
46779        let _upper_name = name.to_ascii_uppercase();
46780
46781        // Check for no-paren functions like CURRENT_DATE, CURRENT_TIMESTAMP
46782        if self.is_no_paren_function() {
46783            // Check if next token is NOT a paren (so it's used without parens)
46784            if !self.check_next(TokenType::LParen) {
46785                self.skip();
46786                return Ok(Some(Expression::Function(Box::new(Function {
46787                    name, // Preserve original case; generator handles normalization
46788                    args: Vec::new(),
46789                    distinct: false,
46790                    trailing_comments: Vec::new(),
46791                    use_bracket_syntax: false,
46792                    no_parens: true,
46793                    quoted: false,
46794                    span: None,
46795                    inferred_type: None,
46796                }))));
46797            }
46798        }
46799
46800        // Must be followed by left paren
46801        if !self.check_next(TokenType::LParen) {
46802            return Ok(None);
46803        }
46804
46805        // Token must be a valid function name token
46806        let is_valid_func_token = matches!(
46807            token_type,
46808            TokenType::Identifier
46809                | TokenType::Var
46810                | TokenType::If
46811                | TokenType::Left
46812                | TokenType::Right
46813                | TokenType::Insert
46814                | TokenType::Replace
46815                | TokenType::Row
46816                | TokenType::Index
46817        );
46818        if !is_valid_func_token {
46819            return Ok(None);
46820        }
46821
46822        self.skip(); // consume function name
46823        self.skip(); // consume (
46824
46825        // Check for DISTINCT keyword
46826        let distinct = self.match_token(TokenType::Distinct);
46827
46828        // Parse arguments
46829        let args = self.parse_function_args_list()?;
46830
46831        self.match_token(TokenType::RParen);
46832
46833        // Handle window specifications
46834        let func_expr = Expression::Function(Box::new(Function {
46835            name, // Preserve original case; generator handles normalization
46836            args,
46837            distinct,
46838            trailing_comments: Vec::new(),
46839            use_bracket_syntax: false,
46840            no_parens: false,
46841            quoted: false,
46842            span: None,
46843            inferred_type: None,
46844        }));
46845
46846        // Check for OVER clause (window function)
46847        if self.match_token(TokenType::Over) {
46848            // Parse window spec - create a simple WindowSpec
46849            if self.match_token(TokenType::LParen) {
46850                // Use parse_window_spec_inner to handle DISTRIBUTE BY/SORT BY (Hive)
46851                let spec = self.parse_window_spec_inner()?;
46852                self.expect(TokenType::RParen)?;
46853
46854                if let Some(spec_expr) = spec {
46855                    return Ok(Some(spec_expr));
46856                }
46857            }
46858        }
46859
46860        Ok(Some(func_expr))
46861    }
46862
46863    /// parse_function_parameter - Ported from Python _parse_function_parameter
46864    /// Parses a function parameter in CREATE FUNCTION (name type [DEFAULT expr])
46865    pub fn parse_function_parameter(&mut self) -> Result<Option<Expression>> {
46866        // Parse optional parameter mode (IN, OUT, INOUT)
46867        let _mode = if self.match_texts(&["IN"]) {
46868            if self.match_texts(&["OUT"]) {
46869                Some(ParameterMode::InOut)
46870            } else {
46871                Some(ParameterMode::In)
46872            }
46873        } else if self.match_texts(&["OUT"]) {
46874            Some(ParameterMode::Out)
46875        } else if self.match_texts(&["INOUT"]) {
46876            Some(ParameterMode::InOut)
46877        } else {
46878            None
46879        };
46880
46881        // Parse parameter name (optional in some dialects)
46882        let name_expr = self.parse_id_var()?;
46883        let name = name_expr.and_then(|n| match n {
46884            Expression::Identifier(id) => Some(id),
46885            _ => None,
46886        });
46887
46888        // Parse data type - returns Result<DataType>, not Result<Option<DataType>>
46889        // We need to handle the case where we can't parse a data type
46890        let data_type_result = self.parse_data_type();
46891        let _data_type = match data_type_result {
46892            Ok(dt) => dt,
46893            Err(_) => return Ok(None),
46894        };
46895
46896        // Parse optional DEFAULT value
46897        let _default = if self.match_token(TokenType::Default) || self.match_texts(&["="]) {
46898            self.parse_disjunction()?
46899        } else {
46900            None
46901        };
46902
46903        // Return the name as a Column expression
46904        Ok(Some(Expression::boxed_column(Column {
46905            name: Identifier {
46906                name: name.map(|n| n.name).unwrap_or_default(),
46907                quoted: false,
46908                trailing_comments: Vec::new(),
46909                span: None,
46910            },
46911            table: None,
46912            join_mark: false,
46913            trailing_comments: Vec::new(),
46914            span: None,
46915            inferred_type: None,
46916        })))
46917    }
46918
46919    /// parse_gap_fill - Ported from Python _parse_gap_fill
46920    #[allow(unused_variables, unused_mut)]
46921    /// parse_gap_fill - Parses GAP_FILL function for time series
46922    /// Example: GAP_FILL(TABLE t, ts_column, bucket_width, partitioning_columns, value_columns)
46923    pub fn parse_gap_fill(&mut self) -> Result<Option<Expression>> {
46924        // Optional TABLE keyword
46925        self.match_token(TokenType::Table);
46926
46927        // Parse the table reference
46928        let this = self.parse_table()?;
46929        if this.is_none() {
46930            return Ok(None);
46931        }
46932
46933        // Parse comma-separated arguments
46934        self.match_token(TokenType::Comma);
46935        let mut args = self.parse_expression_list()?;
46936
46937        // Extract arguments by position
46938        let ts_column = args.get(0).cloned().map(Box::new);
46939        let bucket_width = args.get(1).cloned().map(Box::new);
46940        let partitioning_columns = args.get(2).cloned().map(Box::new);
46941        let value_columns = args.get(3).cloned().map(Box::new);
46942
46943        Ok(Some(Expression::GapFill(Box::new(GapFill {
46944            this: Box::new(this.unwrap()),
46945            ts_column,
46946            bucket_width,
46947            partitioning_columns,
46948            value_columns,
46949            origin: None,
46950            ignore_nulls: None,
46951        }))))
46952    }
46953
46954    /// parse_semantic_view - Parse Snowflake SEMANTIC_VIEW function
46955    /// Example: SEMANTIC_VIEW(foo METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE a.b > '1995-01-01')
46956    pub fn parse_semantic_view(&mut self) -> Result<Expression> {
46957        // Parse the table/view reference as a primary expression (identifier or qualified name)
46958        let this = self.parse_primary()?;
46959
46960        let mut metrics = None;
46961        let mut dimensions = None;
46962        let mut facts = None;
46963        let mut where_clause = None;
46964
46965        // Parse optional clauses: METRICS, DIMENSIONS, FACTS, WHERE
46966        while !self.check(TokenType::RParen) && !self.is_at_end() {
46967            if self.match_identifier("METRICS") {
46968                // Parse comma-separated expressions until next keyword or )
46969                let exprs = self.parse_semantic_view_list()?;
46970                metrics = Some(Box::new(Expression::Tuple(Box::new(Tuple {
46971                    expressions: exprs,
46972                }))));
46973            } else if self.match_identifier("DIMENSIONS") {
46974                let exprs = self.parse_semantic_view_list()?;
46975                dimensions = Some(Box::new(Expression::Tuple(Box::new(Tuple {
46976                    expressions: exprs,
46977                }))));
46978            } else if self.match_identifier("FACTS") {
46979                let exprs = self.parse_semantic_view_list()?;
46980                facts = Some(Box::new(Expression::Tuple(Box::new(Tuple {
46981                    expressions: exprs,
46982                }))));
46983            } else if self.match_token(TokenType::Where) {
46984                // Parse the WHERE expression
46985                where_clause = Some(Box::new(self.parse_expression()?));
46986                // WHERE is the last clause, break after parsing it
46987                break;
46988            } else {
46989                // Unknown token
46990                break;
46991            }
46992        }
46993
46994        Ok(Expression::SemanticView(Box::new(SemanticView {
46995            this: Box::new(this),
46996            metrics,
46997            dimensions,
46998            facts,
46999            where_: where_clause,
47000        })))
47001    }
47002
47003    /// Helper to parse comma-separated expression list for SEMANTIC_VIEW clauses
47004    /// Stops at METRICS, DIMENSIONS, FACTS, WHERE, or )
47005    /// Each element can have an optional AS alias: expr AS name
47006    fn parse_semantic_view_list(&mut self) -> Result<Vec<Expression>> {
47007        let first = self.parse_semantic_view_element()?;
47008        let mut exprs = vec![first];
47009        while self.match_token(TokenType::Comma) {
47010            // Check if next token is a keyword that starts a new clause
47011            if self.check_identifier("METRICS")
47012                || self.check_identifier("DIMENSIONS")
47013                || self.check_identifier("FACTS")
47014                || self.check(TokenType::Where)
47015                || self.check(TokenType::RParen)
47016            {
47017                break;
47018            }
47019            exprs.push(self.parse_semantic_view_element()?);
47020        }
47021        Ok(exprs)
47022    }
47023
47024    /// Parse a single SEMANTIC_VIEW element: expression [AS alias]
47025    fn parse_semantic_view_element(&mut self) -> Result<Expression> {
47026        let expr = self
47027            .parse_disjunction()?
47028            .ok_or_else(|| self.parse_error("Expected expression in SEMANTIC_VIEW clause"))?;
47029        // Check for optional explicit AS alias
47030        if self.match_token(TokenType::As) {
47031            let alias = self.expect_identifier_or_keyword_with_quoted()?;
47032            Ok(Expression::Alias(Box::new(crate::expressions::Alias {
47033                this: expr,
47034                alias,
47035                column_aliases: Vec::new(),
47036                pre_alias_comments: Vec::new(),
47037                trailing_comments: Vec::new(),
47038                inferred_type: None,
47039            })))
47040        } else {
47041            Ok(expr)
47042        }
47043    }
47044
47045    /// parse_grant_principal - Implemented from Python _parse_grant_principal
47046    /// Calls: parse_id_var
47047    #[allow(unused_variables, unused_mut)]
47048    pub fn parse_grant_principal(&mut self) -> Result<Option<Expression>> {
47049        if self.match_texts(&["ROLE", "GROUP"]) {
47050            // Matched one of: ROLE, GROUP
47051            return Ok(None);
47052        }
47053        Ok(None)
47054    }
47055
47056    /// parse_grant_privilege - Parse a single privilege in GRANT/REVOKE
47057    /// Parses: SELECT, INSERT, UPDATE(col1, col2), DELETE, etc.
47058    #[allow(unused_variables, unused_mut)]
47059    pub fn parse_grant_privilege(&mut self) -> Result<Option<Expression>> {
47060        // Collect privilege keywords (SELECT, INSERT, UPDATE, DELETE, ALL PRIVILEGES, etc.)
47061        let mut privilege_parts = Vec::new();
47062
47063        // Keep consuming keywords until we hit a follow token
47064        // Follow tokens are: comma, ON, left paren
47065        while !self.is_at_end() {
47066            // Check if we've hit a follow token
47067            if self.check(TokenType::Comma)
47068                || self.check(TokenType::On)
47069                || self.check(TokenType::LParen)
47070            {
47071                break;
47072            }
47073
47074            // Get the current token text
47075            let text = self.peek().text.to_ascii_uppercase();
47076            privilege_parts.push(text);
47077            self.skip();
47078        }
47079
47080        if privilege_parts.is_empty() {
47081            return Ok(None);
47082        }
47083
47084        let privilege_str = privilege_parts.join(" ");
47085
47086        // Check for column list in parentheses (e.g., UPDATE(col1, col2))
47087        let expressions = if self.match_token(TokenType::LParen) {
47088            let mut columns = Vec::new();
47089            loop {
47090                if let Some(col) = self.parse_column()? {
47091                    columns.push(col);
47092                } else {
47093                    break;
47094                }
47095                if !self.match_token(TokenType::Comma) {
47096                    break;
47097                }
47098            }
47099            self.match_token(TokenType::RParen);
47100            columns
47101        } else {
47102            Vec::new()
47103        };
47104
47105        Ok(Some(Expression::GrantPrivilege(Box::new(GrantPrivilege {
47106            this: Box::new(Expression::Identifier(Identifier::new(privilege_str))),
47107            expressions,
47108        }))))
47109    }
47110
47111    /// parse_grant_revoke_common - Parses common parts of GRANT/REVOKE statements
47112    /// Python: _parse_grant_revoke_common
47113    /// Returns a Tuple containing (privileges, kind, securable)
47114    pub fn parse_grant_revoke_common(&mut self) -> Result<Option<Expression>> {
47115        // Parse privileges (CSV of grant privileges)
47116        let mut privileges = Vec::new();
47117        loop {
47118            if let Some(priv_expr) = self.parse_grant_privilege()? {
47119                privileges.push(priv_expr);
47120            }
47121            if !self.match_token(TokenType::Comma) {
47122                break;
47123            }
47124        }
47125
47126        // Match ON keyword
47127        self.match_token(TokenType::On);
47128
47129        // Parse kind (TABLE, VIEW, SCHEMA, DATABASE, etc.)
47130        let kind = if self.match_texts(&[
47131            "TABLE",
47132            "VIEW",
47133            "SCHEMA",
47134            "DATABASE",
47135            "SEQUENCE",
47136            "FUNCTION",
47137            "PROCEDURE",
47138            "INDEX",
47139            "TYPE",
47140            "TABLESPACE",
47141            "ROLE",
47142            "USER",
47143        ]) {
47144            let kind_text = self.previous().text.to_ascii_uppercase();
47145            Some(Expression::Var(Box::new(Var { this: kind_text })))
47146        } else {
47147            None
47148        };
47149
47150        // Try to parse securable (table parts)
47151        let securable = self.parse_table_parts()?;
47152
47153        // Return as Tuple with three elements: privileges_list, kind, securable
47154        let privileges_expr = Expression::Tuple(Box::new(Tuple {
47155            expressions: privileges,
47156        }));
47157
47158        let mut result_exprs = vec![privileges_expr];
47159
47160        if let Some(k) = kind {
47161            result_exprs.push(k);
47162        } else {
47163            result_exprs.push(Expression::Null(Null));
47164        }
47165
47166        if let Some(s) = securable {
47167            result_exprs.push(s);
47168        } else {
47169            result_exprs.push(Expression::Null(Null));
47170        }
47171
47172        Ok(Some(Expression::Tuple(Box::new(Tuple {
47173            expressions: result_exprs,
47174        }))))
47175    }
47176
47177    /// parse_group - Parse GROUP BY clause
47178    /// Python: if not self._match(TokenType.GROUP_BY): return None; expressions = self._parse_csv(self._parse_disjunction)
47179    pub fn parse_group(&mut self) -> Result<Option<Expression>> {
47180        // Check for GROUP BY token (which should be parsed as Group + By tokens)
47181        if !self.match_token(TokenType::Group) {
47182            return Ok(None);
47183        }
47184        // Consume BY if present
47185        self.match_token(TokenType::By);
47186
47187        // Check for optional ALL/DISTINCT
47188        // Some(true) = ALL, Some(false) = DISTINCT, None = no modifier
47189        let all = if self.match_token(TokenType::All) {
47190            Some(true)
47191        } else if self.match_token(TokenType::Distinct) {
47192            Some(false)
47193        } else {
47194            None
47195        };
47196
47197        // Parse comma-separated expressions
47198        let mut expressions = Vec::new();
47199        loop {
47200            match self.parse_expression() {
47201                Ok(expr) => expressions.push(expr),
47202                Err(_) => break,
47203            }
47204            if !self.match_token(TokenType::Comma) {
47205                break;
47206            }
47207        }
47208
47209        // Handle TOTALS (ClickHouse)
47210        let totals = if self.match_text_seq(&["WITH", "TOTALS"]) {
47211            Some(Box::new(Expression::Boolean(BooleanLiteral {
47212                value: true,
47213            })))
47214        } else if self.match_text_seq(&["TOTALS"]) {
47215            Some(Box::new(Expression::Boolean(BooleanLiteral {
47216                value: true,
47217            })))
47218        } else {
47219            None
47220        };
47221
47222        Ok(Some(Expression::Group(Box::new(Group {
47223            expressions,
47224            grouping_sets: None,
47225            cube: None,
47226            rollup: None,
47227            totals,
47228            all,
47229        }))))
47230    }
47231
47232    /// parse_group_concat - Ported from Python _parse_group_concat
47233    #[allow(unused_variables, unused_mut)]
47234    /// parse_group_concat - Parses MySQL GROUP_CONCAT function
47235    /// Example: GROUP_CONCAT(DISTINCT col ORDER BY col SEPARATOR ',')
47236    pub fn parse_group_concat(&mut self) -> Result<Option<Expression>> {
47237        // Check for DISTINCT
47238        let distinct = self.match_token(TokenType::Distinct);
47239
47240        // Parse expression(s)
47241        let expr = self.parse_expression()?;
47242
47243        // Parse optional ORDER BY
47244        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
47245            let mut orderings = Vec::new();
47246            loop {
47247                let order_expr = self.parse_expression()?;
47248                let desc = if self.match_token(TokenType::Desc) {
47249                    true
47250                } else {
47251                    self.match_token(TokenType::Asc);
47252                    false
47253                };
47254                let nulls_first = if self.match_keywords(&[TokenType::Nulls, TokenType::First]) {
47255                    Some(true)
47256                } else if self.match_keywords(&[TokenType::Nulls, TokenType::Last]) {
47257                    Some(false)
47258                } else {
47259                    None
47260                };
47261                orderings.push(Ordered {
47262                    this: order_expr,
47263                    desc,
47264                    nulls_first,
47265                    explicit_asc: !desc,
47266                    with_fill: None,
47267                });
47268                if !self.match_token(TokenType::Comma) {
47269                    break;
47270                }
47271            }
47272            Some(orderings)
47273        } else {
47274            None
47275        };
47276
47277        // Parse optional SEPARATOR
47278        let separator = if self.match_token(TokenType::Separator) {
47279            self.parse_string()?
47280        } else {
47281            None
47282        };
47283
47284        Ok(Some(Expression::GroupConcat(Box::new(GroupConcatFunc {
47285            this: expr,
47286            separator,
47287            order_by,
47288            distinct,
47289            filter: None,
47290            limit: None,
47291            inferred_type: None,
47292        }))))
47293    }
47294
47295    /// parse_grouping_set - Delegates to parse_grouping_sets
47296    #[allow(unused_variables, unused_mut)]
47297    pub fn parse_grouping_set(&mut self) -> Result<Option<Expression>> {
47298        self.parse_grouping_sets()
47299    }
47300
47301    /// parse_grouping_sets - Ported from Python _parse_grouping_sets
47302    /// Parses GROUPING SETS ((...), (...)) in GROUP BY
47303    #[allow(unused_variables, unused_mut)]
47304    pub fn parse_grouping_sets(&mut self) -> Result<Option<Expression>> {
47305        // Check for GROUPING SETS keyword
47306        if !self.match_text_seq(&["GROUPING", "SETS"]) {
47307            return Ok(None);
47308        }
47309
47310        // Parse wrapped grouping sets
47311        self.expect(TokenType::LParen)?;
47312        let mut expressions = Vec::new();
47313
47314        if !self.check(TokenType::RParen) {
47315            loop {
47316                // Each grouping set can be:
47317                // - A nested GROUPING SETS
47318                // - CUBE or ROLLUP
47319                // - A parenthesized list
47320                // - A single expression
47321                if let Some(nested) = self.parse_grouping_sets()? {
47322                    expressions.push(nested);
47323                } else if let Some(cube_rollup) = self.parse_cube_or_rollup()? {
47324                    expressions.push(cube_rollup);
47325                } else if self.match_token(TokenType::LParen) {
47326                    // Parenthesized group
47327                    let mut group = Vec::new();
47328                    if !self.check(TokenType::RParen) {
47329                        loop {
47330                            match self.parse_bitwise() {
47331                                Ok(Some(expr)) => group.push(expr),
47332                                Ok(None) => break,
47333                                Err(e) => return Err(e),
47334                            }
47335                            if !self.match_token(TokenType::Comma) {
47336                                break;
47337                            }
47338                        }
47339                    }
47340                    self.expect(TokenType::RParen)?;
47341                    expressions.push(Expression::Tuple(Box::new(Tuple { expressions: group })));
47342                } else {
47343                    // Single expression
47344                    match self.parse_bitwise() {
47345                        Ok(Some(expr)) => expressions.push(expr),
47346                        Ok(None) => break,
47347                        Err(e) => return Err(e),
47348                    }
47349                }
47350
47351                if !self.match_token(TokenType::Comma) {
47352                    break;
47353                }
47354            }
47355        }
47356
47357        self.expect(TokenType::RParen)?;
47358
47359        Ok(Some(Expression::GroupingSets(Box::new(GroupingSets {
47360            expressions,
47361        }))))
47362    }
47363
47364    /// parse_having - Parse HAVING clause
47365    /// Python: if not self._match(TokenType.HAVING): return None; return exp.Having(this=self._parse_disjunction())
47366    pub fn parse_having(&mut self) -> Result<Option<Expression>> {
47367        if !self.match_token(TokenType::Having) {
47368            return Ok(None);
47369        }
47370        // Parse the condition expression
47371        let condition = self.parse_expression()?;
47372        Ok(Some(Expression::Having(Box::new(Having {
47373            this: condition,
47374            comments: Vec::new(),
47375        }))))
47376    }
47377
47378    /// parse_having_max - Implemented from Python _parse_having_max
47379    /// Calls: parse_column
47380    #[allow(unused_variables, unused_mut)]
47381    pub fn parse_having_max(&mut self) -> Result<Option<Expression>> {
47382        if self.match_texts(&["MAX", "MIN"]) {
47383            // Matched one of: MAX, MIN
47384            return Ok(None);
47385        }
47386        Ok(None)
47387    }
47388
47389    /// parse_heredoc - Implemented from Python _parse_heredoc
47390    /// Parses dollar-quoted strings: $$content$$, $tag$content$tag$
47391    pub fn parse_heredoc(&mut self) -> Result<Option<Expression>> {
47392        // Check if current token is a HEREDOC_STRING type
47393        if self.match_token(TokenType::HeredocString) {
47394            let text = self.previous().text.clone();
47395            return Ok(Some(Expression::Heredoc(Box::new(Heredoc {
47396                this: Box::new(Expression::Literal(Box::new(Literal::String(text)))),
47397                tag: None,
47398            }))));
47399        }
47400
47401        // Try to parse $...$ or $tag$...$tag$
47402        if !self.match_text_seq(&["$"]) {
47403            return Ok(None);
47404        }
47405
47406        // Collect the tag text (if any) and the closing marker
47407        let mut tags = vec!["$".to_string()];
47408        let mut tag_text: Option<String> = None;
47409
47410        // Check if next token is connected (no whitespace) and collect tag
47411        if !self.is_at_end() {
47412            let next_text = self.peek().text.to_ascii_uppercase();
47413            if next_text == "$" {
47414                // Simple $$ ... $$ case
47415                self.skip();
47416                tags.push("$".to_string());
47417            } else {
47418                // $tag$ ... $tag$ case
47419                self.skip();
47420                tag_text = Some(next_text.clone());
47421                tags.push(next_text);
47422
47423                // Expect closing $
47424                if self.match_text_seq(&["$"]) {
47425                    tags.push("$".to_string());
47426                } else {
47427                    return Err(self.parse_error("No closing $ found"));
47428                }
47429            }
47430        }
47431
47432        // Now collect content until we find the closing tags
47433        let mut content_parts = Vec::new();
47434        let closing_tag = tags.join("");
47435
47436        while !self.is_at_end() {
47437            // Build current sequence to check for closing tag
47438            let current_text = self.peek().text.clone();
47439
47440            // Check if we've reached the closing tag
47441            if current_text == "$" || current_text.eq_ignore_ascii_case(&closing_tag) {
47442                // Try to match the full closing sequence
47443                let start_pos = self.current;
47444                let mut matched = true;
47445                for expected in &tags {
47446                    if self.is_at_end() || !self.peek().text.eq_ignore_ascii_case(expected) {
47447                        matched = false;
47448                        break;
47449                    }
47450                    self.skip();
47451                }
47452                if matched {
47453                    // Found the closing tag
47454                    let content = content_parts.join(" ");
47455                    return Ok(Some(Expression::Heredoc(Box::new(Heredoc {
47456                        this: Box::new(Expression::Literal(Box::new(Literal::String(content)))),
47457                        tag: tag_text
47458                            .map(|t| Box::new(Expression::Literal(Box::new(Literal::String(t))))),
47459                    }))));
47460                }
47461                // Not the closing tag, backtrack and add to content
47462                self.current = start_pos;
47463            }
47464
47465            content_parts.push(self.advance().text.clone());
47466        }
47467
47468        Err(self.parse_error(&format!("No closing {} found", closing_tag)))
47469    }
47470
47471    /// parse_hint_body - Delegates to parse_hint_fallback_to_string
47472    #[allow(unused_variables, unused_mut)]
47473    pub fn parse_hint_body(&mut self) -> Result<Option<Expression>> {
47474        self.parse_hint_fallback_to_string()
47475    }
47476
47477    /// parse_hint_fallback_to_string - Parses remaining hint tokens as a raw string
47478    /// Python: _parse_hint_fallback_to_string
47479    /// Used when structured hint parsing fails - collects all remaining tokens
47480    pub fn parse_hint_fallback_to_string(&mut self) -> Result<Option<Expression>> {
47481        // Collect all remaining tokens as a string
47482        let mut parts = Vec::new();
47483        while !self.is_at_end() {
47484            let token = self.advance();
47485            parts.push(token.text.clone());
47486        }
47487
47488        if parts.is_empty() {
47489            return Ok(None);
47490        }
47491
47492        let hint_text = parts.join(" ");
47493        Ok(Some(Expression::Hint(Box::new(Hint {
47494            expressions: vec![HintExpression::Raw(hint_text)],
47495        }))))
47496    }
47497
47498    /// parse_hint_function_call - Delegates to parse_function_call
47499    #[allow(unused_variables, unused_mut)]
47500    pub fn parse_hint_function_call(&mut self) -> Result<Option<Expression>> {
47501        self.parse_function_call()
47502    }
47503
47504    /// parse_historical_data - Snowflake AT/BEFORE time travel clauses
47505    /// Parses: AT(TIMESTAMP => expr) or BEFORE(STATEMENT => 'id') etc.
47506    /// Reference: https://docs.snowflake.com/en/sql-reference/constructs/at-before
47507    #[allow(unused_variables, unused_mut)]
47508    pub fn parse_historical_data(&mut self) -> Result<Option<Expression>> {
47509        // Save position for backtracking
47510        let start_index = self.current;
47511
47512        // Check for AT, BEFORE, or END keywords
47513        let this = if self.match_texts(&["AT", "BEFORE", "END"]) {
47514            self.previous().text.to_ascii_uppercase()
47515        } else {
47516            return Ok(None);
47517        };
47518
47519        // Expect opening paren and kind (OFFSET, STATEMENT, STREAM, TIMESTAMP, VERSION)
47520        if !self.match_token(TokenType::LParen) {
47521            // Backtrack if not the right pattern
47522            self.current = start_index;
47523            return Ok(None);
47524        }
47525
47526        let kind = if self.match_texts(&["OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"]) {
47527            self.previous().text.to_ascii_uppercase()
47528        } else {
47529            // Backtrack if not the right pattern
47530            self.current = start_index;
47531            return Ok(None);
47532        };
47533
47534        // Expect => and expression
47535        if !self.match_token(TokenType::FArrow) {
47536            self.current = start_index;
47537            return Ok(None);
47538        }
47539
47540        let expression = self.parse_bitwise()?;
47541        if expression.is_none() {
47542            self.current = start_index;
47543            return Ok(None);
47544        }
47545
47546        self.match_token(TokenType::RParen); // Consume closing paren
47547
47548        Ok(Some(Expression::HistoricalData(Box::new(HistoricalData {
47549            this: Box::new(Expression::Identifier(Identifier::new(this))),
47550            kind,
47551            expression: Box::new(expression.unwrap()),
47552        }))))
47553    }
47554
47555    /// parse_id_var - Ported from Python _parse_id_var
47556    /// Parses an identifier or variable (more permissive than parse_identifier)
47557    #[allow(unused_variables, unused_mut)]
47558    pub fn parse_id_var(&mut self) -> Result<Option<Expression>> {
47559        // First try to parse a regular identifier
47560        if let Some(ident) = self.parse_identifier()? {
47561            return Ok(Some(ident));
47562        }
47563
47564        // Try to match Var token type
47565        if self.match_token(TokenType::Var) {
47566            let text = self.previous().text.clone();
47567            return Ok(Some(Expression::Identifier(Identifier {
47568                name: text,
47569                quoted: false,
47570                trailing_comments: Vec::new(),
47571                span: None,
47572            })));
47573        }
47574
47575        // Try to match string as identifier (some dialects allow this)
47576        if self.match_token(TokenType::String) {
47577            let text = self.previous().text.clone();
47578            return Ok(Some(Expression::Identifier(Identifier {
47579                name: text,
47580                quoted: true,
47581                trailing_comments: Vec::new(),
47582                span: None,
47583            })));
47584        }
47585
47586        // Accept keywords as identifiers in some contexts
47587        if self.check(TokenType::Select)
47588            || self.check(TokenType::From)
47589            || self.check(TokenType::Where)
47590            || self.check(TokenType::And)
47591            || self.check(TokenType::Or)
47592            || self.check(TokenType::Not)
47593            || self.check(TokenType::True)
47594            || self.check(TokenType::False)
47595            || self.check(TokenType::Null)
47596        {
47597            // Don't consume keywords as identifiers in parse_id_var
47598            return Ok(None);
47599        }
47600
47601        Ok(None)
47602    }
47603
47604    /// parse_identifier - Parse quoted identifier
47605    /// Python: if self._match(TokenType.IDENTIFIER): return self._identifier_expression(quoted=True)
47606    pub fn parse_identifier(&mut self) -> Result<Option<Expression>> {
47607        // Match quoted identifiers (e.g., "column_name" or `column_name`)
47608        if self.match_token(TokenType::QuotedIdentifier) || self.match_token(TokenType::Identifier)
47609        {
47610            let text = self.previous().text.clone();
47611            let quoted = self.previous().token_type == TokenType::QuotedIdentifier;
47612            return Ok(Some(Expression::Identifier(Identifier {
47613                name: text,
47614                quoted,
47615                trailing_comments: Vec::new(),
47616                span: None,
47617            })));
47618        }
47619        Ok(None)
47620    }
47621
47622    /// Parse IF expression
47623    /// IF(condition, true_value, false_value) - function style
47624    /// IF condition THEN true_value ELSE false_value END - statement style
47625    pub fn parse_if(&mut self) -> Result<Option<Expression>> {
47626        // TSQL/Fabric: IF (cond) BEGIN ... END is a statement, not a function.
47627        // Parse condition, strip outer parens, then capture rest as command.
47628        if matches!(
47629            self.config.dialect,
47630            Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
47631        ) && self.check(TokenType::LParen)
47632        {
47633            // Parse the parenthesized condition using balanced paren matching
47634            let cond_start = self.current;
47635            self.skip(); // consume opening (
47636            let mut depth = 1;
47637            while depth > 0 && !self.is_at_end() {
47638                if self.check(TokenType::LParen) {
47639                    depth += 1;
47640                } else if self.check(TokenType::RParen) {
47641                    depth -= 1;
47642                    if depth == 0 {
47643                        break;
47644                    }
47645                }
47646                self.skip();
47647            }
47648            // Extract condition text from source (inside outer parens)
47649            let cond_text = if let Some(ref source) = self.source {
47650                let inner_start = self.tokens[cond_start + 1].span.start;
47651                let inner_end = self.tokens[self.current].span.start;
47652                source[inner_start..inner_end].trim().to_string()
47653            } else {
47654                self.tokens_to_sql(cond_start + 1, self.current)
47655            };
47656            self.skip(); // consume closing )
47657
47658            // Now collect the rest (BEGIN...END) as raw text
47659            let body_start = self.current;
47660            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
47661                self.skip();
47662            }
47663            let body_text = if let Some(ref source) = self.source {
47664                let start_span = self.tokens[body_start].span.start;
47665                let end_span = if self.current > 0 {
47666                    self.tokens[self.current - 1].span.end
47667                } else {
47668                    start_span
47669                };
47670                source[start_span..end_span].trim().to_string()
47671            } else {
47672                self.tokens_to_sql(body_start, self.current)
47673            };
47674            let command_text = format!("IF {} {}", cond_text, body_text);
47675            return Ok(Some(Expression::Command(Box::new(
47676                crate::expressions::Command { this: command_text },
47677            ))));
47678        }
47679
47680        // Function style: IF(cond, true, false)
47681        if self.match_token(TokenType::LParen) {
47682            // ClickHouse: if() with zero args is valid (used in test queries)
47683            if self.check(TokenType::RParen) {
47684                self.skip(); // consume RParen
47685                return Ok(Some(Expression::Function(Box::new(Function {
47686                    name: "IF".to_string(),
47687                    args: vec![],
47688                    distinct: false,
47689                    trailing_comments: Vec::new(),
47690                    use_bracket_syntax: false,
47691                    no_parens: false,
47692                    quoted: false,
47693                    span: None,
47694                    inferred_type: None,
47695                }))));
47696            }
47697            let args = self.parse_expression_list()?;
47698            self.expect(TokenType::RParen)?;
47699
47700            if args.len() == 3 {
47701                return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
47702                    original_name: None,
47703                    condition: args[0].clone(),
47704                    true_value: args[1].clone(),
47705                    false_value: Some(args[2].clone()),
47706                    inferred_type: None,
47707                }))));
47708            } else if args.len() == 2 {
47709                return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
47710                    original_name: None,
47711                    condition: args[0].clone(),
47712                    true_value: args[1].clone(),
47713                    false_value: None,
47714                    inferred_type: None,
47715                }))));
47716            } else if args.len() == 1 {
47717                return Ok(Some(Expression::Function(Box::new(Function {
47718                    name: "IF".to_string(),
47719                    args,
47720                    distinct: false,
47721                    trailing_comments: Vec::new(),
47722                    use_bracket_syntax: false,
47723                    no_parens: false,
47724                    quoted: false,
47725                    span: None,
47726                    inferred_type: None,
47727                }))));
47728            } else {
47729                return Err(self.parse_error("IF function requires 2 or 3 arguments"));
47730            }
47731        }
47732
47733        // TSQL: IF OBJECT_ID(...) IS NOT NULL [BEGIN] DROP TABLE x [; END] -> DROP TABLE IF EXISTS x
47734        if matches!(
47735            self.config.dialect,
47736            Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
47737        ) {
47738            let saved = self.current;
47739            if self.match_text_seq(&["OBJECT_ID"]) {
47740                // Capture the OBJECT_ID arguments text for TSQL round-trip
47741                let object_id_args_text = if self.match_token(TokenType::LParen) {
47742                    let args_start = self.current;
47743                    let args = self.parse_expression_list()?;
47744                    // Reconstruct args text from source
47745                    let args_text = if let Some(ref source) = self.source {
47746                        let start_span = self.tokens[args_start].span.start;
47747                        let end_span = self.tokens[self.current].span.start;
47748                        source[start_span..end_span].trim().to_string()
47749                    } else {
47750                        // Fallback: generate from parsed expressions
47751                        args.iter()
47752                            .map(|a| format!("{:?}", a))
47753                            .collect::<Vec<_>>()
47754                            .join(", ")
47755                    };
47756                    let _ = self.match_token(TokenType::RParen);
47757                    Some(args_text)
47758                } else {
47759                    None
47760                };
47761                if self.match_text_seq(&["IS", "NOT", "NULL"]) {
47762                    // Check for DROP directly or BEGIN ... DROP ... END
47763                    let has_begin = self.match_token(TokenType::Begin);
47764                    if self.check(TokenType::Drop) {
47765                        // Parse DROP TABLE, forcing if_exists = true
47766                        self.skip(); // consume DROP
47767                        if self.match_token(TokenType::Table) {
47768                            // Parse table names
47769                            let mut names = Vec::new();
47770                            loop {
47771                                names.push(self.parse_table_ref()?);
47772                                if !self.match_token(TokenType::Comma) {
47773                                    break;
47774                                }
47775                            }
47776                            // If we had BEGIN, consume optional ; and END
47777                            if has_begin {
47778                                let _ = self.match_token(TokenType::Semicolon);
47779                                let _ = self.match_token(TokenType::End);
47780                            }
47781                            return Ok(Some(Expression::DropTable(Box::new(
47782                                crate::expressions::DropTable {
47783                                    names,
47784                                    if_exists: true,
47785                                    cascade: false,
47786                                    cascade_constraints: false,
47787                                    purge: false,
47788                                    leading_comments: Vec::new(),
47789                                    object_id_args: object_id_args_text,
47790                                    sync: false,
47791                                    iceberg: false,
47792                                    restrict: false,
47793                                },
47794                            ))));
47795                        }
47796                    }
47797                }
47798                // Retreat if pattern didn't match
47799                self.current = saved;
47800            }
47801        }
47802
47803        // Statement style: IF cond THEN true [ELSE false] END/ENDIF
47804        // Use parse_disjunction (parse_or) for condition - same as Python sqlglot
47805        // This ensures we stop at THEN rather than consuming too much
47806        let condition = match self.parse_disjunction()? {
47807            Some(c) => c,
47808            None => return Ok(None),
47809        };
47810
47811        if !self.match_token(TokenType::Then) {
47812            // Not statement style, return as just the expression parsed
47813            return Ok(Some(condition));
47814        }
47815
47816        // Parse true value - use parse_disjunction to stop at ELSE/END
47817        let true_value = match self.parse_disjunction()? {
47818            Some(v) => v,
47819            None => return Err(self.parse_error("Expected expression after THEN")),
47820        };
47821
47822        let false_value = if self.match_token(TokenType::Else) {
47823            match self.parse_disjunction()? {
47824                Some(v) => Some(v),
47825                None => return Err(self.parse_error("Expected expression after ELSE")),
47826            }
47827        } else {
47828            None
47829        };
47830
47831        // Consume END or ENDIF (Exasol tokenizes ENDIF as END)
47832        self.match_token(TokenType::End);
47833
47834        Ok(Some(Expression::IfFunc(Box::new(IfFunc {
47835            original_name: None,
47836            condition,
47837            true_value,
47838            false_value,
47839            inferred_type: None,
47840        }))))
47841    }
47842
47843    /// parse_in - Ported from Python _parse_in
47844    /// Parses IN expression: expr IN (values...) or expr IN (subquery)
47845    /// Can also parse standalone IN list after IN keyword has been matched
47846    #[allow(unused_variables, unused_mut)]
47847    pub fn parse_in(&mut self) -> Result<Option<Expression>> {
47848        // If we're at IN keyword, parse what follows
47849        if self.match_token(TokenType::In) {
47850            return Err(self.parse_error("Expected expression before IN"));
47851        }
47852
47853        // Try to parse as a complete expression: left IN (...)
47854        let saved_pos = self.current;
47855
47856        // Parse the left side expression
47857        match self.parse_bitwise() {
47858            Ok(Some(left_expr)) => {
47859                // Check for optional NOT
47860                let negate = self.match_token(TokenType::Not);
47861
47862                // Expect IN keyword
47863                if self.match_token(TokenType::In) {
47864                    let in_result = self.parse_in_with_expr(Some(left_expr))?;
47865                    return Ok(Some(if negate {
47866                        Expression::Not(Box::new(UnaryOp {
47867                            this: in_result,
47868                            inferred_type: None,
47869                        }))
47870                    } else {
47871                        in_result
47872                    }));
47873                }
47874
47875                // Not an IN expression, restore position
47876                self.current = saved_pos;
47877                Ok(None)
47878            }
47879            Ok(None) => {
47880                self.current = saved_pos;
47881                Ok(None)
47882            }
47883            Err(_) => {
47884                self.current = saved_pos;
47885                Ok(None)
47886            }
47887        }
47888    }
47889
47890    /// parse_index - Implemented from Python _parse_index
47891    /// Calls: parse_index_params, parse_id_var
47892    #[allow(unused_variables, unused_mut)]
47893    pub fn parse_index(&mut self) -> Result<Option<Expression>> {
47894        if self.match_text_seq(&["PRIMARY"]) {
47895            return Ok(Some(Expression::Index(Box::new(Index {
47896                this: None,
47897                table: None,
47898                unique: false,
47899                primary: None,
47900                amp: None,
47901                params: Vec::new(),
47902            }))));
47903        }
47904        if self.match_text_seq(&["AMP"]) {
47905            // Matched: AMP
47906            return Ok(None);
47907        }
47908        Ok(None)
47909    }
47910
47911    /// parse_index_params - Implemented from Python _parse_index_params
47912    /// Calls: parse_where, parse_wrapped_properties, parse_wrapped_id_vars
47913    #[allow(unused_variables, unused_mut)]
47914    pub fn parse_index_params(&mut self) -> Result<Option<Expression>> {
47915        if self.match_text_seq(&["INCLUDE"]) {
47916            return Ok(Some(Expression::IndexParameters(Box::new(
47917                IndexParameters {
47918                    using: None,
47919                    include: None,
47920                    columns: Vec::new(),
47921                    with_storage: None,
47922                    partition_by: None,
47923                    tablespace: None,
47924                    where_: None,
47925                    on: None,
47926                },
47927            ))));
47928        }
47929        if self.match_text_seq(&["USING", "INDEX", "TABLESPACE"]) {
47930            // Matched: USING INDEX TABLESPACE
47931            return Ok(None);
47932        }
47933        Ok(None)
47934    }
47935
47936    /// parse_initcap - Ported from Python _parse_initcap
47937    #[allow(unused_variables, unused_mut)]
47938    /// parse_initcap - Parses INITCAP function
47939    /// Example: INITCAP(str) or INITCAP(str, delimiter)
47940    pub fn parse_initcap(&mut self) -> Result<Option<Expression>> {
47941        // Parse the first argument (string to capitalize)
47942        let args = self.parse_expression_list()?;
47943
47944        if args.is_empty() {
47945            return Ok(None);
47946        }
47947
47948        // Initcap is a UnaryFunc
47949        Ok(Some(Expression::Initcap(Box::new(UnaryFunc::new(
47950            args.into_iter().next().unwrap(),
47951        )))))
47952    }
47953
47954    /// parse_inline - Implemented from Python _parse_inline
47955    #[allow(unused_variables, unused_mut)]
47956    pub fn parse_inline(&mut self) -> Result<Option<Expression>> {
47957        if self.match_text_seq(&["LENGTH"]) {
47958            // Matched: LENGTH
47959            return Ok(None);
47960        }
47961        Ok(None)
47962    }
47963
47964    /// parse_insert_table - Parse table reference for INSERT statement
47965    /// Parses: table_name [schema] [partition] [alias]
47966    /// This method is a simple wrapper around parse_table for INSERT context
47967    #[allow(unused_variables, unused_mut)]
47968    pub fn parse_insert_table(&mut self) -> Result<Option<Expression>> {
47969        // Parse the table reference - parse_table handles aliases
47970        self.parse_table()
47971    }
47972
47973    /// parse_interpolate - Implemented from Python _parse_interpolate
47974    /// Parses INTERPOLATE clause for ClickHouse ORDER BY WITH FILL
47975    pub fn parse_interpolate(&mut self) -> Result<Option<Expression>> {
47976        if !self.match_text_seq(&["INTERPOLATE"]) {
47977            return Ok(None);
47978        }
47979
47980        // Parse wrapped CSV of name-as-expression pairs
47981        if self.match_token(TokenType::LParen) {
47982            let mut expressions = Vec::new();
47983            loop {
47984                if let Some(expr) = self.parse_name_as_expression()? {
47985                    expressions.push(expr);
47986                }
47987                if !self.match_token(TokenType::Comma) {
47988                    break;
47989                }
47990            }
47991            self.match_token(TokenType::RParen);
47992
47993            if expressions.is_empty() {
47994                return Ok(None);
47995            }
47996
47997            return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
47998        }
47999
48000        Ok(None)
48001    }
48002
48003    /// parse_interval - Creates Interval expression
48004    /// Parses INTERVAL expressions: INTERVAL '1 day', INTERVAL 1 MONTH, etc.
48005    #[allow(unused_variables, unused_mut)]
48006    pub fn parse_interval(&mut self) -> Result<Option<Expression>> {
48007        // Delegate to the existing try_parse_interval method
48008        self.try_parse_interval()
48009    }
48010
48011    /// parse_interval_span - Implemented from Python _parse_interval_span
48012    /// Calls: parse_function
48013    #[allow(unused_variables, unused_mut)]
48014    pub fn parse_interval_span(&mut self) -> Result<Option<Expression>> {
48015        if self.match_text_seq(&["TO"]) {
48016            return Ok(Some(Expression::Var(Box::new(Var {
48017                this: String::new(),
48018            }))));
48019        }
48020        if self.match_text_seq(&["TO"]) {
48021            // Matched: TO
48022            return Ok(None);
48023        }
48024        Ok(None)
48025    }
48026
48027    /// parse_into - Implemented from Python _parse_into
48028    /// Parses: INTO [TEMPORARY] [UNLOGGED] [TABLE] table_name
48029    /// Returns the table expression for the INTO clause
48030    #[allow(unused_variables, unused_mut)]
48031    pub fn parse_into(&mut self) -> Result<Option<Expression>> {
48032        if !self.match_token(TokenType::Into) {
48033            return Ok(None);
48034        }
48035
48036        // Optional TEMPORARY
48037        let _temp = self.match_token(TokenType::Temporary);
48038
48039        // Optional UNLOGGED
48040        let _unlogged = self.match_text_seq(&["UNLOGGED"]);
48041
48042        // Optional TABLE keyword
48043        let _ = self.match_token(TokenType::Table);
48044
48045        // Parse the table name
48046        self.parse_table_parts()
48047    }
48048
48049    /// parse_introducer - Parses MySQL introducer expression (_charset'string')
48050    /// Python: _parse_introducer
48051    /// Format: _charset 'literal'
48052    pub fn parse_introducer(&mut self) -> Result<Option<Expression>> {
48053        // We expect to have already consumed the introducer token (e.g., _utf8)
48054        let token = self.previous().clone();
48055
48056        // Try to parse a primary expression (usually a string literal)
48057        // parse_primary returns Expression (not Option), so we use it directly
48058        let literal = self.parse_primary()?;
48059
48060        // Check if it's a null expression (indicating nothing was parsed)
48061        match &literal {
48062            Expression::Null(_) => {
48063                // Just return as an identifier
48064                Ok(Some(Expression::Identifier(Identifier {
48065                    name: token.text.clone(),
48066                    quoted: false,
48067                    trailing_comments: Vec::new(),
48068                    span: None,
48069                })))
48070            }
48071            _ => Ok(Some(Expression::Introducer(Box::new(Introducer {
48072                this: Box::new(Expression::Identifier(Identifier {
48073                    name: token.text.clone(),
48074                    quoted: false,
48075                    trailing_comments: Vec::new(),
48076                    span: None,
48077                })),
48078                expression: Box::new(literal),
48079            })))),
48080        }
48081    }
48082
48083    /// parse_is - Implemented from Python _parse_is
48084    /// Calls: parse_null, parse_bitwise
48085    #[allow(unused_variables, unused_mut)]
48086    pub fn parse_is(&mut self) -> Result<Option<Expression>> {
48087        if self.match_text_seq(&["DISTINCT", "FROM"]) {
48088            return Ok(Some(Expression::JSON(Box::new(JSON {
48089                this: None,
48090                with_: None,
48091                unique: false,
48092            }))));
48093        }
48094        if self.match_text_seq(&["WITH"]) {
48095            // Matched: WITH
48096            return Ok(None);
48097        }
48098        if self.match_text_seq(&["WITHOUT"]) {
48099            // Matched: WITHOUT
48100            return Ok(None);
48101        }
48102        Ok(None)
48103    }
48104
48105    /// parse_join - Ported from Python _parse_join
48106    /// Parses a single JOIN clause: [method] [side] [kind] JOIN table [ON condition | USING (columns)]
48107    /// Returns the Join wrapped in an Expression, or None if no join is found
48108    #[allow(unused_variables, unused_mut)]
48109    pub fn parse_join(&mut self) -> Result<Option<Expression>> {
48110        // Check for comma-style implicit join
48111        if self.match_token(TokenType::Comma) {
48112            if let Ok(Some(table)) = self.parse_table() {
48113                return Ok(Some(Expression::Join(Box::new(Join {
48114                    this: table,
48115                    on: None,
48116                    using: Vec::new(),
48117                    kind: JoinKind::Implicit,
48118                    use_inner_keyword: false,
48119                    use_outer_keyword: false,
48120                    deferred_condition: false,
48121                    join_hint: None,
48122                    match_condition: None,
48123                    pivots: Vec::new(),
48124                    comments: Vec::new(),
48125                    nesting_group: 0,
48126                    directed: false,
48127                }))));
48128            }
48129            return Ok(None);
48130        }
48131
48132        // Try to parse join kind (INNER, LEFT, RIGHT, FULL, CROSS, etc.)
48133        let saved_pos = self.current;
48134        if let Some((kind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint)) =
48135            self.try_parse_join_kind()
48136        {
48137            // Collect comments from tokens consumed by try_parse_join_kind
48138            let mut join_comments = Vec::new();
48139            for i in saved_pos..self.current {
48140                if i < self.tokens.len() {
48141                    join_comments.extend(self.tokens[i].trailing_comments.iter().cloned());
48142                }
48143            }
48144
48145            // If kind requires JOIN keyword, expect it
48146            if needs_join_keyword && !self.match_token(TokenType::Join) {
48147                self.current = saved_pos;
48148                return Ok(None);
48149            }
48150
48151            // Parse the table being joined
48152            let table = self.parse_table_expression()?;
48153
48154            // Parse ON or USING condition
48155            let (on, using) = if self.match_token(TokenType::On) {
48156                (Some(self.parse_expression()?), Vec::new())
48157            } else if self.match_token(TokenType::Using) {
48158                let has_parens = self.match_token(TokenType::LParen);
48159                // Use parse_using_column_list to handle qualified names like t1.col
48160                let cols = self.parse_using_column_list()?;
48161                if has_parens {
48162                    self.expect(TokenType::RParen)?;
48163                }
48164                (None, cols)
48165            } else {
48166                (None, Vec::new())
48167            };
48168
48169            return Ok(Some(Expression::Join(Box::new(Join {
48170                this: table,
48171                on,
48172                using,
48173                kind,
48174                use_inner_keyword,
48175                use_outer_keyword,
48176                deferred_condition: false,
48177                join_hint,
48178                match_condition: None,
48179                pivots: Vec::new(),
48180                comments: join_comments,
48181                nesting_group: 0,
48182                directed: false,
48183            }))));
48184        }
48185
48186        // Check for CROSS APPLY / OUTER APPLY (SQL Server)
48187        if self.match_text_seq(&["CROSS", "APPLY"]) || self.match_text_seq(&["OUTER", "APPLY"]) {
48188            let is_outer = self.previous().text.eq_ignore_ascii_case("OUTER");
48189            let table = self.parse_table_expression()?;
48190            return Ok(Some(Expression::Join(Box::new(Join {
48191                this: table,
48192                on: None,
48193                using: Vec::new(),
48194                kind: if is_outer {
48195                    JoinKind::Outer
48196                } else {
48197                    JoinKind::Cross
48198                },
48199                use_inner_keyword: false,
48200                use_outer_keyword: is_outer,
48201                deferred_condition: false,
48202                join_hint: None,
48203                match_condition: None,
48204                pivots: Vec::new(),
48205                comments: Vec::new(),
48206                nesting_group: 0,
48207                directed: false,
48208            }))));
48209        }
48210
48211        Ok(None)
48212    }
48213
48214    /// parse_join_hint - Spark/Hive join hints (BROADCAST, MERGE, SHUFFLE_HASH, etc.)
48215    /// Parses: HINT_NAME(table1, table2, ...)
48216    /// hint_name should be the already matched hint keyword (BROADCAST, MAPJOIN, etc.)
48217    #[allow(unused_variables, unused_mut)]
48218    pub fn parse_join_hint(&mut self, hint_name: &str) -> Result<Option<Expression>> {
48219        // Parse comma-separated list of tables
48220        let mut tables = Vec::new();
48221        loop {
48222            if let Some(table) = self.parse_table()? {
48223                tables.push(table);
48224            } else {
48225                break;
48226            }
48227            if !self.match_token(TokenType::Comma) {
48228                break;
48229            }
48230        }
48231
48232        Ok(Some(Expression::JoinHint(Box::new(JoinHint {
48233            this: Box::new(Expression::Identifier(Identifier::new(
48234                hint_name.to_ascii_uppercase(),
48235            ))),
48236            expressions: tables,
48237        }))))
48238    }
48239
48240    /// parse_join_parts - Ported from Python _parse_join_parts
48241    /// Returns (method, side, kind) where each is an optional string
48242    /// method: ASOF, NATURAL, POSITIONAL
48243    /// side: LEFT, RIGHT, FULL
48244    /// kind: ANTI, CROSS, INNER, OUTER, SEMI
48245    pub fn parse_join_parts(&mut self) -> (Option<String>, Option<String>, Option<String>) {
48246        // Parse join method (ASOF, NATURAL, POSITIONAL)
48247        let method = if self.match_texts(&["ASOF", "NATURAL", "POSITIONAL"]) {
48248            Some(self.previous().text.to_ascii_uppercase())
48249        } else {
48250            None
48251        };
48252
48253        // Parse join side (LEFT, RIGHT, FULL)
48254        let side = if self.match_texts(&["LEFT", "RIGHT", "FULL"]) {
48255            Some(self.previous().text.to_ascii_uppercase())
48256        } else {
48257            None
48258        };
48259
48260        // Parse join kind (ANTI, CROSS, INNER, OUTER, SEMI)
48261        let kind = if self.match_texts(&["ANTI", "CROSS", "INNER", "OUTER", "SEMI"]) {
48262            Some(self.previous().text.to_ascii_uppercase())
48263        } else if self.match_token(TokenType::StraightJoin) {
48264            Some("STRAIGHT_JOIN".to_string())
48265        } else {
48266            None
48267        };
48268
48269        (method, side, kind)
48270    }
48271
48272    /// parse_journal - Parses JOURNAL property (Teradata)
48273    /// Python: _parse_journal
48274    /// Creates a JournalProperty expression
48275    pub fn parse_journal(&mut self) -> Result<Option<Expression>> {
48276        self.parse_journal_impl(false, false, false, false, false)
48277    }
48278
48279    /// Implementation of parse_journal with options
48280    pub fn parse_journal_impl(
48281        &mut self,
48282        no: bool,
48283        dual: bool,
48284        before: bool,
48285        local: bool,
48286        after: bool,
48287    ) -> Result<Option<Expression>> {
48288        Ok(Some(Expression::JournalProperty(Box::new(
48289            JournalProperty {
48290                no: if no {
48291                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48292                        value: true,
48293                    })))
48294                } else {
48295                    None
48296                },
48297                dual: if dual {
48298                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48299                        value: true,
48300                    })))
48301                } else {
48302                    None
48303                },
48304                before: if before {
48305                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48306                        value: true,
48307                    })))
48308                } else {
48309                    None
48310                },
48311                local: if local {
48312                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48313                        value: true,
48314                    })))
48315                } else {
48316                    None
48317                },
48318                after: if after {
48319                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48320                        value: true,
48321                    })))
48322                } else {
48323                    None
48324                },
48325            },
48326        ))))
48327    }
48328
48329    /// parse_json_column_def - Implemented from Python _parse_json_column_def
48330    /// Calls: parse_string, parse_json_schema, parse_id_var
48331    #[allow(unused_variables, unused_mut)]
48332    pub fn parse_json_column_def(&mut self) -> Result<Option<Expression>> {
48333        if self.match_text_seq(&["NESTED"]) {
48334            return Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
48335                this: None,
48336                kind: None,
48337                path: None,
48338                nested_schema: None,
48339                ordinality: None,
48340            }))));
48341        }
48342        if self.match_text_seq(&["PATH"]) {
48343            // Matched: PATH
48344            return Ok(None);
48345        }
48346        Ok(None)
48347    }
48348
48349    /// parse_json_key_value - Implemented from Python _parse_json_key_value
48350    #[allow(unused_variables, unused_mut)]
48351    /// parse_json_key_value - Parses a JSON key-value pair
48352    /// Python: _parse_json_key_value
48353    /// Format: [KEY] key [: | VALUE] value
48354    pub fn parse_json_key_value(&mut self) -> Result<Option<Expression>> {
48355        // Optional KEY keyword
48356        self.match_text_seq(&["KEY"]);
48357
48358        // Parse the key expression
48359        let key = self.parse_column()?;
48360
48361        // Match separator (colon, comma, or VALUE keyword)
48362        let _ = self.match_token(TokenType::Colon)
48363            || self.match_token(TokenType::Comma)
48364            || self.match_text_seq(&["VALUE"]);
48365
48366        // Optional VALUE keyword
48367        self.match_text_seq(&["VALUE"]);
48368
48369        // Parse the value expression
48370        let value = self.parse_bitwise()?;
48371
48372        // If neither key nor value, return None
48373        match (key, value) {
48374            (None, None) => Ok(None),
48375            (Some(k), None) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
48376                this: Box::new(k),
48377                expression: Box::new(Expression::Null(Null)),
48378            })))),
48379            (None, Some(v)) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
48380                this: Box::new(Expression::Null(Null)),
48381                expression: Box::new(v),
48382            })))),
48383            (Some(k), Some(v)) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
48384                this: Box::new(k),
48385                expression: Box::new(v),
48386            })))),
48387        }
48388    }
48389
48390    /// parse_json_object - Parses JSON_OBJECT function
48391    /// Python: _parse_json_object
48392    /// Handles both JSON_OBJECT and JSON_OBJECTAGG
48393    pub fn parse_json_object(&mut self) -> Result<Option<Expression>> {
48394        self.parse_json_object_impl(false)
48395    }
48396
48397    /// Implementation of JSON object parsing with aggregate flag
48398    pub fn parse_json_object_impl(&mut self, agg: bool) -> Result<Option<Expression>> {
48399        // Try to parse a star expression
48400        let star = self.parse_star()?;
48401
48402        // Parse expressions: either star or comma-separated key-value pairs
48403        let expressions = if let Some(star_expr) = star {
48404            vec![star_expr]
48405        } else {
48406            // Parse comma-separated JSON key-value pairs
48407            let mut exprs = Vec::new();
48408            loop {
48409                if let Some(kv) = self.parse_json_key_value()? {
48410                    // Wrap with FORMAT JSON if specified
48411                    if self.match_text_seq(&["FORMAT", "JSON"]) {
48412                        exprs.push(Expression::JSONFormat(Box::new(JSONFormat {
48413                            this: Some(Box::new(kv)),
48414                            options: Vec::new(),
48415                            is_json: None,
48416                            to_json: None,
48417                        })));
48418                    } else {
48419                        exprs.push(kv);
48420                    }
48421                } else {
48422                    break;
48423                }
48424                if !self.match_token(TokenType::Comma) {
48425                    break;
48426                }
48427            }
48428            exprs
48429        };
48430
48431        // Parse NULL handling: NULL ON NULL or ABSENT ON NULL
48432        let null_handling = self.parse_json_on_null_handling()?;
48433
48434        // Parse UNIQUE KEYS option
48435        let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE"]) {
48436            self.match_text_seq(&["KEYS"]);
48437            Some(Box::new(Expression::Boolean(BooleanLiteral {
48438                value: true,
48439            })))
48440        } else if self.match_text_seq(&["WITHOUT", "UNIQUE"]) {
48441            self.match_text_seq(&["KEYS"]);
48442            Some(Box::new(Expression::Boolean(BooleanLiteral {
48443                value: false,
48444            })))
48445        } else {
48446            None
48447        };
48448
48449        // Consume optional KEYS keyword
48450        self.match_text_seq(&["KEYS"]);
48451
48452        // Parse RETURNING clause
48453        let return_type = if self.match_text_seq(&["RETURNING"]) {
48454            let type_expr = self.parse_type()?;
48455            // Wrap with FORMAT JSON if specified
48456            if self.match_text_seq(&["FORMAT", "JSON"]) {
48457                type_expr.map(|t| {
48458                    Box::new(Expression::JSONFormat(Box::new(JSONFormat {
48459                        this: Some(Box::new(t)),
48460                        options: Vec::new(),
48461                        is_json: None,
48462                        to_json: None,
48463                    })))
48464                })
48465            } else {
48466                type_expr.map(Box::new)
48467            }
48468        } else {
48469            None
48470        };
48471
48472        // Parse ENCODING option
48473        let encoding = if self.match_text_seq(&["ENCODING"]) {
48474            self.parse_var()?.map(Box::new)
48475        } else {
48476            None
48477        };
48478
48479        if agg {
48480            Ok(Some(Expression::JSONObjectAgg(Box::new(JSONObjectAgg {
48481                expressions,
48482                null_handling,
48483                unique_keys,
48484                return_type,
48485                encoding,
48486            }))))
48487        } else {
48488            Ok(Some(Expression::JSONObject(Box::new(JSONObject {
48489                expressions,
48490                null_handling,
48491                unique_keys,
48492                return_type,
48493                encoding,
48494            }))))
48495        }
48496    }
48497
48498    /// Parse JSON NULL handling clause: NULL ON NULL or ABSENT ON NULL
48499    fn parse_json_on_null_handling(&mut self) -> Result<Option<Box<Expression>>> {
48500        if self.match_text_seq(&["NULL", "ON", "NULL"]) {
48501            Ok(Some(Box::new(Expression::Var(Box::new(Var {
48502                this: "NULL ON NULL".to_string(),
48503            })))))
48504        } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
48505            Ok(Some(Box::new(Expression::Var(Box::new(Var {
48506                this: "ABSENT ON NULL".to_string(),
48507            })))))
48508        } else {
48509            Ok(None)
48510        }
48511    }
48512
48513    /// parse_json_schema - Implemented from Python _parse_json_schema
48514    #[allow(unused_variables, unused_mut)]
48515    pub fn parse_json_schema(&mut self) -> Result<Option<Expression>> {
48516        if self.match_text_seq(&["COLUMNS"]) {
48517            return Ok(Some(Expression::JSONSchema(Box::new(JSONSchema {
48518                expressions: Vec::new(),
48519            }))));
48520        }
48521        Ok(None)
48522    }
48523
48524    /// Parse JSON_TABLE COLUMNS clause: COLUMNS (column_def, column_def, ...) or COLUMNS column_def
48525    /// Column definitions can be:
48526    /// - name type PATH 'json_path'
48527    /// - name FOR ORDINALITY
48528    /// - NESTED [PATH] 'json_path' COLUMNS (...)
48529    pub fn parse_json_table_columns(&mut self) -> Result<Option<Expression>> {
48530        if !self.match_text_seq(&["COLUMNS"]) {
48531            return Ok(None);
48532        }
48533
48534        // Check for opening paren - Oracle supports both COLUMNS(...) and COLUMNS col PATH '...'
48535        let has_parens = self.match_token(TokenType::LParen);
48536
48537        let mut columns = Vec::new();
48538
48539        // Parse column definitions
48540        if has_parens {
48541            // COLUMNS(col1, col2, ...)
48542            if !self.check(TokenType::RParen) {
48543                loop {
48544                    if let Some(col_def) = self.parse_json_table_column_def()? {
48545                        columns.push(col_def);
48546                    }
48547                    if !self.match_token(TokenType::Comma) {
48548                        break;
48549                    }
48550                }
48551            }
48552            // Expect closing paren for COLUMNS(...)
48553            self.expect(TokenType::RParen)?;
48554        } else {
48555            // COLUMNS col PATH '...' (single column without parens)
48556            if let Some(col_def) = self.parse_json_table_column_def()? {
48557                columns.push(col_def);
48558            }
48559        }
48560
48561        Ok(Some(Expression::JSONSchema(Box::new(JSONSchema {
48562            expressions: columns,
48563        }))))
48564    }
48565
48566    /// Parse a single JSON_TABLE column definition
48567    /// Formats:
48568    /// - name [FOR ORDINALITY] [type] [PATH 'path']
48569    /// - NESTED [PATH] 'path' COLUMNS (...)
48570    pub fn parse_json_table_column_def(&mut self) -> Result<Option<Expression>> {
48571        // Check for NESTED column
48572        if self.match_text_seq(&["NESTED"]) {
48573            // NESTED [PATH] 'json_path' COLUMNS (...)
48574            self.match_text_seq(&["PATH"]); // Optional PATH keyword
48575            let path = self.parse_string()?;
48576            let nested_schema = self.parse_json_table_columns()?;
48577
48578            return Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
48579                this: None,
48580                kind: None,
48581                path: path.map(Box::new),
48582                nested_schema: nested_schema.map(Box::new),
48583                ordinality: None,
48584            }))));
48585        }
48586
48587        // Regular column: name [FOR ORDINALITY] [type] [PATH 'path']
48588        let name = self.parse_id_var()?;
48589        if name.is_none() {
48590            return Ok(None);
48591        }
48592
48593        // Check for FOR ORDINALITY
48594        let ordinality = if self.match_text_seq(&["FOR", "ORDINALITY"]) {
48595            Some(Box::new(Expression::Boolean(BooleanLiteral {
48596                value: true,
48597            })))
48598        } else {
48599            None
48600        };
48601
48602        // Parse data type (if not FOR ORDINALITY, type is expected)
48603        let kind = if ordinality.is_none() {
48604            // Try to parse a data type
48605            let data_type = self.parse_data_type_optional()?;
48606            data_type.map(|dt| self.data_type_to_string(&dt))
48607        } else {
48608            None
48609        };
48610
48611        // Parse PATH 'json_path'
48612        let path = if self.match_text_seq(&["PATH"]) {
48613            self.parse_string()?
48614        } else {
48615            None
48616        };
48617
48618        Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
48619            this: name.map(Box::new),
48620            kind,
48621            path: path.map(Box::new),
48622            nested_schema: None,
48623            ordinality,
48624        }))))
48625    }
48626
48627    /// Parse JSON_TABLE function
48628    /// JSON_TABLE(expr, path COLUMNS (...)) [ON ERROR ...] [ON EMPTY ...]
48629    pub fn parse_json_table(&mut self) -> Result<Option<Expression>> {
48630        // Parse the JSON expression
48631        let this = self.parse_expression()?;
48632
48633        // Optional path after comma
48634        let path = if self.match_token(TokenType::Comma) {
48635            if let Some(s) = self.parse_string()? {
48636                Some(Box::new(s))
48637            } else {
48638                None
48639            }
48640        } else {
48641            None
48642        };
48643
48644        // Parse error handling: ON ERROR NULL or ON ERROR ERROR
48645        let error_handling = if self.match_text_seq(&["ON", "ERROR"]) {
48646            if self.match_text_seq(&["NULL"]) {
48647                Some(Box::new(Expression::Var(Box::new(Var {
48648                    this: "NULL".to_string(),
48649                }))))
48650            } else if self.match_text_seq(&["ERROR"]) {
48651                Some(Box::new(Expression::Var(Box::new(Var {
48652                    this: "ERROR".to_string(),
48653                }))))
48654            } else {
48655                None
48656            }
48657        } else {
48658            None
48659        };
48660
48661        // Parse empty handling: ON EMPTY NULL or ON EMPTY ERROR
48662        let empty_handling = if self.match_text_seq(&["ON", "EMPTY"]) {
48663            if self.match_text_seq(&["NULL"]) {
48664                Some(Box::new(Expression::Var(Box::new(Var {
48665                    this: "NULL".to_string(),
48666                }))))
48667            } else if self.match_text_seq(&["ERROR"]) {
48668                Some(Box::new(Expression::Var(Box::new(Var {
48669                    this: "ERROR".to_string(),
48670                }))))
48671            } else {
48672                None
48673            }
48674        } else {
48675            None
48676        };
48677
48678        // Parse COLUMNS clause
48679        let schema = self.parse_json_schema()?;
48680
48681        Ok(Some(Expression::JSONTable(Box::new(JSONTable {
48682            this: Box::new(this),
48683            schema: schema.map(Box::new),
48684            path,
48685            error_handling,
48686            empty_handling,
48687        }))))
48688    }
48689
48690    /// parse_json_value - Ported from Python _parse_json_value
48691    #[allow(unused_variables, unused_mut)]
48692    /// parse_json_value - Parses JSON_VALUE function
48693    /// Example: JSON_VALUE(json, '$.path' RETURNING type)
48694    pub fn parse_json_value(&mut self) -> Result<Option<Expression>> {
48695        // Parse the JSON expression
48696        let this = self.parse_expression()?;
48697
48698        // Parse path (after comma)
48699        self.match_token(TokenType::Comma);
48700        let path = self.parse_expression()?;
48701
48702        // Parse optional RETURNING type
48703        let returning = if self.match_token(TokenType::Returning) {
48704            Some(Box::new(self.parse_expression()?))
48705        } else {
48706            None
48707        };
48708
48709        // Parse optional ON condition (ON ERROR, ON EMPTY)
48710        let on_condition = if self.check(TokenType::On) {
48711            self.parse_on_condition()?
48712        } else {
48713            None
48714        };
48715
48716        Ok(Some(Expression::JSONValue(Box::new(JSONValue {
48717            this: Box::new(this),
48718            path: Some(Box::new(path)),
48719            returning,
48720            on_condition: on_condition.map(Box::new),
48721        }))))
48722    }
48723
48724    /// parse_key_constraint_options - Implemented from Python _parse_key_constraint_options
48725    #[allow(unused_variables, unused_mut)]
48726    pub fn parse_key_constraint_options(&mut self) -> Result<Option<Expression>> {
48727        if self.match_text_seq(&["NO", "ACTION"]) {
48728            // Matched: NO ACTION
48729            return Ok(None);
48730        }
48731        if self.match_text_seq(&["CASCADE"]) {
48732            // Matched: CASCADE
48733            return Ok(None);
48734        }
48735        if self.match_text_seq(&["RESTRICT"]) {
48736            // Matched: RESTRICT
48737            return Ok(None);
48738        }
48739        Ok(None)
48740    }
48741
48742    /// parse_lambda - Ported from Python _parse_lambda
48743    /// Parses lambda expressions: x -> x + 1 or (x, y) -> x + y
48744    /// Also supports DuckDB syntax: LAMBDA x : x + 1
48745    #[allow(unused_variables, unused_mut)]
48746    pub fn parse_lambda(&mut self) -> Result<Option<Expression>> {
48747        let start_index = self.current;
48748
48749        // Check for DuckDB's LAMBDA keyword syntax: LAMBDA x : expr
48750        // ClickHouse doesn't use LAMBDA keyword — lambda is just a function name there
48751        if !matches!(
48752            self.config.dialect,
48753            Some(crate::dialects::DialectType::ClickHouse)
48754        ) && self.match_token(TokenType::Lambda)
48755        {
48756            // Parse lambda parameters (comma-separated identifiers)
48757            let mut params = Vec::new();
48758            loop {
48759                // Use is_identifier_token which handles Identifier, QuotedIdentifier, and Var
48760                if self.is_identifier_token() {
48761                    let token = self.advance();
48762                    let quoted = token.token_type == TokenType::QuotedIdentifier;
48763                    params.push(Identifier {
48764                        name: token.text,
48765                        quoted,
48766                        trailing_comments: Vec::new(),
48767                        span: None,
48768                    });
48769                } else {
48770                    break;
48771                }
48772                if !self.match_token(TokenType::Comma) {
48773                    break;
48774                }
48775            }
48776
48777            // Must have at least one parameter
48778            if params.is_empty() {
48779                return Err(self.parse_error("LAMBDA requires at least one parameter"));
48780            }
48781
48782            // Expect colon separator
48783            if !self.match_token(TokenType::Colon) {
48784                return Err(self.parse_error("Expected ':' after LAMBDA parameters"));
48785            }
48786
48787            let body = self.parse_expression()?;
48788            return Ok(Some(Expression::Lambda(Box::new(LambdaExpr {
48789                parameters: params,
48790                body,
48791                colon: true,
48792                parameter_types: Vec::new(),
48793            }))));
48794        }
48795
48796        // Try to parse lambda parameters
48797        let parameters = if self.match_token(TokenType::LParen) {
48798            // Parenthesized parameters: (x, y) -> ...
48799            let mut params = Vec::new();
48800            if !self.check(TokenType::RParen) {
48801                loop {
48802                    if let Some(ident) = self.parse_identifier()? {
48803                        if let Expression::Identifier(id) = ident {
48804                            params.push(id);
48805                        }
48806                    }
48807                    if !self.match_token(TokenType::Comma) {
48808                        break;
48809                    }
48810                }
48811            }
48812            if !self.match_token(TokenType::RParen) {
48813                // Not a lambda, retreat
48814                self.current = start_index;
48815                return Ok(None);
48816            }
48817            params
48818        } else {
48819            // Single parameter: x -> ...
48820            if let Some(ident) = self.parse_identifier()? {
48821                if let Expression::Identifier(id) = ident {
48822                    vec![id]
48823                } else {
48824                    self.current = start_index;
48825                    return Ok(None);
48826                }
48827            } else {
48828                return Ok(None);
48829            }
48830        };
48831
48832        // Check for arrow operator
48833        if self.match_token(TokenType::Arrow) || self.match_token(TokenType::FArrow) {
48834            // Parse lambda body
48835            let body = self.parse_expression()?;
48836            Ok(Some(Expression::Lambda(Box::new(LambdaExpr {
48837                parameters,
48838                body,
48839                colon: false,
48840                parameter_types: Vec::new(),
48841            }))))
48842        } else {
48843            // Not a lambda, retreat
48844            self.current = start_index;
48845            Ok(None)
48846        }
48847    }
48848
48849    /// parse_lambda_arg - Delegates to parse_id_var
48850    #[allow(unused_variables, unused_mut)]
48851    pub fn parse_lambda_arg(&mut self) -> Result<Option<Expression>> {
48852        self.parse_id_var()
48853    }
48854
48855    /// parse_lateral - Parse LATERAL subquery or table function
48856    /// Python: if self._match(TokenType.LATERAL): return exp.Lateral(this=..., view=..., outer=...)
48857    pub fn parse_lateral(&mut self) -> Result<Option<Expression>> {
48858        // Check for CROSS APPLY / OUTER APPLY (handled by join parsing in try_parse_join_kind)
48859        // This method focuses on LATERAL keyword parsing
48860
48861        if !self.match_token(TokenType::Lateral) {
48862            return Ok(None);
48863        }
48864
48865        // Check for LATERAL VIEW (Hive/Spark syntax)
48866        let view = self.match_token(TokenType::View);
48867        let outer = if view {
48868            self.match_token(TokenType::Outer)
48869        } else {
48870            false
48871        };
48872
48873        // Parse the lateral expression (subquery, function call, or table reference)
48874        let this = if self.check(TokenType::LParen) {
48875            // Could be a subquery: LATERAL (SELECT ...)
48876            self.expect(TokenType::LParen)?;
48877            let inner = self.parse_statement()?;
48878            self.expect(TokenType::RParen)?;
48879            inner
48880        } else {
48881            // Could be a function or table reference: LATERAL unnest(...)
48882            self.parse_primary()?
48883        };
48884
48885        // Parse optional alias
48886        let alias = if self.match_token(TokenType::As) {
48887            Some(self.expect_identifier()?)
48888        } else if self.check(TokenType::Identifier) && !self.check_keyword() {
48889            Some(self.expect_identifier()?)
48890        } else {
48891            None
48892        };
48893
48894        // Parse optional column aliases: AS alias(col1, col2, ...)
48895        let column_aliases = if alias.is_some() && self.match_token(TokenType::LParen) {
48896            let mut cols = Vec::new();
48897            loop {
48898                if self.check(TokenType::RParen) {
48899                    break;
48900                }
48901                let col = self.expect_identifier()?;
48902                cols.push(col);
48903                if !self.match_token(TokenType::Comma) {
48904                    break;
48905                }
48906            }
48907            self.expect(TokenType::RParen)?;
48908            cols
48909        } else {
48910            Vec::new()
48911        };
48912
48913        Ok(Some(Expression::Lateral(Box::new(Lateral {
48914            this: Box::new(this),
48915            view: if view {
48916                Some(Box::new(Expression::Boolean(BooleanLiteral {
48917                    value: true,
48918                })))
48919            } else {
48920                None
48921            },
48922            outer: if outer {
48923                Some(Box::new(Expression::Boolean(BooleanLiteral {
48924                    value: true,
48925                })))
48926            } else {
48927                None
48928            },
48929            alias,
48930            alias_quoted: false,
48931            cross_apply: None,
48932            ordinality: None,
48933            column_aliases,
48934        }))))
48935    }
48936
48937    /// parse_limit - Parse LIMIT clause
48938    /// Python: if self._match(TokenType.LIMIT): return exp.Limit(this=self._parse_term())
48939    pub fn parse_limit(&mut self) -> Result<Option<Expression>> {
48940        if !self.match_token(TokenType::Limit) {
48941            return Ok(None);
48942        }
48943        // Parse the limit expression (usually a number)
48944        let limit_expr = self.parse_expression()?;
48945        Ok(Some(Expression::Limit(Box::new(Limit {
48946            this: limit_expr,
48947            percent: false,
48948            comments: Vec::new(),
48949        }))))
48950    }
48951
48952    /// parse_limit_by - Implemented from Python _parse_limit_by
48953    #[allow(unused_variables, unused_mut)]
48954    pub fn parse_limit_by(&mut self) -> Result<Option<Expression>> {
48955        if self.match_text_seq(&["BY"]) {
48956            // Matched: BY
48957            return Ok(None);
48958        }
48959        Ok(None)
48960    }
48961
48962    /// parse_limit_options - Implemented from Python _parse_limit_options
48963    #[allow(unused_variables, unused_mut)]
48964    pub fn parse_limit_options(&mut self) -> Result<Option<Expression>> {
48965        if self.match_text_seq(&["ONLY"]) {
48966            return Ok(Some(Expression::LimitOptions(Box::new(LimitOptions {
48967                percent: None,
48968                rows: None,
48969                with_ties: None,
48970            }))));
48971        }
48972        if self.match_text_seq(&["WITH", "TIES"]) {
48973            // Matched: WITH TIES
48974            return Ok(None);
48975        }
48976        Ok(None)
48977    }
48978
48979    /// parse_load - Implemented from Python _parse_load
48980    #[allow(unused_variables, unused_mut)]
48981    pub fn parse_load(&mut self) -> Result<Option<Expression>> {
48982        if self.match_text_seq(&["DATA"]) {
48983            return Ok(Some(Expression::Command(Box::new(Command {
48984                this: String::new(),
48985            }))));
48986        }
48987        if self.match_text_seq(&["LOCAL"]) {
48988            // Matched: LOCAL
48989            return Ok(None);
48990        }
48991        Ok(None)
48992    }
48993
48994    /// parse_locking - Implemented from Python _parse_locking
48995    /// Calls: parse_table_parts
48996    #[allow(unused_variables, unused_mut)]
48997    pub fn parse_locking(&mut self) -> Result<Option<Expression>> {
48998        let kind = if self.match_token(TokenType::Table) {
48999            Some("TABLE")
49000        } else if self.match_token(TokenType::View) {
49001            Some("VIEW")
49002        } else if self.match_token(TokenType::Row) {
49003            Some("ROW")
49004        } else if self.match_token(TokenType::Database) || self.match_identifier("DATABASE") {
49005            Some("DATABASE")
49006        } else {
49007            None
49008        };
49009
49010        let kind = match kind {
49011            Some(k) => k.to_string(),
49012            None => return Ok(None),
49013        };
49014
49015        let this = if matches!(kind.as_str(), "DATABASE" | "TABLE" | "VIEW") {
49016            self.parse_table_parts()?
49017        } else {
49018            None
49019        };
49020
49021        let for_or_in = if self.match_token(TokenType::For) {
49022            Some("FOR")
49023        } else if self.match_token(TokenType::In) {
49024            Some("IN")
49025        } else {
49026            None
49027        };
49028
49029        let lock_type = if self.match_identifier("ACCESS") {
49030            Some("ACCESS")
49031        } else if self.match_texts(&["EXCL", "EXCLUSIVE"]) {
49032            Some("EXCLUSIVE")
49033        } else if self.match_identifier("SHARE") {
49034            Some("SHARE")
49035        } else if self.match_identifier("READ") {
49036            Some("READ")
49037        } else if self.match_identifier("WRITE") {
49038            Some("WRITE")
49039        } else if self.match_identifier("CHECKSUM") {
49040            Some("CHECKSUM")
49041        } else {
49042            None
49043        };
49044
49045        let override_ = if self.match_identifier("OVERRIDE") {
49046            Some(Box::new(Expression::Boolean(BooleanLiteral {
49047                value: true,
49048            })))
49049        } else {
49050            None
49051        };
49052
49053        Ok(Some(Expression::LockingProperty(Box::new(
49054            LockingProperty {
49055                this: this.map(Box::new),
49056                kind,
49057                for_or_in: for_or_in.map(|v| {
49058                    Box::new(Expression::Var(Box::new(Var {
49059                        this: v.to_string(),
49060                    })))
49061                }),
49062                lock_type: lock_type.map(|v| {
49063                    Box::new(Expression::Var(Box::new(Var {
49064                        this: v.to_string(),
49065                    })))
49066                }),
49067                override_,
49068            },
49069        ))))
49070    }
49071
49072    /// Parse Teradata LOCKING statement: LOCKING <property> SELECT ...
49073    fn parse_locking_statement(&mut self) -> Result<Expression> {
49074        self.expect(TokenType::Lock)?;
49075        let locking = self
49076            .parse_locking()?
49077            .ok_or_else(|| self.parse_error("Expected LOCKING clause"))?;
49078        let query = if self.check(TokenType::With) {
49079            self.parse_statement()?
49080        } else {
49081            self.parse_select()?
49082        };
49083        Ok(Expression::LockingStatement(Box::new(LockingStatement {
49084            this: Box::new(locking),
49085            expression: Box::new(query),
49086        })))
49087    }
49088
49089    /// parse_log - Parses LOG property (Teradata)
49090    /// Python: _parse_log
49091    /// Creates a LogProperty expression
49092    pub fn parse_log(&mut self) -> Result<Option<Expression>> {
49093        self.parse_log_impl(false)
49094    }
49095
49096    /// Implementation of parse_log with no flag
49097    pub fn parse_log_impl(&mut self, no: bool) -> Result<Option<Expression>> {
49098        Ok(Some(Expression::LogProperty(Box::new(LogProperty {
49099            no: if no {
49100                Some(Box::new(Expression::Boolean(BooleanLiteral {
49101                    value: true,
49102                })))
49103            } else {
49104                None
49105            },
49106        }))))
49107    }
49108
49109    /// parse_match_against - Parses MATCH(columns) AGAINST(pattern)
49110    /// Python: parser.py:7125-7153
49111    #[allow(unused_variables, unused_mut)]
49112    pub fn parse_match_against(&mut self) -> Result<Option<Expression>> {
49113        // Parse column expressions or TABLE syntax
49114        let expressions = if self.match_text_seq(&["TABLE"]) {
49115            // SingleStore TABLE syntax
49116            if let Some(table) = self.parse_table()? {
49117                vec![table]
49118            } else {
49119                Vec::new()
49120            }
49121        } else {
49122            // Regular column list
49123            let mut cols = Vec::new();
49124            loop {
49125                if let Some(col) = self.parse_column()? {
49126                    cols.push(col);
49127                }
49128                if !self.match_token(TokenType::Comma) {
49129                    break;
49130                }
49131            }
49132            cols
49133        };
49134
49135        // Match ) AGAINST (
49136        self.match_text_seq(&[")", "AGAINST", "("]);
49137
49138        // Parse the search pattern
49139        let this = self.parse_string()?;
49140
49141        // Parse modifier
49142        let modifier = if self.match_text_seq(&["IN", "NATURAL", "LANGUAGE", "MODE"]) {
49143            if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
49144                Some(Box::new(Expression::Var(Box::new(Var {
49145                    this: "IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION".to_string(),
49146                }))))
49147            } else {
49148                Some(Box::new(Expression::Var(Box::new(Var {
49149                    this: "IN NATURAL LANGUAGE MODE".to_string(),
49150                }))))
49151            }
49152        } else if self.match_text_seq(&["IN", "BOOLEAN", "MODE"]) {
49153            Some(Box::new(Expression::Var(Box::new(Var {
49154                this: "IN BOOLEAN MODE".to_string(),
49155            }))))
49156        } else if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
49157            Some(Box::new(Expression::Var(Box::new(Var {
49158                this: "WITH QUERY EXPANSION".to_string(),
49159            }))))
49160        } else {
49161            None
49162        };
49163
49164        match this {
49165            Some(t) => Ok(Some(Expression::MatchAgainst(Box::new(MatchAgainst {
49166                this: Box::new(t),
49167                expressions,
49168                modifier,
49169            })))),
49170            None => Ok(None),
49171        }
49172    }
49173
49174    /// parse_match_recognize_measure - Implemented from Python _parse_match_recognize_measure
49175    /// Parses a MEASURES expression in MATCH_RECOGNIZE: [FINAL|RUNNING] expression
49176    pub fn parse_match_recognize_measure(&mut self) -> Result<Option<Expression>> {
49177        // Check for optional FINAL or RUNNING keyword
49178        let window_frame = if self.match_texts(&["FINAL", "RUNNING"]) {
49179            let text = self.previous().text.to_ascii_uppercase();
49180            Some(if text == "FINAL" {
49181                MatchRecognizeSemantics::Final
49182            } else {
49183                MatchRecognizeSemantics::Running
49184            })
49185        } else {
49186            None
49187        };
49188
49189        // Parse the expression
49190        let this = self.parse_expression()?;
49191
49192        Ok(Some(Expression::MatchRecognizeMeasure(Box::new(
49193            MatchRecognizeMeasure { this, window_frame },
49194        ))))
49195    }
49196
49197    /// parse_max_min_by - MAX_BY / MIN_BY / ARG_MAX / ARG_MIN aggregate functions
49198    /// Parses: MAX_BY(value, key [, n]) or MIN_BY(value, key [, n])
49199    /// is_max: true for MAX_BY/ARG_MAX, false for MIN_BY/ARG_MIN
49200    #[allow(unused_variables, unused_mut)]
49201    pub fn parse_max_min_by(&mut self, is_max: bool) -> Result<Option<Expression>> {
49202        let mut args = Vec::new();
49203
49204        // Handle optional DISTINCT
49205        let distinct = if self.match_token(TokenType::Distinct) {
49206            let lambda_expr = self.parse_lambda()?;
49207            if let Some(expr) = lambda_expr {
49208                args.push(expr);
49209            }
49210            self.match_token(TokenType::Comma);
49211            true
49212        } else {
49213            false
49214        };
49215
49216        // Parse remaining arguments
49217        loop {
49218            if let Some(arg) = self.parse_lambda()? {
49219                args.push(arg);
49220            } else {
49221                break;
49222            }
49223            if !self.match_token(TokenType::Comma) {
49224                break;
49225            }
49226        }
49227
49228        let this = args
49229            .get(0)
49230            .cloned()
49231            .map(Box::new)
49232            .unwrap_or_else(|| Box::new(Expression::Null(Null)));
49233        let expression = args
49234            .get(1)
49235            .cloned()
49236            .map(Box::new)
49237            .unwrap_or_else(|| Box::new(Expression::Null(Null)));
49238        let count = args.get(2).cloned().map(Box::new);
49239
49240        if is_max {
49241            Ok(Some(Expression::ArgMax(Box::new(ArgMax {
49242                this,
49243                expression,
49244                count,
49245            }))))
49246        } else {
49247            Ok(Some(Expression::ArgMin(Box::new(ArgMin {
49248                this,
49249                expression,
49250                count,
49251            }))))
49252        }
49253    }
49254
49255    /// Parse MERGE statement
49256    /// Python: def _parse_merge(self) -> exp.Merge
49257    pub fn parse_merge(&mut self) -> Result<Option<Expression>> {
49258        // Optional INTO keyword
49259        self.match_token(TokenType::Into);
49260
49261        // Parse target table using parse_table_ref
49262        let mut target = Expression::Table(Box::new(self.parse_table_ref()?));
49263
49264        // Parse optional TSQL table hints: WITH (HOLDLOCK), WITH (TABLOCK), etc.
49265        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
49266            if let Expression::Table(ref mut table) = target {
49267                if let Some(hint_expr) = self.parse_table_hints()? {
49268                    match hint_expr {
49269                        Expression::Tuple(tuple) => {
49270                            table.hints = tuple.expressions;
49271                        }
49272                        other => {
49273                            table.hints = vec![other];
49274                        }
49275                    }
49276                }
49277            }
49278        }
49279
49280        // Parse optional alias for target table
49281        // Try to get an identifier as alias if AS is present or there's an identifier
49282        // Use parse_id_var instead of parse_identifier to handle Var tokens (e.g. T)
49283        if self.match_token(TokenType::As) {
49284            if let Some(alias_expr) = self.parse_id_var()? {
49285                // Extract identifier from the expression
49286                if let Expression::Identifier(ident) = alias_expr {
49287                    target = Expression::Alias(Box::new(Alias {
49288                        this: target,
49289                        alias: ident,
49290                        column_aliases: Vec::new(),
49291                        pre_alias_comments: Vec::new(),
49292                        trailing_comments: Vec::new(),
49293                        inferred_type: None,
49294                    }));
49295                }
49296            }
49297        } else if !self.check(TokenType::Using) {
49298            // Try to parse alias without AS keyword (e.g., MERGE t1 T USING ...)
49299            // Use parse_id_var to handle both Identifier and Var tokens
49300            if let Some(alias_expr) = self.parse_id_var()? {
49301                if let Expression::Identifier(ident) = alias_expr {
49302                    target = Expression::Alias(Box::new(Alias {
49303                        this: target,
49304                        alias: ident,
49305                        column_aliases: Vec::new(),
49306                        pre_alias_comments: Vec::new(),
49307                        trailing_comments: Vec::new(),
49308                        inferred_type: None,
49309                    }));
49310                }
49311            }
49312        }
49313
49314        // USING clause
49315        if !self.match_token(TokenType::Using) {
49316            return Err(self.parse_error("Expected USING in MERGE statement"));
49317        }
49318
49319        // Parse source table or subquery
49320        let mut using = if self.match_token(TokenType::LParen) {
49321            // Subquery: USING (SELECT ...) AS alias
49322            let query = self.parse_statement()?;
49323            self.expect(TokenType::RParen)?;
49324            let trailing = self.previous_trailing_comments().to_vec();
49325            let mut subq = Subquery {
49326                this: query,
49327                alias: None,
49328                column_aliases: Vec::new(),
49329                order_by: None,
49330                limit: None,
49331                offset: None,
49332                distribute_by: None,
49333                sort_by: None,
49334                cluster_by: None,
49335                lateral: false,
49336                modifiers_inside: false,
49337                trailing_comments: trailing,
49338                inferred_type: None,
49339            };
49340            // Parse optional alias: (SELECT ...) AS y(col1, col2)
49341            if self.match_token(TokenType::As) {
49342                let alias_name = self.expect_identifier_or_keyword()?;
49343                subq.alias = Some(Identifier::new(alias_name));
49344                // Parse optional column aliases: AS alias(col1, col2)
49345                if self.match_token(TokenType::LParen) {
49346                    let mut cols = Vec::new();
49347                    loop {
49348                        let col_name = self.expect_identifier_or_keyword()?;
49349                        cols.push(Identifier::new(col_name));
49350                        if !self.match_token(TokenType::Comma) {
49351                            break;
49352                        }
49353                    }
49354                    self.expect(TokenType::RParen)?;
49355                    subq.column_aliases = cols;
49356                }
49357            } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
49358                // Implicit alias without AS
49359                let alias_name = self.expect_identifier_or_keyword()?;
49360                subq.alias = Some(Identifier::new(alias_name));
49361                // Parse optional column aliases: alias(col1, col2)
49362                if self.match_token(TokenType::LParen) {
49363                    let mut cols = Vec::new();
49364                    loop {
49365                        let col_name = self.expect_identifier_or_keyword()?;
49366                        cols.push(Identifier::new(col_name));
49367                        if !self.match_token(TokenType::Comma) {
49368                            break;
49369                        }
49370                    }
49371                    self.expect(TokenType::RParen)?;
49372                    subq.column_aliases = cols;
49373                }
49374            }
49375            Expression::Subquery(Box::new(subq))
49376        } else {
49377            Expression::Table(Box::new(self.parse_table_ref()?))
49378        };
49379
49380        // Parse optional alias for source (if not already parsed for subquery)
49381        if matches!(&using, Expression::Table(_)) {
49382            if self.match_token(TokenType::As) {
49383                if let Some(alias_expr) = self.parse_id_var()? {
49384                    if let Expression::Identifier(ident) = alias_expr {
49385                        using = Expression::Alias(Box::new(Alias {
49386                            this: using,
49387                            alias: ident,
49388                            column_aliases: Vec::new(),
49389                            pre_alias_comments: Vec::new(),
49390                            trailing_comments: Vec::new(),
49391                            inferred_type: None,
49392                        }));
49393                    }
49394                }
49395            } else if !self.check(TokenType::On) {
49396                // Try to parse alias without AS keyword
49397                // Use parse_id_var to handle both Identifier and Var tokens (e.g., S, T)
49398                if let Some(alias_expr) = self.parse_id_var()? {
49399                    if let Expression::Identifier(ident) = alias_expr {
49400                        using = Expression::Alias(Box::new(Alias {
49401                            this: using,
49402                            alias: ident,
49403                            column_aliases: Vec::new(),
49404                            pre_alias_comments: Vec::new(),
49405                            trailing_comments: Vec::new(),
49406                            inferred_type: None,
49407                        }));
49408                    }
49409                }
49410            }
49411        }
49412
49413        // ON clause with condition
49414        let on = if self.match_token(TokenType::On) {
49415            Some(Box::new(self.parse_expression()?))
49416        } else {
49417            None
49418        };
49419
49420        // Optional additional USING clause for key columns (DuckDB: USING (col1, col2))
49421        let using_cond = if self.match_token(TokenType::Using) {
49422            // Parse comma-separated identifiers wrapped in parentheses
49423            if self.match_token(TokenType::LParen) {
49424                let mut idents = Vec::new();
49425                loop {
49426                    // Use parse_id_var to handle Var tokens (unquoted identifiers)
49427                    if let Some(ident) = self.parse_id_var()? {
49428                        idents.push(ident);
49429                    } else {
49430                        break;
49431                    }
49432                    if !self.match_token(TokenType::Comma) {
49433                        break;
49434                    }
49435                }
49436                self.match_token(TokenType::RParen);
49437                if !idents.is_empty() {
49438                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
49439                        expressions: idents,
49440                    }))))
49441                } else {
49442                    None
49443                }
49444            } else {
49445                // Also support without parentheses for backwards compatibility
49446                let mut idents = Vec::new();
49447                loop {
49448                    if let Some(ident) = self.parse_id_var()? {
49449                        idents.push(ident);
49450                    } else {
49451                        break;
49452                    }
49453                    if !self.match_token(TokenType::Comma) {
49454                        break;
49455                    }
49456                }
49457                if !idents.is_empty() {
49458                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
49459                        expressions: idents,
49460                    }))))
49461                } else {
49462                    None
49463                }
49464            }
49465        } else {
49466            None
49467        };
49468
49469        // Parse WHEN MATCHED clauses
49470        let whens = self.parse_when_matched_clauses()?;
49471
49472        // Parse optional RETURNING clause (PostgreSQL) or OUTPUT clause (TSQL)
49473        let returning = if let Some(ret) = self.parse_returning()? {
49474            Some(ret)
49475        } else if self.match_token(TokenType::Output) {
49476            // TSQL OUTPUT clause: OUTPUT $action, Inserted.col, Deleted.col [INTO target]
49477            let output = self.parse_output_clause()?;
49478            Some(Expression::Returning(Box::new(Returning {
49479                expressions: output.columns,
49480                into: output.into_table.map(Box::new),
49481            })))
49482        } else {
49483            None
49484        };
49485
49486        Ok(Some(Expression::Merge(Box::new(Merge {
49487            this: Box::new(target),
49488            using: Box::new(using),
49489            on,
49490            using_cond,
49491            whens: whens.map(Box::new),
49492            with_: None,
49493            returning: returning.map(Box::new),
49494        }))))
49495    }
49496
49497    /// Parse multiple WHEN [NOT] MATCHED clauses for MERGE
49498    fn parse_when_matched_clauses(&mut self) -> Result<Option<Expression>> {
49499        let mut whens = Vec::new();
49500
49501        while self.match_token(TokenType::When) {
49502            // Check for NOT MATCHED
49503            let matched = !self.match_token(TokenType::Not);
49504            self.match_text_seq(&["MATCHED"]);
49505
49506            // Check for BY TARGET or BY SOURCE
49507            let source = if self.match_text_seq(&["BY", "TARGET"]) {
49508                Some(Box::new(Expression::Boolean(BooleanLiteral {
49509                    value: false,
49510                })))
49511            } else if self.match_text_seq(&["BY", "SOURCE"]) {
49512                Some(Box::new(Expression::Boolean(BooleanLiteral {
49513                    value: true,
49514                })))
49515            } else {
49516                None
49517            };
49518
49519            // Optional AND condition
49520            let condition = if self.match_token(TokenType::And) {
49521                Some(Box::new(self.parse_expression()?))
49522            } else {
49523                None
49524            };
49525
49526            // THEN action
49527            if !self.match_token(TokenType::Then) {
49528                return Err(self.parse_error("Expected THEN in WHEN clause"));
49529            }
49530
49531            // Parse the action: INSERT, UPDATE, DELETE, or other keywords (DO NOTHING, etc.)
49532            let then: Expression = if self.match_token(TokenType::Insert) {
49533                // INSERT action - use Tuple to represent it
49534                let mut elements = vec![Expression::Var(Box::new(Var {
49535                    this: "INSERT".to_string(),
49536                }))];
49537
49538                // Spark/Databricks: INSERT * (insert all columns)
49539                if self.match_token(TokenType::Star) {
49540                    elements.push(Expression::Star(crate::expressions::Star {
49541                        table: None,
49542                        except: None,
49543                        replace: None,
49544                        rename: None,
49545                        trailing_comments: Vec::new(),
49546                        span: None,
49547                    }));
49548                } else
49549                // Parse column list (optional)
49550                if self.match_token(TokenType::LParen) {
49551                    let mut columns: Vec<Expression> = Vec::new();
49552                    loop {
49553                        if let Some(col) = self.parse_id_var()? {
49554                            // Handle qualified column references (e.g., target.a)
49555                            let col = if self.match_token(TokenType::Dot) {
49556                                if let Expression::Identifier(table_ident) = col {
49557                                    if let Some(col_expr) = self.parse_id_var()? {
49558                                        if let Expression::Identifier(col_ident) = col_expr {
49559                                            Expression::boxed_column(Column {
49560                                                name: col_ident,
49561                                                table: Some(table_ident),
49562                                                join_mark: false,
49563                                                trailing_comments: Vec::new(),
49564                                                span: None,
49565                                                inferred_type: None,
49566                                            })
49567                                        } else {
49568                                            col_expr
49569                                        }
49570                                    } else {
49571                                        return Err(self.parse_error(
49572                                            "Expected column name after dot in MERGE INSERT",
49573                                        ));
49574                                    }
49575                                } else {
49576                                    col
49577                                }
49578                            } else {
49579                                col
49580                            };
49581                            columns.push(col);
49582                        } else {
49583                            break;
49584                        }
49585                        if !self.match_token(TokenType::Comma) {
49586                            break;
49587                        }
49588                    }
49589                    self.match_token(TokenType::RParen);
49590                    if !columns.is_empty() {
49591                        elements.push(Expression::Tuple(Box::new(Tuple {
49592                            expressions: columns,
49593                        })));
49594                    }
49595                }
49596
49597                // Parse VALUES clause
49598                if self.match_text_seq(&["VALUES"]) {
49599                    if let Some(values) = self.parse_value()? {
49600                        elements.push(values);
49601                    }
49602                } else if self.match_text_seq(&["ROW"]) {
49603                    elements.push(Expression::Var(Box::new(Var {
49604                        this: "ROW".to_string(),
49605                    })));
49606                }
49607
49608                if elements.len() == 1 {
49609                    elements[0].clone()
49610                } else {
49611                    Expression::Tuple(Box::new(Tuple {
49612                        expressions: elements,
49613                    }))
49614                }
49615            } else if self.match_token(TokenType::Update) {
49616                // UPDATE action - use Tuple to represent SET assignments
49617                let mut elements = vec![Expression::Var(Box::new(Var {
49618                    this: "UPDATE".to_string(),
49619                }))];
49620
49621                // Spark/Databricks: UPDATE * (update all columns)
49622                if self.match_token(TokenType::Star) {
49623                    elements.push(Expression::Star(crate::expressions::Star {
49624                        table: None,
49625                        except: None,
49626                        replace: None,
49627                        rename: None,
49628                        trailing_comments: Vec::new(),
49629                        span: None,
49630                    }));
49631                } else if self.match_token(TokenType::Set) {
49632                    // Parse col = value assignments manually
49633                    let mut assignments: Vec<Expression> = Vec::new();
49634                    loop {
49635                        // Parse: column = expression (column can be qualified like x.a)
49636                        if let Some(col) = self.parse_id_var()? {
49637                            // Handle qualified column references (e.g., x.a = y.b)
49638                            let col = if self.match_token(TokenType::Dot) {
49639                                // We have a qualified column reference
49640                                if let Expression::Identifier(table_ident) = col {
49641                                    // Parse the column part after the dot
49642                                    if let Some(col_expr) = self.parse_id_var()? {
49643                                        if let Expression::Identifier(col_ident) = col_expr {
49644                                            Expression::boxed_column(Column {
49645                                                name: col_ident,
49646                                                table: Some(table_ident),
49647                                                join_mark: false,
49648                                                trailing_comments: Vec::new(),
49649                                                span: None,
49650                                                inferred_type: None,
49651                                            })
49652                                        } else {
49653                                            col_expr
49654                                        }
49655                                    } else {
49656                                        return Err(
49657                                            self.parse_error("Expected column name after dot")
49658                                        );
49659                                    }
49660                                } else {
49661                                    col
49662                                }
49663                            } else {
49664                                col
49665                            };
49666                            if self.match_token(TokenType::Eq) {
49667                                let value = self.parse_expression()?;
49668                                // Create assignment as EQ expression
49669                                let assignment = Expression::Eq(Box::new(BinaryOp {
49670                                    left: col,
49671                                    right: value,
49672                                    left_comments: Vec::new(),
49673                                    operator_comments: Vec::new(),
49674                                    trailing_comments: Vec::new(),
49675                                    inferred_type: None,
49676                                }));
49677                                assignments.push(assignment);
49678                            }
49679                        }
49680                        if !self.match_token(TokenType::Comma) {
49681                            break;
49682                        }
49683                    }
49684                    if !assignments.is_empty() {
49685                        elements.push(Expression::Tuple(Box::new(Tuple {
49686                            expressions: assignments,
49687                        })));
49688                    }
49689                }
49690
49691                if elements.len() == 1 {
49692                    elements[0].clone()
49693                } else {
49694                    Expression::Tuple(Box::new(Tuple {
49695                        expressions: elements,
49696                    }))
49697                }
49698            } else if self.match_token(TokenType::Delete) {
49699                // DELETE action
49700                Expression::Var(Box::new(Var {
49701                    this: "DELETE".to_string(),
49702                }))
49703            } else if self.match_identifier("DO") {
49704                // DO NOTHING action (PostgreSQL)
49705                if self.match_identifier("NOTHING") {
49706                    Expression::Var(Box::new(Var {
49707                        this: "DO NOTHING".to_string(),
49708                    }))
49709                } else {
49710                    return Err(self.parse_error("Expected NOTHING after DO"));
49711                }
49712            } else {
49713                // Other action
49714                if let Some(var) = self.parse_var()? {
49715                    var
49716                } else {
49717                    return Err(
49718                        self.parse_error("Expected INSERT, UPDATE, DELETE, or action keyword")
49719                    );
49720                }
49721            };
49722
49723            whens.push(Expression::When(Box::new(When {
49724                matched: Some(Box::new(Expression::Boolean(BooleanLiteral {
49725                    value: matched,
49726                }))),
49727                source,
49728                condition,
49729                then: Box::new(then),
49730            })));
49731        }
49732
49733        if whens.is_empty() {
49734            Ok(None)
49735        } else {
49736            Ok(Some(Expression::Whens(Box::new(Whens {
49737                expressions: whens,
49738            }))))
49739        }
49740    }
49741
49742    /// parse_mergeblockratio - Parses MERGEBLOCKRATIO property (Teradata)
49743    /// Python: _parse_mergeblockratio
49744    /// Format: MERGEBLOCKRATIO = number [PERCENT] or NO MERGEBLOCKRATIO or DEFAULT MERGEBLOCKRATIO
49745    pub fn parse_mergeblockratio(&mut self) -> Result<Option<Expression>> {
49746        self.parse_mergeblockratio_impl(false, false)
49747    }
49748
49749    /// Implementation of parse_mergeblockratio with options
49750    pub fn parse_mergeblockratio_impl(
49751        &mut self,
49752        no: bool,
49753        default: bool,
49754    ) -> Result<Option<Expression>> {
49755        // Check for = followed by a number
49756        if self.match_token(TokenType::Eq) {
49757            let this = self.parse_number()?;
49758            let percent = self.match_token(TokenType::Percent);
49759
49760            Ok(Some(Expression::MergeBlockRatioProperty(Box::new(
49761                MergeBlockRatioProperty {
49762                    this: this.map(Box::new),
49763                    no: None,
49764                    default: None,
49765                    percent: if percent {
49766                        Some(Box::new(Expression::Boolean(BooleanLiteral {
49767                            value: true,
49768                        })))
49769                    } else {
49770                        None
49771                    },
49772                },
49773            ))))
49774        } else {
49775            // NO or DEFAULT variant
49776            Ok(Some(Expression::MergeBlockRatioProperty(Box::new(
49777                MergeBlockRatioProperty {
49778                    this: None,
49779                    no: if no {
49780                        Some(Box::new(Expression::Boolean(BooleanLiteral {
49781                            value: true,
49782                        })))
49783                    } else {
49784                        None
49785                    },
49786                    default: if default {
49787                        Some(Box::new(Expression::Boolean(BooleanLiteral {
49788                            value: true,
49789                        })))
49790                    } else {
49791                        None
49792                    },
49793                    percent: None,
49794                },
49795            ))))
49796        }
49797    }
49798
49799    /// parse_modifies_property - Implemented from Python _parse_modifies_property
49800    #[allow(unused_variables, unused_mut)]
49801    pub fn parse_modifies_property(&mut self) -> Result<Option<Expression>> {
49802        if self.match_text_seq(&["SQL", "DATA"]) {
49803            // Matched: SQL DATA
49804            return Ok(None);
49805        }
49806        Ok(None)
49807    }
49808
49809    /// parse_multitable_inserts - Parses Oracle's multi-table INSERT (INSERT ALL/FIRST)
49810    /// Python: _parse_multitable_inserts
49811    /// Syntax: INSERT ALL|FIRST [WHEN cond THEN] INTO table [(cols)] [VALUES(...)] ... SELECT ...
49812    pub fn parse_multitable_inserts(
49813        &mut self,
49814        leading_comments: Vec<String>,
49815    ) -> Result<Option<Expression>> {
49816        // Get kind from previous token (ALL or FIRST)
49817        let kind = self.previous().text.to_ascii_uppercase();
49818
49819        let mut expressions = Vec::new();
49820
49821        // Helper closure to parse a single conditional insert
49822        // Returns None when no more INTO clauses found
49823        loop {
49824            // Check for WHEN condition
49825            let condition = if self.match_token(TokenType::When) {
49826                let cond = self.parse_or()?;
49827                self.match_token(TokenType::Then);
49828                Some(cond)
49829            } else {
49830                None
49831            };
49832
49833            // Check for ELSE (used in INSERT FIRST ... ELSE INTO ...)
49834            let is_else = self.match_token(TokenType::Else);
49835
49836            // Must have INTO keyword to continue
49837            if !self.match_token(TokenType::Into) {
49838                break;
49839            }
49840
49841            // Parse table with optional schema (using parse_table_parts for proper schema.table parsing)
49842            let table_expr = self.parse_table_parts()?;
49843
49844            // Extract TableRef from the table expression
49845            let table_ref = if let Some(Expression::Table(t)) = table_expr {
49846                *t
49847            } else {
49848                // Fallback: create empty table ref (shouldn't happen)
49849                TableRef::new("")
49850            };
49851
49852            // Parse optional column list: (col1, col2, ...)
49853            let columns = if self.match_token(TokenType::LParen) {
49854                let cols = self.parse_identifier_list()?;
49855                self.expect(TokenType::RParen)?;
49856                cols
49857            } else {
49858                Vec::new()
49859            };
49860
49861            // Parse optional VALUES clause
49862            let values = if self.match_token(TokenType::Values) {
49863                self.expect(TokenType::LParen)?;
49864                let row = self.parse_expression_list()?;
49865                self.expect(TokenType::RParen)?;
49866                vec![row]
49867            } else {
49868                Vec::new()
49869            };
49870
49871            // Create Insert expression for this INTO clause
49872            let insert_expr = Expression::Insert(Box::new(Insert {
49873                table: table_ref,
49874                columns,
49875                values,
49876                query: None,
49877                overwrite: false,
49878                partition: Vec::new(),
49879                directory: None,
49880                returning: Vec::new(),
49881                output: None,
49882                on_conflict: None,
49883                leading_comments: Vec::new(),
49884                if_exists: false,
49885                with: None,
49886                ignore: false,
49887                source_alias: None,
49888                alias: None,
49889                alias_explicit_as: false,
49890                default_values: false,
49891                by_name: false,
49892                conflict_action: None,
49893                is_replace: false,
49894                replace_where: None,
49895                source: None,
49896                hint: None,
49897                function_target: None,
49898                partition_by: None,
49899                settings: Vec::new(),
49900            }));
49901
49902            // Wrap in ConditionalInsert
49903            let conditional_insert = Expression::ConditionalInsert(Box::new(ConditionalInsert {
49904                this: Box::new(insert_expr),
49905                expression: condition.map(Box::new),
49906                else_: if is_else {
49907                    Some(Box::new(Expression::Boolean(BooleanLiteral {
49908                        value: true,
49909                    })))
49910                } else {
49911                    None
49912                },
49913            }));
49914
49915            expressions.push(conditional_insert);
49916        }
49917
49918        // Parse the source SELECT statement (or subquery)
49919        let source = self.parse_statement()?;
49920
49921        Ok(Some(Expression::MultitableInserts(Box::new(
49922            MultitableInserts {
49923                kind,
49924                expressions,
49925                source: Some(Box::new(source)),
49926                leading_comments,
49927            },
49928        ))))
49929    }
49930
49931    /// parse_name_as_expression - Parse identifier that can be aliased
49932    /// Parses: identifier [AS expression]
49933    #[allow(unused_variables, unused_mut)]
49934    pub fn parse_name_as_expression(&mut self) -> Result<Option<Expression>> {
49935        // Parse the identifier
49936        let this = self.parse_id_var()?;
49937        if this.is_none() {
49938            return Ok(None);
49939        }
49940
49941        // Check for AS alias
49942        if self.match_token(TokenType::Alias) {
49943            let expression = self.parse_disjunction()?;
49944            if expression.is_none() {
49945                return Ok(this);
49946            }
49947
49948            // Extract the identifier for the alias
49949            let alias_ident =
49950                match this.ok_or_else(|| self.parse_error("Expected identifier for alias"))? {
49951                    Expression::Identifier(id) => id,
49952                    _ => Identifier::new(String::new()),
49953                };
49954
49955            return Ok(Some(Expression::Alias(Box::new(Alias {
49956                this: expression.ok_or_else(|| self.parse_error("Expected expression after AS"))?,
49957                alias: alias_ident,
49958                column_aliases: Vec::new(),
49959                pre_alias_comments: Vec::new(),
49960                trailing_comments: Vec::new(),
49961                inferred_type: None,
49962            }))));
49963        }
49964
49965        Ok(this)
49966    }
49967
49968    /// parse_named_window - Ported from Python _parse_named_window
49969    /// Parses a named window definition: name AS (spec)
49970    #[allow(unused_variables, unused_mut)]
49971    pub fn parse_named_window(&mut self) -> Result<Option<Expression>> {
49972        // Parse window name
49973        let name = self.parse_id_var()?;
49974        if name.is_none() {
49975            return Ok(None);
49976        }
49977
49978        // Expect AS
49979        if !self.match_token(TokenType::As) {
49980            return Ok(name); // Just the name, no spec
49981        }
49982
49983        // Parse window spec (parenthesized)
49984        self.expect(TokenType::LParen)?;
49985        let spec = self.parse_window_spec_inner()?;
49986        self.expect(TokenType::RParen)?;
49987
49988        if let (Some(name_expr), Some(spec_expr)) = (name, spec) {
49989            // Create an Alias expression wrapping the spec with the name
49990            let alias_ident = if let Expression::Identifier(id) = name_expr {
49991                id
49992            } else {
49993                Identifier::new("window")
49994            };
49995            Ok(Some(Expression::Alias(Box::new(Alias {
49996                this: spec_expr,
49997                alias: alias_ident,
49998                column_aliases: Vec::new(),
49999                pre_alias_comments: Vec::new(),
50000                trailing_comments: Vec::new(),
50001                inferred_type: None,
50002            }))))
50003        } else {
50004            Ok(None)
50005        }
50006    }
50007
50008    /// parse_next_value_for - Parses NEXT VALUE FOR sequence_name
50009    /// Python: parser.py:6752-6761
50010    #[allow(unused_variables, unused_mut)]
50011    pub fn parse_next_value_for(&mut self) -> Result<Option<Expression>> {
50012        if !self.match_text_seq(&["VALUE", "FOR"]) {
50013            // Retreat if we consumed a token
50014            if self.current > 0 {
50015                self.current -= 1;
50016            }
50017            return Ok(None);
50018        }
50019
50020        // Parse the sequence name as a dotted identifier (db.schema.sequence_name)
50021        // Manually parse identifier parts separated by dots
50022        let first = self
50023            .parse_id_var()?
50024            .ok_or_else(|| self.parse_error("Expected sequence name after NEXT VALUE FOR"))?;
50025        let first_id = match first {
50026            Expression::Identifier(id) => id,
50027            Expression::Var(v) => Identifier {
50028                name: v.this,
50029                quoted: false,
50030                trailing_comments: Vec::new(),
50031                span: None,
50032            },
50033            _ => Identifier {
50034                name: String::new(),
50035                quoted: false,
50036                trailing_comments: Vec::new(),
50037                span: None,
50038            },
50039        };
50040
50041        // Check for dotted parts (db.schema.sequence_name)
50042        let mut parts = vec![first_id];
50043        while self.match_token(TokenType::Dot) {
50044            if self.is_identifier_or_keyword_token() {
50045                let token = self.advance();
50046                parts.push(Identifier {
50047                    name: token.text,
50048                    quoted: token.token_type == TokenType::QuotedIdentifier,
50049                    trailing_comments: Vec::new(),
50050                    span: None,
50051                });
50052            } else {
50053                break;
50054            }
50055        }
50056
50057        // Build a Column expression from the parts
50058        let this = if parts.len() == 1 {
50059            Expression::boxed_column(Column {
50060                name: parts.remove(0),
50061                table: None,
50062                join_mark: false,
50063                trailing_comments: Vec::new(),
50064                span: None,
50065                inferred_type: None,
50066            })
50067        } else if parts.len() == 2 {
50068            Expression::boxed_column(Column {
50069                name: parts.remove(1),
50070                table: Some(parts.remove(0)),
50071                join_mark: false,
50072                trailing_comments: Vec::new(),
50073                span: None,
50074                inferred_type: None,
50075            })
50076        } else {
50077            // For 3+ parts, build nested Dot expressions
50078            let mut expr = Expression::Identifier(parts.remove(0));
50079            for part in parts.drain(..) {
50080                expr = Expression::Dot(Box::new(DotAccess {
50081                    this: expr,
50082                    field: part,
50083                }));
50084            }
50085            expr
50086        };
50087
50088        // Parse optional OVER (ORDER BY ...) clause
50089        let order = if self.match_token(TokenType::Over) {
50090            if self.match_token(TokenType::LParen) {
50091                let ord = self.parse_order()?;
50092                self.expect(TokenType::RParen)?;
50093                ord.map(Box::new)
50094            } else {
50095                None
50096            }
50097        } else {
50098            None
50099        };
50100
50101        Ok(Some(Expression::NextValueFor(Box::new(NextValueFor {
50102            this: Box::new(this),
50103            order,
50104        }))))
50105    }
50106
50107    /// parse_no_property - Implemented from Python _parse_no_property
50108    #[allow(unused_variables, unused_mut)]
50109    pub fn parse_no_property(&mut self) -> Result<Option<Expression>> {
50110        if self.match_text_seq(&["PRIMARY", "INDEX"]) {
50111            // Matched: PRIMARY INDEX
50112            return Ok(None);
50113        }
50114        if self.match_text_seq(&["SQL"]) {
50115            // Matched: SQL
50116            return Ok(None);
50117        }
50118        Ok(None)
50119    }
50120
50121    /// parse_normalize - Ported from Python _parse_normalize
50122    #[allow(unused_variables, unused_mut)]
50123    /// parse_normalize - Parses NORMALIZE(expr [, form])
50124    /// Python: NORMALIZE(expr, form) where form is NFC/NFD/NFKC/NFKD
50125    pub fn parse_normalize(&mut self) -> Result<Option<Expression>> {
50126        // Parse the expression to normalize
50127        let this = self.parse_expression()?;
50128
50129        // Check for optional form argument
50130        let form = if self.match_token(TokenType::Comma) {
50131            self.parse_var()?.map(Box::new)
50132        } else {
50133            None
50134        };
50135
50136        Ok(Some(Expression::Normalize(Box::new(Normalize {
50137            this: Box::new(this),
50138            form,
50139            is_casefold: None,
50140        }))))
50141    }
50142
50143    /// parse_not_constraint - Implemented from Python _parse_not_constraint
50144    /// Parses constraints that start with NOT: NOT NULL, NOT CASESPECIFIC
50145    pub fn parse_not_constraint(&mut self) -> Result<Option<Expression>> {
50146        // NOT NULL constraint
50147        if self.match_text_seq(&["NULL"]) {
50148            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
50149                NotNullColumnConstraint { allow_null: None },
50150            ))));
50151        }
50152        // NOT CASESPECIFIC constraint (Teradata)
50153        if self.match_text_seq(&["CASESPECIFIC"]) {
50154            return Ok(Some(Expression::CaseSpecificColumnConstraint(Box::new(
50155                CaseSpecificColumnConstraint {
50156                    not_: Some(Box::new(Expression::Boolean(BooleanLiteral {
50157                        value: true,
50158                    }))),
50159                },
50160            ))));
50161        }
50162        // NOT FOR REPLICATION (SQL Server) - consume the tokens and return as a property
50163        if self.match_token(TokenType::For) && self.match_identifier("REPLICATION") {
50164            return Ok(Some(Expression::Property(Box::new(
50165                crate::expressions::Property {
50166                    this: Box::new(Expression::Identifier(Identifier::new(
50167                        "NOT FOR REPLICATION".to_string(),
50168                    ))),
50169                    value: None,
50170                },
50171            ))));
50172        }
50173        Ok(None)
50174    }
50175
50176    /// parse_null - Parse NULL literal
50177    /// Python: if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): return exp.Null
50178    pub fn parse_null(&mut self) -> Result<Option<Expression>> {
50179        if self.match_token(TokenType::Null) {
50180            return Ok(Some(Expression::Null(Null)));
50181        }
50182        // UNKNOWN is treated as NULL in some dialects
50183        if self.match_token(TokenType::Unknown) {
50184            return Ok(Some(Expression::Null(Null)));
50185        }
50186        Ok(None)
50187    }
50188
50189    /// parse_number - Parse numeric literal
50190    /// Python: TokenType.NUMBER -> exp.Literal(this=token.text, is_string=False)
50191    /// Handles Hive/Spark numeric suffixes encoded as "number::TYPE" by the tokenizer
50192    pub fn parse_number(&mut self) -> Result<Option<Expression>> {
50193        if self.match_token(TokenType::Number) {
50194            let text = self.previous().text.clone();
50195            // Check for numeric literal suffix encoded as "number::TYPE"
50196            if let Some(sep_pos) = text.find("::") {
50197                let num_part = &text[..sep_pos];
50198                let type_name = &text[sep_pos + 2..];
50199                // Create a TryCast expression: TRY_CAST(number AS TYPE)
50200                let num_expr = Expression::Literal(Box::new(Literal::Number(num_part.to_string())));
50201                let data_type = match type_name {
50202                    "BIGINT" => crate::expressions::DataType::BigInt { length: None },
50203                    "SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
50204                    "TINYINT" => crate::expressions::DataType::TinyInt { length: None },
50205                    "DOUBLE" => crate::expressions::DataType::Double {
50206                        precision: None,
50207                        scale: None,
50208                    },
50209                    "FLOAT" => crate::expressions::DataType::Float {
50210                        precision: None,
50211                        scale: None,
50212                        real_spelling: false,
50213                    },
50214                    "DECIMAL" => crate::expressions::DataType::Decimal {
50215                        precision: None,
50216                        scale: None,
50217                    },
50218                    _ => crate::expressions::DataType::Custom {
50219                        name: type_name.to_string(),
50220                    },
50221                };
50222                return Ok(Some(Expression::TryCast(Box::new(
50223                    crate::expressions::Cast {
50224                        this: num_expr,
50225                        to: data_type,
50226                        trailing_comments: Vec::new(),
50227                        double_colon_syntax: false,
50228                        format: None,
50229                        default: None,
50230                        inferred_type: None,
50231                    },
50232                ))));
50233            }
50234            return Ok(Some(Expression::Literal(Box::new(Literal::Number(text)))));
50235        }
50236        Ok(None)
50237    }
50238
50239    /// parse_odbc_datetime_literal - Ported from Python _parse_odbc_datetime_literal
50240    #[allow(unused_variables, unused_mut)]
50241    /// parse_odbc_datetime_literal - Parses ODBC datetime literals
50242    /// Examples: {d'2023-01-01'}, {t'12:00:00'}, {ts'2023-01-01 12:00:00'}
50243    pub fn parse_odbc_datetime_literal(&mut self) -> Result<Option<Expression>> {
50244        // Match the type indicator (d, t, ts)
50245        if !self.match_token(TokenType::Var) {
50246            return Ok(None);
50247        }
50248        let type_indicator = self.previous().text.to_lowercase();
50249
50250        // Parse the string value
50251        let value = self.parse_string()?;
50252        if value.is_none() {
50253            return Ok(None);
50254        }
50255
50256        // Expect closing brace
50257        self.expect(TokenType::RBrace)?;
50258
50259        // Return appropriate expression based on type
50260        let value = value
50261            .ok_or_else(|| self.parse_error("Expected string value in ODBC datetime literal"))?;
50262        match type_indicator.as_str() {
50263            "d" => Ok(Some(Expression::Date(Box::new(UnaryFunc::new(value))))),
50264            "t" => Ok(Some(Expression::Time(Box::new(UnaryFunc::new(value))))),
50265            "ts" => Ok(Some(Expression::Timestamp(Box::new(TimestampFunc {
50266                this: Some(Box::new(value)),
50267                zone: None,
50268                with_tz: None,
50269                safe: None,
50270            })))),
50271            _ => Ok(Some(value)),
50272        }
50273    }
50274
50275    /// parse_offset - Parse OFFSET clause
50276    /// Python: if self._match(TokenType.OFFSET): return exp.Offset(this=self._parse_term())
50277    pub fn parse_offset(&mut self) -> Result<Option<Expression>> {
50278        if !self.match_token(TokenType::Offset) {
50279            return Ok(None);
50280        }
50281        // Parse the offset expression (usually a number)
50282        let offset_expr = self.parse_expression()?;
50283        Ok(Some(Expression::Offset(Box::new(Offset {
50284            this: offset_expr,
50285            rows: None,
50286        }))))
50287    }
50288
50289    /// parse_on_condition - Ported from Python _parse_on_condition
50290    #[allow(unused_variables, unused_mut)]
50291    /// parse_on_condition - Parses ON EMPTY/ERROR/NULL conditions
50292    /// Example: NULL ON EMPTY, ERROR ON ERROR
50293    pub fn parse_on_condition(&mut self) -> Result<Option<Expression>> {
50294        // Parse ON EMPTY
50295        let empty = if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
50296            Some(Box::new(Expression::Identifier(Identifier::new(
50297                "NULL".to_string(),
50298            ))))
50299        } else if self.match_text_seq(&["ERROR", "ON", "EMPTY"]) {
50300            Some(Box::new(Expression::Identifier(Identifier::new(
50301                "ERROR".to_string(),
50302            ))))
50303        } else if self.match_text_seq(&["DEFAULT"]) {
50304            let default_val = self.parse_expression()?;
50305            if self.match_text_seq(&["ON", "EMPTY"]) {
50306                Some(Box::new(default_val))
50307            } else {
50308                None
50309            }
50310        } else {
50311            None
50312        };
50313
50314        // Parse ON ERROR
50315        let error = if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
50316            Some(Box::new(Expression::Identifier(Identifier::new(
50317                "NULL".to_string(),
50318            ))))
50319        } else if self.match_text_seq(&["ERROR", "ON", "ERROR"]) {
50320            Some(Box::new(Expression::Identifier(Identifier::new(
50321                "ERROR".to_string(),
50322            ))))
50323        } else if self.match_text_seq(&["DEFAULT"]) {
50324            let default_val = self.parse_expression()?;
50325            if self.match_text_seq(&["ON", "ERROR"]) {
50326                Some(Box::new(default_val))
50327            } else {
50328                None
50329            }
50330        } else {
50331            None
50332        };
50333
50334        // Parse ON NULL
50335        let null = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
50336            Some(Box::new(Expression::Identifier(Identifier::new(
50337                "NULL".to_string(),
50338            ))))
50339        } else {
50340            None
50341        };
50342
50343        if empty.is_none() && error.is_none() && null.is_none() {
50344            return Ok(None);
50345        }
50346
50347        Ok(Some(Expression::OnCondition(Box::new(OnCondition {
50348            empty,
50349            error,
50350            null,
50351        }))))
50352    }
50353
50354    /// parse_on_handling - Implemented from Python _parse_on_handling
50355    /// Calls: parse_bitwise
50356    #[allow(unused_variables, unused_mut)]
50357    pub fn parse_on_handling(&mut self) -> Result<Option<Expression>> {
50358        if self.match_text_seq(&["ON"]) {
50359            // Matched: ON
50360            return Ok(None);
50361        }
50362        if self.match_text_seq(&["ON"]) {
50363            // Matched: ON
50364            return Ok(None);
50365        }
50366        Ok(None)
50367    }
50368
50369    /// parse_on_property - Implemented from Python _parse_on_property
50370    #[allow(unused_variables, unused_mut)]
50371    pub fn parse_on_property(&mut self) -> Result<Option<Expression>> {
50372        if self.match_text_seq(&["COMMIT", "PRESERVE", "ROWS"]) {
50373            return Ok(Some(Expression::OnCommitProperty(Box::new(
50374                OnCommitProperty { delete: None },
50375            ))));
50376        }
50377        if self.match_text_seq(&["COMMIT", "DELETE", "ROWS"]) {
50378            // Matched: COMMIT DELETE ROWS
50379            return Ok(None);
50380        }
50381        Ok(None)
50382    }
50383
50384    /// parse_opclass - Ported from Python _parse_opclass
50385    #[allow(unused_variables, unused_mut)]
50386    /// parse_opclass - Parses PostgreSQL operator class in index expressions
50387    /// Example: column_name text_pattern_ops
50388    pub fn parse_opclass(&mut self) -> Result<Option<Expression>> {
50389        // Parse the expression first
50390        let this = self.parse_expression()?;
50391
50392        // Check for keywords that would indicate this is not an opclass
50393        // (e.g., ASC, DESC, NULLS, etc.)
50394        if self.check(TokenType::Asc)
50395            || self.check(TokenType::Desc)
50396            || self.check(TokenType::Nulls)
50397            || self.check(TokenType::Comma)
50398            || self.check(TokenType::RParen)
50399        {
50400            return Ok(Some(this));
50401        }
50402
50403        // Try to parse an operator class name (table parts)
50404        if let Some(opclass_name) = self.parse_table()? {
50405            return Ok(Some(Expression::Opclass(Box::new(Opclass {
50406                this: Box::new(this),
50407                expression: Box::new(opclass_name),
50408            }))));
50409        }
50410
50411        Ok(Some(this))
50412    }
50413
50414    /// parse_open_json - Parses SQL Server OPENJSON function
50415    /// Example: OPENJSON(json, '$.path') WITH (col1 type '$.path' AS JSON, ...)
50416    pub fn parse_open_json(&mut self) -> Result<Option<Expression>> {
50417        // Parse the JSON expression
50418        let this = self.parse_expression()?;
50419
50420        // Parse optional path
50421        let path = if self.match_token(TokenType::Comma) {
50422            self.parse_string()?.map(Box::new)
50423        } else {
50424            None
50425        };
50426
50427        // Check for closing paren and WITH clause
50428        let expressions = if self.match_token(TokenType::RParen)
50429            && self.match_token(TokenType::With)
50430        {
50431            self.expect(TokenType::LParen)?;
50432            let mut cols = Vec::new();
50433            loop {
50434                // Parse column definition: name type 'path' [AS JSON]
50435                let col_name = self.parse_field()?;
50436                if col_name.is_none() {
50437                    break;
50438                }
50439                let col_type = self.parse_data_type()?;
50440                let col_path = self.parse_string()?.map(Box::new);
50441                let as_json = if self.match_token(TokenType::As) && self.match_identifier("JSON") {
50442                    Some(Box::new(Expression::Boolean(BooleanLiteral {
50443                        value: true,
50444                    })))
50445                } else {
50446                    None
50447                };
50448                cols.push(Expression::OpenJSONColumnDef(Box::new(OpenJSONColumnDef {
50449                    this: Box::new(col_name.ok_or_else(|| {
50450                        self.parse_error("Expected column name in OPENJSON WITH clause")
50451                    })?),
50452                    kind: String::new(), // kept for backwards compat, use data_type instead
50453                    path: col_path,
50454                    as_json,
50455                    data_type: Some(col_type),
50456                })));
50457                if !self.match_token(TokenType::Comma) {
50458                    break;
50459                }
50460            }
50461            self.expect(TokenType::RParen)?;
50462            cols
50463        } else {
50464            Vec::new()
50465        };
50466
50467        Ok(Some(Expression::OpenJSON(Box::new(OpenJSON {
50468            this: Box::new(this),
50469            path,
50470            expressions,
50471        }))))
50472    }
50473
50474    /// parse_operator - Ported from Python _parse_operator
50475    #[allow(unused_variables, unused_mut)]
50476    /// parse_operator - Parses PostgreSQL OPERATOR(op) syntax
50477    /// Example: col1 OPERATOR(~>) col2
50478    pub fn parse_operator(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
50479        let mut result = this;
50480
50481        // Parse OPERATOR(op) expressions
50482        while self.match_token(TokenType::LParen) {
50483            // Collect the operator text between parens
50484            let mut op_text = String::new();
50485            while !self.check(TokenType::RParen) && !self.is_at_end() {
50486                op_text.push_str(&self.peek().text);
50487                self.skip();
50488            }
50489            self.expect(TokenType::RParen)?;
50490
50491            // Parse the right-hand side expression
50492            let rhs = self.parse_expression()?;
50493
50494            result = Some(Expression::Operator(Box::new(Operator {
50495                this: Box::new(result.unwrap_or_else(|| Expression::Null(Null))),
50496                operator: Some(Box::new(Expression::Identifier(Identifier::new(op_text)))),
50497                expression: Box::new(rhs),
50498                comments: Vec::new(),
50499            })));
50500
50501            // Check if there's another OPERATOR keyword
50502            if !self.match_token(TokenType::Operator) {
50503                break;
50504            }
50505        }
50506
50507        Ok(result)
50508    }
50509
50510    /// parse_order - Parse ORDER BY clause
50511    /// Python: if not self._match(TokenType.ORDER_BY): return this; return exp.Order(expressions=self._parse_csv(self._parse_ordered))
50512    pub fn parse_order(&mut self) -> Result<Option<Expression>> {
50513        if !self.match_token(TokenType::Order) {
50514            return Ok(None);
50515        }
50516        // Consume BY if present
50517        self.match_token(TokenType::By);
50518
50519        // Parse comma-separated ordered expressions
50520        let mut expressions = Vec::new();
50521        loop {
50522            if let Some(ordered) = self.parse_ordered_item()? {
50523                expressions.push(ordered);
50524            } else {
50525                break;
50526            }
50527            if !self.match_token(TokenType::Comma) {
50528                break;
50529            }
50530        }
50531
50532        Ok(Some(Expression::OrderBy(Box::new(OrderBy {
50533            expressions,
50534            siblings: false,
50535            comments: Vec::new(),
50536        }))))
50537    }
50538
50539    /// parse_ordered_item - Parse a single ORDER BY item (expr [ASC|DESC] [NULLS FIRST|LAST])
50540    fn parse_ordered_item(&mut self) -> Result<Option<Ordered>> {
50541        // Parse the expression to order by
50542        let expr = match self.parse_expression() {
50543            Ok(e) => e,
50544            Err(_) => return Ok(None),
50545        };
50546
50547        // Check for ASC/DESC
50548        let mut desc = false;
50549        let mut explicit_asc = false;
50550        if self.match_token(TokenType::Asc) {
50551            explicit_asc = true;
50552        } else if self.match_token(TokenType::Desc) {
50553            desc = true;
50554        }
50555
50556        // Check for NULLS FIRST/LAST
50557        let nulls_first = if self.match_text_seq(&["NULLS", "FIRST"]) {
50558            Some(true)
50559        } else if self.match_text_seq(&["NULLS", "LAST"]) {
50560            Some(false)
50561        } else {
50562            None
50563        };
50564
50565        // Parse optional WITH FILL clause (ClickHouse)
50566        let with_fill = if self.match_text_seq(&["WITH", "FILL"]) {
50567            let from_ = if self.match_token(TokenType::From) {
50568                Some(Box::new(self.parse_or()?))
50569            } else {
50570                None
50571            };
50572            let to = if self.match_text_seq(&["TO"]) {
50573                Some(Box::new(self.parse_or()?))
50574            } else {
50575                None
50576            };
50577            let step = if self.match_text_seq(&["STEP"]) {
50578                Some(Box::new(self.parse_or()?))
50579            } else {
50580                None
50581            };
50582            let staleness = if self.match_text_seq(&["STALENESS"]) {
50583                Some(Box::new(self.parse_or()?))
50584            } else {
50585                None
50586            };
50587            let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
50588                if self.match_token(TokenType::LParen) {
50589                    let exprs = self.parse_expression_list()?;
50590                    self.expect(TokenType::RParen)?;
50591                    if exprs.len() == 1 {
50592                        Some(Box::new(exprs.into_iter().next().unwrap()))
50593                    } else {
50594                        Some(Box::new(Expression::Tuple(Box::new(
50595                            crate::expressions::Tuple { expressions: exprs },
50596                        ))))
50597                    }
50598                } else {
50599                    None
50600                }
50601            } else {
50602                None
50603            };
50604            Some(Box::new(WithFill {
50605                from_,
50606                to,
50607                step,
50608                staleness,
50609                interpolate,
50610            }))
50611        } else {
50612            None
50613        };
50614
50615        Ok(Some(Ordered {
50616            this: expr,
50617            desc,
50618            nulls_first,
50619            explicit_asc,
50620            with_fill,
50621        }))
50622    }
50623
50624    /// parse_ordered - Implemented from Python _parse_ordered (wrapper for parse_ordered_item)
50625    #[allow(unused_variables, unused_mut)]
50626    pub fn parse_ordered(&mut self) -> Result<Option<Expression>> {
50627        if let Some(ordered) = self.parse_ordered_item()? {
50628            return Ok(Some(Expression::Ordered(Box::new(ordered))));
50629        }
50630        if self.match_text_seq(&["NULLS", "FIRST"]) {
50631            return Ok(Some(Expression::WithFill(Box::new(WithFill {
50632                from_: None,
50633                to: None,
50634                step: None,
50635                staleness: None,
50636                interpolate: None,
50637            }))));
50638        }
50639        if self.match_text_seq(&["NULLS", "LAST"]) {
50640            // Matched: NULLS LAST
50641            return Ok(None);
50642        }
50643        if self.match_text_seq(&["WITH", "FILL"]) {
50644            // Matched: WITH FILL
50645            return Ok(None);
50646        }
50647        Ok(None)
50648    }
50649
50650    /// parse_overlay - Ported from Python _parse_overlay
50651    /// Parses OVERLAY function: OVERLAY(string PLACING replacement FROM position [FOR length])
50652    #[allow(unused_variables, unused_mut)]
50653    pub fn parse_overlay(&mut self) -> Result<Option<Expression>> {
50654        // Parse the string to be modified
50655        let this = match self.parse_bitwise() {
50656            Ok(Some(expr)) => expr,
50657            Ok(None) => return Ok(None),
50658            Err(e) => return Err(e),
50659        };
50660
50661        // Parse PLACING replacement (or comma then replacement)
50662        let replacement = if self.match_text_seq(&["PLACING"]) || self.match_token(TokenType::Comma)
50663        {
50664            match self.parse_bitwise() {
50665                Ok(Some(expr)) => expr,
50666                Ok(None) => {
50667                    return Err(self.parse_error("Expected replacement expression in OVERLAY"))
50668                }
50669                Err(e) => return Err(e),
50670            }
50671        } else {
50672            return Err(self.parse_error("Expected PLACING in OVERLAY function"));
50673        };
50674
50675        // Parse FROM position (or comma then position)
50676        let from = if self.match_token(TokenType::From) || self.match_token(TokenType::Comma) {
50677            match self.parse_bitwise() {
50678                Ok(Some(expr)) => expr,
50679                Ok(None) => return Err(self.parse_error("Expected position expression in OVERLAY")),
50680                Err(e) => return Err(e),
50681            }
50682        } else {
50683            return Err(self.parse_error("Expected FROM in OVERLAY function"));
50684        };
50685
50686        // Parse optional FOR length (or comma then length)
50687        let length = if self.match_token(TokenType::For) || self.match_token(TokenType::Comma) {
50688            match self.parse_bitwise() {
50689                Ok(Some(expr)) => Some(expr),
50690                Ok(None) => None,
50691                Err(_) => None,
50692            }
50693        } else {
50694            None
50695        };
50696
50697        Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
50698            this,
50699            replacement,
50700            from,
50701            length,
50702        }))))
50703    }
50704
50705    /// parse_parameter - Parse named parameter (@name or :name)
50706    /// Python: this = self._parse_identifier() or self._parse_primary_or_var(); return exp.Parameter(this=this)
50707    pub fn parse_parameter(&mut self) -> Result<Option<Expression>> {
50708        // Check for parameter token types
50709        if self.match_token(TokenType::Parameter) {
50710            let text = self.previous().text.clone();
50711            return Ok(Some(Expression::Parameter(Box::new(Parameter {
50712                name: Some(text),
50713                index: None,
50714                style: ParameterStyle::Colon,
50715                quoted: false,
50716                string_quoted: false,
50717                expression: None,
50718            }))));
50719        }
50720
50721        // Check for session parameter (@@name)
50722        if self.match_token(TokenType::SessionParameter) {
50723            let text = self.previous().text.clone();
50724            return Ok(Some(Expression::SessionParameter(Box::new(
50725                SessionParameter {
50726                    this: Box::new(Expression::Identifier(Identifier::new(text))),
50727                    kind: None,
50728                },
50729            ))));
50730        }
50731
50732        Ok(None)
50733    }
50734
50735    /// parse_paren - Ported from Python _parse_paren
50736    /// Parses parenthesized expressions: (expr), (select ...), or (a, b, c)
50737    #[allow(unused_variables, unused_mut)]
50738    pub fn parse_paren(&mut self) -> Result<Option<Expression>> {
50739        if !self.match_token(TokenType::LParen) {
50740            return Ok(None);
50741        }
50742
50743        // Check for empty tuple ()
50744        if self.match_token(TokenType::RParen) {
50745            return Ok(Some(Expression::Tuple(Box::new(Tuple {
50746                expressions: Vec::new(),
50747            }))));
50748        }
50749
50750        // Try to parse as subquery first
50751        // ClickHouse also allows (EXPLAIN ...) as subquery
50752        if self.check(TokenType::Select)
50753            || self.check(TokenType::With)
50754            || (matches!(
50755                self.config.dialect,
50756                Some(crate::dialects::DialectType::ClickHouse)
50757            ) && self.check(TokenType::Var)
50758                && self.peek().text.eq_ignore_ascii_case("EXPLAIN"))
50759        {
50760            let query = self.parse_statement()?;
50761            self.expect(TokenType::RParen)?;
50762            return Ok(Some(Expression::Subquery(Box::new(Subquery {
50763                this: query,
50764                alias: None,
50765                column_aliases: Vec::new(),
50766                order_by: None,
50767                limit: None,
50768                offset: None,
50769                lateral: false,
50770                modifiers_inside: true,
50771                trailing_comments: Vec::new(),
50772                distribute_by: None,
50773                sort_by: None,
50774                cluster_by: None,
50775                inferred_type: None,
50776            }))));
50777        }
50778
50779        // Parse comma-separated expressions
50780        let mut expressions = Vec::new();
50781        let mut trailing_comma = false;
50782        loop {
50783            match self.parse_expression() {
50784                Ok(expr) => expressions.push(expr),
50785                Err(_) => break,
50786            }
50787            if !self.match_token(TokenType::Comma) {
50788                break;
50789            }
50790            // ClickHouse: trailing comma makes a single-element tuple, e.g., (1,)
50791            if self.check(TokenType::RParen) {
50792                trailing_comma = true;
50793                break;
50794            }
50795        }
50796
50797        self.expect(TokenType::RParen)?;
50798
50799        // Single expression with trailing comma → tuple, e.g., (1,)
50800        if trailing_comma && expressions.len() == 1 {
50801            return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
50802        }
50803
50804        // Single expression - return the unwrapped Paren
50805        if expressions.len() == 1 {
50806            return Ok(Some(Expression::Paren(Box::new(Paren {
50807                this: expressions.remove(0),
50808                trailing_comments: Vec::new(),
50809            }))));
50810        }
50811
50812        // Multiple expressions - return as tuple
50813        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
50814    }
50815
50816    /// parse_partition - Parses PARTITION/SUBPARTITION clause
50817    /// Python: _parse_partition
50818    pub fn parse_partition(&mut self) -> Result<Option<Expression>> {
50819        // PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"}
50820        if !self.match_texts(&["PARTITION", "SUBPARTITION"]) {
50821            return Ok(None);
50822        }
50823
50824        let subpartition = self.previous().text.eq_ignore_ascii_case("SUBPARTITION");
50825
50826        // Parse wrapped CSV of disjunction expressions
50827        if !self.match_token(TokenType::LParen) {
50828            // Without parentheses, still return a Partition with empty expressions
50829            return Ok(Some(Expression::Partition(Box::new(Partition {
50830                expressions: Vec::new(),
50831                subpartition,
50832            }))));
50833        }
50834
50835        let mut expressions = Vec::new();
50836        loop {
50837            if let Some(expr) = self.parse_disjunction()? {
50838                expressions.push(expr);
50839            } else {
50840                break;
50841            }
50842
50843            if !self.match_token(TokenType::Comma) {
50844                break;
50845            }
50846        }
50847
50848        self.match_token(TokenType::RParen);
50849
50850        Ok(Some(Expression::Partition(Box::new(Partition {
50851            expressions,
50852            subpartition,
50853        }))))
50854    }
50855
50856    /// parse_partition_and_order - Delegates to parse_partition_by
50857    #[allow(unused_variables, unused_mut)]
50858    pub fn parse_partition_and_order(&mut self) -> Result<Option<Expression>> {
50859        self.parse_partition_by()
50860    }
50861
50862    /// parse_partition_bound_spec - Implemented from Python _parse_partition_bound_spec
50863    /// Calls: parse_bitwise, parse_number
50864    #[allow(unused_variables, unused_mut)]
50865    pub fn parse_partition_bound_spec_legacy(&mut self) -> Result<Option<Expression>> {
50866        if self.match_text_seq(&["MINVALUE"]) {
50867            return Ok(Some(Expression::PartitionBoundSpec(Box::new(
50868                PartitionBoundSpec {
50869                    this: None,
50870                    expression: None,
50871                    from_expressions: None,
50872                    to_expressions: None,
50873                },
50874            ))));
50875        }
50876        if self.match_text_seq(&["MAXVALUE"]) {
50877            // Matched: MAXVALUE
50878            return Ok(None);
50879        }
50880        if self.match_text_seq(&["TO"]) {
50881            // Matched: TO
50882            return Ok(None);
50883        }
50884        Ok(None)
50885    }
50886
50887    /// parse_partition_by - Ported from Python _parse_partition_by
50888    /// Parses PARTITION BY expression list
50889    #[allow(unused_variables, unused_mut)]
50890    pub fn parse_partition_by(&mut self) -> Result<Option<Expression>> {
50891        if !self.match_keywords(&[TokenType::Partition, TokenType::By]) {
50892            return Ok(None);
50893        }
50894        let expressions = self.parse_expression_list()?;
50895        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
50896    }
50897
50898    /// parse_partitioned_by - Parses PARTITIONED BY clause
50899    /// Python: _parse_partitioned_by
50900    pub fn parse_partitioned_by(&mut self) -> Result<Option<Expression>> {
50901        // Optionally match '='
50902        self.match_token(TokenType::Eq);
50903
50904        // Try to parse a schema first
50905        if let Some(schema) = self.parse_schema()? {
50906            return Ok(Some(Expression::PartitionedByProperty(Box::new(
50907                PartitionedByProperty {
50908                    this: Box::new(schema),
50909                },
50910            ))));
50911        }
50912
50913        // Fall back to bracket(field)
50914        if let Some(bracket) = self.parse_bracket()? {
50915            return Ok(Some(Expression::PartitionedByProperty(Box::new(
50916                PartitionedByProperty {
50917                    this: Box::new(bracket),
50918                },
50919            ))));
50920        }
50921
50922        // Try to parse a field directly
50923        if let Some(field) = self.parse_field()? {
50924            return Ok(Some(Expression::PartitionedByProperty(Box::new(
50925                PartitionedByProperty {
50926                    this: Box::new(field),
50927                },
50928            ))));
50929        }
50930
50931        Ok(None)
50932    }
50933
50934    /// parse_partitioned_by_bucket_or_truncate - Parses BUCKET or TRUNCATE partition transforms
50935    /// Python: _parse_partitioned_by_bucket_or_truncate
50936    /// Syntax: BUCKET(col, num_buckets) or TRUNCATE(col, width)
50937    /// Handles both Hive (num, col) and Trino (col, num) ordering, normalizes to (col, num)
50938    pub fn parse_partitioned_by_bucket_or_truncate(&mut self) -> Result<Option<Expression>> {
50939        // If no L_PAREN follows, this should be parsed as an identifier, not a function call
50940        if !self.check(TokenType::LParen) {
50941            // Retreat: go back one token (previous was BUCKET or TRUNCATE)
50942            if self.current > 0 {
50943                self.current -= 1;
50944            }
50945            return Ok(None);
50946        }
50947
50948        // Determine if it's BUCKET or TRUNCATE based on previous token
50949        let is_bucket = self.previous().text.eq_ignore_ascii_case("BUCKET");
50950
50951        // Parse wrapped arguments
50952        self.expect(TokenType::LParen)?;
50953        let mut args = Vec::new();
50954
50955        if !self.check(TokenType::RParen) {
50956            loop {
50957                // Try to parse primary or column
50958                if let Some(expr) = self.parse_primary_or_var()? {
50959                    args.push(expr);
50960                } else if let Some(col) = self.parse_column()? {
50961                    args.push(col);
50962                }
50963
50964                if !self.match_token(TokenType::Comma) {
50965                    break;
50966                }
50967            }
50968        }
50969        self.match_token(TokenType::RParen);
50970
50971        // Get first two arguments
50972        let (mut this, mut expr) = (args.get(0).cloned(), args.get(1).cloned());
50973
50974        // Normalize: if first arg is a Literal, swap (Hive uses (num, col), Trino uses (col, num))
50975        // We canonicalize to (col, num)
50976        if let Some(Expression::Literal(_)) = &this {
50977            std::mem::swap(&mut this, &mut expr);
50978        }
50979
50980        // Ensure we have both arguments
50981        let this_expr = this.unwrap_or(Expression::Null(Null));
50982        let expr_expr = expr.unwrap_or(Expression::Null(Null));
50983
50984        if is_bucket {
50985            Ok(Some(Expression::PartitionedByBucket(Box::new(
50986                PartitionedByBucket {
50987                    this: Box::new(this_expr),
50988                    expression: Box::new(expr_expr),
50989                },
50990            ))))
50991        } else {
50992            Ok(Some(Expression::PartitionByTruncate(Box::new(
50993                PartitionByTruncate {
50994                    this: Box::new(this_expr),
50995                    expression: Box::new(expr_expr),
50996                },
50997            ))))
50998        }
50999    }
51000
51001    /// parse_doris_partition_by_range_or_list - Parses Doris PARTITION BY RANGE/LIST syntax
51002    /// Handles:
51003    ///   PARTITION BY RANGE (`col`) (PARTITION name VALUES LESS THAN (val), ...)
51004    ///   PARTITION BY RANGE (`col`) (PARTITION name VALUES [(val1), (val2)), ...)
51005    ///   PARTITION BY RANGE (`col`) (FROM ('start') TO ('end') INTERVAL n UNIT)
51006    ///   PARTITION BY LIST (`col`) (PARTITION name VALUES IN (val1, val2), ...)
51007    fn parse_doris_partition_by_range_or_list(&mut self, kind: &str) -> Result<Expression> {
51008        // Parse partition column expressions: (`col1`, `col2`, ...) or (STR2DATE(col, fmt))
51009        // Use parse_wrapped_csv to handle function calls in partition columns
51010        let partition_expressions = self.parse_wrapped_csv()?;
51011
51012        // Check for partition definitions in parentheses
51013        let create_expressions = if self.check(TokenType::LParen) {
51014            self.skip(); // consume (
51015
51016            if kind == "LIST" {
51017                // Parse LIST partition definitions: PARTITION name VALUES IN (val1, val2), ...
51018                let partitions = self.parse_doris_list_partition_definitions()?;
51019                self.expect(TokenType::RParen)?;
51020                Some(Box::new(Expression::Tuple(Box::new(Tuple {
51021                    expressions: partitions,
51022                }))))
51023            } else {
51024                // RANGE: check for FROM (dynamic), START (StarRocks dynamic), or PARTITION (static)
51025                if self.check(TokenType::From) {
51026                    // Dynamic: FROM ('start') TO ('end') INTERVAL n UNIT
51027                    let dynamic_expr = self.parse_doris_dynamic_partition()?;
51028                    self.expect(TokenType::RParen)?;
51029                    Some(Box::new(dynamic_expr))
51030                } else if self.check(TokenType::Start) {
51031                    // StarRocks dynamic: START ('val') END ('val') EVERY (expr), ...
51032                    let mut dynamics = Vec::new();
51033                    loop {
51034                        if !self.check(TokenType::Start) {
51035                            break;
51036                        }
51037                        let dynamic_expr = self.parse_starrocks_start_end_every()?;
51038                        dynamics.push(dynamic_expr);
51039                        if !self.match_token(TokenType::Comma) {
51040                            break;
51041                        }
51042                    }
51043                    self.expect(TokenType::RParen)?;
51044                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
51045                        expressions: dynamics,
51046                    }))))
51047                } else if self.check(TokenType::Partition) {
51048                    // Static: PARTITION name VALUES LESS THAN (val) or VALUES [(val1), (val2))
51049                    let partitions = self.parse_doris_range_partition_definitions()?;
51050                    self.expect(TokenType::RParen)?;
51051                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
51052                        expressions: partitions,
51053                    }))))
51054                } else {
51055                    self.expect(TokenType::RParen)?;
51056                    None
51057                }
51058            }
51059        } else {
51060            None
51061        };
51062
51063        if kind == "LIST" {
51064            Ok(Expression::PartitionByListProperty(Box::new(
51065                PartitionByListProperty {
51066                    partition_expressions: partition_expressions.map(Box::new),
51067                    create_expressions,
51068                },
51069            )))
51070        } else {
51071            Ok(Expression::PartitionByRangeProperty(Box::new(
51072                PartitionByRangeProperty {
51073                    partition_expressions: partition_expressions.map(Box::new),
51074                    create_expressions,
51075                },
51076            )))
51077        }
51078    }
51079
51080    /// Parse Doris LIST partition definitions: PARTITION name VALUES IN (val1, val2), ...
51081    fn parse_doris_list_partition_definitions(&mut self) -> Result<Vec<Expression>> {
51082        let mut partitions = Vec::new();
51083        loop {
51084            if !self.match_token(TokenType::Partition) {
51085                break;
51086            }
51087            let name = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
51088            self.match_text_seq(&["VALUES", "IN"]);
51089            let values = self.parse_wrapped_csv_expressions()?;
51090
51091            let part_list = Expression::PartitionList(Box::new(PartitionList {
51092                this: Box::new(name),
51093                expressions: values,
51094            }));
51095            partitions.push(Expression::Partition(Box::new(Partition {
51096                expressions: vec![part_list],
51097                subpartition: false,
51098            })));
51099
51100            if !self.match_token(TokenType::Comma) {
51101                break;
51102            }
51103        }
51104        Ok(partitions)
51105    }
51106
51107    /// Parse Doris RANGE partition definitions
51108    fn parse_doris_range_partition_definitions(&mut self) -> Result<Vec<Expression>> {
51109        let mut partitions = Vec::new();
51110        loop {
51111            if !self.match_token(TokenType::Partition) {
51112                break;
51113            }
51114            let name = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
51115            self.match_text_seq(&["VALUES"]);
51116
51117            let part_range = if self.match_text_seq(&["LESS", "THAN"]) {
51118                if self.match_token(TokenType::Maxvalue) {
51119                    // VALUES LESS THAN MAXVALUE (without parens)
51120                    Expression::PartitionRange(Box::new(PartitionRange {
51121                        this: Box::new(name),
51122                        expression: None,
51123                        expressions: vec![Expression::Identifier(Identifier::new("MAXVALUE"))],
51124                    }))
51125                } else {
51126                    // VALUES LESS THAN (val) or VALUES LESS THAN (MAXVALUE)
51127                    let values = self.parse_wrapped_csv_expressions()?;
51128                    Expression::PartitionRange(Box::new(PartitionRange {
51129                        this: Box::new(name),
51130                        expression: None,
51131                        expressions: values,
51132                    }))
51133                }
51134            } else if self.check(TokenType::LBracket) {
51135                // VALUES [(val1), (val2)) - note asymmetric brackets
51136                self.skip(); // consume [
51137                let mut value_tuples = Vec::new();
51138                loop {
51139                    let vals = self.parse_wrapped_csv_expressions()?;
51140                    // Wrap in a Tuple for each (val)
51141                    value_tuples.push(Expression::Tuple(Box::new(Tuple { expressions: vals })));
51142                    if !self.match_token(TokenType::Comma) {
51143                        break;
51144                    }
51145                }
51146                // Expect ) to close the asymmetric bracket
51147                self.expect(TokenType::RParen)?;
51148                Expression::PartitionRange(Box::new(PartitionRange {
51149                    this: Box::new(name),
51150                    expression: None,
51151                    expressions: value_tuples,
51152                }))
51153            } else {
51154                // Fallback: no values
51155                Expression::PartitionRange(Box::new(PartitionRange {
51156                    this: Box::new(name),
51157                    expression: None,
51158                    expressions: Vec::new(),
51159                }))
51160            };
51161
51162            partitions.push(Expression::Partition(Box::new(Partition {
51163                expressions: vec![part_range],
51164                subpartition: false,
51165            })));
51166
51167            if !self.match_token(TokenType::Comma) {
51168                break;
51169            }
51170        }
51171        Ok(partitions)
51172    }
51173
51174    /// Parse Doris dynamic partition: FROM ('start') TO ('end') INTERVAL n UNIT
51175    fn parse_doris_dynamic_partition(&mut self) -> Result<Expression> {
51176        self.expect(TokenType::From)?;
51177        let start = self.parse_wrapped_expression()?;
51178        self.expect(TokenType::To)?;
51179        let end = self.parse_wrapped_expression()?;
51180
51181        // Parse INTERVAL n UNIT
51182        let every = if self.match_token(TokenType::Interval) {
51183            let number = self.parse_expression()?;
51184            let unit = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
51185                let unit_text = self.advance().text.to_ascii_uppercase();
51186                // Convert unit text to IntervalUnit
51187                let interval_unit = match unit_text.as_str() {
51188                    "YEAR" | "YEARS" => crate::expressions::IntervalUnit::Year,
51189                    "MONTH" | "MONTHS" => crate::expressions::IntervalUnit::Month,
51190                    "DAY" | "DAYS" => crate::expressions::IntervalUnit::Day,
51191                    "HOUR" | "HOURS" => crate::expressions::IntervalUnit::Hour,
51192                    "MINUTE" | "MINUTES" => crate::expressions::IntervalUnit::Minute,
51193                    "SECOND" | "SECONDS" => crate::expressions::IntervalUnit::Second,
51194                    _ => crate::expressions::IntervalUnit::Day, // Default fallback
51195                };
51196                Some(crate::expressions::IntervalUnitSpec::Simple {
51197                    unit: interval_unit,
51198                    use_plural: unit_text.ends_with('S'),
51199                })
51200            } else {
51201                None
51202            };
51203            Some(Box::new(Expression::Interval(Box::new(Interval {
51204                this: Some(number),
51205                unit,
51206            }))))
51207        } else {
51208            None
51209        };
51210
51211        Ok(Expression::PartitionByRangePropertyDynamic(Box::new(
51212            PartitionByRangePropertyDynamic {
51213                this: None,
51214                start: Some(Box::new(start)),
51215                end: Some(Box::new(end)),
51216                every,
51217                use_start_end: false,
51218            },
51219        )))
51220    }
51221
51222    /// Parse StarRocks START ('val') END ('val') EVERY (expr) syntax
51223    fn parse_starrocks_start_end_every(&mut self) -> Result<Expression> {
51224        self.expect(TokenType::Start)?;
51225        let start = self.parse_wrapped_expression()?;
51226        self.expect(TokenType::End)?;
51227        let end = self.parse_wrapped_expression()?;
51228
51229        // Parse EVERY (expr)
51230        let every = if self.match_identifier("EVERY") {
51231            self.expect(TokenType::LParen)?;
51232            let expr = self.parse_expression()?;
51233            self.expect(TokenType::RParen)?;
51234            Some(Box::new(expr))
51235        } else {
51236            None
51237        };
51238
51239        Ok(Expression::PartitionByRangePropertyDynamic(Box::new(
51240            PartitionByRangePropertyDynamic {
51241                this: None,
51242                start: Some(Box::new(start)),
51243                end: Some(Box::new(end)),
51244                every,
51245                use_start_end: true,
51246            },
51247        )))
51248    }
51249
51250    /// Parse wrapped comma-separated expressions: (expr, expr, ...)
51251    fn parse_wrapped_csv_expressions(&mut self) -> Result<Vec<Expression>> {
51252        self.expect(TokenType::LParen)?;
51253        let mut exprs = Vec::new();
51254        if !self.check(TokenType::RParen) {
51255            loop {
51256                // Check for MAXVALUE special keyword
51257                if self.match_token(TokenType::Maxvalue) {
51258                    exprs.push(Expression::Var(Box::new(Var {
51259                        this: "MAXVALUE".to_string(),
51260                    })));
51261                } else {
51262                    exprs.push(self.parse_expression()?);
51263                }
51264                if !self.match_token(TokenType::Comma) {
51265                    break;
51266                }
51267            }
51268        }
51269        self.expect(TokenType::RParen)?;
51270        Ok(exprs)
51271    }
51272
51273    /// Parse a single wrapped expression: (expr)
51274    fn parse_wrapped_expression(&mut self) -> Result<Expression> {
51275        self.expect(TokenType::LParen)?;
51276        let expr = self.parse_expression()?;
51277        self.expect(TokenType::RParen)?;
51278        Ok(expr)
51279    }
51280
51281    /// parse_partitioned_of - Implemented from Python _parse_partitioned_of
51282    #[allow(unused_variables, unused_mut)]
51283    pub fn parse_partitioned_of(&mut self) -> Result<Option<Expression>> {
51284        if self.match_text_seq(&["OF"]) {
51285            return Ok(Some(Expression::PartitionBoundSpec(Box::new(
51286                PartitionBoundSpec {
51287                    this: None,
51288                    expression: None,
51289                    from_expressions: None,
51290                    to_expressions: None,
51291                },
51292            ))));
51293        }
51294        if self.match_text_seq(&["FOR", "VALUES"]) {
51295            // Matched: FOR VALUES
51296            return Ok(None);
51297        }
51298        Ok(None)
51299    }
51300
51301    /// parse_period_for_system_time - Parses PERIOD FOR SYSTEM_TIME constraint
51302    /// Python: _parse_period_for_system_time
51303    /// Syntax: PERIOD FOR SYSTEM_TIME (start_col, end_col)
51304    pub fn parse_period_for_system_time(&mut self) -> Result<Option<Expression>> {
51305        // Check for SYSTEM_TIME / TIMESTAMP_SNAPSHOT token
51306        if !self.match_token(TokenType::TimestampSnapshot) {
51307            // Retreat: go back one token
51308            if self.current > 0 {
51309                self.current -= 1;
51310            }
51311            return Ok(None);
51312        }
51313
51314        // Parse wrapped id vars (two column names)
51315        let id_vars = self.parse_wrapped_id_vars()?;
51316
51317        // Extract the two columns from the tuple
51318        let (this, expression) = if let Some(Expression::Tuple(tuple)) = id_vars {
51319            let exprs = &tuple.expressions;
51320            (
51321                exprs.get(0).cloned().unwrap_or(Expression::Null(Null)),
51322                exprs.get(1).cloned().unwrap_or(Expression::Null(Null)),
51323            )
51324        } else {
51325            return Ok(None);
51326        };
51327
51328        Ok(Some(Expression::PeriodForSystemTimeConstraint(Box::new(
51329            PeriodForSystemTimeConstraint {
51330                this: Box::new(this),
51331                expression: Box::new(expression),
51332            },
51333        ))))
51334    }
51335
51336    /// parse_pipe_syntax_aggregate - Implemented from Python _parse_pipe_syntax_aggregate
51337    #[allow(unused_variables, unused_mut)]
51338    pub fn parse_pipe_syntax_aggregate(&mut self) -> Result<Option<Expression>> {
51339        if self.match_text_seq(&["AGGREGATE"]) {
51340            return Ok(Some(Expression::Select(Box::new(Select {
51341                expressions: Vec::new(),
51342                from: None,
51343                joins: Vec::new(),
51344                lateral_views: Vec::new(),
51345                prewhere: None,
51346                where_clause: None,
51347                group_by: None,
51348                having: None,
51349                qualify: None,
51350                order_by: None,
51351                distribute_by: None,
51352                cluster_by: None,
51353                sort_by: None,
51354                limit: None,
51355                offset: None,
51356                limit_by: None,
51357                fetch: None,
51358                distinct: false,
51359                distinct_on: None,
51360                top: None,
51361                with: None,
51362                sample: None,
51363                settings: None,
51364                format: None,
51365                windows: None,
51366                hint: None,
51367                connect: None,
51368                into: None,
51369                locks: Vec::new(),
51370                for_xml: Vec::new(),
51371                for_json: Vec::new(),
51372                leading_comments: Vec::new(),
51373                post_select_comments: Vec::new(),
51374                kind: None,
51375                operation_modifiers: Vec::new(),
51376                qualify_after_window: false,
51377                option: None,
51378                exclude: None,
51379            }))));
51380        }
51381        if self.match_text_seq(&["GROUP", "AND"]) {
51382            // Matched: GROUP AND
51383            return Ok(None);
51384        }
51385        Ok(None)
51386    }
51387
51388    /// parse_pipe_syntax_aggregate_fields - Implemented from Python _parse_pipe_syntax_aggregate_fields
51389    /// Calls: parse_disjunction
51390    #[allow(unused_variables, unused_mut)]
51391    pub fn parse_pipe_syntax_aggregate_fields(&mut self) -> Result<Option<Expression>> {
51392        if self.match_text_seq(&["GROUP", "AND"]) {
51393            // Matched: GROUP AND
51394            return Ok(None);
51395        }
51396        Ok(None)
51397    }
51398
51399    /// parse_pipe_syntax_aggregate_group_order_by - Parses pipe syntax aggregate fields with grouping and ordering
51400    /// Python: _parse_pipe_syntax_aggregate_group_order_by
51401    /// Parses comma-separated aggregate fields and separates them into aggregates/groups and ORDER BY specs
51402    /// Returns a Tuple with two elements: (aggregates_and_groups, order_by_specs)
51403    pub fn parse_pipe_syntax_aggregate_group_order_by(&mut self) -> Result<Option<Expression>> {
51404        // Parse CSV of pipe syntax aggregate fields
51405        let mut aggregates_or_groups = Vec::new();
51406        let mut orders = Vec::new();
51407
51408        loop {
51409            if let Some(element) = self.parse_pipe_syntax_aggregate_fields()? {
51410                // Check if it's an Ordered expression (ORDER BY spec)
51411                match &element {
51412                    Expression::Ordered(ordered) => {
51413                        // Extract the inner expression, potentially adjusting for alias
51414                        let this = match &ordered.this {
51415                            Expression::Alias(alias) => {
51416                                // Use the alias name as an Identifier expression
51417                                Expression::Identifier(alias.alias.clone())
51418                            }
51419                            other => other.clone(),
51420                        };
51421                        // Add modified Ordered to orders
51422                        orders.push(Expression::Ordered(Box::new(Ordered {
51423                            this: this.clone(),
51424                            desc: ordered.desc,
51425                            nulls_first: ordered.nulls_first,
51426                            explicit_asc: ordered.explicit_asc,
51427                            with_fill: ordered.with_fill.clone(),
51428                        })));
51429                        aggregates_or_groups.push(this);
51430                    }
51431                    _ => {
51432                        aggregates_or_groups.push(element);
51433                    }
51434                }
51435            }
51436
51437            if !self.match_token(TokenType::Comma) {
51438                break;
51439            }
51440        }
51441
51442        if aggregates_or_groups.is_empty() && orders.is_empty() {
51443            return Ok(None);
51444        }
51445
51446        // Return a tuple with (aggregates_or_groups, orders)
51447        Ok(Some(Expression::Tuple(Box::new(Tuple {
51448            expressions: vec![
51449                Expression::Tuple(Box::new(Tuple {
51450                    expressions: aggregates_or_groups,
51451                })),
51452                Expression::Tuple(Box::new(Tuple {
51453                    expressions: orders,
51454                })),
51455            ],
51456        }))))
51457    }
51458
51459    /// parse_pipe_syntax_extend - Implemented from Python _parse_pipe_syntax_extend
51460    #[allow(unused_variables, unused_mut)]
51461    pub fn parse_pipe_syntax_extend(&mut self) -> Result<Option<Expression>> {
51462        if self.match_text_seq(&["EXTEND"]) {
51463            return Ok(Some(Expression::Select(Box::new(Select {
51464                expressions: Vec::new(),
51465                from: None,
51466                joins: Vec::new(),
51467                lateral_views: Vec::new(),
51468                prewhere: None,
51469                where_clause: None,
51470                group_by: None,
51471                having: None,
51472                qualify: None,
51473                order_by: None,
51474                distribute_by: None,
51475                cluster_by: None,
51476                sort_by: None,
51477                limit: None,
51478                offset: None,
51479                limit_by: None,
51480                fetch: None,
51481                distinct: false,
51482                distinct_on: None,
51483                top: None,
51484                with: None,
51485                sample: None,
51486                settings: None,
51487                format: None,
51488                windows: None,
51489                hint: None,
51490                connect: None,
51491                into: None,
51492                locks: Vec::new(),
51493                for_xml: Vec::new(),
51494                for_json: Vec::new(),
51495                leading_comments: Vec::new(),
51496                post_select_comments: Vec::new(),
51497                kind: None,
51498                operation_modifiers: Vec::new(),
51499                qualify_after_window: false,
51500                option: None,
51501                exclude: None,
51502            }))));
51503        }
51504        Ok(None)
51505    }
51506
51507    /// parse_pipe_syntax_join - Parses JOIN in BigQuery pipe syntax
51508    /// Python: _parse_pipe_syntax_join
51509    /// Format: |> JOIN table ON condition
51510    pub fn parse_pipe_syntax_join(&mut self) -> Result<Option<Expression>> {
51511        // Parse the JOIN clause
51512        self.parse_join()
51513    }
51514
51515    /// parse_pipe_syntax_limit - Parses LIMIT/OFFSET in BigQuery pipe syntax
51516    /// Python: _parse_pipe_syntax_limit
51517    /// Format: |> LIMIT n [OFFSET m]
51518    pub fn parse_pipe_syntax_limit(&mut self) -> Result<Option<Expression>> {
51519        // Parse the LIMIT clause
51520        let limit = self.parse_limit()?;
51521
51522        // Parse optional OFFSET
51523        let offset = self.parse_offset()?;
51524
51525        // Combine into a tuple if both present
51526        match (limit, offset) {
51527            (Some(l), Some(o)) => Ok(Some(Expression::Tuple(Box::new(Tuple {
51528                expressions: vec![l, o],
51529            })))),
51530            (Some(l), None) => Ok(Some(l)),
51531            (None, Some(o)) => Ok(Some(o)),
51532            (None, None) => Ok(None),
51533        }
51534    }
51535
51536    /// parse_pipe_syntax_pivot - Parses PIVOT in BigQuery pipe syntax
51537    /// Python: _parse_pipe_syntax_pivot
51538    /// Format: |> PIVOT (agg_function FOR column IN (values))
51539    pub fn parse_pipe_syntax_pivot(&mut self) -> Result<Option<Expression>> {
51540        // For pipe syntax, we don't have a source yet - return pivot aggregation
51541        // The actual pivot parsing will be done in the query transformer
51542        self.parse_pivot_aggregation()
51543    }
51544
51545    /// parse_pipe_syntax_query - Parses a query with pipe syntax transformations
51546    /// Python: _parse_pipe_syntax_query
51547    /// Handles queries like: FROM table |> WHERE ... |> SELECT ... |> AGGREGATE ...
51548    pub fn parse_pipe_syntax_query(&mut self) -> Result<Option<Expression>> {
51549        // Start with a base query (could be a FROM clause or subquery)
51550        let mut query = self.parse_select_query()?;
51551
51552        if query.is_none() {
51553            return Ok(None);
51554        }
51555
51556        // Process pipe syntax chain: |> transform1 |> transform2 |> ...
51557        while self.match_token(TokenType::PipeGt) {
51558            let start_pos = self.current;
51559            let operator_text = self.peek().text.to_ascii_uppercase();
51560
51561            // Try to match known pipe syntax transforms
51562            let transform_result = match operator_text.as_str() {
51563                "WHERE" => {
51564                    self.skip();
51565                    self.parse_where()?
51566                }
51567                "SELECT" => {
51568                    self.skip();
51569                    self.parse_pipe_syntax_select()?
51570                }
51571                "AGGREGATE" => {
51572                    self.skip();
51573                    self.parse_pipe_syntax_aggregate()?
51574                }
51575                "EXTEND" => {
51576                    self.skip();
51577                    self.parse_pipe_syntax_extend()?
51578                }
51579                "LIMIT" => {
51580                    self.skip();
51581                    self.parse_pipe_syntax_limit()?
51582                }
51583                "JOIN" | "LEFT" | "RIGHT" | "INNER" | "OUTER" | "CROSS" | "FULL" => {
51584                    self.parse_pipe_syntax_join()?
51585                }
51586                "UNION" | "INTERSECT" | "EXCEPT" => self.parse_pipe_syntax_set_operator()?,
51587                "PIVOT" => {
51588                    self.skip();
51589                    self.parse_pipe_syntax_pivot()?
51590                }
51591                "TABLESAMPLE" => {
51592                    self.skip();
51593                    self.parse_pipe_syntax_tablesample()?
51594                }
51595                _ => {
51596                    // Try set operator or join as fallback
51597                    let set_op = self.parse_pipe_syntax_set_operator()?;
51598                    if set_op.is_some() {
51599                        set_op
51600                    } else {
51601                        let join_op = self.parse_pipe_syntax_join()?;
51602                        if join_op.is_some() {
51603                            join_op
51604                        } else {
51605                            // Unsupported operator, retreat and break
51606                            self.current = start_pos;
51607                            break;
51608                        }
51609                    }
51610                }
51611            };
51612
51613            // Apply transform to query
51614            if let Some(transform) = transform_result {
51615                // Wrap current query with transform in a PipeOperator
51616                let current_query = query.ok_or_else(|| {
51617                    self.parse_error("Expected base query before pipe syntax transform")
51618                })?;
51619                query = Some(Expression::PipeOperator(Box::new(PipeOperator {
51620                    this: current_query,
51621                    expression: transform,
51622                })));
51623            }
51624        }
51625
51626        Ok(query)
51627    }
51628
51629    /// parse_pipe_syntax_select - Parses SELECT in BigQuery pipe syntax
51630    /// Python: _parse_pipe_syntax_select
51631    /// Format: |> SELECT expressions
51632    pub fn parse_pipe_syntax_select(&mut self) -> Result<Option<Expression>> {
51633        // Parse the SELECT expressions without consuming the pipe
51634        let expressions = self.parse_expressions()?;
51635
51636        match expressions {
51637            Some(expr) => Ok(Some(expr)),
51638            None => Ok(Some(Expression::Star(Star {
51639                table: None,
51640                except: None,
51641                replace: None,
51642                rename: None,
51643                trailing_comments: Vec::new(),
51644                span: None,
51645            }))),
51646        }
51647    }
51648
51649    /// parse_pipe_syntax_set_operator - Parses set operation in BigQuery pipe syntax
51650    /// Python: _parse_pipe_syntax_set_operator
51651    /// Format: |> UNION ALL/INTERSECT/EXCEPT (subquery1, subquery2, ...)
51652    pub fn parse_pipe_syntax_set_operator(&mut self) -> Result<Option<Expression>> {
51653        // Try to parse as a set operation (UNION, INTERSECT, EXCEPT)
51654        if let Some(set_op) = self.parse_set_operations()? {
51655            Ok(Some(set_op))
51656        } else {
51657            Ok(None)
51658        }
51659    }
51660
51661    /// parse_pipe_syntax_tablesample - Parses TABLESAMPLE in BigQuery pipe syntax
51662    /// Python: _parse_pipe_syntax_tablesample
51663    /// Format: |> TABLESAMPLE SYSTEM (percent PERCENT)
51664    pub fn parse_pipe_syntax_tablesample(&mut self) -> Result<Option<Expression>> {
51665        // Parse the TABLESAMPLE clause
51666        self.parse_table_sample()
51667    }
51668
51669    /// parse_pivot_aggregation - Ported from Python _parse_pivot_aggregation
51670    /// Parses an aggregation function in PIVOT clause, optionally with alias
51671    #[allow(unused_variables, unused_mut)]
51672    pub fn parse_pivot_aggregation(&mut self) -> Result<Option<Expression>> {
51673        // Parse a function
51674        let func = self.parse_function()?;
51675
51676        if func.is_none() {
51677            // If previous token was a comma, silently return None
51678            if self.previous().token_type == TokenType::Comma {
51679                return Ok(None);
51680            }
51681            // Otherwise this could be an error, but we'll just return None
51682            return Ok(None);
51683        }
51684
51685        // Try to parse an alias for the function
51686        self.parse_alias_with_expr(func)
51687    }
51688
51689    /// parse_pivot_in - Parses the IN clause of a PIVOT
51690    /// Python: _parse_pivot_in
51691    /// Format: column IN (value1 [AS alias1], value2 [AS alias2], ...)
51692    pub fn parse_pivot_in(&mut self) -> Result<Option<Expression>> {
51693        // Parse the column being pivoted
51694        let value = self.parse_column()?;
51695        let value_expr = value.unwrap_or(Expression::Null(Null));
51696
51697        // Expect IN keyword
51698        if !self.match_token(TokenType::In) {
51699            return Err(self.parse_error("Expecting IN"));
51700        }
51701
51702        // Check if it's a parenthesized list or a field reference
51703        if self.match_token(TokenType::LParen) {
51704            // Check for ANY keyword
51705            let expressions = if self.match_text_seq(&["ANY"]) {
51706                // Parse PivotAny with optional ORDER BY
51707                let order = self.parse_order()?;
51708                vec![Expression::PivotAny(Box::new(PivotAny {
51709                    this: order.map(Box::new),
51710                }))]
51711            } else {
51712                // Parse comma-separated list of expressions, optionally aliased
51713                let mut exprs = Vec::new();
51714                loop {
51715                    if let Some(expr) = self.parse_select_or_expression()? {
51716                        // Check for alias
51717                        let final_expr = if self.match_token(TokenType::Alias) {
51718                            if let Some(alias) = self.parse_bitwise()? {
51719                                // Store the alias expression directly
51720                                Expression::PivotAlias(Box::new(PivotAlias { this: expr, alias }))
51721                            } else {
51722                                expr
51723                            }
51724                        } else {
51725                            expr
51726                        };
51727                        exprs.push(final_expr);
51728                    } else {
51729                        break;
51730                    }
51731                    if !self.match_token(TokenType::Comma) {
51732                        break;
51733                    }
51734                }
51735                exprs
51736            };
51737
51738            self.expect(TokenType::RParen)?;
51739
51740            Ok(Some(Expression::In(Box::new(In {
51741                this: value_expr,
51742                expressions,
51743                query: None,
51744                not: false,
51745                global: false,
51746                unnest: None,
51747                is_field: false,
51748            }))))
51749        } else {
51750            // Parse as a field reference: IN field_name
51751            let field = self.parse_id_var()?;
51752            // Convert field to expression and add to expressions
51753            let expressions = if let Some(f) = field {
51754                vec![f]
51755            } else {
51756                Vec::new()
51757            };
51758            Ok(Some(Expression::In(Box::new(In {
51759                this: value_expr,
51760                expressions,
51761                query: None,
51762                not: false,
51763                global: false,
51764                unnest: None,
51765                is_field: true,
51766            }))))
51767        }
51768    }
51769
51770    /// parse_pivots - Ported from Python _parse_pivots
51771    /// Parses one or more PIVOT/UNPIVOT clauses attached to a source expression
51772    /// Uses the existing parse_pivot/parse_unpivot methods
51773    pub fn parse_pivots_for_source(&mut self, source: Expression) -> Result<Option<Expression>> {
51774        let mut result = source;
51775
51776        loop {
51777            if self.match_token(TokenType::Pivot) {
51778                result = self.parse_pivot(result)?;
51779            } else if self.match_texts(&["UNPIVOT"]) {
51780                result = self.parse_unpivot(result)?;
51781            } else {
51782                break;
51783            }
51784        }
51785
51786        // Return None if no pivots were parsed
51787        if matches!(result, Expression::Null(_)) {
51788            Ok(None)
51789        } else {
51790            Ok(Some(result))
51791        }
51792    }
51793
51794    /// parse_placeholder - Parse placeholder token (? or :name)
51795    /// Python: if self._match_set(self.PLACEHOLDER_PARSERS): return placeholder
51796    pub fn parse_placeholder(&mut self) -> Result<Option<Expression>> {
51797        // Match positional placeholder (?)
51798        if self.match_token(TokenType::Placeholder) {
51799            return Ok(Some(Expression::Placeholder(Placeholder { index: None })));
51800        }
51801        // Match colon placeholder (:name) - handled by Parameter token
51802        if self.match_token(TokenType::Parameter) {
51803            let text = self.previous().text.clone();
51804            return Ok(Some(Expression::Parameter(Box::new(Parameter {
51805                name: Some(text),
51806                index: None,
51807                style: ParameterStyle::Colon,
51808                quoted: false,
51809                string_quoted: false,
51810                expression: None,
51811            }))));
51812        }
51813        Ok(None)
51814    }
51815
51816    /// Parse ClickHouse query parameter syntax: {name: Type}
51817    fn parse_clickhouse_braced_parameter(&mut self) -> Result<Option<Expression>> {
51818        if !matches!(
51819            self.config.dialect,
51820            Some(crate::dialects::DialectType::ClickHouse)
51821        ) {
51822            return Ok(None);
51823        }
51824        if !self.check(TokenType::LBrace) {
51825            return Ok(None);
51826        }
51827
51828        let start = self.current;
51829        self.skip(); // consume {
51830
51831        if !(self.is_identifier_token() || self.is_safe_keyword_as_identifier()) {
51832            self.current = start;
51833            return Ok(None);
51834        }
51835        let name = self.advance().text.clone();
51836
51837        if !self.match_token(TokenType::Colon) {
51838            self.current = start;
51839            return Ok(None);
51840        }
51841
51842        let kind_start = self.current;
51843        let mut paren_depth = 0usize;
51844        let mut bracket_depth = 0usize;
51845
51846        while !self.is_at_end() {
51847            let token_type = self.peek().token_type;
51848            match token_type {
51849                TokenType::LParen => {
51850                    paren_depth += 1;
51851                    self.skip();
51852                }
51853                TokenType::RParen => {
51854                    if paren_depth == 0 {
51855                        break;
51856                    }
51857                    paren_depth -= 1;
51858                    self.skip();
51859                }
51860                TokenType::LBracket => {
51861                    bracket_depth += 1;
51862                    self.skip();
51863                }
51864                TokenType::RBracket => {
51865                    if bracket_depth == 0 {
51866                        break;
51867                    }
51868                    bracket_depth -= 1;
51869                    self.skip();
51870                }
51871                TokenType::RBrace => {
51872                    if paren_depth == 0 && bracket_depth == 0 {
51873                        break;
51874                    }
51875                    self.skip();
51876                }
51877                _ => {
51878                    self.skip();
51879                }
51880            }
51881        }
51882
51883        if self.current <= kind_start || !self.match_token(TokenType::RBrace) {
51884            return Err(self.parse_error("Expected } in ClickHouse query parameter"));
51885        }
51886
51887        let kind = self
51888            .tokens_to_sql(kind_start, self.current - 1)
51889            .trim()
51890            .to_string();
51891        if kind.is_empty() {
51892            return Err(self.parse_error("Expected parameter kind in ClickHouse query parameter"));
51893        }
51894
51895        Ok(Some(Expression::Parameter(Box::new(Parameter {
51896            name: Some(name),
51897            index: None,
51898            style: ParameterStyle::Brace,
51899            quoted: false,
51900            string_quoted: false,
51901            expression: Some(kind),
51902        }))))
51903    }
51904
51905    /// parse_position - Ported from Python _parse_position
51906    /// Parses POSITION function: POSITION(substr IN str) or POSITION(needle, haystack, start)
51907    #[allow(unused_variables, unused_mut)]
51908    pub fn parse_position(&mut self) -> Result<Option<Expression>> {
51909        // Parse comma-separated arguments first
51910        let mut args: Vec<Expression> = Vec::new();
51911
51912        match self.parse_bitwise() {
51913            Ok(Some(expr)) => {
51914                let expr = self.maybe_clickhouse_alias(expr);
51915                let expr = self.try_clickhouse_func_arg_alias(expr);
51916                args.push(expr);
51917            }
51918            Ok(None) => return Ok(None),
51919            Err(e) => return Err(e),
51920        }
51921
51922        // Check for IN keyword (SQL standard syntax: POSITION(substr IN str))
51923        if self.match_token(TokenType::In) {
51924            match self.parse_bitwise() {
51925                Ok(Some(haystack)) => {
51926                    let haystack = self.maybe_clickhouse_alias(haystack);
51927                    let haystack = self.try_clickhouse_func_arg_alias(haystack);
51928                    return Ok(Some(Expression::StrPosition(Box::new(StrPosition {
51929                        this: Box::new(haystack),
51930                        substr: Some(Box::new(args.remove(0))),
51931                        position: None,
51932                        occurrence: None,
51933                    }))));
51934                }
51935                Ok(None) => {
51936                    return Err(self.parse_error("Expected expression after IN in POSITION"))
51937                }
51938                Err(e) => return Err(e),
51939            }
51940        }
51941
51942        // Parse comma-separated additional arguments
51943        while self.match_token(TokenType::Comma) {
51944            match self.parse_bitwise() {
51945                Ok(Some(expr)) => {
51946                    let expr = self.maybe_clickhouse_alias(expr);
51947                    let expr = self.try_clickhouse_func_arg_alias(expr);
51948                    args.push(expr);
51949                }
51950                Ok(None) => break,
51951                Err(e) => return Err(e),
51952            }
51953        }
51954
51955        // Function syntax: POSITION(needle, haystack, start?) or ClickHouse POSITION(haystack, needle, start?)
51956        let position = args.get(2).cloned();
51957        let (haystack, needle) = if matches!(
51958            self.config.dialect,
51959            Some(crate::dialects::DialectType::ClickHouse)
51960        ) {
51961            (args.get(0).cloned(), args.get(1).cloned())
51962        } else {
51963            (args.get(1).cloned(), args.get(0).cloned())
51964        };
51965
51966        Ok(Some(Expression::StrPosition(Box::new(StrPosition {
51967            this: Box::new(
51968                haystack.unwrap_or_else(|| {
51969                    Expression::Literal(Box::new(Literal::String("".to_string())))
51970                }),
51971            ),
51972            substr: needle.map(Box::new),
51973            position: position.map(Box::new),
51974            occurrence: None,
51975        }))))
51976    }
51977
51978    /// parse_prewhere - Ported from Python _parse_prewhere
51979    /// Parses PREWHERE clause (ClickHouse specific)
51980    #[allow(unused_variables, unused_mut)]
51981    pub fn parse_prewhere(&mut self) -> Result<Option<Expression>> {
51982        if !self.match_token(TokenType::Prewhere) {
51983            return Ok(None);
51984        }
51985        // Parse the condition expression
51986        let condition = self.parse_expression()?;
51987        Ok(Some(Expression::PreWhere(Box::new(PreWhere {
51988            this: condition,
51989        }))))
51990    }
51991
51992    /// parse_primary_key - Parses PRIMARY KEY constraint
51993    /// Python: _parse_primary_key
51994    /// Can return either PrimaryKeyColumnConstraint (column-level) or PrimaryKey (table-level)
51995    pub fn parse_primary_key(&mut self) -> Result<Option<Expression>> {
51996        self.parse_primary_key_impl(false, false)
51997    }
51998
51999    /// Implementation of parse_primary_key with options
52000    pub fn parse_primary_key_impl(
52001        &mut self,
52002        wrapped_optional: bool,
52003        in_props: bool,
52004    ) -> Result<Option<Expression>> {
52005        // Check for ASC/DESC
52006        let desc = if self.match_token(TokenType::Asc) {
52007            false
52008        } else if self.match_token(TokenType::Desc) {
52009            true
52010        } else {
52011            false
52012        };
52013
52014        // Parse optional constraint name (if current token is identifier and next is L_PAREN)
52015        let this = if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
52016            && self.check_next(TokenType::LParen)
52017        {
52018            self.parse_id_var()?
52019        } else {
52020            None
52021        };
52022
52023        // If not in_props and no L_PAREN ahead, return column-level constraint
52024        if !in_props && !self.check(TokenType::LParen) {
52025            let options = self.parse_key_constraint_options_list()?;
52026            return Ok(Some(Expression::PrimaryKeyColumnConstraint(Box::new(
52027                PrimaryKeyColumnConstraint {
52028                    desc: if desc {
52029                        Some(Box::new(Expression::Boolean(BooleanLiteral {
52030                            value: true,
52031                        })))
52032                    } else {
52033                        None
52034                    },
52035                    options,
52036                },
52037            ))));
52038        }
52039
52040        // Parse table-level PRIMARY KEY (column_list)
52041        let expressions = if self.match_token(TokenType::LParen) {
52042            let mut exprs = Vec::new();
52043            loop {
52044                if let Some(part) = self.parse_primary_key_part()? {
52045                    exprs.push(part);
52046                }
52047                if !self.match_token(TokenType::Comma) {
52048                    break;
52049                }
52050            }
52051            self.expect(TokenType::RParen)?;
52052            exprs
52053        } else if wrapped_optional {
52054            Vec::new()
52055        } else {
52056            return Err(self.parse_error("Expected '(' for PRIMARY KEY column list"));
52057        };
52058
52059        // Parse INCLUDE clause for covering index
52060        let include = self.parse_index_params()?;
52061
52062        // Parse constraint options
52063        let options = self.parse_key_constraint_options_list()?;
52064
52065        Ok(Some(Expression::PrimaryKey(Box::new(PrimaryKey {
52066            this: this.map(Box::new),
52067            expressions,
52068            options,
52069            include: include.map(Box::new),
52070        }))))
52071    }
52072
52073    /// Parse key constraint options as a list of expressions
52074    fn parse_key_constraint_options_list(&mut self) -> Result<Vec<Expression>> {
52075        let mut options = Vec::new();
52076
52077        loop {
52078            if self.is_at_end() {
52079                break;
52080            }
52081
52082            if self.match_token(TokenType::On) {
52083                // Parse ON DELETE/UPDATE action
52084                let on_what = if !self.is_at_end() {
52085                    let token = self.advance();
52086                    token.text.clone()
52087                } else {
52088                    break;
52089                };
52090
52091                let action = if self.match_text_seq(&["NO", "ACTION"]) {
52092                    "NO ACTION"
52093                } else if self.match_text_seq(&["CASCADE"]) {
52094                    "CASCADE"
52095                } else if self.match_text_seq(&["RESTRICT"]) {
52096                    "RESTRICT"
52097                } else if self.match_token(TokenType::Set) && self.match_token(TokenType::Null) {
52098                    "SET NULL"
52099                } else if self.match_token(TokenType::Set) && self.match_token(TokenType::Default) {
52100                    "SET DEFAULT"
52101                } else {
52102                    break;
52103                };
52104
52105                options.push(Expression::Var(Box::new(Var {
52106                    this: format!("ON {} {}", on_what, action),
52107                })));
52108            } else if self.match_text_seq(&["NOT", "ENFORCED"]) {
52109                options.push(Expression::Var(Box::new(Var {
52110                    this: "NOT ENFORCED".to_string(),
52111                })));
52112            } else if self.match_text_seq(&["DEFERRABLE"]) {
52113                options.push(Expression::Var(Box::new(Var {
52114                    this: "DEFERRABLE".to_string(),
52115                })));
52116            } else if self.match_text_seq(&["INITIALLY", "DEFERRED"]) {
52117                options.push(Expression::Var(Box::new(Var {
52118                    this: "INITIALLY DEFERRED".to_string(),
52119                })));
52120            } else if self.match_text_seq(&["NORELY"]) {
52121                options.push(Expression::Var(Box::new(Var {
52122                    this: "NORELY".to_string(),
52123                })));
52124            } else if self.match_text_seq(&["RELY"]) {
52125                options.push(Expression::Var(Box::new(Var {
52126                    this: "RELY".to_string(),
52127                })));
52128            } else {
52129                break;
52130            }
52131        }
52132
52133        Ok(options)
52134    }
52135
52136    /// parse_primary_key_part - Delegates to parse_field
52137    #[allow(unused_variables, unused_mut)]
52138    pub fn parse_primary_key_part(&mut self) -> Result<Option<Expression>> {
52139        // ClickHouse: PRIMARY KEY can contain full expressions (e.g., t.a, c0 IN (SELECT 1))
52140        if matches!(
52141            self.config.dialect,
52142            Some(crate::dialects::DialectType::ClickHouse)
52143        ) {
52144            return self.parse_expression().map(Some);
52145        }
52146        if (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
52147            && self.check_next(TokenType::LParen)
52148        {
52149            return self.parse_expression().map(Some);
52150        }
52151        if let Some(field) = self.parse_field()? {
52152            Ok(Some(field))
52153        } else {
52154            self.parse_expression().map(Some)
52155        }
52156    }
52157
52158    /// parse_primary_or_var - Parses a primary expression or variable
52159    /// Python: _parse_primary_or_var
52160    /// Returns: parse_primary() or parse_var(any_token=True)
52161    pub fn parse_primary_or_var(&mut self) -> Result<Option<Expression>> {
52162        // First try to parse a primary expression
52163        let saved_pos = self.current;
52164        match self.parse_primary() {
52165            Ok(expr) => return Ok(Some(expr)),
52166            Err(_) => {
52167                // Reset position and try parse_var
52168                self.current = saved_pos;
52169            }
52170        }
52171
52172        // Fall back to parsing a variable
52173        self.parse_var()
52174    }
52175
52176    /// parse_procedure_option - Implemented from Python _parse_procedure_option
52177    #[allow(unused_variables, unused_mut)]
52178    pub fn parse_procedure_option(&mut self) -> Result<Option<Expression>> {
52179        if self.match_text_seq(&["EXECUTE", "AS"]) {
52180            // Matched: EXECUTE AS
52181            return Ok(None);
52182        }
52183        Ok(None)
52184    }
52185
52186    /// parse_projections - Delegates to parse_expressions
52187    #[allow(unused_variables, unused_mut)]
52188    pub fn parse_projections(&mut self) -> Result<Option<Expression>> {
52189        self.parse_expressions()
52190    }
52191
52192    /// parse_properties - Parses table/column properties
52193    /// Python: _parse_properties
52194    /// Collects a list of properties using parse_property
52195    pub fn parse_properties(&mut self) -> Result<Option<Expression>> {
52196        self.parse_properties_impl(None)
52197    }
52198
52199    /// Implementation of parse_properties with before option
52200    pub fn parse_properties_impl(&mut self, before: Option<bool>) -> Result<Option<Expression>> {
52201        let mut properties = Vec::new();
52202
52203        loop {
52204            let prop = if before == Some(true) {
52205                self.parse_property_before()?
52206            } else {
52207                self.parse_property()?
52208            };
52209
52210            if let Some(p) = prop {
52211                properties.push(p);
52212            } else {
52213                break;
52214            }
52215        }
52216
52217        if properties.is_empty() {
52218            Ok(None)
52219        } else {
52220            Ok(Some(Expression::Properties(Box::new(Properties {
52221                expressions: properties,
52222            }))))
52223        }
52224    }
52225
52226    /// parse_property - Implemented from Python _parse_property
52227    /// Calls: parse_bitwise, parse_column, parse_sequence_properties
52228    #[allow(unused_variables, unused_mut)]
52229    pub fn parse_property(&mut self) -> Result<Option<Expression>> {
52230        if self.match_text_seq(&["COMPOUND", "SORTKEY"]) {
52231            return Ok(Some(Expression::Identifier(Identifier {
52232                name: String::new(),
52233                quoted: false,
52234                trailing_comments: Vec::new(),
52235                span: None,
52236            })));
52237        }
52238        if self.match_text_seq(&["SQL", "SECURITY"]) {
52239            // Matched: SQL SECURITY
52240            return Ok(None);
52241        }
52242        if self.match_texts(&["DEFINER", "INVOKER"]) {
52243            // Matched one of: DEFINER, INVOKER
52244            return Ok(None);
52245        }
52246        Ok(None)
52247    }
52248
52249    /// parse_on_cluster_clause - Parse ClickHouse ON CLUSTER clause
52250    fn parse_on_cluster_clause(&mut self) -> Result<Option<OnCluster>> {
52251        if !matches!(
52252            self.config.dialect,
52253            Some(crate::dialects::DialectType::ClickHouse)
52254        ) {
52255            return Ok(None);
52256        }
52257
52258        let start = self.current;
52259        if !self.match_token(TokenType::On) {
52260            return Ok(None);
52261        }
52262
52263        if !self.match_token(TokenType::Cluster) {
52264            self.current = start;
52265            return Ok(None);
52266        }
52267
52268        let this = if self.check(TokenType::String) {
52269            let value = self.expect_string()?;
52270            Expression::Literal(Box::new(Literal::String(value)))
52271        } else if let Some(id_expr) = self.parse_id_var()? {
52272            id_expr
52273        } else if self.is_safe_keyword_as_identifier() {
52274            let name = self.advance().text;
52275            Expression::Identifier(Identifier {
52276                name,
52277                quoted: false,
52278                trailing_comments: Vec::new(),
52279                span: None,
52280            })
52281        } else {
52282            return Err(self.parse_error("Expected cluster name after ON CLUSTER"));
52283        };
52284
52285        Ok(Some(OnCluster {
52286            this: Box::new(this),
52287        }))
52288    }
52289
52290    /// parse_clickhouse_table_properties - Parse ClickHouse table properties after column defs
52291    fn parse_clickhouse_table_properties(
52292        &mut self,
52293        properties: &mut Vec<Expression>,
52294    ) -> Result<()> {
52295        loop {
52296            if self.match_identifier("ENGINE") {
52297                self.match_token(TokenType::Eq);
52298                let engine = self.parse_clickhouse_engine_expression()?;
52299                properties.push(Expression::EngineProperty(Box::new(EngineProperty {
52300                    this: Box::new(engine),
52301                })));
52302                continue;
52303            }
52304
52305            if self.match_token(TokenType::Order) {
52306                self.expect(TokenType::By)?;
52307                let order_by = if matches!(
52308                    self.config.dialect,
52309                    Some(crate::dialects::DialectType::ClickHouse)
52310                ) && self.match_token(TokenType::LParen)
52311                {
52312                    // ClickHouse: ORDER BY (col1 [ASC|DESC], col2 [ASC|DESC], ...)
52313                    // or ORDER BY () for no ordering
52314                    if self.check(TokenType::RParen) {
52315                        self.skip();
52316                        OrderBy {
52317                            expressions: vec![Ordered::asc(Expression::Tuple(Box::new(Tuple {
52318                                expressions: Vec::new(),
52319                            })))],
52320                            siblings: false,
52321                            comments: Vec::new(),
52322                        }
52323                    } else {
52324                        // Parse all expressions inside the parentheses
52325                        let mut inner_exprs = Vec::new();
52326                        loop {
52327                            let expr = self.parse_expression()?;
52328                            inner_exprs.push(expr);
52329                            if !self.match_token(TokenType::Comma) {
52330                                break;
52331                            }
52332                        }
52333                        self.expect(TokenType::RParen)?;
52334                        // Wrap in a Tuple for multi-expr, Paren for single-expr
52335                        let wrapper = if inner_exprs.len() == 1 {
52336                            Expression::Paren(Box::new(Paren {
52337                                this: inner_exprs.into_iter().next().unwrap(),
52338                                trailing_comments: Vec::new(),
52339                            }))
52340                        } else {
52341                            Expression::Tuple(Box::new(Tuple {
52342                                expressions: inner_exprs,
52343                            }))
52344                        };
52345                        OrderBy {
52346                            expressions: vec![Ordered::asc(wrapper)],
52347                            siblings: false,
52348                            comments: Vec::new(),
52349                        }
52350                    }
52351                } else {
52352                    self.parse_order_by()?
52353                };
52354                properties.push(Expression::OrderBy(Box::new(order_by)));
52355                continue;
52356            }
52357
52358            if self.match_token(TokenType::Partition) {
52359                self.expect(TokenType::By)?;
52360                if self.check(TokenType::Order) && self.check_next(TokenType::By) {
52361                    return Err(self.parse_error("Expected expression after PARTITION BY"));
52362                }
52363                let expr = self
52364                    .parse_assignment()?
52365                    .ok_or_else(|| self.parse_error("Expected expression after PARTITION BY"))?;
52366                properties.push(Expression::PartitionedByProperty(Box::new(
52367                    PartitionedByProperty {
52368                        this: Box::new(expr),
52369                    },
52370                )));
52371                continue;
52372            }
52373
52374            if self.match_token(TokenType::PrimaryKey) {
52375                // ClickHouse supports PRIMARY KEY id and PRIMARY KEY (id, ...)
52376                let _ = self.match_token(TokenType::Key);
52377                if self.check(TokenType::LParen) {
52378                    if let Some(pk) = self.parse_primary_key_impl(false, true)? {
52379                        properties.push(pk);
52380                    }
52381                } else if let Some(expr) = self.parse_conjunction()? {
52382                    // ClickHouse: PRIMARY KEY expr (e.g., PRIMARY KEY tuple(), PRIMARY KEY id)
52383                    let mut exprs = vec![expr];
52384                    while self.match_token(TokenType::Comma) {
52385                        if let Some(next_expr) = self.parse_field()? {
52386                            exprs.push(next_expr);
52387                        } else {
52388                            break;
52389                        }
52390                    }
52391                    properties.push(Expression::PrimaryKey(Box::new(PrimaryKey {
52392                        this: None,
52393                        expressions: exprs,
52394                        options: Vec::new(),
52395                        include: None,
52396                    })));
52397                } else {
52398                    return Err(self.parse_error("Expected expression after PRIMARY KEY"));
52399                }
52400                continue;
52401            }
52402
52403            if self.match_token(TokenType::Sample) {
52404                let _ = self.match_token(TokenType::By);
52405                let expr = self.parse_expression()?;
52406                properties.push(Expression::SampleProperty(Box::new(SampleProperty {
52407                    this: Box::new(expr),
52408                })));
52409                continue;
52410            }
52411
52412            if self.match_token(TokenType::Settings) {
52413                let mut settings = Vec::new();
52414                loop {
52415                    settings.push(self.parse_expression()?);
52416                    if !self.match_token(TokenType::Comma) {
52417                        break;
52418                    }
52419                }
52420                properties.push(Expression::SettingsProperty(Box::new(SettingsProperty {
52421                    expressions: settings,
52422                })));
52423                continue;
52424            }
52425
52426            if self.match_token(TokenType::Comment) {
52427                let comment_expr = if self.check(TokenType::String) {
52428                    Expression::Literal(Box::new(Literal::String(self.expect_string()?)))
52429                } else {
52430                    self.parse_expression()?
52431                };
52432                properties.push(Expression::SchemaCommentProperty(Box::new(
52433                    SchemaCommentProperty {
52434                        this: Box::new(comment_expr),
52435                    },
52436                )));
52437                continue;
52438            }
52439
52440            // TTL time_column + INTERVAL '1' MONTH [DELETE|RECOMPRESS|TO DISK|TO VOLUME] [WHERE ...]
52441            if self.match_identifier("TTL") {
52442                if let Some(ttl_expr) = self.parse_ttl()? {
52443                    properties.push(ttl_expr);
52444                }
52445                continue;
52446            }
52447
52448            if self.match_identifier("SOURCE") {
52449                if let Some(prop) = self.parse_dict_property("SOURCE")? {
52450                    properties.push(prop);
52451                }
52452                continue;
52453            }
52454
52455            if self.match_identifier("LAYOUT") {
52456                if let Some(prop) = self.parse_dict_property("LAYOUT")? {
52457                    properties.push(prop);
52458                }
52459                continue;
52460            }
52461
52462            if self.match_identifier("LIFETIME") {
52463                if let Some(range) = self.parse_dict_range("LIFETIME")? {
52464                    properties.push(range);
52465                }
52466                continue;
52467            }
52468
52469            if self.match_identifier("RANGE") || self.match_token(TokenType::Range) {
52470                if let Some(range) = self.parse_dict_range("RANGE")? {
52471                    properties.push(range);
52472                }
52473                continue;
52474            }
52475
52476            break;
52477        }
52478
52479        Ok(())
52480    }
52481
52482    /// ClickHouse implicit alias in function arguments: `expr identifier` (without AS keyword).
52483    /// The token after the alias must be a delimiter (comma, RParen, FROM, FOR, AS).
52484    fn try_clickhouse_implicit_alias(&mut self, expr: Expression) -> Expression {
52485        if !matches!(
52486            self.config.dialect,
52487            Some(crate::dialects::DialectType::ClickHouse)
52488        ) {
52489            return expr;
52490        }
52491        if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
52492            let next_after = self.peek_nth(1).map(|t| t.token_type);
52493            let is_delimiter = matches!(
52494                next_after,
52495                Some(TokenType::Comma)
52496                    | Some(TokenType::RParen)
52497                    | Some(TokenType::From)
52498                    | Some(TokenType::For)
52499                    | Some(TokenType::As)
52500            );
52501            if is_delimiter {
52502                let alias_token = self.advance();
52503                let alias_name = alias_token.text.clone();
52504                return Expression::Alias(Box::new(crate::expressions::Alias::new(
52505                    expr,
52506                    Identifier::new(alias_name),
52507                )));
52508            }
52509        }
52510        expr
52511    }
52512
52513    /// ClickHouse alias in function arguments: handles both implicit (`expr identifier`)
52514    /// and explicit (`expr AS identifier`) aliases. Use this in special function parsers
52515    /// (SUBSTRING, TRIM, EXTRACT) but NOT in CAST (which has its own AS handling).
52516    /// Normalize TSQL date part aliases (e.g., dd -> DAY, yy -> YEAR, etc.)
52517    fn normalize_tsql_date_part(&self, expr: Expression) -> Expression {
52518        let name = match &expr {
52519            Expression::Var(v) => Some(v.this.to_ascii_uppercase()),
52520            Expression::Column(c) if c.table.is_none() => Some(c.name.name.to_ascii_uppercase()),
52521            Expression::Identifier(id) => Some(id.name.to_ascii_uppercase()),
52522            _ => None,
52523        };
52524        if let Some(name) = name {
52525            let mapped = match name.as_str() {
52526                "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
52527                "MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
52528                "D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
52529                "DOW" | "DW" | "WEEKDAY" => "DAYOFWEEK",
52530                "DOY" | "DY" | "Y" => "DAYOFYEAR",
52531                "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" => "WEEK",
52532                "Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
52533                "H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
52534                "MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
52535                "S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
52536                "MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
52537                | "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
52538                "US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
52539                | "MICROSECONDS" | "MCS" => "MICROSECOND",
52540                "NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
52541                "TZH" => "TIMEZONE_HOUR",
52542                "TZM" | "TZOFFSET" | "TZ" => "TIMEZONE_MINUTE",
52543                "DEC" | "DECS" | "DECADES" => "DECADE",
52544                "MIL" | "MILS" | "MILLENIA" => "MILLENNIUM",
52545                "C" | "CENT" | "CENTS" | "CENTURIES" => "CENTURY",
52546                "ISOWK" | "ISOWW" | "ISO_WEEK" | "WEEKOFYEARISO" | "WEEKOFYEAR_ISO"
52547                | "WEEK_ISO" => "WEEKISO",
52548                _ => return expr, // No mapping, return as-is
52549            };
52550            return Expression::Var(Box::new(Var {
52551                this: mapped.to_string(),
52552            }));
52553        }
52554        expr
52555    }
52556
52557    fn try_parse_date_part_unit_expr(&self, expr: &Expression) -> Option<IntervalUnit> {
52558        let upper = self.date_part_expr_name(expr)?.to_ascii_uppercase();
52559        let canonical = match upper.as_str() {
52560            // Year
52561            "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
52562            // Quarter
52563            "Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
52564            // Month
52565            "MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
52566            // Week
52567            "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" | "WEEKS" => "WEEK",
52568            // Day
52569            "D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
52570            // Hour
52571            "H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
52572            // Minute
52573            "MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
52574            // Second
52575            "S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
52576            // Millisecond
52577            "MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
52578            | "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
52579            // Microsecond
52580            "US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
52581            | "MICROSECONDS" | "MCS" => "MICROSECOND",
52582            // Nanosecond
52583            "NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
52584            _ => upper.as_str(),
52585        };
52586
52587        Self::parse_interval_unit_from_string(canonical)
52588    }
52589
52590    fn try_parse_date_part_unit_identifier_expr(&self, expr: &Expression) -> Option<IntervalUnit> {
52591        let upper = self
52592            .date_part_identifier_expr_name(expr)?
52593            .to_ascii_uppercase();
52594        let canonical = match upper.as_str() {
52595            "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
52596            "Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
52597            "MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
52598            "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" | "WEEKS" => "WEEK",
52599            "D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
52600            "H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
52601            "MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
52602            "S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
52603            "MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
52604            | "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
52605            "US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
52606            | "MICROSECONDS" | "MCS" => "MICROSECOND",
52607            "NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
52608            _ => upper.as_str(),
52609        };
52610
52611        Self::parse_interval_unit_from_string(canonical)
52612    }
52613
52614    fn try_parse_date_part_field_identifier_expr(
52615        &self,
52616        expr: &Expression,
52617    ) -> Option<DateTimeField> {
52618        let upper = self
52619            .date_part_identifier_expr_name(expr)?
52620            .to_ascii_uppercase();
52621        Some(match upper.as_str() {
52622            "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => DateTimeField::Year,
52623            "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => DateTimeField::Month,
52624            "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => DateTimeField::Day,
52625            "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => DateTimeField::Hour,
52626            "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => DateTimeField::Minute,
52627            "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => DateTimeField::Second,
52628            "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => DateTimeField::Millisecond,
52629            "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => DateTimeField::Microsecond,
52630            "DOW" | "DAYOFWEEK" | "DW" => DateTimeField::DayOfWeek,
52631            "DOY" | "DAYOFYEAR" | "DY" => DateTimeField::DayOfYear,
52632            "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WW" => DateTimeField::Week,
52633            "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => DateTimeField::Quarter,
52634            "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => DateTimeField::Epoch,
52635            "TIMEZONE" => DateTimeField::Timezone,
52636            "TIMEZONE_HOUR" | "TZH" => DateTimeField::TimezoneHour,
52637            "TIMEZONE_MINUTE" | "TZM" => DateTimeField::TimezoneMinute,
52638            "DATE" => DateTimeField::Date,
52639            "TIME" => DateTimeField::Time,
52640            other => DateTimeField::Custom(other.to_string()),
52641        })
52642    }
52643
52644    fn convert_date_part_identifier_expr_to_var(&self, expr: Expression) -> Expression {
52645        match expr {
52646            Expression::Var(_) => expr,
52647            Expression::Column(c) if c.table.is_none() => {
52648                Expression::Var(Box::new(Var { this: c.name.name }))
52649            }
52650            Expression::Identifier(id) => Expression::Var(Box::new(Var { this: id.name })),
52651            _ => expr,
52652        }
52653    }
52654
52655    /// For date-part functions where one argument is a date-part keyword
52656    /// (DAY, MONTH, WEEK, WEEK(MONDAY), ...) rather than a column reference,
52657    /// convert Column/Identifier at that position to Var so it is not caught
52658    /// by column qualification (lineage, validation). Matches sqlglot's
52659    /// build_date_diff/unit-aware parsing.
52660    ///
52661    /// The unit-position varies by dialect:
52662    /// - BigQuery: last arg (DATE_DIFF(a, b, DAY); DATE_TRUNC(a, MONTH))
52663    /// - TSQL/Fabric/Redshift/Snowflake: first arg (DATEDIFF(day, a, b))
52664    fn normalize_date_part_arg(&self, name: &str, args: &mut [Expression]) {
52665        use crate::dialects::DialectType as DT;
52666        let dialect = match self.config.dialect {
52667            Some(d) => d,
52668            None => return,
52669        };
52670        let upper = name.to_ascii_uppercase();
52671        let unit_index: Option<usize> = match dialect {
52672            DT::BigQuery => match upper.as_str() {
52673                "DATE_DIFF" | "DATETIME_DIFF" | "TIMESTAMP_DIFF" | "TIME_DIFF"
52674                    if args.len() == 3 =>
52675                {
52676                    Some(2)
52677                }
52678                "DATE_TRUNC" | "DATETIME_TRUNC" | "TIMESTAMP_TRUNC" | "TIME_TRUNC"
52679                    if args.len() >= 2 =>
52680                {
52681                    Some(1)
52682                }
52683                _ => None,
52684            },
52685            DT::TSQL | DT::Fabric => match upper.as_str() {
52686                "DATEDIFF" | "DATEDIFF_BIG" | "DATEADD" | "DATEPART" | "DATE_PART"
52687                | "DATENAME" | "DATETRUNC"
52688                    if !args.is_empty() =>
52689                {
52690                    Some(0)
52691                }
52692                _ => None,
52693            },
52694            DT::Redshift => match upper.as_str() {
52695                "DATEDIFF" | "DATE_DIFF" | "DATEADD" | "DATE_ADD" | "DATE_PART" | "DATEPART"
52696                | "DATE_TRUNC" | "DATETRUNC"
52697                    if !args.is_empty() =>
52698                {
52699                    Some(0)
52700                }
52701                _ => None,
52702            },
52703            DT::Snowflake => match upper.as_str() {
52704                "DATE_TRUNC" | "DATETRUNC" if !args.is_empty() => Some(0),
52705                _ => None,
52706            },
52707            _ => None,
52708        };
52709        if let Some(idx) = unit_index {
52710            let taken = std::mem::replace(&mut args[idx], Expression::Null(Null));
52711            args[idx] = Self::date_part_arg_to_var(taken);
52712        }
52713    }
52714
52715    fn date_part_arg_to_var(expr: Expression) -> Expression {
52716        match expr {
52717            Expression::Column(c) if c.table.is_none() => {
52718                Expression::Var(Box::new(Var { this: c.name.name }))
52719            }
52720            Expression::Identifier(id) => Expression::Var(Box::new(Var { this: id.name })),
52721            // WEEK(MONDAY), WEEK(SATURDAY), etc. — recurse into the inner arg
52722            Expression::Function(mut f) if !f.args.is_empty() => {
52723                let inner = std::mem::replace(&mut f.args[0], Expression::Null(Null));
52724                f.args[0] = Self::date_part_arg_to_var(inner);
52725                Expression::Function(f)
52726            }
52727            other => other,
52728        }
52729    }
52730
52731    fn date_part_identifier_expr_name<'a>(&self, expr: &'a Expression) -> Option<&'a str> {
52732        match expr {
52733            Expression::Var(v) => Some(v.this.as_str()),
52734            Expression::Column(c) if c.table.is_none() => Some(c.name.name.as_str()),
52735            Expression::Identifier(id) => Some(id.name.as_str()),
52736            _ => None,
52737        }
52738    }
52739
52740    fn date_part_expr_name<'a>(&self, expr: &'a Expression) -> Option<&'a str> {
52741        self.date_part_identifier_expr_name(expr).or(match expr {
52742            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
52743                let Literal::String(s) = lit.as_ref() else {
52744                    unreachable!()
52745                };
52746                Some(s.as_str())
52747            }
52748            _ => None,
52749        })
52750    }
52751
52752    fn try_clickhouse_func_arg_alias(&mut self, expr: Expression) -> Expression {
52753        if !matches!(
52754            self.config.dialect,
52755            Some(crate::dialects::DialectType::ClickHouse)
52756        ) {
52757            return expr;
52758        }
52759        // Try implicit alias first
52760        if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
52761            let next_after = self.peek_nth(1).map(|t| t.token_type);
52762            let is_delimiter = matches!(
52763                next_after,
52764                Some(TokenType::Comma)
52765                    | Some(TokenType::RParen)
52766                    | Some(TokenType::From)
52767                    | Some(TokenType::For)
52768                    | Some(TokenType::As)
52769            );
52770            if is_delimiter {
52771                let alias_token = self.advance();
52772                let alias_name = alias_token.text.clone();
52773                return Expression::Alias(Box::new(crate::expressions::Alias::new(
52774                    expr,
52775                    Identifier::new(alias_name),
52776                )));
52777            }
52778        }
52779        // Try explicit AS alias
52780        if self.check(TokenType::As) {
52781            let next_idx = self.current + 1;
52782            let after_alias_idx = self.current + 2;
52783            let is_alias_token = next_idx < self.tokens.len()
52784                && matches!(
52785                    self.tokens[next_idx].token_type,
52786                    TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
52787                );
52788            let is_delimiter = is_alias_token
52789                && after_alias_idx < self.tokens.len()
52790                && matches!(
52791                    self.tokens[after_alias_idx].token_type,
52792                    TokenType::Comma
52793                        | TokenType::RParen
52794                        | TokenType::From
52795                        | TokenType::For
52796                        | TokenType::As
52797                );
52798            if is_delimiter {
52799                self.skip(); // consume AS
52800                let alias_token = self.advance();
52801                let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
52802                    let mut ident = Identifier::new(alias_token.text.clone());
52803                    ident.quoted = true;
52804                    ident
52805                } else {
52806                    Identifier::new(alias_token.text.clone())
52807                };
52808                return Expression::Alias(Box::new(crate::expressions::Alias::new(
52809                    expr, alias_name,
52810                )));
52811            }
52812        }
52813        expr
52814    }
52815
52816    /// parse_clickhouse_engine_expression - Parse ENGINE expression with optional args
52817    fn parse_clickhouse_engine_expression(&mut self) -> Result<Expression> {
52818        if self.is_at_end() {
52819            return Err(self.parse_error("Expected engine name after ENGINE"));
52820        }
52821
52822        let token = self.advance();
52823        let quoted = matches!(token.token_type, TokenType::QuotedIdentifier);
52824        let name = token.text.clone();
52825
52826        let ident = Expression::Identifier(Identifier {
52827            name,
52828            quoted,
52829            trailing_comments: Vec::new(),
52830            span: None,
52831        });
52832
52833        if self.match_token(TokenType::LParen) {
52834            let args = if self.check(TokenType::RParen) {
52835                Vec::new()
52836            } else {
52837                self.parse_expression_list()?
52838            };
52839            self.expect(TokenType::RParen)?;
52840            Ok(Expression::Anonymous(Box::new(Anonymous {
52841                this: Box::new(ident),
52842                expressions: args,
52843            })))
52844        } else {
52845            Ok(ident)
52846        }
52847    }
52848
52849    /// parse_property_assignment - Ported from Python _parse_property_assignment
52850    /// Parses a property assignment: optionally = or AS, then a value
52851    #[allow(unused_variables, unused_mut)]
52852    pub fn parse_property_assignment(&mut self) -> Result<Option<Expression>> {
52853        // Optionally match = or AS
52854        let _ = self.match_token(TokenType::Eq);
52855        let _ = self.match_token(TokenType::Alias);
52856
52857        // Parse the value as an unquoted field
52858        let value = self.parse_unquoted_field()?;
52859
52860        Ok(value)
52861    }
52862
52863    /// parse_property_before - Implemented from Python _parse_property_before
52864    #[allow(unused_variables, unused_mut)]
52865    pub fn parse_property_before(&mut self) -> Result<Option<Expression>> {
52866        if self.match_text_seq(&["NO"]) {
52867            // Matched: NO
52868            return Ok(None);
52869        }
52870        if self.match_text_seq(&["DUAL"]) {
52871            // Matched: DUAL
52872            return Ok(None);
52873        }
52874        if self.match_text_seq(&["BEFORE"]) {
52875            // Matched: BEFORE
52876            return Ok(None);
52877        }
52878        if self.match_texts(&["MIN", "MINIMUM"]) {
52879            // Matched one of: MIN, MINIMUM
52880            return Ok(None);
52881        }
52882        if self.match_texts(&["MAX", "MAXIMUM"]) {
52883            // Matched one of: MAX, MAXIMUM
52884            return Ok(None);
52885        }
52886        Ok(None)
52887    }
52888
52889    /// parse_qualify - Parse QUALIFY clause (Snowflake, BigQuery)
52890    /// Python: if not self._match(TokenType.QUALIFY): return None; return exp.Qualify(this=self._parse_disjunction())
52891    pub fn parse_qualify(&mut self) -> Result<Option<Expression>> {
52892        if !self.match_token(TokenType::Qualify) {
52893            return Ok(None);
52894        }
52895        let condition = self.parse_expression()?;
52896        Ok(Some(Expression::Qualify(Box::new(Qualify {
52897            this: condition,
52898        }))))
52899    }
52900
52901    /// parse_range - Parses range expressions (BETWEEN, LIKE, IN, IS, etc.)
52902    /// Python: _parse_range
52903    pub fn parse_range(&mut self) -> Result<Option<Expression>> {
52904        // First parse a bitwise expression as the left side
52905        let mut this = self.parse_bitwise()?;
52906        if this.is_none() {
52907            return Ok(None);
52908        }
52909
52910        // Check for NOT (for NOT LIKE, NOT IN, NOT BETWEEN, etc.)
52911        let negate = self.match_token(TokenType::Not);
52912
52913        // BETWEEN
52914        if self.match_token(TokenType::Between) {
52915            let between = self.parse_between_with_expr(this.clone(), negate)?;
52916            this = Some(between);
52917            return Ok(this);
52918        }
52919
52920        // LIKE
52921        if self.match_token(TokenType::Like) {
52922            let left = this.clone().expect("left expression checked above");
52923            let right = self
52924                .parse_bitwise()?
52925                .ok_or_else(|| self.parse_error("Expected expression after LIKE"))?;
52926            let escape = self.parse_escape()?;
52927            let like = Expression::Like(Box::new(LikeOp {
52928                left,
52929                right,
52930                escape,
52931                quantifier: None,
52932                inferred_type: None,
52933            }));
52934            this = if negate {
52935                Some(Expression::Not(Box::new(UnaryOp {
52936                    this: like,
52937                    inferred_type: None,
52938                })))
52939            } else {
52940                Some(like)
52941            };
52942            return Ok(this);
52943        }
52944
52945        // ILIKE
52946        if self.match_token(TokenType::ILike) {
52947            let left = this.clone().expect("left expression checked above");
52948            let right = self
52949                .parse_bitwise()?
52950                .ok_or_else(|| self.parse_error("Expected expression after ILIKE"))?;
52951            let escape = self.parse_escape()?;
52952            let ilike = Expression::ILike(Box::new(LikeOp {
52953                left,
52954                right,
52955                escape,
52956                quantifier: None,
52957                inferred_type: None,
52958            }));
52959            this = if negate {
52960                Some(Expression::Not(Box::new(UnaryOp {
52961                    this: ilike,
52962                    inferred_type: None,
52963                })))
52964            } else {
52965                Some(ilike)
52966            };
52967            return Ok(this);
52968        }
52969
52970        // IN
52971        if self.match_token(TokenType::In) {
52972            let in_expr = self.parse_in_with_expr(this.clone())?;
52973            this = if negate {
52974                Some(Expression::Not(Box::new(UnaryOp {
52975                    this: in_expr,
52976                    inferred_type: None,
52977                })))
52978            } else {
52979                Some(in_expr)
52980            };
52981            return Ok(this);
52982        }
52983
52984        // IS [NOT] NULL / IS [NOT] TRUE / IS [NOT] FALSE
52985        if self.match_token(TokenType::Is) {
52986            let is_expr = self.parse_is_with_expr(this.clone())?;
52987            this = Some(is_expr);
52988            return Ok(this);
52989        }
52990
52991        // Handle standalone NOT with NULL (for NOT NULL pattern after negate)
52992        if negate && self.match_token(TokenType::Null) {
52993            if let Some(left) = this {
52994                let is_null = Expression::Is(Box::new(BinaryOp {
52995                    left,
52996                    right: Expression::Null(Null),
52997                    left_comments: Vec::new(),
52998                    operator_comments: Vec::new(),
52999                    trailing_comments: Vec::new(),
53000                    inferred_type: None,
53001                }));
53002                return Ok(Some(Expression::Not(Box::new(UnaryOp {
53003                    this: is_null,
53004                    inferred_type: None,
53005                }))));
53006            }
53007        }
53008
53009        Ok(this)
53010    }
53011
53012    /// parse_between_with_expr - Parses BETWEEN expression with given left side
53013    fn parse_between_with_expr(
53014        &mut self,
53015        this: Option<Expression>,
53016        negate: bool,
53017    ) -> Result<Expression> {
53018        let this_expr = match this {
53019            Some(e) => e,
53020            None => return Err(self.parse_error("Expected expression before BETWEEN")),
53021        };
53022
53023        // Check for SYMMETRIC/ASYMMETRIC qualifier
53024        let symmetric = if self.match_texts(&["SYMMETRIC"]) {
53025            Some(true)
53026        } else if self.match_texts(&["ASYMMETRIC"]) {
53027            Some(false)
53028        } else {
53029            None
53030        };
53031
53032        let low = self
53033            .parse_bitwise()?
53034            .ok_or_else(|| self.parse_error("Expected low expression after BETWEEN"))?;
53035
53036        if !self.match_token(TokenType::And) {
53037            return Err(self.parse_error("Expected AND in BETWEEN expression"));
53038        }
53039
53040        let high = self
53041            .parse_bitwise()?
53042            .ok_or_else(|| self.parse_error("Expected high expression after AND in BETWEEN"))?;
53043
53044        Ok(Expression::Between(Box::new(Between {
53045            this: this_expr,
53046            low,
53047            high,
53048            not: negate,
53049            symmetric,
53050        })))
53051    }
53052
53053    /// parse_in_with_expr - Parses IN expression with given left side
53054    fn parse_in_with_expr(&mut self, this: Option<Expression>) -> Result<Expression> {
53055        let this_expr = match this {
53056            Some(e) => e,
53057            None => return Err(self.parse_error("Expected expression before IN")),
53058        };
53059
53060        // BigQuery: IN UNNEST(expr) — UNNEST without wrapping parentheses
53061        if self.check_identifier("UNNEST") {
53062            self.skip(); // consume UNNEST
53063            self.expect(TokenType::LParen)?;
53064            let unnest_expr = self.parse_expression()?;
53065            self.expect(TokenType::RParen)?;
53066            return Ok(Expression::In(Box::new(In {
53067                this: this_expr,
53068                expressions: Vec::new(),
53069                query: None,
53070                not: false,
53071                global: false,
53072                unnest: Some(Box::new(unnest_expr)),
53073                is_field: false,
53074            })));
53075        }
53076
53077        // Parse the IN list (subquery or value list)
53078        if !self.match_token(TokenType::LParen) {
53079            // DuckDB: IN without parentheses for array/list membership: 'red' IN tbl.flags
53080            // Try to parse as a single expression (column/array reference)
53081            if let Ok(expr) = self.parse_primary() {
53082                return Ok(Expression::In(Box::new(In {
53083                    this: this_expr,
53084                    expressions: vec![expr],
53085                    query: None,
53086                    not: false,
53087                    global: false,
53088                    unnest: None,
53089                    is_field: true,
53090                })));
53091            }
53092            return Err(self.parse_error("Expected expression or parenthesized list after IN"));
53093        }
53094
53095        // Check if it's a subquery
53096        if self.check(TokenType::Select) {
53097            let subquery = self.parse_select()?;
53098            self.expect(TokenType::RParen)?;
53099            return Ok(Expression::In(Box::new(In {
53100                this: this_expr,
53101                expressions: Vec::new(),
53102                query: Some(subquery),
53103                not: false,
53104                global: false,
53105                unnest: None,
53106                is_field: false,
53107            })));
53108        }
53109
53110        // Parse value list. Pre-size for large IN lists to reduce reallocations.
53111        let capacity_hint = self.estimate_expression_list_capacity_until_rparen();
53112        let expressions = self.parse_expression_list_with_capacity(capacity_hint)?;
53113        self.expect(TokenType::RParen)?;
53114
53115        if expressions.is_empty() {
53116            return Err(self.parse_error("Expected expression list after IN"));
53117        }
53118
53119        Ok(Expression::In(Box::new(In {
53120            this: this_expr,
53121            expressions,
53122            query: None,
53123            not: false,
53124            global: false,
53125            unnest: None,
53126            is_field: false,
53127        })))
53128    }
53129
53130    /// parse_is_with_expr - Parses IS expression with given left side
53131    fn parse_is_with_expr(&mut self, this: Option<Expression>) -> Result<Expression> {
53132        let this_expr = match this {
53133            Some(e) => e,
53134            None => return Err(self.parse_error("Expected expression before IS")),
53135        };
53136
53137        let negate = self.match_token(TokenType::Not);
53138
53139        // IS NULL
53140        if self.match_token(TokenType::Null) {
53141            let is_null = Expression::Is(Box::new(BinaryOp {
53142                left: this_expr,
53143                right: Expression::Null(Null),
53144                left_comments: Vec::new(),
53145                operator_comments: Vec::new(),
53146                trailing_comments: Vec::new(),
53147                inferred_type: None,
53148            }));
53149            return if negate {
53150                Ok(Expression::Not(Box::new(UnaryOp {
53151                    this: is_null,
53152                    inferred_type: None,
53153                })))
53154            } else {
53155                Ok(is_null)
53156            };
53157        }
53158
53159        // IS TRUE
53160        if self.match_texts(&["TRUE"]) {
53161            let is_true = Expression::Is(Box::new(BinaryOp {
53162                left: this_expr,
53163                right: Expression::Boolean(BooleanLiteral { value: true }),
53164                left_comments: Vec::new(),
53165                operator_comments: Vec::new(),
53166                trailing_comments: Vec::new(),
53167                inferred_type: None,
53168            }));
53169            return if negate {
53170                Ok(Expression::Not(Box::new(UnaryOp {
53171                    this: is_true,
53172                    inferred_type: None,
53173                })))
53174            } else {
53175                Ok(is_true)
53176            };
53177        }
53178
53179        // IS FALSE
53180        if self.match_texts(&["FALSE"]) {
53181            let is_false = Expression::Is(Box::new(BinaryOp {
53182                left: this_expr,
53183                right: Expression::Boolean(BooleanLiteral { value: false }),
53184                left_comments: Vec::new(),
53185                operator_comments: Vec::new(),
53186                trailing_comments: Vec::new(),
53187                inferred_type: None,
53188            }));
53189            return if negate {
53190                Ok(Expression::Not(Box::new(UnaryOp {
53191                    this: is_false,
53192                    inferred_type: None,
53193                })))
53194            } else {
53195                Ok(is_false)
53196            };
53197        }
53198
53199        // IS JSON [VALUE|SCALAR|OBJECT|ARRAY] [WITH UNIQUE KEYS|WITHOUT UNIQUE KEYS|UNIQUE KEYS]
53200        if self.match_texts(&["JSON"]) {
53201            // Parse optional JSON type
53202            let json_type = if self.match_texts(&["VALUE"]) {
53203                Some("VALUE".to_string())
53204            } else if self.match_texts(&["SCALAR"]) {
53205                Some("SCALAR".to_string())
53206            } else if self.match_texts(&["OBJECT"]) {
53207                Some("OBJECT".to_string())
53208            } else if self.match_texts(&["ARRAY"]) {
53209                Some("ARRAY".to_string())
53210            } else {
53211                None
53212            };
53213
53214            // Parse optional key uniqueness constraint
53215            let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE", "KEYS"]) {
53216                Some(JsonUniqueKeys::With)
53217            } else if self.match_text_seq(&["WITHOUT", "UNIQUE", "KEYS"]) {
53218                Some(JsonUniqueKeys::Without)
53219            } else if self.match_text_seq(&["UNIQUE", "KEYS"]) {
53220                // Shorthand for WITH UNIQUE KEYS
53221                Some(JsonUniqueKeys::Shorthand)
53222            } else {
53223                None
53224            };
53225
53226            return Ok(Expression::IsJson(Box::new(IsJson {
53227                this: this_expr,
53228                json_type,
53229                unique_keys,
53230                negated: negate,
53231            })));
53232        }
53233
53234        // IS DISTINCT FROM / IS NOT DISTINCT FROM
53235        if self.match_text_seq(&["DISTINCT", "FROM"]) {
53236            let right = self.parse_bitwise()?;
53237            if let Some(right_expr) = right {
53238                // IS DISTINCT FROM is semantically "not equal with null handling"
53239                // Use NullSafeNeq for IS DISTINCT FROM
53240                // If negate was set (IS NOT DISTINCT FROM), use NullSafeEq
53241                let expr = if negate {
53242                    Expression::NullSafeEq(Box::new(BinaryOp {
53243                        left: this_expr,
53244                        right: right_expr,
53245                        left_comments: Vec::new(),
53246                        operator_comments: Vec::new(),
53247                        trailing_comments: Vec::new(),
53248                        inferred_type: None,
53249                    }))
53250                } else {
53251                    Expression::NullSafeNeq(Box::new(BinaryOp {
53252                        left: this_expr,
53253                        right: right_expr,
53254                        left_comments: Vec::new(),
53255                        operator_comments: Vec::new(),
53256                        trailing_comments: Vec::new(),
53257                        inferred_type: None,
53258                    }))
53259                };
53260                return Ok(expr);
53261            }
53262            return Err(self.parse_error("Expected expression after IS DISTINCT FROM"));
53263        }
53264
53265        Err(self.parse_error("Expected NULL, TRUE, FALSE, JSON, or DISTINCT FROM after IS"))
53266    }
53267
53268    /// parse_reads_property - Implemented from Python _parse_reads_property
53269    #[allow(unused_variables, unused_mut)]
53270    pub fn parse_reads_property(&mut self) -> Result<Option<Expression>> {
53271        if self.match_text_seq(&["SQL", "DATA"]) {
53272            // Matched: SQL DATA
53273            return Ok(None);
53274        }
53275        Ok(None)
53276    }
53277
53278    /// parse_recursive_with_search - Parse SEARCH/CYCLE clause for recursive CTEs (PostgreSQL)
53279    /// Syntax: SEARCH BREADTH|DEPTH FIRST BY column SET column [USING column]
53280    ///     or: CYCLE column SET column USING column
53281    #[allow(unused_variables, unused_mut)]
53282    pub fn parse_recursive_with_search(&mut self) -> Result<Option<Box<Expression>>> {
53283        // Check for SEARCH or CYCLE keyword
53284        let kind = if self.match_text_seq(&["SEARCH"]) {
53285            // SEARCH BREADTH|DEPTH FIRST BY ...
53286            let search_kind = if self.match_text_seq(&["BREADTH"]) {
53287                "BREADTH"
53288            } else if self.match_text_seq(&["DEPTH"]) {
53289                "DEPTH"
53290            } else {
53291                return Ok(None);
53292            };
53293            // Consume "FIRST BY"
53294            self.match_text_seq(&["FIRST"]);
53295            self.match_text_seq(&["BY"]);
53296            search_kind.to_string()
53297        } else if self.match_token(TokenType::Cycle) {
53298            "CYCLE".to_string()
53299        } else {
53300            return Ok(None);
53301        };
53302
53303        // Parse the column(s) - for CYCLE this is typically a single column
53304        let this = self.expect_identifier()?;
53305        let this_expr = Expression::Identifier(Identifier::new(this));
53306
53307        // SET column
53308        let expression = if self.match_text_seq(&["SET"]) {
53309            let set_col = self.expect_identifier()?;
53310            Expression::Identifier(Identifier::new(set_col))
53311        } else {
53312            return Err(self.parse_error("Expected SET in CYCLE/SEARCH clause"));
53313        };
53314
53315        // USING column (optional for SEARCH, required for CYCLE)
53316        let using = if self.match_token(TokenType::Using) {
53317            let using_col = self.expect_identifier()?;
53318            Some(Box::new(Expression::Identifier(Identifier::new(using_col))))
53319        } else {
53320            None
53321        };
53322
53323        Ok(Some(Box::new(Expression::RecursiveWithSearch(Box::new(
53324            RecursiveWithSearch {
53325                kind,
53326                this: Box::new(this_expr),
53327                expression: Box::new(expression),
53328                using,
53329            },
53330        )))))
53331    }
53332
53333    /// parse_references - Ported from Python _parse_references
53334    /// Parses REFERENCES clause for foreign key constraints
53335    #[allow(unused_variables, unused_mut)]
53336    pub fn parse_references(&mut self) -> Result<Option<Expression>> {
53337        if !self.match_token(TokenType::References) {
53338            return Ok(None);
53339        }
53340
53341        // Parse referenced table
53342        let this = self.parse_table()?;
53343        if this.is_none() {
53344            return Err(self.parse_error("Expected table name after REFERENCES"));
53345        }
53346
53347        // Parse optional column list (table(col1, col2))
53348        let expressions = if self.match_token(TokenType::LParen) {
53349            let cols = self.parse_identifier_list()?;
53350            self.expect(TokenType::RParen)?;
53351            cols.into_iter()
53352                .map(|id| Expression::Identifier(id))
53353                .collect()
53354        } else {
53355            Vec::new()
53356        };
53357
53358        // Parse optional constraint options (ON DELETE, ON UPDATE, etc.)
53359        let options = self.parse_fk_constraint_options()?;
53360
53361        Ok(Some(Expression::Reference(Box::new(Reference {
53362            this: Box::new(this.unwrap()),
53363            expressions,
53364            options,
53365        }))))
53366    }
53367
53368    /// Parse key constraint options (ON DELETE CASCADE, ON UPDATE SET NULL, etc.)
53369    fn parse_fk_constraint_options(&mut self) -> Result<Vec<Expression>> {
53370        let mut options = Vec::new();
53371
53372        while self.match_token(TokenType::On) {
53373            let kind = if self.match_token(TokenType::Delete) {
53374                "DELETE"
53375            } else if self.match_token(TokenType::Update) {
53376                "UPDATE"
53377            } else {
53378                break;
53379            };
53380
53381            let action = if self.match_text_seq(&["NO", "ACTION"]) {
53382                "NO ACTION"
53383            } else if self.match_text_seq(&["SET", "NULL"]) {
53384                "SET NULL"
53385            } else if self.match_text_seq(&["SET", "DEFAULT"]) {
53386                "SET DEFAULT"
53387            } else if self.match_token(TokenType::Cascade) {
53388                "CASCADE"
53389            } else if self.match_token(TokenType::Restrict) {
53390                "RESTRICT"
53391            } else {
53392                continue;
53393            };
53394
53395            // Store as simple identifier with the full action description
53396            options.push(Expression::Identifier(Identifier {
53397                name: format!("ON {} {}", kind, action),
53398                quoted: false,
53399                trailing_comments: Vec::new(),
53400                span: None,
53401            }));
53402        }
53403
53404        // Parse MATCH option
53405        if self.match_token(TokenType::Match) {
53406            let match_type = if self.match_identifier("FULL") {
53407                "FULL"
53408            } else if self.match_identifier("PARTIAL") {
53409                "PARTIAL"
53410            } else if self.match_identifier("SIMPLE") {
53411                "SIMPLE"
53412            } else {
53413                ""
53414            };
53415            if !match_type.is_empty() {
53416                options.push(Expression::Identifier(Identifier {
53417                    name: format!("MATCH {}", match_type),
53418                    quoted: false,
53419                    trailing_comments: Vec::new(),
53420                    span: None,
53421                }));
53422            }
53423        }
53424
53425        Ok(options)
53426    }
53427
53428    /// parse_refresh - Implemented from Python _parse_refresh
53429    #[allow(unused_variables, unused_mut)]
53430    /// parse_refresh - Parses REFRESH TABLE or REFRESH MATERIALIZED VIEW
53431    /// Python: parser.py:7656-7668
53432    pub fn parse_refresh(&mut self) -> Result<Option<Expression>> {
53433        let kind = if self.match_token(TokenType::Table) {
53434            "TABLE".to_string()
53435        } else if self.match_text_seq(&["MATERIALIZED", "VIEW"]) {
53436            "MATERIALIZED VIEW".to_string()
53437        } else {
53438            String::new()
53439        };
53440
53441        // Parse the object name (string literal or table name)
53442        // First try a string literal, then fall back to table reference
53443        if let Some(s) = self.parse_string()? {
53444            return Ok(Some(Expression::Refresh(Box::new(Refresh {
53445                this: Box::new(s),
53446                kind,
53447            }))));
53448        }
53449
53450        // Parse as a table reference (schema.table format)
53451        let table_ref = self.parse_table_ref()?;
53452        let table_expr = Expression::Table(Box::new(table_ref));
53453
53454        Ok(Some(Expression::Refresh(Box::new(Refresh {
53455            this: Box::new(table_expr),
53456            kind,
53457        }))))
53458    }
53459
53460    /// parse_refresh_trigger_property - Doris REFRESH clause for materialized views
53461    /// Syntax: REFRESH method ON kind [EVERY n UNIT] [STARTS 'datetime']
53462    /// Examples:
53463    ///   REFRESH COMPLETE ON MANUAL
53464    ///   REFRESH AUTO ON COMMIT
53465    ///   REFRESH AUTO ON SCHEDULE EVERY 5 MINUTE STARTS '2025-01-01 00:00:00'
53466    pub fn parse_refresh_trigger_property(&mut self) -> Result<RefreshTriggerProperty> {
53467        // Parse method: COMPLETE or AUTO
53468        let method = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
53469
53470        // Parse ON
53471        self.expect(TokenType::On)?;
53472
53473        // Parse kind: MANUAL, COMMIT, or SCHEDULE
53474        let kind_text = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
53475        let kind = Some(kind_text.clone());
53476
53477        // For SCHEDULE, parse EVERY n UNIT [STARTS 'datetime']
53478        let (every, unit, starts) = if kind_text == "SCHEDULE" {
53479            // EVERY n UNIT
53480            let every = if self.match_identifier("EVERY") {
53481                // parse_number returns Option<Expression> with Expression::Literal(Box::new(Literal::Number(...)))
53482                self.parse_number()?.map(Box::new)
53483            } else {
53484                None
53485            };
53486
53487            // Unit: MINUTE, HOUR, DAY, etc.
53488            let unit = if every.is_some() {
53489                Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase())
53490            } else {
53491                None
53492            };
53493
53494            // STARTS 'datetime'
53495            let starts = if self.match_identifier("STARTS") {
53496                let s = self.expect_string()?;
53497                Some(Box::new(Expression::Literal(Box::new(Literal::String(s)))))
53498            } else {
53499                None
53500            };
53501
53502            (every, unit, starts)
53503        } else {
53504            (None, None, None)
53505        };
53506
53507        Ok(RefreshTriggerProperty {
53508            method,
53509            kind,
53510            every,
53511            unit,
53512            starts,
53513        })
53514    }
53515
53516    /// parse_remote_with_connection - Implemented from Python _parse_remote_with_connection
53517    #[allow(unused_variables, unused_mut)]
53518    pub fn parse_remote_with_connection(&mut self) -> Result<Option<Expression>> {
53519        if self.match_text_seq(&["WITH", "CONNECTION"]) {
53520            // Matched: WITH CONNECTION
53521            return Ok(None);
53522        }
53523        Ok(None)
53524    }
53525
53526    /// parse_respect_or_ignore_nulls - Implemented from Python _parse_respect_or_ignore_nulls
53527    #[allow(unused_variables, unused_mut)]
53528    pub fn parse_respect_or_ignore_nulls(&mut self) -> Result<Option<Expression>> {
53529        if self.match_text_seq(&["IGNORE", "NULLS"]) {
53530            // Matched: IGNORE NULLS
53531            return Ok(None);
53532        }
53533        if self.match_text_seq(&["RESPECT", "NULLS"]) {
53534            // Matched: RESPECT NULLS
53535            return Ok(None);
53536        }
53537        Ok(None)
53538    }
53539
53540    /// parse_retention_period - Parses HISTORY_RETENTION_PERIOD (TSQL)
53541    /// Python: _parse_retention_period
53542    /// Format: INFINITE | <number> DAY | DAYS | MONTH | MONTHS | YEAR | YEARS
53543    pub fn parse_retention_period(&mut self) -> Result<Option<Expression>> {
53544        // Try to parse a number first
53545        let number = self.parse_number()?;
53546        let number_str = number
53547            .map(|n| match n {
53548                Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
53549                    let Literal::Number(s) = lit.as_ref() else {
53550                        unreachable!()
53551                    };
53552                    format!("{} ", s)
53553                }
53554                _ => String::new(),
53555            })
53556            .unwrap_or_default();
53557
53558        // Parse the unit (any token as a variable)
53559        let unit = self.parse_var_any_token()?;
53560        let unit_str = unit
53561            .map(|u| match u {
53562                Expression::Var(v) => v.this.clone(),
53563                _ => String::new(),
53564            })
53565            .unwrap_or_default();
53566
53567        let result = format!("{}{}", number_str, unit_str);
53568        Ok(Some(Expression::Var(Box::new(Var { this: result }))))
53569    }
53570
53571    /// parse_var_any_token - Parses any token as a Var (for flexible parsing)
53572    fn parse_var_any_token(&mut self) -> Result<Option<Expression>> {
53573        if !self.is_at_end() {
53574            let token = self.advance();
53575            Ok(Some(Expression::Var(Box::new(Var {
53576                this: token.text.clone(),
53577            }))))
53578        } else {
53579            Ok(None)
53580        }
53581    }
53582
53583    /// parse_returning - Creates Returning expression
53584    /// Parses RETURNING clause (PostgreSQL) for INSERT/UPDATE/DELETE
53585    #[allow(unused_variables, unused_mut)]
53586    pub fn parse_returning(&mut self) -> Result<Option<Expression>> {
53587        if !self.match_token(TokenType::Returning) {
53588            return Ok(None);
53589        }
53590
53591        // Parse expressions (column list or *)
53592        let expressions = self.parse_expression_list()?;
53593
53594        // Check for INTO target_table (Oracle style)
53595        let into = if self.match_token(TokenType::Into) {
53596            self.parse_table()?.map(Box::new)
53597        } else {
53598            None
53599        };
53600
53601        Ok(Some(Expression::Returning(Box::new(Returning {
53602            expressions,
53603            into,
53604        }))))
53605    }
53606
53607    /// parse_output_clause - Parses OUTPUT clause (TSQL)
53608    /// Used in INSERT/UPDATE/DELETE and MERGE statements
53609    /// Supports expressions with optional AS aliases: OUTPUT col1, col2 AS alias, col3
53610    pub fn parse_output_clause(&mut self) -> Result<OutputClause> {
53611        // Parse comma-separated list of columns/expressions with optional aliases
53612        let mut columns = Vec::new();
53613        loop {
53614            let expr = self.parse_expression()?;
53615            // Check for optional AS alias
53616            let expr = if self.match_token(TokenType::As) {
53617                let alias = self.expect_identifier_or_keyword_with_quoted()?;
53618                Expression::Alias(Box::new(Alias {
53619                    this: expr,
53620                    alias,
53621                    column_aliases: Vec::new(),
53622                    pre_alias_comments: Vec::new(),
53623                    trailing_comments: Vec::new(),
53624                    inferred_type: None,
53625                }))
53626            } else {
53627                expr
53628            };
53629            columns.push(expr);
53630            if !self.match_token(TokenType::Comma) {
53631                break;
53632            }
53633        }
53634
53635        // Check for INTO target
53636        let into_table = if self.match_token(TokenType::Into) {
53637            Some(self.parse_expression()?)
53638        } else {
53639            None
53640        };
53641
53642        Ok(OutputClause {
53643            columns,
53644            into_table,
53645        })
53646    }
53647
53648    /// parse_returns - Implemented from Python _parse_returns
53649    /// Calls: parse_types
53650    #[allow(unused_variables, unused_mut)]
53651    pub fn parse_returns(&mut self) -> Result<Option<Expression>> {
53652        if self.match_text_seq(&["NULL", "ON", "NULL", "INPUT"]) {
53653            return Ok(Some(Expression::Schema(Box::new(Schema {
53654                this: None,
53655                expressions: Vec::new(),
53656            }))));
53657        }
53658        Ok(None)
53659    }
53660
53661    /// parse_row - Parses ROW FORMAT clause
53662    /// Returns RowFormatSerdeProperty or RowFormatDelimitedProperty
53663    pub fn parse_row(&mut self) -> Result<Option<Expression>> {
53664        // Python: if not self._match(TokenType.FORMAT): return None
53665        if !self.match_token(TokenType::Format) {
53666            return Ok(None);
53667        }
53668        self.parse_row_format()
53669    }
53670
53671    /// parse_row_format - Implemented from Python _parse_row_format
53672    /// Parses SERDE or DELIMITED row format specifications
53673    pub fn parse_row_format(&mut self) -> Result<Option<Expression>> {
53674        // Check for SERDE row format
53675        if self.match_text_seq(&["SERDE"]) {
53676            let this = self.parse_string()?;
53677            let serde_properties = self.parse_serde_properties(false)?;
53678
53679            return Ok(Some(Expression::RowFormatSerdeProperty(Box::new(
53680                RowFormatSerdeProperty {
53681                    this: Box::new(this.unwrap_or(Expression::Null(Null))),
53682                    serde_properties: serde_properties.map(Box::new),
53683                },
53684            ))));
53685        }
53686
53687        // Check for DELIMITED row format
53688        self.match_text_seq(&["DELIMITED"]);
53689
53690        let mut fields = None;
53691        let mut escaped = None;
53692        let mut collection_items = None;
53693        let mut map_keys = None;
53694        let mut lines = None;
53695        let mut null = None;
53696
53697        // Parse FIELDS TERMINATED BY
53698        if self.match_text_seq(&["FIELDS", "TERMINATED", "BY"]) {
53699            fields = self.parse_string()?.map(Box::new);
53700            // Parse optional ESCAPED BY
53701            if self.match_text_seq(&["ESCAPED", "BY"]) {
53702                escaped = self.parse_string()?.map(Box::new);
53703            }
53704        }
53705
53706        // Parse COLLECTION ITEMS TERMINATED BY
53707        if self.match_text_seq(&["COLLECTION", "ITEMS", "TERMINATED", "BY"]) {
53708            collection_items = self.parse_string()?.map(Box::new);
53709        }
53710
53711        // Parse MAP KEYS TERMINATED BY
53712        if self.match_text_seq(&["MAP", "KEYS", "TERMINATED", "BY"]) {
53713            map_keys = self.parse_string()?.map(Box::new);
53714        }
53715
53716        // Parse LINES TERMINATED BY
53717        if self.match_text_seq(&["LINES", "TERMINATED", "BY"]) {
53718            lines = self.parse_string()?.map(Box::new);
53719        }
53720
53721        // Parse NULL DEFINED AS
53722        if self.match_text_seq(&["NULL", "DEFINED", "AS"]) {
53723            null = self.parse_string()?.map(Box::new);
53724        }
53725
53726        // Parse optional WITH SERDEPROPERTIES
53727        let serde = self.parse_serde_properties(false)?.map(Box::new);
53728
53729        Ok(Some(Expression::RowFormatDelimitedProperty(Box::new(
53730            RowFormatDelimitedProperty {
53731                fields,
53732                escaped,
53733                collection_items,
53734                map_keys,
53735                lines,
53736                null,
53737                serde,
53738            },
53739        ))))
53740    }
53741
53742    /// parse_schema - Ported from Python _parse_schema
53743    /// Parses schema definition: (col1 type1, col2 type2, ...)
53744    /// Used for CREATE TABLE column definitions
53745    #[allow(unused_variables, unused_mut)]
53746    pub fn parse_schema(&mut self) -> Result<Option<Expression>> {
53747        self.parse_schema_with_this(None)
53748    }
53749
53750    /// parse_schema_with_this - Parses schema with optional table reference
53751    fn parse_schema_with_this(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
53752        // Check for opening parenthesis
53753        if !self.match_token(TokenType::LParen) {
53754            return Ok(this.map(|e| e));
53755        }
53756
53757        // Check if this is a subquery (SELECT, WITH, etc.) not a schema
53758        if self.check(TokenType::Select) || self.check(TokenType::With) {
53759            // Retreat - put back the LParen
53760            self.current -= 1;
53761            return Ok(this.map(|e| e));
53762        }
53763
53764        // Parse column definitions and constraints
53765        let mut expressions = Vec::new();
53766        if !self.check(TokenType::RParen) {
53767            loop {
53768                // Try to parse constraint first, then field definition
53769                if let Some(constraint) = self.parse_constraint()? {
53770                    expressions.push(constraint);
53771                } else if let Some(field_def) = self.parse_field_def()? {
53772                    expressions.push(field_def);
53773                } else {
53774                    break;
53775                }
53776
53777                if !self.match_token(TokenType::Comma) {
53778                    break;
53779                }
53780            }
53781        }
53782
53783        self.expect(TokenType::RParen)?;
53784
53785        Ok(Some(Expression::Schema(Box::new(Schema {
53786            this: this.map(Box::new),
53787            expressions,
53788        }))))
53789    }
53790
53791    /// Parse schema identifier: name or name(columns)
53792    /// Used for TSQL ON filegroup (partition_column) syntax
53793    fn parse_schema_identifier(&mut self) -> Result<Expression> {
53794        // Parse the identifier (filegroup name)
53795        let name = self.expect_identifier_with_quoted()?;
53796        let name_expr = Expression::Identifier(name);
53797
53798        // Check for optional parenthesized columns
53799        if self.match_token(TokenType::LParen) {
53800            let mut columns = Vec::new();
53801            loop {
53802                let col = self.expect_identifier_with_quoted()?;
53803                columns.push(Expression::Identifier(col));
53804                if !self.match_token(TokenType::Comma) {
53805                    break;
53806                }
53807            }
53808            self.expect(TokenType::RParen)?;
53809            Ok(Expression::Schema(Box::new(Schema {
53810                this: Some(Box::new(name_expr)),
53811                expressions: columns,
53812            })))
53813        } else {
53814            // Just the identifier, no columns
53815            Ok(name_expr)
53816        }
53817    }
53818
53819    /// parse_security - Implemented from Python _parse_security
53820    #[allow(unused_variables, unused_mut)]
53821    pub fn parse_security(&mut self) -> Result<Option<Expression>> {
53822        if self.match_texts(&["NONE", "DEFINER", "INVOKER"]) {
53823            // Matched one of: NONE, DEFINER, INVOKER
53824            return Ok(None);
53825        }
53826        Ok(None)
53827    }
53828
53829    /// parse_select_or_expression - Parses either a SELECT statement or an expression
53830    /// Python: _parse_select_or_expression
53831    pub fn parse_select_or_expression(&mut self) -> Result<Option<Expression>> {
53832        // Save position for potential backtracking
53833        let start_pos = self.current;
53834
53835        // First try to parse a SELECT statement if we're at a SELECT keyword
53836        if self.check(TokenType::Select) {
53837            return Ok(Some(self.parse_select()?));
53838        }
53839
53840        // Otherwise try to parse an expression (assignment)
53841        if let Some(expr) = self.parse_disjunction()? {
53842            return Ok(Some(expr));
53843        }
53844
53845        // Backtrack if nothing worked
53846        self.current = start_pos;
53847
53848        Ok(None)
53849    }
53850
53851    /// parse_select_query - Implemented from Python _parse_select_query
53852    /// Calls: parse_string, parse_table, parse_describe
53853    #[allow(unused_variables, unused_mut)]
53854    pub fn parse_select_query(&mut self) -> Result<Option<Expression>> {
53855        if self.match_texts(&["STRUCT", "VALUE"]) {
53856            // Matched one of: STRUCT, VALUE
53857            return Ok(None);
53858        }
53859        Ok(None)
53860    }
53861
53862    /// parse_sequence_properties - Implemented from Python _parse_sequence_properties
53863    /// Calls: parse_number, parse_term, parse_column
53864    #[allow(unused_variables, unused_mut)]
53865    pub fn parse_sequence_properties(&mut self) -> Result<Option<Expression>> {
53866        if self.match_text_seq(&["INCREMENT"]) {
53867            return Ok(Some(Expression::SequenceProperties(Box::new(
53868                SequenceProperties {
53869                    increment: None,
53870                    minvalue: None,
53871                    maxvalue: None,
53872                    cache: None,
53873                    start: None,
53874                    owned: None,
53875                    options: Vec::new(),
53876                },
53877            ))));
53878        }
53879        if self.match_text_seq(&["BY"]) {
53880            // Matched: BY
53881            return Ok(None);
53882        }
53883        if self.match_text_seq(&["="]) {
53884            // Matched: =
53885            return Ok(None);
53886        }
53887        Ok(None)
53888    }
53889
53890    /// parse_serde_properties - Implemented from Python _parse_serde_properties
53891    /// Parses SERDEPROPERTIES clause: [WITH] SERDEPROPERTIES (key=value, ...)
53892    pub fn parse_serde_properties(&mut self, with_: bool) -> Result<Option<Expression>> {
53893        let start_index = self.current;
53894        let has_with = with_ || self.match_text_seq(&["WITH"]);
53895
53896        // Check for SERDEPROPERTIES keyword
53897        if !self.match_token(TokenType::SerdeProperties) {
53898            self.current = start_index;
53899            return Ok(None);
53900        }
53901
53902        // Parse wrapped properties manually since parse_property doesn't handle 'key'='value' syntax
53903        let mut expressions = Vec::new();
53904        if self.match_token(TokenType::LParen) {
53905            loop {
53906                if self.check(TokenType::RParen) {
53907                    break;
53908                }
53909                // Parse 'key'='value' or key=value
53910                let key = self.parse_primary()?;
53911                if self.match_token(TokenType::Eq) {
53912                    let value = self.parse_primary()?;
53913                    expressions.push(Expression::Eq(Box::new(BinaryOp::new(key, value))));
53914                } else {
53915                    expressions.push(key);
53916                }
53917                if !self.match_token(TokenType::Comma) {
53918                    break;
53919                }
53920            }
53921            self.expect(TokenType::RParen)?;
53922        }
53923
53924        Ok(Some(Expression::SerdeProperties(Box::new(
53925            SerdeProperties {
53926                expressions,
53927                with_: if has_with {
53928                    Some(Box::new(Expression::Boolean(BooleanLiteral {
53929                        value: true,
53930                    })))
53931                } else {
53932                    None
53933                },
53934            },
53935        ))))
53936    }
53937
53938    /// parse_session_parameter - Ported from Python _parse_session_parameter
53939    #[allow(unused_variables, unused_mut)]
53940    /// parse_session_parameter - Parses session parameters (@@var or @@session.var)
53941    /// Example: @@session.sql_mode, @@global.autocommit
53942    pub fn parse_session_parameter(&mut self) -> Result<Option<Expression>> {
53943        // Parse the first identifier or primary
53944        let first = if let Some(id) = self.parse_id_var()? {
53945            id
53946        } else if let Some(primary) = self.parse_primary_or_var()? {
53947            primary
53948        } else {
53949            return Ok(None);
53950        };
53951
53952        // Check for dot notation (kind.name)
53953        let (kind, this) = if self.match_token(TokenType::Dot) {
53954            // kind is the first part, parse the second
53955            let kind_name = match &first {
53956                Expression::Identifier(id) => Some(id.name.clone()),
53957                _ => None,
53958            };
53959            let second = self
53960                .parse_var()?
53961                .or_else(|| self.parse_primary_or_var().ok().flatten());
53962            (kind_name, second.unwrap_or(first))
53963        } else {
53964            (None, first)
53965        };
53966
53967        Ok(Some(Expression::SessionParameter(Box::new(
53968            SessionParameter {
53969                this: Box::new(this),
53970                kind,
53971            },
53972        ))))
53973    }
53974
53975    /// parse_set_item - Ported from Python _parse_set_item
53976    /// Parses an item in a SET statement (GLOBAL, LOCAL, SESSION prefixes, or assignment)
53977    #[allow(unused_variables, unused_mut)]
53978    pub fn parse_set_item(&mut self) -> Result<Option<Expression>> {
53979        // Check for specific prefixes
53980        let kind = if self.match_text_seq(&["GLOBAL"]) {
53981            Some("GLOBAL".to_string())
53982        } else if self.match_text_seq(&["LOCAL"]) {
53983            Some("LOCAL".to_string())
53984        } else if self.match_text_seq(&["SESSION"]) {
53985            Some("SESSION".to_string())
53986        } else {
53987            None
53988        };
53989
53990        // Delegate to set_item_assignment
53991        self.parse_set_item_assignment()
53992    }
53993
53994    /// parse_set_item_assignment - Implemented from Python _parse_set_item_assignment
53995    /// Parses SET variable = value assignments
53996    pub fn parse_set_item_assignment(&mut self) -> Result<Option<Expression>> {
53997        let start_index = self.current;
53998
53999        // Try to parse as TRANSACTION
54000        if self.match_text_seq(&["TRANSACTION"]) {
54001            // This is handled by parse_set_transaction
54002            return Ok(Some(Expression::SetItem(Box::new(SetItem {
54003                name: Expression::Var(Box::new(Var {
54004                    this: "TRANSACTION".to_string(),
54005                })),
54006                value: Expression::Null(Null),
54007                kind: None,
54008                no_equals: false,
54009            }))));
54010        }
54011
54012        // Parse left side: primary or column
54013        let left = self
54014            .parse_primary_or_var()?
54015            .or_else(|| self.parse_column().ok().flatten());
54016
54017        if left.is_none() {
54018            self.current = start_index;
54019            return Ok(None);
54020        }
54021
54022        // Check for assignment delimiter (= or TO or :=)
54023        if !self.match_texts(&["=", "TO", ":="]) {
54024            self.current = start_index;
54025            return Ok(None);
54026        }
54027
54028        // Parse right side: value
54029        // First try string literals (preserve quoting), then booleans/numbers, then identifiers
54030        let right_val = if self.check(TokenType::String) {
54031            let text = self.advance().text.clone();
54032            Expression::Literal(Box::new(Literal::String(text)))
54033        } else if self.check(TokenType::False) {
54034            self.skip();
54035            Expression::Boolean(BooleanLiteral { value: false })
54036        } else if self.check(TokenType::True) {
54037            self.skip();
54038            Expression::Boolean(BooleanLiteral { value: true })
54039        } else {
54040            let right = self
54041                .parse_id_var()?
54042                .or_else(|| self.parse_primary_or_var().ok().flatten());
54043            // Convert Column/Identifier to Var
54044            match right {
54045                Some(Expression::Column(col)) => Expression::Var(Box::new(Var {
54046                    this: col.name.name.clone(),
54047                })),
54048                Some(Expression::Identifier(id)) => Expression::Var(Box::new(Var {
54049                    this: id.name.clone(),
54050                })),
54051                Some(other) => other,
54052                None => Expression::Null(Null),
54053            }
54054        };
54055
54056        Ok(Some(Expression::SetItem(Box::new(SetItem {
54057            name: left
54058                .ok_or_else(|| self.parse_error("Expected variable name in SET statement"))?,
54059            value: right_val,
54060            kind: None,
54061            no_equals: false,
54062        }))))
54063    }
54064
54065    /// parse_set_operations - Parses UNION/INTERSECT/EXCEPT operations
54066    /// This version parses from current position (expects to be at set operator)
54067    /// Python: _parse_set_operations
54068    pub fn parse_set_operations(&mut self) -> Result<Option<Expression>> {
54069        // Parse a SELECT or subquery first
54070        let left = if self.check(TokenType::Select) {
54071            Some(self.parse_select()?)
54072        } else if self.match_token(TokenType::LParen) {
54073            let inner = self.parse_select()?;
54074            self.match_token(TokenType::RParen);
54075            Some(inner)
54076        } else {
54077            None
54078        };
54079
54080        if left.is_none() {
54081            return Ok(None);
54082        }
54083
54084        self.parse_set_operations_with_expr(left)
54085    }
54086
54087    /// parse_set_operations_with_expr - Parses set operations with a left expression
54088    pub fn parse_set_operations_with_expr(
54089        &mut self,
54090        this: Option<Expression>,
54091    ) -> Result<Option<Expression>> {
54092        let mut result = this;
54093
54094        while result.is_some() {
54095            if let Some(setop) = self.parse_set_operation_with_expr(result.clone())? {
54096                result = Some(setop);
54097            } else {
54098                break;
54099            }
54100        }
54101
54102        Ok(result)
54103    }
54104
54105    /// parse_set_operation_with_expr - Parses a single set operation (UNION, INTERSECT, EXCEPT)
54106    fn parse_set_operation_with_expr(
54107        &mut self,
54108        left: Option<Expression>,
54109    ) -> Result<Option<Expression>> {
54110        let left_expr = match left {
54111            Some(e) => e,
54112            None => return Ok(None),
54113        };
54114
54115        // Check for UNION, INTERSECT, EXCEPT
54116        let op_type = if self.match_token(TokenType::Union) {
54117            "UNION"
54118        } else if self.match_token(TokenType::Intersect) {
54119            "INTERSECT"
54120        } else if self.match_token(TokenType::Except) {
54121            "EXCEPT"
54122        } else {
54123            return Ok(Some(left_expr));
54124        };
54125
54126        // Check for ALL or DISTINCT
54127        let (all, distinct) = if self.match_token(TokenType::All) {
54128            (true, false)
54129        } else {
54130            let d = self.match_token(TokenType::Distinct);
54131            (false, d)
54132        };
54133
54134        // DuckDB: UNION [ALL] BY NAME SELECT ...
54135        let by_name = self.match_token(TokenType::By) && self.match_identifier("NAME");
54136
54137        // Parse the right side (SELECT or subquery)
54138        let right = if self.check(TokenType::Select) {
54139            self.parse_select()?
54140        } else if self.match_token(TokenType::LParen) {
54141            let inner = self.parse_select()?;
54142            self.match_token(TokenType::RParen);
54143            inner
54144        } else {
54145            return Ok(Some(left_expr));
54146        };
54147
54148        // Create the appropriate set operation expression
54149        match op_type {
54150            "UNION" => Ok(Some(Expression::Union(Box::new(Union {
54151                left: left_expr,
54152                right,
54153                all,
54154                distinct,
54155                with: None,
54156                order_by: None,
54157                limit: None,
54158                offset: None,
54159                distribute_by: None,
54160                sort_by: None,
54161                cluster_by: None,
54162                by_name,
54163                side: None,
54164                kind: None,
54165                corresponding: false,
54166                strict: false,
54167                on_columns: Vec::new(),
54168            })))),
54169            "INTERSECT" => Ok(Some(Expression::Intersect(Box::new(Intersect {
54170                left: left_expr,
54171                right,
54172                all,
54173                distinct,
54174                with: None,
54175                order_by: None,
54176                limit: None,
54177                offset: None,
54178                distribute_by: None,
54179                sort_by: None,
54180                cluster_by: None,
54181                by_name,
54182                side: None,
54183                kind: None,
54184                corresponding: false,
54185                strict: false,
54186                on_columns: Vec::new(),
54187            })))),
54188            "EXCEPT" => Ok(Some(Expression::Except(Box::new(Except {
54189                left: left_expr,
54190                right,
54191                all,
54192                distinct,
54193                with: None,
54194                order_by: None,
54195                limit: None,
54196                offset: None,
54197                distribute_by: None,
54198                sort_by: None,
54199                cluster_by: None,
54200                by_name,
54201                side: None,
54202                kind: None,
54203                corresponding: false,
54204                strict: false,
54205                on_columns: Vec::new(),
54206            })))),
54207            _ => Ok(Some(left_expr)),
54208        }
54209    }
54210
54211    /// parse_set_transaction - Implemented from Python _parse_set_transaction
54212    #[allow(unused_variables, unused_mut)]
54213    pub fn parse_set_transaction(&mut self) -> Result<Option<Expression>> {
54214        if self.match_text_seq(&["TRANSACTION"]) {
54215            // Matched: TRANSACTION
54216            return Ok(None);
54217        }
54218        Ok(None)
54219    }
54220
54221    /// Helper to consume an optional ClickHouse SETTINGS clause
54222    /// Used in SHOW, CHECK TABLE, and other ClickHouse statements
54223    fn parse_clickhouse_settings_clause(&mut self) -> Result<()> {
54224        if self.match_token(TokenType::Settings) {
54225            let _ = self.parse_settings_property()?;
54226        }
54227        Ok(())
54228    }
54229
54230    /// parse_settings_property - Parses SETTINGS property (ClickHouse)
54231    /// Python: _parse_settings_property
54232    /// Format: SETTINGS key=value, key=value, ...
54233    pub fn parse_settings_property(&mut self) -> Result<Option<Expression>> {
54234        // Parse comma-separated assignment expressions
54235        let mut expressions = Vec::new();
54236        loop {
54237            if let Some(assignment) = self.parse_assignment()? {
54238                expressions.push(assignment);
54239            } else {
54240                break;
54241            }
54242            if !self.match_token(TokenType::Comma) {
54243                break;
54244            }
54245        }
54246
54247        Ok(Some(Expression::SettingsProperty(Box::new(
54248            SettingsProperty { expressions },
54249        ))))
54250    }
54251
54252    /// parse_simplified_pivot - Ported from Python _parse_simplified_pivot
54253    /// Handles DuckDB simplified PIVOT/UNPIVOT syntax:
54254    ///   PIVOT table ON columns [IN (...)] USING agg_func [AS alias], ... [GROUP BY ...]
54255    ///   UNPIVOT table ON columns [INTO NAME col VALUE col, ...]
54256    #[allow(unused_variables, unused_mut)]
54257    pub fn parse_simplified_pivot(&mut self, is_unpivot: bool) -> Result<Option<Expression>> {
54258        // Parse the source table (can be a subquery like (SELECT 1 AS col1, 2 AS col2))
54259        let this = if self.check(TokenType::LParen) {
54260            // Could be parenthesized subquery
54261            self.skip(); // consume (
54262            if self.check(TokenType::Select) || self.check(TokenType::With) {
54263                let inner = self.parse_statement()?;
54264                self.expect(TokenType::RParen)?;
54265                Some(Expression::Subquery(Box::new(Subquery {
54266                    this: inner,
54267                    alias: None,
54268                    column_aliases: Vec::new(),
54269                    order_by: None,
54270                    limit: None,
54271                    offset: None,
54272                    lateral: false,
54273                    modifiers_inside: false,
54274                    trailing_comments: Vec::new(),
54275                    distribute_by: None,
54276                    sort_by: None,
54277                    cluster_by: None,
54278                    inferred_type: None,
54279                })))
54280            } else {
54281                // Not a subquery, retreat and parse as expression in parens
54282                self.current -= 1; // un-consume the (
54283                Some(self.parse_primary()?)
54284            }
54285        } else {
54286            // Parse table reference (e.g., Cities, schema.table, duckdb_functions())
54287            Some(self.parse_primary()?)
54288        };
54289
54290        // Parse ON columns
54291        let expressions = if self.match_text_seq(&["ON"]) {
54292            let mut on_exprs = Vec::new();
54293            loop {
54294                // Parse ON expression - use parse_bitwise to handle complex expressions like Country || '_' || Name
54295                let on_expr = self.parse_bitwise()?;
54296                if on_expr.is_none() {
54297                    break;
54298                }
54299                let mut expr = on_expr.unwrap();
54300
54301                // Check for IN clause on this column
54302                if self.match_token(TokenType::In) {
54303                    if self.match_token(TokenType::LParen) {
54304                        let mut in_exprs = Vec::new();
54305                        loop {
54306                            if self.check(TokenType::RParen) {
54307                                break;
54308                            }
54309                            if let Some(val) = self.parse_select_or_expression()? {
54310                                in_exprs.push(val);
54311                            }
54312                            if !self.match_token(TokenType::Comma) {
54313                                break;
54314                            }
54315                        }
54316                        self.expect(TokenType::RParen)?;
54317                        expr = Expression::In(Box::new(In {
54318                            this: expr,
54319                            expressions: in_exprs,
54320                            query: None,
54321                            not: false,
54322                            global: false,
54323                            unnest: None,
54324                            is_field: false,
54325                        }));
54326                    }
54327                }
54328                // Check for alias (UNPIVOT ON (jan, feb, mar) AS q1, ...)
54329                else if self.match_token(TokenType::As) {
54330                    let alias_name = self.expect_identifier()?;
54331                    expr =
54332                        Expression::Alias(Box::new(Alias::new(expr, Identifier::new(alias_name))));
54333                }
54334
54335                on_exprs.push(expr);
54336
54337                // Continue if comma
54338                if !self.match_token(TokenType::Comma) {
54339                    break;
54340                }
54341            }
54342            on_exprs
54343        } else {
54344            Vec::new()
54345        };
54346
54347        // Parse INTO for UNPIVOT columns (INTO NAME col VALUE col, ...)
54348        let into = self.parse_unpivot_columns()?;
54349
54350        // Parse USING clause (aggregation functions with optional aliases)
54351        // e.g., USING SUM(Population), USING SUM(Population) AS total, MAX(Population) AS max
54352        // e.g., USING CAST(AVG(LENGTH(function_name)) AS INT)
54353        let using = if self.match_text_seq(&["USING"]) {
54354            let mut using_exprs = Vec::new();
54355            loop {
54356                // Stop if we hit GROUP BY or end of input
54357                if self.is_at_end() || self.check(TokenType::Group) || self.check(TokenType::RParen)
54358                {
54359                    break;
54360                }
54361                // Parse the primary expression (function call, possibly with cast :: operator)
54362                let func = self.parse_primary()?;
54363                // Check for :: cast operator (e.g., SUM(Population)::INTEGER)
54364                let expr = if self.match_token(TokenType::DColon) {
54365                    let data_type = self.parse_data_type()?;
54366                    Expression::Cast(Box::new(Cast {
54367                        this: func,
54368                        to: data_type,
54369                        trailing_comments: Vec::new(),
54370                        double_colon_syntax: true,
54371                        format: None,
54372                        default: None,
54373                        inferred_type: None,
54374                    }))
54375                } else {
54376                    func
54377                };
54378                // Try to parse alias (AS alias)
54379                if self.match_token(TokenType::As) {
54380                    let alias_name = self.expect_identifier()?;
54381                    using_exprs.push(Expression::Alias(Box::new(Alias::new(
54382                        expr,
54383                        Identifier::new(alias_name),
54384                    ))));
54385                } else {
54386                    using_exprs.push(expr);
54387                }
54388                if !self.match_token(TokenType::Comma) {
54389                    break;
54390                }
54391            }
54392            using_exprs
54393        } else {
54394            Vec::new()
54395        };
54396
54397        // Parse optional GROUP BY
54398        let group = self.parse_group()?;
54399
54400        let source = this.unwrap();
54401
54402        Ok(Some(Expression::Pivot(Box::new(Pivot {
54403            this: source,
54404            expressions,
54405            fields: Vec::new(),
54406            using,
54407            group: group.map(Box::new),
54408            unpivot: is_unpivot,
54409            into: into.map(Box::new),
54410            alias: None,
54411            include_nulls: None,
54412            default_on_null: None,
54413            with: None,
54414        }))))
54415    }
54416
54417    /// parse_slice - Parses array slice syntax [start:end:step]
54418    /// Python: _parse_slice
54419    /// Takes an optional 'this' expression (the start of the slice)
54420    pub fn parse_slice(&mut self) -> Result<Option<Expression>> {
54421        self.parse_slice_with_this(None)
54422    }
54423
54424    /// Implementation of parse_slice with 'this' parameter
54425    pub fn parse_slice_with_this(
54426        &mut self,
54427        this: Option<Expression>,
54428    ) -> Result<Option<Expression>> {
54429        // Check for colon - if not found, return this as-is
54430        if !self.match_token(TokenType::Colon) {
54431            return Ok(this);
54432        }
54433
54434        // Parse end expression
54435        // Handle special case: -: which means -1 (from end)
54436        let end = if self.check(TokenType::Dash) && self.check_next(TokenType::Colon) {
54437            // -: pattern means -1 (from end)
54438            self.skip(); // consume dash
54439            Some(Expression::Neg(Box::new(UnaryOp::new(
54440                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
54441            ))))
54442        } else if self.check(TokenType::Colon) || self.check(TokenType::RBracket) {
54443            // Empty end like [start::step] or [start:]
54444            None
54445        } else {
54446            Some(self.parse_unary()?)
54447        };
54448
54449        // Parse optional step expression after second colon
54450        let step = if self.match_token(TokenType::Colon) {
54451            if self.check(TokenType::RBracket) {
54452                None
54453            } else {
54454                Some(self.parse_unary()?)
54455            }
54456        } else {
54457            None
54458        };
54459
54460        Ok(Some(Expression::Slice(Box::new(Slice {
54461            this: this.map(Box::new),
54462            expression: end.map(Box::new),
54463            step: step.map(Box::new),
54464        }))))
54465    }
54466
54467    /// Parse a slice element (start, end, or step in array slicing)
54468    /// This uses parse_unary to avoid interpreting : as parameter syntax
54469    /// Returns None for empty elements (e.g., [:] or [::step])
54470    fn parse_slice_element(&mut self) -> Result<Option<Expression>> {
54471        // Check for empty element (next is : or ])
54472        if self.check(TokenType::Colon) || self.check(TokenType::RBracket) {
54473            return Ok(None);
54474        }
54475        // Handle special case: -: means -1 (from the end)
54476        // This is used in slicing like [:-:-1] where the first -: means end=-1
54477        if self.check(TokenType::Dash) && self.check_next(TokenType::Colon) {
54478            self.skip(); // consume dash
54479                         // Don't consume the colon - let the caller handle it
54480            return Ok(Some(Expression::Neg(Box::new(UnaryOp::new(
54481                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
54482            )))));
54483        }
54484        // Parse full expression (including binary ops like y - 1) but stop at : or ]
54485        let expr = self.parse_disjunction()?;
54486        Ok(expr)
54487    }
54488
54489    /// parse_sort - Ported from Python _parse_sort
54490    /// Parses SORT BY clause (Hive/Spark)
54491    #[allow(unused_variables, unused_mut)]
54492    pub fn parse_sort(&mut self) -> Result<Option<Expression>> {
54493        // Check for SORT BY token
54494        if !self.match_keywords(&[TokenType::Sort, TokenType::By]) {
54495            return Ok(None);
54496        }
54497
54498        // Parse comma-separated ordered expressions
54499        let mut expressions = Vec::new();
54500        loop {
54501            if let Some(ordered) = self.parse_ordered_item()? {
54502                expressions.push(ordered);
54503            } else {
54504                break;
54505            }
54506            if !self.match_token(TokenType::Comma) {
54507                break;
54508            }
54509        }
54510
54511        Ok(Some(Expression::SortBy(Box::new(SortBy { expressions }))))
54512    }
54513
54514    /// parse_cluster_by_clause - Parses CLUSTER BY clause (Hive/Spark)
54515    #[allow(unused_variables, unused_mut)]
54516    pub fn parse_cluster_by_clause(&mut self) -> Result<Option<Expression>> {
54517        if !self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
54518            return Ok(None);
54519        }
54520
54521        // Parse comma-separated ordered expressions
54522        let mut expressions: Vec<Ordered> = Vec::new();
54523        loop {
54524            if let Some(ordered) = self.parse_ordered_item()? {
54525                expressions.push(ordered);
54526            } else {
54527                break;
54528            }
54529            if !self.match_token(TokenType::Comma) {
54530                break;
54531            }
54532        }
54533        Ok(Some(Expression::ClusterBy(Box::new(ClusterBy {
54534            expressions,
54535        }))))
54536    }
54537
54538    /// parse_distribute_by_clause - Parses DISTRIBUTE BY clause (Hive/Spark)
54539    #[allow(unused_variables, unused_mut)]
54540    pub fn parse_distribute_by_clause(&mut self) -> Result<Option<Expression>> {
54541        if !self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
54542            return Ok(None);
54543        }
54544
54545        let expressions = self.parse_expression_list()?;
54546        Ok(Some(Expression::DistributeBy(Box::new(DistributeBy {
54547            expressions,
54548        }))))
54549    }
54550
54551    /// parse_sortkey - Redshift/PostgreSQL SORTKEY property
54552    /// Parses SORTKEY(column1, column2, ...) with optional COMPOUND modifier
54553    #[allow(unused_variables, unused_mut)]
54554    pub fn parse_sortkey(&mut self) -> Result<Option<Expression>> {
54555        // Parse the wrapped list of columns/identifiers
54556        let this = if self.match_token(TokenType::LParen) {
54557            let mut columns = Vec::new();
54558            loop {
54559                if let Some(id) = self.parse_id_var()? {
54560                    columns.push(id);
54561                } else {
54562                    break;
54563                }
54564                if !self.match_token(TokenType::Comma) {
54565                    break;
54566                }
54567            }
54568            self.match_token(TokenType::RParen);
54569
54570            if columns.is_empty() {
54571                return Ok(None);
54572            }
54573
54574            if columns.len() == 1 {
54575                columns.into_iter().next().unwrap()
54576            } else {
54577                Expression::Tuple(Box::new(Tuple {
54578                    expressions: columns,
54579                }))
54580            }
54581        } else {
54582            // Single column without parens
54583            if let Some(id) = self.parse_id_var()? {
54584                id
54585            } else {
54586                return Ok(None);
54587            }
54588        };
54589
54590        Ok(Some(Expression::SortKeyProperty(Box::new(
54591            SortKeyProperty {
54592                this: Box::new(this),
54593                compound: None, // compound is set by caller if COMPOUND keyword was matched
54594            },
54595        ))))
54596    }
54597
54598    /// parse_star - Parse STAR (*) token with optional EXCEPT/REPLACE/RENAME
54599    /// Python: if self._match(TokenType.STAR): return self._parse_star_ops()
54600    pub fn parse_star(&mut self) -> Result<Option<Expression>> {
54601        if !self.match_token(TokenType::Star) {
54602            return Ok(None);
54603        }
54604
54605        // Parse optional EXCEPT/EXCLUDE columns
54606        let except = self.parse_star_except()?;
54607
54608        // Parse optional REPLACE expressions
54609        let replace = self.parse_star_replace()?;
54610
54611        // Parse optional RENAME columns
54612        let rename = self.parse_star_rename()?;
54613
54614        Ok(Some(Expression::Star(Star {
54615            table: None,
54616            except,
54617            replace,
54618            rename,
54619            trailing_comments: Vec::new(),
54620            span: None,
54621        })))
54622    }
54623
54624    /// try_parse_identifier - Try to parse an identifier, returning None if not found
54625    fn try_parse_identifier(&mut self) -> Option<Identifier> {
54626        if self.is_identifier_token() {
54627            let token = self.advance();
54628            let quoted = token.token_type == TokenType::QuotedIdentifier;
54629            Some(Identifier {
54630                name: token.text,
54631                quoted,
54632                trailing_comments: Vec::new(),
54633                span: None,
54634            })
54635        } else {
54636            None
54637        }
54638    }
54639
54640    /// parse_star_except - Parse EXCEPT/EXCLUDE clause for Star
54641    /// Example: * EXCEPT (col1, col2)
54642    fn parse_star_except(&mut self) -> Result<Option<Vec<Identifier>>> {
54643        if !self.match_texts(&["EXCEPT", "EXCLUDE"]) {
54644            return Ok(None);
54645        }
54646
54647        // Parse (col1, col2, ...)
54648        if self.match_token(TokenType::LParen) {
54649            let mut columns = Vec::new();
54650            loop {
54651                if let Some(id) = self.try_parse_identifier() {
54652                    columns.push(id);
54653                } else if self.is_safe_keyword_as_identifier() {
54654                    // ClickHouse: allow keywords like 'key' as column names in EXCEPT
54655                    let token = self.advance();
54656                    columns.push(Identifier {
54657                        name: token.text,
54658                        quoted: false,
54659                        trailing_comments: Vec::new(),
54660                        span: None,
54661                    });
54662                } else {
54663                    break;
54664                }
54665                if !self.match_token(TokenType::Comma) {
54666                    break;
54667                }
54668            }
54669            self.match_token(TokenType::RParen);
54670            return Ok(Some(columns));
54671        }
54672
54673        // Single column without parens
54674        if let Some(id) = self.try_parse_identifier() {
54675            return Ok(Some(vec![id]));
54676        }
54677
54678        Ok(None)
54679    }
54680
54681    /// parse_star_replace - Parse REPLACE clause for Star
54682    /// Example: * REPLACE (col1 AS alias1, col2 AS alias2)
54683    fn parse_star_replace(&mut self) -> Result<Option<Vec<Alias>>> {
54684        if !self.match_texts(&["REPLACE"]) {
54685            return Ok(None);
54686        }
54687
54688        if self.match_token(TokenType::LParen) {
54689            let mut aliases = Vec::new();
54690            loop {
54691                // Parse expression AS alias
54692                if let Some(expr) = self.parse_disjunction()? {
54693                    let alias_name = if self.match_token(TokenType::As) {
54694                        self.try_parse_identifier()
54695                    } else {
54696                        None
54697                    };
54698
54699                    aliases.push(Alias {
54700                        this: expr,
54701                        alias: alias_name.unwrap_or_else(|| Identifier::new("")),
54702                        column_aliases: Vec::new(),
54703                        pre_alias_comments: Vec::new(),
54704                        trailing_comments: Vec::new(),
54705                        inferred_type: None,
54706                    });
54707                } else {
54708                    break;
54709                }
54710                if !self.match_token(TokenType::Comma) {
54711                    break;
54712                }
54713            }
54714            self.match_token(TokenType::RParen);
54715            return Ok(Some(aliases));
54716        }
54717
54718        Ok(None)
54719    }
54720
54721    /// parse_star_rename - Parse RENAME clause for Star
54722    /// Example: * RENAME (old_col AS new_col, ...)
54723    fn parse_star_rename(&mut self) -> Result<Option<Vec<(Identifier, Identifier)>>> {
54724        if !self.match_texts(&["RENAME"]) {
54725            return Ok(None);
54726        }
54727
54728        if self.match_token(TokenType::LParen) {
54729            let mut renames = Vec::new();
54730            loop {
54731                // Parse old_name AS new_name
54732                if let Some(old_name) = self.try_parse_identifier() {
54733                    if self.match_token(TokenType::As) {
54734                        if let Some(new_name) = self.try_parse_identifier() {
54735                            renames.push((old_name, new_name));
54736                        }
54737                    }
54738                } else {
54739                    break;
54740                }
54741                if !self.match_token(TokenType::Comma) {
54742                    break;
54743                }
54744            }
54745            self.match_token(TokenType::RParen);
54746            return Ok(Some(renames));
54747        }
54748
54749        Ok(None)
54750    }
54751
54752    /// parse_star_op - Helper to parse EXCEPT/REPLACE/RENAME with keywords
54753    /// Returns list of expressions if keywords match
54754    pub fn parse_star_op(&mut self, keywords: &[&str]) -> Result<Option<Vec<Expression>>> {
54755        if !self.match_texts(keywords) {
54756            return Ok(None);
54757        }
54758
54759        // If followed by paren, parse wrapped CSV
54760        if self.match_token(TokenType::LParen) {
54761            let expressions = self.parse_expression_list()?;
54762            self.match_token(TokenType::RParen);
54763            return Ok(Some(expressions));
54764        }
54765
54766        // Otherwise parse single aliased expression
54767        if let Some(expr) = self.parse_disjunction()? {
54768            // Try to parse explicit alias
54769            let result = if self.match_token(TokenType::As) {
54770                if let Some(alias_name) = self.try_parse_identifier() {
54771                    Expression::Alias(Box::new(Alias {
54772                        this: expr,
54773                        alias: alias_name,
54774                        column_aliases: Vec::new(),
54775                        pre_alias_comments: Vec::new(),
54776                        trailing_comments: Vec::new(),
54777                        inferred_type: None,
54778                    }))
54779                } else {
54780                    expr
54781                }
54782            } else {
54783                expr
54784            };
54785            return Ok(Some(vec![result]));
54786        }
54787
54788        Ok(None)
54789    }
54790
54791    /// parse_star_ops - Implemented from Python _parse_star_ops
54792    /// Creates a Star expression with EXCEPT/REPLACE/RENAME clauses
54793    /// Also handles * COLUMNS(pattern) syntax for DuckDB column selection
54794    pub fn parse_star_ops(&mut self) -> Result<Option<Expression>> {
54795        // Handle * COLUMNS(pattern) function (DuckDB)
54796        // This parses patterns like: * COLUMNS(c ILIKE '%suffix')
54797        if self.match_text_seq(&["COLUMNS"]) && self.check(TokenType::LParen) {
54798            // Parse the COLUMNS function arguments
54799            self.expect(TokenType::LParen)?;
54800            let this = self.parse_expression()?;
54801            self.expect(TokenType::RParen)?;
54802
54803            // Return a Columns expression with unpack=true (since it came from * COLUMNS())
54804            return Ok(Some(Expression::Columns(Box::new(Columns {
54805                this: Box::new(this),
54806                unpack: Some(Box::new(Expression::Boolean(BooleanLiteral {
54807                    value: true,
54808                }))),
54809            }))));
54810        }
54811
54812        // Parse EXCEPT/EXCLUDE
54813        let except_exprs = self.parse_star_op(&["EXCEPT", "EXCLUDE"])?;
54814        let except = except_exprs.map(|exprs| {
54815            exprs
54816                .into_iter()
54817                .filter_map(|e| match e {
54818                    Expression::Identifier(id) => Some(id),
54819                    Expression::Column(col) => Some(col.name),
54820                    _ => None,
54821                })
54822                .collect()
54823        });
54824
54825        // Parse REPLACE
54826        let replace_exprs = self.parse_star_op(&["REPLACE"])?;
54827        let replace = replace_exprs.map(|exprs| {
54828            exprs
54829                .into_iter()
54830                .filter_map(|e| match e {
54831                    Expression::Alias(a) => Some(*a),
54832                    _ => None,
54833                })
54834                .collect()
54835        });
54836
54837        // Parse RENAME
54838        let _rename_exprs = self.parse_star_op(&["RENAME"])?;
54839        let rename: Option<Vec<(Identifier, Identifier)>> = None; // Complex to extract from expressions
54840
54841        Ok(Some(Expression::Star(Star {
54842            table: None,
54843            except,
54844            replace,
54845            rename,
54846            trailing_comments: Vec::new(),
54847            span: None,
54848        })))
54849    }
54850
54851    /// parse_stored - Implemented from Python _parse_stored
54852    #[allow(unused_variables, unused_mut)]
54853    pub fn parse_stored(&mut self) -> Result<Option<Expression>> {
54854        if self.match_text_seq(&["BY"]) {
54855            return Ok(Some(Expression::InputOutputFormat(Box::new(
54856                InputOutputFormat {
54857                    input_format: None,
54858                    output_format: None,
54859                },
54860            ))));
54861        }
54862        if self.match_text_seq(&["INPUTFORMAT"]) {
54863            // Matched: INPUTFORMAT
54864            return Ok(None);
54865        }
54866        Ok(None)
54867    }
54868
54869    /// parse_stream - Implemented from Python _parse_stream
54870    #[allow(unused_variables, unused_mut)]
54871    pub fn parse_stream(&mut self) -> Result<Option<Expression>> {
54872        if self.match_text_seq(&["STREAM"]) {
54873            // Matched: STREAM
54874            return Ok(None);
54875        }
54876        Ok(None)
54877    }
54878
54879    /// parse_string - Parse string literal
54880    /// Python: if self._match_set(self.STRING_PARSERS): return STRING_PARSERS[token_type](...)
54881    pub fn parse_string(&mut self) -> Result<Option<Expression>> {
54882        // Regular string literal
54883        if self.match_token(TokenType::String) {
54884            let text = self.previous().text.clone();
54885            return Ok(Some(Expression::Literal(Box::new(Literal::String(text)))));
54886        }
54887        // National string (N'...')
54888        if self.match_token(TokenType::NationalString) {
54889            let text = self.previous().text.clone();
54890            return Ok(Some(Expression::Literal(Box::new(
54891                Literal::NationalString(text),
54892            ))));
54893        }
54894        // Raw string (r"..." or r'...')
54895        if self.match_token(TokenType::RawString) {
54896            let text = self.previous().text.clone();
54897            return Ok(Some(Expression::Literal(Box::new(Literal::RawString(
54898                text,
54899            )))));
54900        }
54901        // Heredoc string
54902        if self.match_token(TokenType::HeredocString) {
54903            let text = self.previous().text.clone();
54904            return Ok(Some(Expression::Literal(Box::new(Literal::String(text)))));
54905        }
54906        // Hex string (X'...' or 0x...)
54907        if self.match_token(TokenType::HexString) {
54908            let text = self.previous().text.clone();
54909            return Ok(Some(Expression::Literal(Box::new(Literal::HexString(
54910                text,
54911            )))));
54912        }
54913        // Bit string (B'...')
54914        if self.match_token(TokenType::BitString) {
54915            let text = self.previous().text.clone();
54916            return Ok(Some(Expression::Literal(Box::new(Literal::BitString(
54917                text,
54918            )))));
54919        }
54920        // Byte string (b"..." - BigQuery style)
54921        if self.match_token(TokenType::ByteString) {
54922            let text = self.previous().text.clone();
54923            return Ok(Some(Expression::Literal(Box::new(Literal::ByteString(
54924                text,
54925            )))));
54926        }
54927        Ok(None)
54928    }
54929
54930    /// parse_string_agg - Parses STRING_AGG function arguments
54931    /// Python: parser.py:6849-6899
54932    /// Handles DISTINCT, separator, ORDER BY, ON OVERFLOW, WITHIN GROUP
54933    #[allow(unused_variables, unused_mut)]
54934    pub fn parse_string_agg(&mut self) -> Result<Option<Expression>> {
54935        // Check for DISTINCT
54936        let distinct = self.match_token(TokenType::Distinct);
54937
54938        // Parse main expression
54939        let this = self.parse_disjunction()?;
54940        if this.is_none() {
54941            return Ok(None);
54942        }
54943
54944        // Parse optional separator
54945        let separator = if self.match_token(TokenType::Comma) {
54946            self.parse_disjunction()?
54947        } else {
54948            None
54949        };
54950
54951        // Parse ON OVERFLOW clause
54952        let on_overflow = if self.match_text_seq(&["ON", "OVERFLOW"]) {
54953            if self.match_text_seq(&["ERROR"]) {
54954                Some(Box::new(Expression::Var(Box::new(Var {
54955                    this: "ERROR".to_string(),
54956                }))))
54957            } else {
54958                self.match_text_seq(&["TRUNCATE"]);
54959                let truncate_str = self.parse_string()?;
54960                let with_count = if self.match_text_seq(&["WITH", "COUNT"]) {
54961                    Some(true)
54962                } else if self.match_text_seq(&["WITHOUT", "COUNT"]) {
54963                    Some(false)
54964                } else {
54965                    None
54966                };
54967                Some(Box::new(Expression::OverflowTruncateBehavior(Box::new(
54968                    OverflowTruncateBehavior {
54969                        this: truncate_str.map(Box::new),
54970                        with_count: with_count
54971                            .map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
54972                    },
54973                ))))
54974            }
54975        } else {
54976            None
54977        };
54978
54979        // Parse ORDER BY or WITHIN GROUP
54980        let order_by = if self.match_token(TokenType::OrderBy) {
54981            Some(self.parse_expression_list()?)
54982        } else if self.match_text_seq(&["WITHIN", "GROUP"]) {
54983            self.match_token(TokenType::LParen);
54984            let order = self.parse_order()?;
54985            self.match_token(TokenType::RParen);
54986            order.map(|o| vec![o])
54987        } else {
54988            None
54989        };
54990
54991        // Return as GroupConcat (which is the canonical form for STRING_AGG)
54992        Ok(Some(Expression::GroupConcat(Box::new(GroupConcatFunc {
54993            this: this.unwrap(),
54994            separator: separator,
54995            order_by: None,
54996            distinct,
54997            filter: None,
54998            limit: None,
54999            inferred_type: None,
55000        }))))
55001    }
55002
55003    /// parse_string_as_identifier - Parses a string literal as a quoted identifier
55004    /// Python: _parse_string_as_identifier
55005    /// Used for cases where a string can be used as an identifier (e.g., MySQL)
55006    pub fn parse_string_as_identifier(&mut self) -> Result<Option<Expression>> {
55007        if self.match_token(TokenType::String) {
55008            let text = self.previous().text.clone();
55009            // Remove quotes if present
55010            let name = if text.starts_with('\'') && text.ends_with('\'') && text.len() >= 2 {
55011                text[1..text.len() - 1].to_string()
55012            } else if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
55013                text[1..text.len() - 1].to_string()
55014            } else {
55015                text
55016            };
55017
55018            Ok(Some(Expression::Identifier(Identifier {
55019                name,
55020                quoted: true,
55021                trailing_comments: Vec::new(),
55022                span: None,
55023            })))
55024        } else {
55025            Ok(None)
55026        }
55027    }
55028
55029    /// parse_struct_types - Delegates to parse_types
55030    #[allow(unused_variables, unused_mut)]
55031    pub fn parse_struct_types(&mut self) -> Result<Option<Expression>> {
55032        self.parse_types()
55033    }
55034
55035    /// parse_subquery - Ported from Python _parse_subquery
55036    /// Parses a parenthesized SELECT as subquery: (SELECT ...)
55037    #[allow(unused_variables, unused_mut)]
55038    pub fn parse_subquery(&mut self) -> Result<Option<Expression>> {
55039        // Check for opening paren
55040        if !self.match_token(TokenType::LParen) {
55041            return Ok(None);
55042        }
55043
55044        // Check if it's a SELECT or WITH statement
55045        if !self.check(TokenType::Select) && !self.check(TokenType::With) {
55046            // Not a subquery, retreat
55047            self.current -= 1;
55048            return Ok(None);
55049        }
55050
55051        // Parse the query
55052        let query = self.parse_statement()?;
55053        self.expect(TokenType::RParen)?;
55054
55055        // Parse optional table alias
55056        let alias = self.parse_table_alias_if_present()?;
55057
55058        Ok(Some(Expression::Subquery(Box::new(Subquery {
55059            this: query,
55060            alias,
55061            column_aliases: Vec::new(),
55062            order_by: None,
55063            limit: None,
55064            offset: None,
55065            lateral: false,
55066            modifiers_inside: false,
55067            trailing_comments: Vec::new(),
55068            distribute_by: None,
55069            sort_by: None,
55070            cluster_by: None,
55071            inferred_type: None,
55072        }))))
55073    }
55074
55075    /// Helper to parse table alias if present
55076    fn parse_table_alias_if_present(&mut self) -> Result<Option<Identifier>> {
55077        // Check for AS keyword
55078        let explicit_as = self.match_token(TokenType::As);
55079
55080        // ClickHouse: keywords can be used as table aliases when AS is explicit
55081        let is_keyword_alias = explicit_as
55082            && matches!(
55083                self.config.dialect,
55084                Some(crate::dialects::DialectType::ClickHouse)
55085            )
55086            && self.peek().token_type.is_keyword();
55087
55088        // Try to parse identifier
55089        if self.check(TokenType::Identifier)
55090            || self.check(TokenType::QuotedIdentifier)
55091            || is_keyword_alias
55092        {
55093            if is_keyword_alias
55094                && !self.check(TokenType::Identifier)
55095                && !self.check(TokenType::QuotedIdentifier)
55096            {
55097                let token = self.advance();
55098                return Ok(Some(Identifier::new(token.text)));
55099            }
55100            if let Some(Expression::Identifier(id)) = self.parse_identifier()? {
55101                return Ok(Some(id));
55102            }
55103        } else if explicit_as {
55104            // AS was present but no identifier follows - this is an error
55105            return Err(self.parse_error("Expected identifier after AS"));
55106        }
55107
55108        Ok(None)
55109    }
55110
55111    /// parse_substring - Ported from Python _parse_substring
55112    /// Parses SUBSTRING function with two syntax variants:
55113    /// 1. Standard SQL: SUBSTRING(str FROM start [FOR length])
55114    /// 2. Function style: SUBSTRING(str, start, length)
55115    #[allow(unused_variables, unused_mut)]
55116    pub fn parse_substring(&mut self) -> Result<Option<Expression>> {
55117        // Parse initial comma-separated arguments
55118        let mut args: Vec<Expression> = Vec::new();
55119
55120        // Parse first argument (the string)
55121        match self.parse_bitwise() {
55122            Ok(Some(expr)) => {
55123                let expr = self.try_clickhouse_func_arg_alias(expr);
55124                args.push(expr);
55125            }
55126            Ok(None) => return Ok(None),
55127            Err(e) => return Err(e),
55128        }
55129
55130        // Check for comma-separated additional arguments
55131        while self.match_token(TokenType::Comma) {
55132            match self.parse_bitwise() {
55133                Ok(Some(expr)) => {
55134                    let expr = self.try_clickhouse_func_arg_alias(expr);
55135                    args.push(expr);
55136                }
55137                Ok(None) => break,
55138                Err(e) => return Err(e),
55139            }
55140        }
55141
55142        // Check for FROM/FOR syntax (SQL standard)
55143        let mut start: Option<Expression> = None;
55144        let mut length: Option<Expression> = None;
55145        let mut from_for_syntax = false;
55146
55147        loop {
55148            if self.match_token(TokenType::From) {
55149                from_for_syntax = true;
55150                match self.parse_bitwise() {
55151                    Ok(Some(expr)) => {
55152                        let expr = self.try_clickhouse_func_arg_alias(expr);
55153                        start = Some(expr);
55154                    }
55155                    Ok(None) => {}
55156                    Err(e) => return Err(e),
55157                }
55158            } else if self.match_token(TokenType::For) {
55159                from_for_syntax = true;
55160                // If no start specified yet, default to 1
55161                if start.is_none() {
55162                    start = Some(Expression::Literal(Box::new(Literal::Number(
55163                        "1".to_string(),
55164                    ))));
55165                }
55166                match self.parse_bitwise() {
55167                    Ok(Some(expr)) => {
55168                        let expr = self.try_clickhouse_func_arg_alias(expr);
55169                        length = Some(expr);
55170                    }
55171                    Ok(None) => {}
55172                    Err(e) => return Err(e),
55173                }
55174            } else {
55175                break;
55176            }
55177        }
55178
55179        // Build the substring expression
55180        if args.is_empty() {
55181            return Ok(None);
55182        }
55183
55184        let this = args.remove(0);
55185
55186        // Determine start and length
55187        let final_start = if let Some(s) = start {
55188            s
55189        } else if !args.is_empty() {
55190            args.remove(0)
55191        } else {
55192            Expression::Literal(Box::new(Literal::Number("1".to_string())))
55193        };
55194
55195        let final_length = if length.is_some() {
55196            length
55197        } else if !args.is_empty() {
55198            Some(args.remove(0))
55199        } else {
55200            None
55201        };
55202
55203        Ok(Some(Expression::Substring(Box::new(SubstringFunc {
55204            this,
55205            start: final_start,
55206            length: final_length,
55207            from_for_syntax,
55208        }))))
55209    }
55210
55211    /// parse_system_versioning_property - Implemented from Python _parse_system_versioning_property
55212    /// Calls: parse_table_parts, parse_retention_period
55213    #[allow(unused_variables, unused_mut)]
55214    pub fn parse_system_versioning_property(&mut self) -> Result<Option<Expression>> {
55215        if self.match_text_seq(&["OFF"]) {
55216            return Ok(Some(Expression::WithSystemVersioningProperty(Box::new(
55217                WithSystemVersioningProperty {
55218                    on: None,
55219                    this: None,
55220                    data_consistency: None,
55221                    retention_period: None,
55222                    with_: None,
55223                },
55224            ))));
55225        }
55226        if self.match_text_seq(&["HISTORY_TABLE", "="]) {
55227            // Matched: HISTORY_TABLE =
55228            return Ok(None);
55229        }
55230        if self.match_text_seq(&["DATA_CONSISTENCY_CHECK", "="]) {
55231            // Matched: DATA_CONSISTENCY_CHECK =
55232            return Ok(None);
55233        }
55234        Ok(None)
55235    }
55236
55237    /// Parse PostgreSQL ROWS FROM syntax:
55238    /// ROWS FROM (func1(args) AS alias1(col1 type1, col2 type2), func2(...) AS alias2(...)) [WITH ORDINALITY] [AS outer_alias(...)]
55239    fn parse_rows_from(&mut self) -> Result<Expression> {
55240        // Expect opening paren
55241        self.expect(TokenType::LParen)?;
55242
55243        let mut expressions = Vec::new();
55244
55245        loop {
55246            // Parse each function expression inside ROWS FROM
55247            // Each element is: func_name(args) [AS alias(col1 type1, col2 type2, ...)]
55248            let func_expr = self.parse_rows_from_function()?;
55249            expressions.push(func_expr);
55250
55251            if !self.match_token(TokenType::Comma) {
55252                break;
55253            }
55254        }
55255
55256        self.expect(TokenType::RParen)?;
55257
55258        // Check for WITH ORDINALITY
55259        let ordinality =
55260            if self.match_token(TokenType::With) && self.match_token(TokenType::Ordinality) {
55261                true
55262            } else {
55263                false
55264            };
55265
55266        // Check for outer alias: AS alias(col1 type1, col2 type2, ...)
55267        let alias = if self.match_token(TokenType::As) {
55268            Some(Box::new(self.parse_rows_from_alias()?))
55269        } else {
55270            None
55271        };
55272
55273        Ok(Expression::RowsFrom(Box::new(RowsFrom {
55274            expressions,
55275            ordinality,
55276            alias,
55277        })))
55278    }
55279
55280    /// Parse a single function in ROWS FROM: func_name(args) [AS alias(col1 type1, ...)]
55281    fn parse_rows_from_function(&mut self) -> Result<Expression> {
55282        // Parse function name
55283        let func_name = self.expect_identifier_or_keyword()?;
55284
55285        // Parse function arguments
55286        self.expect(TokenType::LParen)?;
55287        let args = if self.check(TokenType::RParen) {
55288            Vec::new()
55289        } else {
55290            self.parse_function_arguments()?
55291        };
55292        self.expect(TokenType::RParen)?;
55293
55294        let func_expr = Expression::Function(Box::new(Function {
55295            name: func_name,
55296            args,
55297            distinct: false,
55298            trailing_comments: Vec::new(),
55299            use_bracket_syntax: false,
55300            no_parens: false,
55301            quoted: false,
55302            span: None,
55303            inferred_type: None,
55304        }));
55305
55306        // Check for AS alias(col1 type1, col2 type2, ...)
55307        // Return a Tuple(function, TableAlias) so the generator can output: FUNC() AS alias(col type)
55308        if self.match_token(TokenType::As) {
55309            let alias_expr = self.parse_rows_from_alias()?;
55310            Ok(Expression::Tuple(Box::new(Tuple {
55311                expressions: vec![func_expr, alias_expr],
55312            })))
55313        } else {
55314            Ok(func_expr)
55315        }
55316    }
55317
55318    /// Parse ROWS FROM alias with typed columns: alias_name(col1 type1, col2 type2, ...)
55319    fn parse_rows_from_alias(&mut self) -> Result<Expression> {
55320        let alias_name = self.expect_identifier_or_keyword_with_quoted()?;
55321
55322        // Check for column definitions: (col1 type1, col2 type2, ...)
55323        let columns = if self.match_token(TokenType::LParen) {
55324            let mut cols = Vec::new();
55325            loop {
55326                if self.check(TokenType::RParen) {
55327                    break;
55328                }
55329                // Parse column name (can be quoted)
55330                let col_name = self.expect_identifier_or_keyword_with_quoted()?;
55331                // Parse column type
55332                let col_type = self.parse_data_type()?;
55333                // Create ColumnDef expression, preserving the quoted status
55334                let mut col_def = ColumnDef::new(col_name.name.clone(), col_type);
55335                col_def.name = col_name; // Preserve the full identifier with quoted flag
55336                cols.push(Expression::ColumnDef(Box::new(col_def)));
55337
55338                if !self.match_token(TokenType::Comma) {
55339                    break;
55340                }
55341            }
55342            self.expect(TokenType::RParen)?;
55343            cols
55344        } else {
55345            Vec::new()
55346        };
55347
55348        Ok(Expression::TableAlias(Box::new(TableAlias {
55349            this: Some(Box::new(Expression::Identifier(alias_name))),
55350            columns,
55351        })))
55352    }
55353
55354    /// parse_table - Implemented from Python _parse_table
55355    /// Calls: parse_table_hints, parse_unnest, parse_partition
55356    #[allow(unused_variables, unused_mut)]
55357    pub fn parse_table(&mut self) -> Result<Option<Expression>> {
55358        if self.match_text_seq(&["ROWS", "FROM"]) {
55359            // ROWS FROM is handled by parse_rows_from() in parse_table_expression()
55360            return Ok(None);
55361        }
55362        if self.match_text_seq(&["*"]) {
55363            // Matched: *
55364            return Ok(None);
55365        }
55366        if self.match_text_seq(&["NOT", "INDEXED"]) {
55367            // Matched: NOT INDEXED
55368            return Ok(None);
55369        }
55370        Ok(None)
55371    }
55372
55373    /// parse_table_alias - Ported from Python _parse_table_alias
55374    /// Parses table alias: AS alias [(col1, col2, ...)]
55375    #[allow(unused_variables, unused_mut)]
55376    pub fn parse_table_alias(&mut self) -> Result<Option<Expression>> {
55377        // Check for AS keyword (optional in most dialects)
55378        let has_as = self.match_token(TokenType::As);
55379
55380        // Handle AS (col1, col2) - no alias name, just column aliases
55381        if has_as && self.check(TokenType::LParen) {
55382            // Parse (col1, col2, ...)
55383            self.skip(); // consume LParen
55384            let mut cols = Vec::new();
55385            loop {
55386                if self.check(TokenType::RParen) {
55387                    break;
55388                }
55389                if let Ok(Some(col)) = self.parse_id_var() {
55390                    cols.push(col);
55391                }
55392                if !self.match_token(TokenType::Comma) {
55393                    break;
55394                }
55395            }
55396            self.expect(TokenType::RParen)?;
55397            return Ok(Some(Expression::TableAlias(Box::new(TableAlias {
55398                this: None,
55399                columns: cols,
55400            }))));
55401        }
55402
55403        // Parse the alias identifier
55404        // ClickHouse: keywords can be used as table aliases (e.g., AS select, AS from)
55405        let is_keyword_alias = has_as
55406            && matches!(
55407                self.config.dialect,
55408                Some(crate::dialects::DialectType::ClickHouse)
55409            )
55410            && self.peek().token_type.is_keyword();
55411        if !self.check(TokenType::Identifier)
55412            && !self.check(TokenType::QuotedIdentifier)
55413            && !self.check(TokenType::Var)
55414            && !is_keyword_alias
55415        {
55416            if has_as {
55417                return Err(self.parse_error("Expected identifier after AS"));
55418            }
55419            return Ok(None);
55420        }
55421
55422        let alias_token = self.advance();
55423        let is_quoted = alias_token.token_type == TokenType::QuotedIdentifier;
55424        let mut alias_ident = Identifier::new(alias_token.text.clone());
55425        if is_quoted {
55426            alias_ident.quoted = true;
55427        }
55428        let alias = Expression::Identifier(alias_ident);
55429
55430        // Check for column list: (col1, col2, ...)
55431        let columns = if self.match_token(TokenType::LParen) {
55432            let mut cols = Vec::new();
55433            loop {
55434                if self.check(TokenType::RParen) {
55435                    break;
55436                }
55437                if let Ok(Some(col)) = self.parse_id_var() {
55438                    cols.push(col);
55439                }
55440                if !self.match_token(TokenType::Comma) {
55441                    break;
55442                }
55443            }
55444            self.expect(TokenType::RParen)?;
55445            cols
55446        } else {
55447            Vec::new()
55448        };
55449
55450        Ok(Some(Expression::TableAlias(Box::new(TableAlias {
55451            this: Some(Box::new(alias)),
55452            columns,
55453        }))))
55454    }
55455
55456    /// parse_table_hints - Ported from Python _parse_table_hints
55457    /// Parses table hints (SQL Server WITH (...) or MySQL USE/IGNORE/FORCE INDEX)
55458    #[allow(unused_variables, unused_mut)]
55459    pub fn parse_table_hints(&mut self) -> Result<Option<Expression>> {
55460        let mut hints = Vec::new();
55461
55462        // SQL Server style: WITH (hint1, hint2, ...)
55463        if self.match_text_seq(&["WITH"]) && self.match_token(TokenType::LParen) {
55464            let mut expressions = Vec::new();
55465            loop {
55466                // Parse function or variable as hint
55467                if let Some(func) = self.parse_function()? {
55468                    expressions.push(func);
55469                } else if let Some(var) = self.parse_var()? {
55470                    expressions.push(var);
55471                } else {
55472                    break;
55473                }
55474                if !self.match_token(TokenType::Comma) {
55475                    break;
55476                }
55477            }
55478            self.match_token(TokenType::RParen);
55479
55480            if !expressions.is_empty() {
55481                hints.push(Expression::WithTableHint(Box::new(WithTableHint {
55482                    expressions,
55483                })));
55484            }
55485        } else {
55486            // MySQL style: USE INDEX, IGNORE INDEX, FORCE INDEX
55487            while self.match_texts(&["USE", "IGNORE", "FORCE"]) {
55488                let hint_type = self.previous().text.to_ascii_uppercase();
55489
55490                // Match INDEX or KEY
55491                let _ = self.match_texts(&["INDEX", "KEY"]);
55492
55493                // Check for optional FOR clause: FOR JOIN, FOR ORDER BY, FOR GROUP BY
55494                let target = if self.match_text_seq(&["FOR"]) {
55495                    let target_token = self.advance();
55496                    let target_text = target_token.text.to_ascii_uppercase();
55497                    // For ORDER BY and GROUP BY, combine into a single target name
55498                    let full_target = if (target_text == "ORDER" || target_text == "GROUP")
55499                        && self.check(TokenType::By)
55500                    {
55501                        self.skip(); // consume BY
55502                        format!("{} BY", target_text)
55503                    } else {
55504                        target_text
55505                    };
55506                    Some(Box::new(Expression::Identifier(Identifier {
55507                        name: full_target,
55508                        quoted: false,
55509                        trailing_comments: Vec::new(),
55510                        span: None,
55511                    })))
55512                } else {
55513                    None
55514                };
55515
55516                // Parse wrapped identifiers (index names — can include keywords like PRIMARY)
55517                let expressions = if self.match_token(TokenType::LParen) {
55518                    let mut ids = Vec::new();
55519                    loop {
55520                        if self.check(TokenType::RParen) {
55521                            break;
55522                        }
55523                        if let Some(id) = self.parse_id_var()? {
55524                            ids.push(id);
55525                        } else if self.is_safe_keyword_as_identifier()
55526                            || self.check(TokenType::PrimaryKey)
55527                        {
55528                            // Accept keywords as index names (e.g., PRIMARY)
55529                            let name = self.advance().text.clone();
55530                            ids.push(Expression::Identifier(Identifier::new(name)));
55531                        } else {
55532                            break;
55533                        }
55534                        if !self.match_token(TokenType::Comma) {
55535                            break;
55536                        }
55537                    }
55538                    self.match_token(TokenType::RParen);
55539                    ids
55540                } else {
55541                    Vec::new()
55542                };
55543
55544                hints.push(Expression::IndexTableHint(Box::new(IndexTableHint {
55545                    this: Box::new(Expression::Identifier(Identifier {
55546                        name: hint_type,
55547                        quoted: false,
55548                        trailing_comments: Vec::new(),
55549                        span: None,
55550                    })),
55551                    expressions,
55552                    target,
55553                })));
55554            }
55555        }
55556
55557        if hints.is_empty() {
55558            return Ok(None);
55559        }
55560
55561        // Return as a Tuple containing hints
55562        Ok(Some(Expression::Tuple(Box::new(Tuple {
55563            expressions: hints,
55564        }))))
55565    }
55566
55567    /// Parse TSQL TRUNCATE table hints: WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))
55568    /// Unlike regular table hints, PARTITIONS arguments can contain TO ranges.
55569    pub fn parse_truncate_table_hints(&mut self) -> Result<Option<Expression>> {
55570        if !self.match_text_seq(&["WITH"]) || !self.match_token(TokenType::LParen) {
55571            return Ok(None);
55572        }
55573
55574        let mut hints = Vec::new();
55575
55576        // Check for PARTITIONS specifically
55577        if self.check_identifier("PARTITIONS") {
55578            self.skip(); // consume PARTITIONS
55579            self.expect(TokenType::LParen)?;
55580
55581            // Parse partition ranges: 1, 2 TO 5, 10 TO 20, 84
55582            let mut parts = Vec::new();
55583            loop {
55584                if self.check(TokenType::RParen) {
55585                    break;
55586                }
55587                let low = self.parse_primary()?;
55588                if self.match_text_seq(&["TO"]) {
55589                    let high = self.parse_primary()?;
55590                    parts.push(Expression::PartitionRange(Box::new(PartitionRange {
55591                        this: Box::new(low),
55592                        expression: Some(Box::new(high)),
55593                        expressions: Vec::new(),
55594                    })));
55595                } else {
55596                    parts.push(low);
55597                }
55598                if !self.match_token(TokenType::Comma) {
55599                    break;
55600                }
55601            }
55602            self.expect(TokenType::RParen)?; // close PARTITIONS(...)
55603
55604            // Create an Anonymous function for PARTITIONS(...)
55605            hints.push(Expression::Anonymous(Box::new(Anonymous {
55606                this: Box::new(Expression::Identifier(Identifier {
55607                    name: "PARTITIONS".to_string(),
55608                    quoted: false,
55609                    trailing_comments: Vec::new(),
55610                    span: None,
55611                })),
55612                expressions: parts,
55613            })));
55614        } else {
55615            // Fall back to regular hint parsing (function or var)
55616            loop {
55617                if let Some(func) = self.parse_function()? {
55618                    hints.push(func);
55619                } else if let Some(var) = self.parse_var()? {
55620                    hints.push(var);
55621                } else {
55622                    break;
55623                }
55624                if !self.match_token(TokenType::Comma) {
55625                    break;
55626                }
55627            }
55628        }
55629
55630        self.expect(TokenType::RParen)?; // close WITH(...)
55631
55632        if hints.is_empty() {
55633            return Ok(None);
55634        }
55635
55636        // Wrap in WithTableHint then Tuple (same as parse_table_hints)
55637        let hint = Expression::WithTableHint(Box::new(WithTableHint { expressions: hints }));
55638
55639        Ok(Some(Expression::Tuple(Box::new(Tuple {
55640            expressions: vec![hint],
55641        }))))
55642    }
55643
55644    /// parse_table_part - Parse a single part of a table reference
55645    /// Tries: identifier, string as identifier, placeholder
55646    #[allow(unused_variables, unused_mut)]
55647    pub fn parse_table_part(&mut self) -> Result<Option<Expression>> {
55648        // Try to parse an identifier
55649        if let Some(id) = self.parse_id_var()? {
55650            return Ok(Some(id));
55651        }
55652
55653        // Try to parse a string as identifier
55654        if let Some(str_id) = self.parse_string_as_identifier()? {
55655            return Ok(Some(str_id));
55656        }
55657
55658        // Try to parse a placeholder
55659        if let Some(placeholder) = self.parse_placeholder()? {
55660            return Ok(Some(placeholder));
55661        }
55662
55663        // Accept keywords as identifiers in table part context (e.g., db.cluster where "cluster" is a keyword)
55664        // This mirrors Python sqlglot's ID_VAR_TOKENS which includes many keyword types
55665        if self.check_keyword_as_identifier() {
55666            let text = self.peek().text.clone();
55667            self.skip();
55668            return Ok(Some(Expression::Identifier(Identifier {
55669                name: text,
55670                quoted: false,
55671                trailing_comments: Vec::new(),
55672                span: None,
55673            })));
55674        }
55675
55676        Ok(None)
55677    }
55678
55679    /// Check if the current token is a keyword that can be used as an identifier in certain contexts
55680    /// This includes many SQL keywords like CLUSTER, TABLE, INDEX, etc.
55681    fn check_keyword_as_identifier(&self) -> bool {
55682        if self.is_at_end() {
55683            return false;
55684        }
55685        let token_type = self.peek().token_type;
55686        // Keywords that can be used as identifiers (similar to Python's ID_VAR_TOKENS)
55687        matches!(
55688            token_type,
55689            TokenType::Cluster
55690                | TokenType::Table
55691                | TokenType::Index
55692                | TokenType::View
55693                | TokenType::Database
55694                | TokenType::Schema
55695                | TokenType::Column
55696                | TokenType::Function
55697                | TokenType::Procedure
55698                | TokenType::Constraint
55699                | TokenType::Sequence
55700                | TokenType::Type
55701                | TokenType::Partition
55702                | TokenType::Comment
55703                | TokenType::Cache
55704                | TokenType::Commit
55705                | TokenType::Begin
55706                | TokenType::End
55707                | TokenType::Set
55708                | TokenType::Show
55709                | TokenType::Describe
55710                | TokenType::Use
55711                | TokenType::Execute
55712                | TokenType::Delete
55713                | TokenType::Update
55714                | TokenType::Merge
55715                | TokenType::Load
55716                | TokenType::Copy
55717                | TokenType::Truncate
55718                | TokenType::Replace
55719                | TokenType::Refresh
55720                | TokenType::Rename
55721                | TokenType::Filter
55722                | TokenType::Format
55723                | TokenType::First
55724                | TokenType::Next
55725                | TokenType::Last
55726                | TokenType::Keep
55727                | TokenType::Match
55728                | TokenType::Over
55729                | TokenType::Range
55730                | TokenType::Rows
55731                | TokenType::Row
55732                | TokenType::Offset
55733                | TokenType::Limit
55734                | TokenType::Top
55735                | TokenType::Cube
55736                | TokenType::Rollup
55737                | TokenType::Pivot
55738                | TokenType::Unpivot
55739                | TokenType::Window
55740                | TokenType::Recursive
55741                | TokenType::Unique
55742                | TokenType::Temporary
55743                | TokenType::Volatile
55744                | TokenType::References
55745                | TokenType::Natural
55746                | TokenType::Left
55747                | TokenType::Right
55748                | TokenType::Full
55749                | TokenType::Semi
55750                | TokenType::Anti
55751                | TokenType::Apply
55752                | TokenType::All
55753                | TokenType::Asc
55754                | TokenType::Desc
55755                | TokenType::Analyze
55756        )
55757    }
55758
55759    /// parse_table_parts - Parse catalog.schema.table or schema.table or table
55760    /// Returns a Table expression with all parts
55761    #[allow(unused_variables, unused_mut)]
55762    pub fn parse_table_parts(&mut self) -> Result<Option<Expression>> {
55763        // Parse the first part
55764        let first = self.parse_table_part()?;
55765        if first.is_none() {
55766            return Ok(None);
55767        }
55768
55769        let mut parts = vec![first.unwrap()];
55770
55771        // Parse additional dot-separated parts
55772        while self.match_token(TokenType::Dot) {
55773            if let Some(part) = self.parse_table_part()? {
55774                parts.push(part);
55775            } else {
55776                break;
55777            }
55778        }
55779
55780        // Convert parts to Table expression
55781        // Last part is table name, second-to-last is schema, third-to-last is catalog
55782        let (catalog, schema, name) = match parts.len() {
55783            1 => (None, None, parts.pop().unwrap()),
55784            2 => {
55785                let table = parts.pop().unwrap();
55786                let schema = parts.pop().unwrap();
55787                (None, Some(schema), table)
55788            }
55789            _ => {
55790                let table = parts.pop().unwrap();
55791                let schema = parts.pop().unwrap();
55792                let catalog = parts.pop();
55793                (catalog, Some(schema), table)
55794            }
55795        };
55796
55797        // Extract identifier from Expression
55798        let name_ident = match name {
55799            Expression::Identifier(id) => id,
55800            _ => Identifier::new(String::new()),
55801        };
55802        let schema_ident = schema.map(|s| match s {
55803            Expression::Identifier(id) => id,
55804            _ => Identifier::new(String::new()),
55805        });
55806        let catalog_ident = catalog.map(|c| match c {
55807            Expression::Identifier(id) => id,
55808            _ => Identifier::new(String::new()),
55809        });
55810
55811        Ok(Some(Expression::boxed_table(TableRef {
55812            name: name_ident,
55813            schema: schema_ident,
55814            catalog: catalog_ident,
55815            alias: None,
55816            alias_explicit_as: false,
55817            column_aliases: Vec::new(),
55818            leading_comments: Vec::new(),
55819            trailing_comments: Vec::new(),
55820            when: None,
55821            only: false,
55822            final_: false,
55823            table_sample: None,
55824            hints: Vec::new(),
55825            system_time: None,
55826            partitions: Vec::new(),
55827            identifier_func: None,
55828            changes: None,
55829            version: None,
55830            span: None,
55831        })))
55832    }
55833
55834    /// parse_table_sample - Implemented from Python _parse_table_sample
55835    /// Calls: parse_number, parse_factor, parse_placeholder
55836    #[allow(unused_variables, unused_mut)]
55837    pub fn parse_table_sample(&mut self) -> Result<Option<Expression>> {
55838        if self.match_text_seq(&["USING", "SAMPLE"]) {
55839            return Ok(Some(Expression::TableSample(Box::new(TableSample {
55840                this: None,
55841                sample: None,
55842                expressions: Vec::new(),
55843                method: None,
55844                bucket_numerator: None,
55845                bucket_denominator: None,
55846                bucket_field: None,
55847                percent: None,
55848                rows: None,
55849                size: None,
55850                seed: None,
55851            }))));
55852        }
55853        if self.match_text_seq(&["BUCKET"]) {
55854            // Matched: BUCKET
55855            return Ok(None);
55856        }
55857        if self.match_text_seq(&["OUT", "OF"]) {
55858            // Matched: OUT OF
55859            return Ok(None);
55860        }
55861        if self.match_texts(&["SEED", "REPEATABLE"]) {
55862            // Matched one of: SEED, REPEATABLE
55863            return Ok(None);
55864        }
55865        Ok(None)
55866    }
55867
55868    /// parse_term - Parses addition/subtraction expressions (+ - operators)
55869    /// Python: _parse_term
55870    /// Delegates to the existing parse_addition in the operator precedence chain
55871    pub fn parse_term(&mut self) -> Result<Option<Expression>> {
55872        // Delegate to the existing addition parsing
55873        match self.parse_addition() {
55874            Ok(expr) => Ok(Some(expr)),
55875            Err(_) => Ok(None),
55876        }
55877    }
55878
55879    /// parse_to_table - ClickHouse TO table property
55880    /// Parses: TO table_name
55881    #[allow(unused_variables, unused_mut)]
55882    pub fn parse_to_table(&mut self) -> Result<Option<Expression>> {
55883        // Parse the table reference
55884        let table = self.parse_table_parts()?;
55885        if table.is_none() {
55886            return Ok(None);
55887        }
55888
55889        Ok(Some(Expression::ToTableProperty(Box::new(
55890            ToTableProperty {
55891                this: Box::new(table.unwrap()),
55892            },
55893        ))))
55894    }
55895
55896    /// parse_tokens - Operator precedence parser
55897    #[allow(unused_variables, unused_mut)]
55898    pub fn parse_tokens(&mut self) -> Result<Option<Expression>> {
55899        // Uses operator precedence parsing pattern
55900        Ok(None)
55901    }
55902
55903    /// parse_trim - Ported from Python _parse_trim
55904    /// Parses TRIM function: TRIM([BOTH|LEADING|TRAILING] chars FROM str) or TRIM(str, chars)
55905    #[allow(unused_variables, unused_mut)]
55906    pub fn parse_trim(&mut self) -> Result<Option<Expression>> {
55907        // Check for position keyword (BOTH, LEADING, TRAILING)
55908        let (position, position_explicit) = if self.match_texts(&["BOTH"]) {
55909            (TrimPosition::Both, true)
55910        } else if self.match_texts(&["LEADING"]) {
55911            (TrimPosition::Leading, true)
55912        } else if self.match_texts(&["TRAILING"]) {
55913            (TrimPosition::Trailing, true)
55914        } else {
55915            (TrimPosition::Both, false)
55916        };
55917
55918        // Parse first expression
55919        let first = match self.parse_bitwise() {
55920            Ok(Some(expr)) => self.try_clickhouse_func_arg_alias(expr),
55921            Ok(None) => return Ok(None),
55922            Err(e) => return Err(e),
55923        };
55924
55925        // Check for FROM or comma to see if there's a second expression
55926        let (this, characters, sql_standard_syntax) = if self.match_token(TokenType::From) {
55927            // SQL standard syntax: TRIM([position] chars FROM str)
55928            let second = match self.parse_bitwise() {
55929                Ok(Some(expr)) => self.try_clickhouse_func_arg_alias(expr),
55930                Ok(None) => return Err(self.parse_error("Expected expression after FROM in TRIM")),
55931                Err(e) => return Err(e),
55932            };
55933            // In SQL standard syntax: first is characters, second is the string
55934            (second, Some(first), true)
55935        } else if self.match_token(TokenType::Comma) {
55936            // Function syntax: TRIM(a, b)
55937            let second = match self.parse_bitwise() {
55938                Ok(Some(expr)) => Some(expr),
55939                Ok(None) => None,
55940                Err(e) => return Err(e),
55941            };
55942            // In Spark, comma syntax is TRIM(chars, str) - pattern first
55943            // In other dialects, comma syntax is TRIM(str, chars) - string first
55944            let trim_pattern_first = matches!(
55945                self.config.dialect,
55946                Some(crate::dialects::DialectType::Spark)
55947            );
55948            if trim_pattern_first && second.is_some() {
55949                // first=chars, second=str
55950                (second.unwrap(), Some(first), false)
55951            } else {
55952                (first, second, false)
55953            }
55954        } else {
55955            // Single argument: TRIM(str)
55956            (first, None, false)
55957        };
55958
55959        Ok(Some(Expression::Trim(Box::new(TrimFunc {
55960            this,
55961            characters,
55962            position,
55963            sql_standard_syntax,
55964            position_explicit,
55965        }))))
55966    }
55967
55968    /// parse_truncate_table - Implemented from Python _parse_truncate_table
55969    /// Calls: parse_on_property, parse_partition, parse_function
55970    #[allow(unused_variables, unused_mut)]
55971    pub fn parse_truncate_table(&mut self) -> Result<Option<Expression>> {
55972        if self.match_text_seq(&["RESTART", "IDENTITY"]) {
55973            return Ok(Some(Expression::TruncateTable(Box::new(TruncateTable {
55974                expressions: Vec::new(),
55975                is_database: None,
55976                exists: false,
55977                only: None,
55978                cluster: None,
55979                identity: None,
55980                option: None,
55981                partition: None,
55982            }))));
55983        }
55984        if self.match_text_seq(&["CONTINUE", "IDENTITY"]) {
55985            // Matched: CONTINUE IDENTITY
55986            return Ok(None);
55987        }
55988        if self.match_text_seq(&["CASCADE"]) {
55989            // Matched: CASCADE
55990            return Ok(None);
55991        }
55992        Ok(None)
55993    }
55994
55995    /// parse_ttl - Implemented from Python _parse_ttl
55996    /// Parses ClickHouse TTL expression with optional DELETE, RECOMPRESS, TO DISK/VOLUME
55997    pub fn parse_ttl(&mut self) -> Result<Option<Expression>> {
55998        // Parse CSV of TTL actions
55999        let mut expressions = Vec::new();
56000
56001        loop {
56002            // Parse the base expression
56003            let base_start = self.current;
56004            let this = match self.parse_bitwise() {
56005                Ok(Some(expr)) => expr,
56006                _ => {
56007                    self.current = base_start;
56008                    let mut paren_depth = 0usize;
56009                    while !self.is_at_end() {
56010                        if paren_depth == 0
56011                            && (self.check(TokenType::Comma)
56012                                || self.peek().text.eq_ignore_ascii_case("DELETE")
56013                                || self.peek().text.eq_ignore_ascii_case("RECOMPRESS")
56014                                || self.peek().text.eq_ignore_ascii_case("TO")
56015                                || self.peek().text.eq_ignore_ascii_case("WHERE")
56016                                || self.peek().text.eq_ignore_ascii_case("GROUP")
56017                                || self.peek().text.eq_ignore_ascii_case("SET"))
56018                        {
56019                            break;
56020                        }
56021                        if self.check(TokenType::LParen) {
56022                            paren_depth += 1;
56023                        } else if self.check(TokenType::RParen) {
56024                            if paren_depth == 0 {
56025                                break;
56026                            }
56027                            paren_depth -= 1;
56028                        }
56029                        self.skip();
56030                    }
56031                    if self.current == base_start {
56032                        break;
56033                    }
56034                    let raw = self
56035                        .tokens_to_sql(base_start, self.current)
56036                        .trim()
56037                        .to_string();
56038                    Expression::Var(Box::new(Var { this: raw }))
56039                }
56040            };
56041
56042            // Check for TTL action
56043            let action = if self.match_text_seq(&["DELETE"]) {
56044                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
56045                    this: Box::new(this),
56046                    delete: Some(Box::new(Expression::Boolean(BooleanLiteral {
56047                        value: true,
56048                    }))),
56049                    recompress: None,
56050                    to_disk: None,
56051                    to_volume: None,
56052                }))
56053            } else if self.match_text_seq(&["RECOMPRESS"]) {
56054                let recompress = if self.match_identifier("CODEC") {
56055                    self.expect(TokenType::LParen)?;
56056                    let mut args = Vec::new();
56057                    if !self.check(TokenType::RParen) {
56058                        args.push(self.parse_expression()?);
56059                        while self.match_token(TokenType::Comma) {
56060                            args.push(self.parse_expression()?);
56061                        }
56062                    }
56063                    self.expect(TokenType::RParen)?;
56064                    Some(Box::new(Expression::Function(Box::new(Function::new(
56065                        "CODEC".to_string(),
56066                        args,
56067                    )))))
56068                } else {
56069                    self.parse_bitwise()?.map(Box::new)
56070                };
56071                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
56072                    this: Box::new(this),
56073                    delete: None,
56074                    recompress,
56075                    to_disk: None,
56076                    to_volume: None,
56077                }))
56078            } else if self.match_text_seq(&["TO", "DISK"]) {
56079                let to_disk = self.parse_string()?.map(Box::new);
56080                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
56081                    this: Box::new(this),
56082                    delete: None,
56083                    recompress: None,
56084                    to_disk,
56085                    to_volume: None,
56086                }))
56087            } else if self.match_text_seq(&["TO", "VOLUME"]) {
56088                let to_volume = self.parse_string()?.map(Box::new);
56089                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
56090                    this: Box::new(this),
56091                    delete: None,
56092                    recompress: None,
56093                    to_disk: None,
56094                    to_volume,
56095                }))
56096            } else {
56097                this
56098            };
56099
56100            expressions.push(action);
56101
56102            if !self.match_token(TokenType::Comma) {
56103                break;
56104            }
56105        }
56106
56107        // Parse optional top-level WHERE clause (for backwards compatibility)
56108        let where_ = self.parse_where()?.map(Box::new);
56109
56110        // Parse optional GROUP BY
56111        let group = if self.match_token(TokenType::Group) {
56112            self.expect(TokenType::By)?;
56113            let mut exprs = Vec::new();
56114            exprs.push(self.parse_expression()?);
56115            while self.match_token(TokenType::Comma) {
56116                exprs.push(self.parse_expression()?);
56117            }
56118            Some(Box::new(Expression::Group(Box::new(Group {
56119                expressions: exprs,
56120                grouping_sets: None,
56121                cube: None,
56122                rollup: None,
56123                totals: None,
56124                all: None,
56125            }))))
56126        } else {
56127            None
56128        };
56129
56130        // Parse optional SET (aggregates) after GROUP BY
56131        let aggregates = if group.is_some() && self.match_token(TokenType::Set) {
56132            let mut aggs = Vec::new();
56133            loop {
56134                aggs.push(self.parse_expression()?);
56135                if !self.match_token(TokenType::Comma) {
56136                    break;
56137                }
56138            }
56139            if aggs.is_empty() {
56140                None
56141            } else {
56142                Some(Box::new(Expression::Tuple(Box::new(Tuple {
56143                    expressions: aggs,
56144                }))))
56145            }
56146        } else {
56147            None
56148        };
56149
56150        Ok(Some(Expression::MergeTreeTTL(Box::new(MergeTreeTTL {
56151            expressions,
56152            where_,
56153            group,
56154            aggregates,
56155        }))))
56156    }
56157
56158    /// parse_type - Parses a data type expression
56159    /// Python: _parse_type
56160    pub fn parse_type(&mut self) -> Result<Option<Expression>> {
56161        // First try to parse an interval
56162        if let Some(interval) = self.parse_interval()? {
56163            return self.parse_column_ops_with_expr(Some(interval));
56164        }
56165
56166        // Try to parse a data type
56167        let data_type = self.parse_types()?;
56168
56169        if let Some(dt) = data_type {
56170            // If it's a Cast (BigQuery inline constructor), apply column ops
56171            if matches!(dt, Expression::Cast(_)) {
56172                return self.parse_column_ops_with_expr(Some(dt));
56173            }
56174
56175            // Try to parse a primary expression after the type
56176            let start_pos = self.current;
56177            if let Some(primary) = self.parse_primary_or_var()? {
56178                // If it's a literal, this might be a type cast like DATE '2020-01-01'
56179                if let Expression::Literal(_) = &primary {
56180                    let result = self.parse_column_ops_with_expr(Some(primary))?;
56181                    if let Some(value) = result {
56182                        // Create a Cast expression
56183                        if let Expression::DataType(data_type_struct) = dt {
56184                            return Ok(Some(Expression::Cast(Box::new(Cast {
56185                                this: value,
56186                                to: data_type_struct,
56187                                trailing_comments: Vec::new(),
56188                                double_colon_syntax: false,
56189                                format: None,
56190                                default: None,
56191                                inferred_type: None,
56192                            }))));
56193                        }
56194                    }
56195                }
56196                // Backtrack if not a type-literal pattern
56197                self.current = start_pos;
56198            }
56199
56200            return Ok(Some(dt));
56201        }
56202
56203        Ok(None)
56204    }
56205
56206    /// parse_type_size - Ported from Python _parse_type_size
56207    /// Parses type size parameters like 10 in VARCHAR(10) or 10, 2 in DECIMAL(10, 2)
56208    #[allow(unused_variables, unused_mut)]
56209    pub fn parse_type_size(&mut self) -> Result<Option<Expression>> {
56210        // First try to parse a type - this handles both numeric literals and type names
56211        let this = self.parse_type()?;
56212
56213        if this.is_none() {
56214            return Ok(None);
56215        }
56216
56217        let mut result = this.unwrap();
56218
56219        // If it's a Column with no table, convert it to an Identifier (var)
56220        // This handles cases like CHAR in VARCHAR(100 CHAR)
56221        if let Expression::Column(ref col) = result {
56222            if col.table.is_none() {
56223                result = Expression::Identifier(col.name.clone());
56224            }
56225        }
56226
56227        // Check for optional expression after the type (e.g., "CHAR" in "100 CHAR")
56228        // This is for byte/char length specifiers in some dialects
56229        if let Some(var_token) = self.parse_var()? {
56230            // We have an additional specifier, combine them
56231            // For now, just return the original result since Rust doesn't have DataTypeParam
56232            // The var expression would be attached as an expression in Python
56233        }
56234
56235        Ok(Some(result))
56236    }
56237
56238    /// parse_types - Implemented from Python _parse_types
56239    /// Calls: parse_string
56240    #[allow(unused_variables, unused_mut)]
56241    pub fn parse_types(&mut self) -> Result<Option<Expression>> {
56242        if self.match_text_seq(&["SYSUDTLIB", "."]) {
56243            return Ok(Some(Expression::Identifier(Identifier {
56244                name: String::new(),
56245                quoted: false,
56246                trailing_comments: Vec::new(),
56247                span: None,
56248            })));
56249        }
56250        if self.match_text_seq(&["WITH", "TIME", "ZONE"]) {
56251            // Matched: WITH TIME ZONE
56252            return Ok(None);
56253        }
56254        if self.match_text_seq(&["WITH", "LOCAL", "TIME", "ZONE"]) {
56255            // Matched: WITH LOCAL TIME ZONE
56256            return Ok(None);
56257        }
56258        Ok(None)
56259    }
56260
56261    /// parse_unique - Implemented from Python _parse_unique
56262    /// Parses UNIQUE [KEY|INDEX] [NULLS NOT DISTINCT] [(columns)] [USING index_type]
56263    #[allow(unused_variables, unused_mut)]
56264    pub fn parse_unique(&mut self) -> Result<Option<Expression>> {
56265        // Check for optional KEY/INDEX
56266        let _ = self.match_texts(&["KEY", "INDEX"]);
56267
56268        // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
56269        let nulls = if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
56270            Some(Box::new(Expression::Boolean(BooleanLiteral {
56271                value: true,
56272            })))
56273        } else {
56274            None
56275        };
56276
56277        // Parse the optional key name and schema (column list)
56278        let unique_key = self.parse_unique_key()?;
56279        let this = self.parse_schema_with_this(unique_key)?;
56280
56281        // Parse optional USING index_type
56282        let index_type = if self.match_token(TokenType::Using) {
56283            self.skip();
56284            Some(Box::new(Expression::Var(Box::new(Var {
56285                this: self.previous().text.clone(),
56286            }))))
56287        } else {
56288            None
56289        };
56290
56291        Ok(Some(Expression::UniqueColumnConstraint(Box::new(
56292            UniqueColumnConstraint {
56293                this: this.map(Box::new),
56294                index_type,
56295                on_conflict: None,
56296                nulls,
56297                options: Vec::new(),
56298            },
56299        ))))
56300    }
56301
56302    /// parse_unique_key - Parse the key/index name for UNIQUE constraint
56303    /// Simply parses an identifier
56304    #[allow(unused_variables, unused_mut)]
56305    pub fn parse_unique_key(&mut self) -> Result<Option<Expression>> {
56306        self.parse_id_var()
56307    }
56308
56309    /// parse_unnest - Ported from Python _parse_unnest
56310    /// Parses UNNEST(array_expr) [WITH ORDINALITY] [AS alias]
56311    #[allow(unused_variables, unused_mut)]
56312    pub fn parse_unnest(&mut self) -> Result<Option<Expression>> {
56313        // Check for UNNEST keyword
56314        if !self.match_texts(&["UNNEST"]) {
56315            return Ok(None);
56316        }
56317
56318        // Expect opening parenthesis
56319        if !self.match_token(TokenType::LParen) {
56320            return Ok(None);
56321        }
56322
56323        // Parse comma-separated array expression(s): UNNEST(arr1, arr2, ...)
56324        let this = match self.parse_expression() {
56325            Ok(expr) => expr,
56326            Err(e) => return Err(e),
56327        };
56328
56329        let mut extra_expressions = Vec::new();
56330        while self.match_token(TokenType::Comma) {
56331            let expr = self.parse_expression()?;
56332            extra_expressions.push(expr);
56333        }
56334
56335        // Expect closing parenthesis
56336        self.expect(TokenType::RParen)?;
56337
56338        // Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
56339        let mut with_ordinality = self.match_text_seq(&["WITH", "ORDINALITY"]);
56340        let mut offset_alias = None;
56341        if !with_ordinality && self.match_text_seq(&["WITH", "OFFSET"]) {
56342            with_ordinality = true;
56343            // Parse optional offset alias: WITH OFFSET AS y or WITH OFFSET y
56344            if matches!(
56345                self.config.dialect,
56346                Some(crate::dialects::DialectType::BigQuery)
56347            ) {
56348                let has_as = self.match_token(TokenType::As);
56349                if has_as || self.check(TokenType::Identifier) || self.check(TokenType::Var) {
56350                    let alias_name = self.advance().text;
56351                    offset_alias = Some(crate::expressions::Identifier {
56352                        name: alias_name,
56353                        quoted: false,
56354                        trailing_comments: Vec::new(),
56355                        span: None,
56356                    });
56357                }
56358            }
56359        }
56360
56361        // Parse optional alias
56362        let alias = if self.match_token(TokenType::As)
56363            || self.check(TokenType::Identifier)
56364            || self.check(TokenType::QuotedIdentifier)
56365        {
56366            if self.check(TokenType::Identifier) || self.check(TokenType::QuotedIdentifier) {
56367                let is_quoted = self.check(TokenType::QuotedIdentifier);
56368                let token = self.advance();
56369                let mut ident = Identifier::new(token.text.clone());
56370                if is_quoted {
56371                    ident.quoted = true;
56372                }
56373                Some(ident)
56374            } else {
56375                None
56376            }
56377        } else {
56378            None
56379        };
56380
56381        Ok(Some(Expression::Unnest(Box::new(UnnestFunc {
56382            this,
56383            expressions: extra_expressions,
56384            with_ordinality,
56385            alias,
56386            offset_alias,
56387        }))))
56388    }
56389
56390    /// parse_unpivot_columns - Implemented from Python _parse_unpivot_columns
56391    /// Python: parser.py:4454-4462
56392    /// Parses INTO NAME column VALUE col1, col2, ...
56393    #[allow(unused_variables, unused_mut)]
56394    pub fn parse_unpivot_columns(&mut self) -> Result<Option<Expression>> {
56395        // Must match INTO keyword
56396        if !self.match_token(TokenType::Into) {
56397            return Ok(None);
56398        }
56399
56400        // Parse NAME column
56401        let this = if self.match_text_seq(&["NAME"]) {
56402            self.parse_column()?
56403        } else {
56404            None
56405        };
56406
56407        // Parse VALUE columns
56408        let expressions = if self.match_text_seq(&["VALUE"]) {
56409            let mut cols = Vec::new();
56410            loop {
56411                if let Some(col) = self.parse_column()? {
56412                    cols.push(col);
56413                }
56414                if !self.match_token(TokenType::Comma) {
56415                    break;
56416                }
56417            }
56418            cols
56419        } else {
56420            Vec::new()
56421        };
56422
56423        // If we have either this or expressions, return an UnpivotColumns
56424        if this.is_some() || !expressions.is_empty() {
56425            Ok(Some(Expression::UnpivotColumns(Box::new(UnpivotColumns {
56426                this: Box::new(this.unwrap_or(Expression::Null(Null))),
56427                expressions,
56428            }))))
56429        } else {
56430            Ok(None)
56431        }
56432    }
56433
56434    /// parse_unquoted_field - Parses a field and converts unquoted identifiers to Var
56435    /// Python: _parse_unquoted_field
56436    pub fn parse_unquoted_field(&mut self) -> Result<Option<Expression>> {
56437        let field = self.parse_field()?;
56438
56439        // If field is an unquoted identifier, convert it to a Var
56440        match field {
56441            Some(Expression::Identifier(id)) if !id.quoted => {
56442                Ok(Some(Expression::Var(Box::new(Var { this: id.name }))))
56443            }
56444            other => Ok(other),
56445        }
56446    }
56447
56448    /// parse_user_defined_function - Parses user-defined function call
56449    /// Python: _parse_user_defined_function
56450    /// Parses: schema.function_name(param1, param2, ...)
56451    pub fn parse_user_defined_function(&mut self) -> Result<Option<Expression>> {
56452        // Parse table parts (potentially schema-qualified function name)
56453        let this = self.parse_table_parts()?;
56454        if this.is_none() {
56455            return Ok(None);
56456        }
56457
56458        // If no L_PAREN, return just the table parts (not a function call)
56459        if !self.match_token(TokenType::LParen) {
56460            return Ok(this);
56461        }
56462
56463        // Parse function parameters
56464        let mut expressions = Vec::new();
56465        if !self.check(TokenType::RParen) {
56466            loop {
56467                if let Some(param) = self.parse_function_parameter()? {
56468                    expressions.push(param);
56469                }
56470                if !self.match_token(TokenType::Comma) {
56471                    break;
56472                }
56473            }
56474        }
56475
56476        self.match_token(TokenType::RParen);
56477
56478        Ok(Some(Expression::UserDefinedFunction(Box::new(
56479            UserDefinedFunction {
56480                this: Box::new(this.unwrap()),
56481                expressions,
56482                wrapped: Some(Box::new(Expression::Boolean(BooleanLiteral {
56483                    value: true,
56484                }))),
56485            },
56486        ))))
56487    }
56488
56489    /// parse_user_defined_function_expression - Parse user-defined function expression
56490    #[allow(unused_variables, unused_mut)]
56491    pub fn parse_user_defined_function_expression(&mut self) -> Result<Option<Expression>> {
56492        // Parse a statement and wrap in Some if successful
56493        match self.parse_statement() {
56494            Ok(stmt) => Ok(Some(stmt)),
56495            Err(_) => Ok(None),
56496        }
56497    }
56498
56499    /// parse_user_defined_type - Parses a user-defined type reference
56500    /// Python: _parse_user_defined_type
56501    /// Format: schema.type_name or just type_name
56502    pub fn parse_user_defined_type(
56503        &mut self,
56504        identifier: Identifier,
56505    ) -> Result<Option<Expression>> {
56506        let mut type_name = identifier.name.clone();
56507
56508        // Handle dotted names (schema.type_name)
56509        while self.match_token(TokenType::Dot) {
56510            if !self.is_at_end() {
56511                let token = self.advance();
56512                type_name = format!("{}.{}", type_name, token.text);
56513            } else {
56514                break;
56515            }
56516        }
56517
56518        // Return as a custom data type
56519        Ok(Some(Expression::DataType(DataType::Custom {
56520            name: type_name,
56521        })))
56522    }
56523
56524    /// parse_using_identifiers - Ported from Python _parse_using_identifiers
56525    /// Parses (col1, col2, ...) for JOIN USING clause
56526    #[allow(unused_variables, unused_mut)]
56527    pub fn parse_using_identifiers(&mut self) -> Result<Option<Expression>> {
56528        // Optionally expect opening paren
56529        let has_paren = self.match_token(TokenType::LParen);
56530
56531        let mut identifiers = Vec::new();
56532        loop {
56533            // Parse column as identifier
56534            if let Some(expr) = self.parse_identifier()? {
56535                identifiers.push(expr);
56536            } else {
56537                break;
56538            }
56539            if !self.match_token(TokenType::Comma) {
56540                break;
56541            }
56542        }
56543
56544        // Match closing paren if we matched opening
56545        if has_paren {
56546            self.expect(TokenType::RParen)?;
56547        }
56548
56549        if identifiers.is_empty() {
56550            Ok(None)
56551        } else {
56552            Ok(Some(Expression::Tuple(Box::new(Tuple {
56553                expressions: identifiers,
56554            }))))
56555        }
56556    }
56557
56558    /// parse_value - Parses a value tuple for INSERT VALUES clause
56559    /// Python: _parse_value
56560    /// Syntax: (expr1, expr2, ...) or just expr (single value)
56561    pub fn parse_value(&mut self) -> Result<Option<Expression>> {
56562        // Check for parenthesized list of expressions
56563        if self.match_token(TokenType::LParen) {
56564            let mut expressions = Vec::new();
56565
56566            if !self.check(TokenType::RParen) {
56567                loop {
56568                    // Support DEFAULT keyword in VALUES
56569                    if self.match_texts(&["DEFAULT"]) {
56570                        let text = self.previous().text.to_ascii_uppercase();
56571                        expressions.push(Expression::Var(Box::new(Var { this: text })));
56572                    } else {
56573                        // Try to parse an expression
56574                        let saved_pos = self.current;
56575                        match self.parse_expression() {
56576                            Ok(expr) => expressions.push(expr),
56577                            Err(_) => {
56578                                self.current = saved_pos;
56579                            }
56580                        }
56581                    }
56582
56583                    if !self.match_token(TokenType::Comma) {
56584                        break;
56585                    }
56586                }
56587            }
56588
56589            self.match_token(TokenType::RParen);
56590            return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
56591        }
56592
56593        // Single value without parentheses (some dialects support VALUES 1, 2)
56594        let saved_pos = self.current;
56595        match self.parse_expression() {
56596            Ok(expr) => {
56597                return Ok(Some(Expression::Tuple(Box::new(Tuple {
56598                    expressions: vec![expr],
56599                }))));
56600            }
56601            Err(_) => {
56602                self.current = saved_pos;
56603            }
56604        }
56605
56606        Ok(None)
56607    }
56608
56609    /// parse_var - Parse variable reference (unquoted identifier)
56610    /// Python: if self._match(TokenType.VAR): return exp.Var(this=self._prev.text)
56611    pub fn parse_var(&mut self) -> Result<Option<Expression>> {
56612        if self.match_token(TokenType::Var) {
56613            let text = self.previous().text.clone();
56614            return Ok(Some(Expression::Var(Box::new(Var { this: text }))));
56615        }
56616        // Fall back to placeholder parsing
56617        self.parse_placeholder()
56618    }
56619
56620    /// parse_var_from_options - Ported from Python _parse_var_from_options
56621    /// Parses a variable/identifier from a predefined set of options
56622    #[allow(unused_variables, unused_mut)]
56623    pub fn parse_var_from_options(&mut self) -> Result<Option<Expression>> {
56624        // Without the options dict, we just try to parse an identifier
56625        if self.is_at_end() {
56626            return Ok(None);
56627        }
56628
56629        // Get current token text as the option
56630        let token = self.peek().clone();
56631        if token.token_type == TokenType::Identifier || token.token_type == TokenType::Var {
56632            self.skip();
56633            return Ok(Some(Expression::Var(Box::new(Var {
56634                this: token.text.to_ascii_uppercase(),
56635            }))));
56636        }
56637
56638        Ok(None)
56639    }
56640
56641    /// parse_var_or_string - Delegates to parse_string
56642    #[allow(unused_variables, unused_mut)]
56643    /// parse_var_or_string - Parses a string literal or a variable
56644    /// Python: parser.py:7506-7507
56645    pub fn parse_var_or_string(&mut self) -> Result<Option<Expression>> {
56646        // Try string first, then var
56647        if let Some(s) = self.parse_string()? {
56648            return Ok(Some(s));
56649        }
56650        self.parse_var_any_token()
56651    }
56652
56653    /// parse_vector_expressions - Transforms vector type parameters
56654    /// Python: _parse_vector_expressions
56655    /// In Python, this transforms a list of expressions where the first element (identifier)
56656    /// is converted to a DataType. In Rust, since VECTOR type parsing is handled inline in
56657    /// parse_data_type, this method parses vector expressions (element_type, dimension) from
56658    /// the current position and returns them as a Tuple.
56659    pub fn parse_vector_expressions(&mut self) -> Result<Option<Expression>> {
56660        let mut expressions = Vec::new();
56661
56662        // Parse element type - convert identifier to DataType
56663        if let Some(type_expr) = self.parse_type()? {
56664            expressions.push(type_expr);
56665        } else {
56666            return Ok(None);
56667        }
56668
56669        // Parse optional dimension or additional parameters
56670        while self.match_token(TokenType::Comma) {
56671            if let Some(expr) = self.parse_primary_or_var()? {
56672                expressions.push(expr);
56673            }
56674        }
56675
56676        if expressions.is_empty() {
56677            return Ok(None);
56678        }
56679
56680        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
56681    }
56682
56683    /// parse_version - Implemented from Python _parse_version
56684    /// Python: parser.py:4266-4295
56685    /// Parses FOR SYSTEM_TIME AS OF, VERSIONS BETWEEN, etc.
56686    #[allow(unused_variables, unused_mut)]
56687    pub fn parse_version(&mut self) -> Result<Option<Expression>> {
56688        // Check for TIMESTAMP or VERSION snapshot token
56689        let this = if self.match_token(TokenType::TimestampSnapshot) {
56690            "TIMESTAMP".to_string()
56691        } else if self.match_token(TokenType::VersionSnapshot) {
56692            "VERSION".to_string()
56693        } else {
56694            return Ok(None);
56695        };
56696
56697        // Parse the kind and expression
56698        let (kind, expression) = if self.match_texts(&["FROM", "BETWEEN"]) {
56699            // FROM start TO end or BETWEEN start AND end
56700            let kind_str = self.previous().text.to_ascii_uppercase();
56701            let start = self.parse_bitwise()?;
56702            self.match_texts(&["TO", "AND"]);
56703            let end = self.parse_bitwise()?;
56704            let tuple = Expression::Tuple(Box::new(Tuple {
56705                expressions: vec![
56706                    start.unwrap_or(Expression::Null(Null)),
56707                    end.unwrap_or(Expression::Null(Null)),
56708                ],
56709            }));
56710            (kind_str, Some(Box::new(tuple)))
56711        } else if self.match_text_seq(&["CONTAINED", "IN"]) {
56712            // CONTAINED IN (values)
56713            let expressions = if self.match_token(TokenType::LParen) {
56714                let exprs = self.parse_expression_list()?;
56715                self.expect(TokenType::RParen)?;
56716                exprs
56717            } else {
56718                Vec::new()
56719            };
56720            (
56721                "CONTAINED IN".to_string(),
56722                Some(Box::new(Expression::Tuple(Box::new(Tuple { expressions })))),
56723            )
56724        } else if self.match_token(TokenType::All) {
56725            // ALL
56726            ("ALL".to_string(), None)
56727        } else {
56728            // AS OF
56729            self.match_text_seq(&["AS", "OF"]);
56730            let type_expr = self.parse_type()?;
56731            ("AS OF".to_string(), type_expr.map(Box::new))
56732        };
56733
56734        Ok(Some(Expression::Version(Box::new(Version {
56735            this: Box::new(Expression::Var(Box::new(Var { this }))),
56736            kind,
56737            expression,
56738        }))))
56739    }
56740
56741    /// parse_volatile_property - Parses VOLATILE property
56742    /// Python: _parse_volatile_property
56743    /// Returns VolatileProperty for table volatility or StabilityProperty for function stability
56744    pub fn parse_volatile_property(&mut self) -> Result<Option<Expression>> {
56745        // Check the token before VOLATILE to determine context
56746        // In SQL, VOLATILE can mean:
56747        // 1. Table volatility (CREATE VOLATILE TABLE)
56748        // 2. Function stability (CREATE FUNCTION ... VOLATILE)
56749
56750        // Look back to see if this is in a table context
56751        // PRE_VOLATILE_TOKENS typically include: CREATE, REPLACE, GLOBAL, etc.
56752        let is_table_context = if self.current >= 2 {
56753            let pre_token = &self.tokens[self.current - 2];
56754            matches!(
56755                pre_token.token_type,
56756                TokenType::Create | TokenType::Global | TokenType::Temporary | TokenType::Replace
56757            )
56758        } else {
56759            false
56760        };
56761
56762        if is_table_context {
56763            Ok(Some(Expression::VolatileProperty(Box::new(
56764                VolatileProperty { this: None },
56765            ))))
56766        } else {
56767            // Function stability - return StabilityProperty with "VOLATILE" literal
56768            Ok(Some(Expression::StabilityProperty(Box::new(
56769                StabilityProperty {
56770                    this: Box::new(Expression::Literal(Box::new(Literal::String(
56771                        "VOLATILE".to_string(),
56772                    )))),
56773                },
56774            ))))
56775        }
56776    }
56777
56778    /// parse_when_matched - Implemented from Python _parse_when_matched
56779    /// Calls: parse_disjunction, parse_star, parse_value
56780    #[allow(unused_variables, unused_mut)]
56781    /// Parse WHEN [NOT] MATCHED clauses for MERGE statements
56782    /// This is the public entry point that calls parse_when_matched_clauses
56783    pub fn parse_when_matched(&mut self) -> Result<Option<Expression>> {
56784        self.parse_when_matched_clauses()
56785    }
56786
56787    /// parse_where - Parse WHERE clause
56788    /// Python: if not self._match(TokenType.WHERE): return None; return exp.Where(this=self._parse_disjunction())
56789    pub fn parse_where(&mut self) -> Result<Option<Expression>> {
56790        if !self.match_token(TokenType::Where) {
56791            return Ok(None);
56792        }
56793        // Parse the condition expression
56794        let condition = self.parse_expression()?;
56795        Ok(Some(Expression::Where(Box::new(Where { this: condition }))))
56796    }
56797
56798    /// parse_window - Implemented from Python _parse_window
56799    /// Calls: parse_window_spec, parse_partition_and_order
56800    #[allow(unused_variables, unused_mut)]
56801    pub fn parse_window(&mut self) -> Result<Option<Expression>> {
56802        if self.match_text_seq(&["WITHIN", "GROUP"]) {
56803            return Ok(Some(Expression::WindowSpec(Box::new(WindowSpec {
56804                partition_by: Vec::new(),
56805                order_by: Vec::new(),
56806                frame: None,
56807            }))));
56808        }
56809        if self.match_text_seq(&["LAST"]) {
56810            // Matched: LAST
56811            return Ok(None);
56812        }
56813        if self.match_text_seq(&["EXCLUDE"]) {
56814            // Matched: EXCLUDE
56815            return Ok(None);
56816        }
56817        Ok(None)
56818    }
56819
56820    /// parse_window_clause - Ported from Python _parse_window_clause
56821    /// Parses WINDOW named_window_definition [, named_window_definition, ...]
56822    #[allow(unused_variables, unused_mut)]
56823    pub fn parse_window_clause(&mut self) -> Result<Option<Expression>> {
56824        if !self.match_token(TokenType::Window) {
56825            return Ok(None);
56826        }
56827
56828        // Parse comma-separated named window definitions
56829        let mut windows = Vec::new();
56830        loop {
56831            // Parse window name
56832            let name = self.parse_identifier()?;
56833            if name.is_none() {
56834                break;
56835            }
56836
56837            // Expect AS
56838            self.expect(TokenType::As)?;
56839
56840            // Parse window specification (parenthesized)
56841            self.expect(TokenType::LParen)?;
56842            let spec = self.parse_window_spec_inner()?;
56843            self.expect(TokenType::RParen)?;
56844
56845            if let (Some(name_expr), Some(spec_expr)) = (name, spec) {
56846                // Create an Alias expression wrapping the spec with the name
56847                let alias_ident = if let Expression::Identifier(id) = name_expr {
56848                    id
56849                } else {
56850                    Identifier::new("window")
56851                };
56852                windows.push(Expression::Alias(Box::new(Alias {
56853                    this: spec_expr,
56854                    alias: alias_ident,
56855                    column_aliases: Vec::new(),
56856                    pre_alias_comments: Vec::new(),
56857                    trailing_comments: Vec::new(),
56858                    inferred_type: None,
56859                })));
56860            }
56861
56862            if !self.match_token(TokenType::Comma) {
56863                break;
56864            }
56865        }
56866
56867        if windows.is_empty() {
56868            Ok(None)
56869        } else {
56870            Ok(Some(Expression::Tuple(Box::new(Tuple {
56871                expressions: windows,
56872            }))))
56873        }
56874    }
56875
56876    /// Parse window spec inner (without parentheses)
56877    fn parse_window_spec_inner(&mut self) -> Result<Option<Expression>> {
56878        // Parse optional base window name (identifier not followed by PARTITION or ORDER or DISTRIBUTE or SORT)
56879        let _base = if (self.check(TokenType::Identifier)
56880            || self.check(TokenType::QuotedIdentifier))
56881            && !self.check(TokenType::Partition)
56882            && !self.check(TokenType::Order)
56883            && !self.check(TokenType::Distribute)
56884            && !self.check(TokenType::Sort)
56885        {
56886            self.parse_identifier()?
56887        } else {
56888            None
56889        };
56890
56891        // Parse PARTITION BY or DISTRIBUTE BY (Hive uses DISTRIBUTE BY in window specs)
56892        let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
56893            self.parse_expression_list()?
56894        } else if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
56895            // Hive: DISTRIBUTE BY is equivalent to PARTITION BY in window specs
56896            self.parse_expression_list()?
56897        } else {
56898            Vec::new()
56899        };
56900
56901        // Parse ORDER BY or SORT BY (Hive uses SORT BY in window specs)
56902        let order_by = if self.match_token(TokenType::Order) {
56903            self.match_token(TokenType::By);
56904            let mut orders = Vec::new();
56905            loop {
56906                if let Some(ordered) = self.parse_ordered_item()? {
56907                    orders.push(ordered);
56908                } else {
56909                    break;
56910                }
56911                if !self.match_token(TokenType::Comma) {
56912                    break;
56913                }
56914            }
56915            orders
56916        } else if self.match_token(TokenType::Sort) {
56917            // Hive: SORT BY is equivalent to ORDER BY in window specs
56918            self.match_token(TokenType::By);
56919            let mut orders = Vec::new();
56920            loop {
56921                if let Some(ordered) = self.parse_ordered_item()? {
56922                    orders.push(ordered);
56923                } else {
56924                    break;
56925                }
56926                if !self.match_token(TokenType::Comma) {
56927                    break;
56928                }
56929            }
56930            orders
56931        } else {
56932            Vec::new()
56933        };
56934
56935        // Parse frame specification (ROWS/RANGE/GROUPS BETWEEN ... AND ...)
56936        let frame = self.parse_window_frame()?;
56937
56938        Ok(Some(Expression::WindowSpec(Box::new(WindowSpec {
56939            partition_by,
56940            order_by,
56941            frame,
56942        }))))
56943    }
56944
56945    /// parse_window_spec - Implemented from Python _parse_window_spec
56946    #[allow(unused_variables, unused_mut)]
56947    pub fn parse_window_spec(&mut self) -> Result<Option<Expression>> {
56948        if self.match_text_seq(&["UNBOUNDED"]) {
56949            // Matched: UNBOUNDED
56950            return Ok(None);
56951        }
56952        if self.match_text_seq(&["CURRENT", "ROW"]) {
56953            // Matched: CURRENT ROW
56954            return Ok(None);
56955        }
56956        Ok(None)
56957    }
56958
56959    /// parse_with_operator - Parse column with operator class (PostgreSQL)
56960    /// Parses: ordered_expression [WITH operator]
56961    #[allow(unused_variables, unused_mut)]
56962    pub fn parse_with_operator(&mut self) -> Result<Option<Expression>> {
56963        // First parse an ordered expression with optional operator class
56964        let this = if let Some(opclass) = self.parse_opclass()? {
56965            opclass
56966        } else if let Some(ordered) = self.parse_ordered()? {
56967            ordered
56968        } else {
56969            return Ok(None);
56970        };
56971
56972        // Check for WITH operator
56973        if !self.match_token(TokenType::With) {
56974            return Ok(Some(this));
56975        }
56976
56977        // Parse the operator
56978        let op = self.parse_var()?;
56979        let op_str = match op {
56980            Some(Expression::Identifier(id)) => id.name,
56981            Some(Expression::Var(v)) => v.this.clone(),
56982            _ => String::new(),
56983        };
56984
56985        Ok(Some(Expression::WithOperator(Box::new(WithOperator {
56986            this: Box::new(this),
56987            op: op_str,
56988        }))))
56989    }
56990
56991    /// parse_with_property - Implemented from Python _parse_with_property
56992    /// Calls: parse_withjournaltable, parse_withisolatedloading, parse_wrapped_properties
56993    #[allow(unused_variables, unused_mut)]
56994    pub fn parse_with_property(&mut self) -> Result<Option<Expression>> {
56995        if self.match_text_seq(&["(", "SYSTEM_VERSIONING"]) {
56996            return Ok(Some(Expression::WithProcedureOptions(Box::new(
56997                WithProcedureOptions {
56998                    expressions: Vec::new(),
56999                },
57000            ))));
57001        }
57002        if self.match_text_seq(&["JOURNAL"]) {
57003            // Matched: JOURNAL
57004            return Ok(None);
57005        }
57006        if self.match_text_seq(&["DATA"]) {
57007            // Matched: DATA
57008            return Ok(None);
57009        }
57010        Ok(None)
57011    }
57012
57013    /// parse_withdata - Implemented from Python _parse_withdata
57014    #[allow(unused_variables, unused_mut)]
57015    pub fn parse_withdata(&mut self) -> Result<Option<Expression>> {
57016        if self.match_text_seq(&["AND", "STATISTICS"]) {
57017            return Ok(Some(Expression::WithDataProperty(Box::new(
57018                WithDataProperty {
57019                    no: None,
57020                    statistics: None,
57021                },
57022            ))));
57023        }
57024        if self.match_text_seq(&["AND", "NO", "STATISTICS"]) {
57025            // Matched: AND NO STATISTICS
57026            return Ok(None);
57027        }
57028        Ok(None)
57029    }
57030
57031    /// parse_withisolatedloading - Implemented from Python _parse_withisolatedloading
57032    #[allow(unused_variables, unused_mut)]
57033    pub fn parse_withisolatedloading(&mut self) -> Result<Option<Expression>> {
57034        if self.match_text_seq(&["NO"]) {
57035            return Ok(Some(Expression::IsolatedLoadingProperty(Box::new(
57036                IsolatedLoadingProperty {
57037                    no: None,
57038                    concurrent: None,
57039                    target: None,
57040                },
57041            ))));
57042        }
57043        if self.match_text_seq(&["CONCURRENT"]) {
57044            // Matched: CONCURRENT
57045            return Ok(None);
57046        }
57047        Ok(None)
57048    }
57049
57050    /// parse_withjournaltable - Teradata WITH JOURNAL TABLE property
57051    /// Parses: WITH JOURNAL TABLE = table_name
57052    #[allow(unused_variables, unused_mut)]
57053    pub fn parse_withjournaltable(&mut self) -> Result<Option<Expression>> {
57054        // Optionally consume TABLE keyword
57055        self.match_token(TokenType::Table);
57056
57057        // Optionally consume = sign
57058        self.match_token(TokenType::Eq);
57059
57060        // Parse the table reference
57061        let table = self.parse_table_parts()?;
57062        if table.is_none() {
57063            return Ok(None);
57064        }
57065
57066        Ok(Some(Expression::WithJournalTableProperty(Box::new(
57067            WithJournalTableProperty {
57068                this: Box::new(table.unwrap()),
57069            },
57070        ))))
57071    }
57072
57073    /// parse_wrapped - Parses an expression wrapped in parentheses
57074    /// Python: _parse_wrapped(parse_method)
57075    /// This version parses a disjunction (expression) inside parentheses
57076    pub fn parse_wrapped(&mut self) -> Result<Option<Expression>> {
57077        if !self.match_token(TokenType::LParen) {
57078            return Ok(None);
57079        }
57080
57081        let result = self.parse_disjunction()?;
57082        self.match_token(TokenType::RParen);
57083
57084        Ok(result)
57085    }
57086
57087    /// parse_wrapped_csv - Parses comma-separated expressions wrapped in parentheses
57088    /// Python: _parse_wrapped_csv(parse_method)
57089    pub fn parse_wrapped_csv(&mut self) -> Result<Option<Expression>> {
57090        if !self.match_token(TokenType::LParen) {
57091            return Ok(None);
57092        }
57093
57094        let expressions = self.parse_expression_list()?;
57095        self.match_token(TokenType::RParen);
57096
57097        if expressions.is_empty() {
57098            return Ok(None);
57099        }
57100
57101        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
57102    }
57103
57104    /// parse_wrapped_id_vars - Parses comma-separated identifiers wrapped in parentheses
57105    /// Python: _parse_wrapped_id_vars
57106    pub fn parse_wrapped_id_vars(&mut self) -> Result<Option<Expression>> {
57107        if !self.match_token(TokenType::LParen) {
57108            return Ok(None);
57109        }
57110
57111        let mut identifiers = Vec::new();
57112        loop {
57113            if let Some(id) = self.parse_id_var()? {
57114                identifiers.push(id);
57115            } else {
57116                break;
57117            }
57118            if !self.match_token(TokenType::Comma) {
57119                break;
57120            }
57121        }
57122
57123        self.match_token(TokenType::RParen);
57124
57125        if identifiers.is_empty() {
57126            return Ok(None);
57127        }
57128
57129        Ok(Some(Expression::Tuple(Box::new(Tuple {
57130            expressions: identifiers,
57131        }))))
57132    }
57133
57134    /// parse_wrapped_options - Implemented from Python _parse_wrapped_options
57135    /// Parses space-separated properties wrapped in parentheses (for Snowflake STAGE_FILE_FORMAT, etc.)
57136    /// Format: = (KEY=VALUE KEY2=VALUE2 ...)
57137    pub fn parse_wrapped_options(&mut self) -> Result<Option<Expression>> {
57138        // Match optional = before opening paren
57139        self.match_token(TokenType::Eq);
57140
57141        // Expect opening paren
57142        if !self.match_token(TokenType::LParen) {
57143            return Ok(None);
57144        }
57145
57146        // Parse space-separated properties (no comma required between them)
57147        let mut properties = Vec::new();
57148        while !self.check(TokenType::RParen) && !self.is_at_end() {
57149            // Try to parse a property: KEY=VALUE
57150            if let Some(prop) = self.parse_option_property()? {
57151                properties.push(prop);
57152            } else {
57153                break;
57154            }
57155        }
57156
57157        // Expect closing paren
57158        self.match_token(TokenType::RParen);
57159
57160        if properties.is_empty() {
57161            Ok(None)
57162        } else {
57163            Ok(Some(Expression::Tuple(Box::new(Tuple {
57164                expressions: properties,
57165            }))))
57166        }
57167    }
57168
57169    /// Parse a single option property: KEY=VALUE
57170    /// Handles various value types: identifiers, strings, numbers, nested parens like ('') or (val1, val2)
57171    fn parse_option_property(&mut self) -> Result<Option<Expression>> {
57172        // Save position to retreat if this isn't a property
57173        let index = self.current;
57174
57175        // Parse the key (identifier/column name)
57176        // For Snowflake options, keys are identifiers like TYPE, FIELD_DELIMITER, NULL_IF, etc.
57177        let key = if self.check(TokenType::Identifier)
57178            || self.check(TokenType::Var)
57179            || self
57180                .peek()
57181                .text
57182                .chars()
57183                .all(|c| c.is_ascii_alphanumeric() || c == '_')
57184        {
57185            let name = self.peek().text.clone();
57186            self.skip();
57187            Some(Expression::Var(Box::new(Var { this: name })))
57188        } else {
57189            None
57190        };
57191
57192        let key = match key {
57193            Some(k) => k,
57194            None => {
57195                self.current = index;
57196                return Ok(None);
57197            }
57198        };
57199
57200        // Expect =
57201        if !self.match_token(TokenType::Eq) {
57202            self.current = index;
57203            return Ok(None);
57204        }
57205
57206        // Parse the value - can be:
57207        // - Simple identifier: CSV, SKIP_FILE, BASE64, TRUE, FALSE, CASE_SENSITIVE
57208        // - String literal: '|', '"', 'TZHTZM YYYY-MM-DD HH24:MI:SS.FF9'
57209        // - Number: 5
57210        // - Nested parens for tuple: ('')
57211        let value = if self.check(TokenType::LParen) {
57212            // Parse nested parenthesized value like NULL_IF=('')
57213            self.skip(); // consume (
57214            let mut inner_exprs = Vec::new();
57215            while !self.check(TokenType::RParen) && !self.is_at_end() {
57216                if let Some(expr) = self.parse_primary_for_option()? {
57217                    inner_exprs.push(expr);
57218                }
57219                // Allow comma between nested values
57220                self.match_token(TokenType::Comma);
57221            }
57222            self.match_token(TokenType::RParen);
57223            Expression::Tuple(Box::new(Tuple {
57224                expressions: inner_exprs,
57225            }))
57226        } else if let Some(primary) = self.parse_primary_for_option()? {
57227            primary
57228        } else {
57229            // Fallback: try to parse as a var
57230            let text = self.peek().text.clone();
57231            self.skip();
57232            Expression::Var(Box::new(Var { this: text }))
57233        };
57234
57235        // Return as a Property expression (KEY=VALUE)
57236        Ok(Some(Expression::Property(Box::new(Property {
57237            this: Box::new(key),
57238            value: Some(Box::new(value)),
57239        }))))
57240    }
57241
57242    /// Parse a primary value for option properties
57243    /// Handles strings, numbers, identifiers, TRUE/FALSE
57244    fn parse_primary_for_option(&mut self) -> Result<Option<Expression>> {
57245        // String literal
57246        if self.check(TokenType::String) {
57247            let text = self.peek().text.clone();
57248            self.skip();
57249            return Ok(Some(Expression::Literal(Box::new(Literal::String(text)))));
57250        }
57251
57252        // Number
57253        if self.check(TokenType::Number) {
57254            let text = self.peek().text.clone();
57255            self.skip();
57256            return Ok(Some(Expression::Literal(Box::new(Literal::Number(text)))));
57257        }
57258
57259        // TRUE/FALSE
57260        if self.check(TokenType::True) {
57261            self.skip();
57262            return Ok(Some(Expression::Boolean(BooleanLiteral { value: true })));
57263        }
57264        if self.check(TokenType::False) {
57265            self.skip();
57266            return Ok(Some(Expression::Boolean(BooleanLiteral { value: false })));
57267        }
57268
57269        // Identifier or keyword used as value (CSV, SKIP_FILE, BASE64, etc.)
57270        if self.check(TokenType::Identifier)
57271            || self.check(TokenType::Var)
57272            || (!self.check(TokenType::RParen)
57273                && !self.check(TokenType::Comma)
57274                && !self.check(TokenType::Eq)
57275                && !self.is_at_end())
57276        {
57277            let text = self.peek().text.clone();
57278            // Don't consume if it's a closing paren or could be the next property key followed by =
57279            if self.check(TokenType::RParen) {
57280                return Ok(None);
57281            }
57282            // Check if this is the start of next property (followed by =)
57283            if self.check_next(TokenType::Eq) {
57284                return Ok(None);
57285            }
57286            self.skip();
57287            return Ok(Some(Expression::Var(Box::new(Var { this: text }))));
57288        }
57289
57290        Ok(None)
57291    }
57292
57293    /// parse_options_list - Parses BigQuery-style OPTIONS list: (key=value, key=value, ...)
57294    /// Parses key=value assignments where values can be complex expressions
57295    pub fn parse_options_list(&mut self) -> Result<Vec<Expression>> {
57296        // Expect opening paren
57297        if !self.match_token(TokenType::LParen) {
57298            return Ok(Vec::new());
57299        }
57300
57301        // Parse comma-separated key=value pairs
57302        let mut options = Vec::new();
57303        loop {
57304            // Check for empty OPTIONS () or end of list
57305            if self.check(TokenType::RParen) {
57306                break;
57307            }
57308
57309            // Parse key=value using parse_assignment which handles EQ operations
57310            if let Some(opt) = self.parse_assignment()? {
57311                options.push(opt);
57312            } else {
57313                break;
57314            }
57315
57316            if !self.match_token(TokenType::Comma) {
57317                break;
57318            }
57319        }
57320
57321        // Expect closing paren
57322        self.expect(TokenType::RParen)?;
57323
57324        Ok(options)
57325    }
57326
57327    /// Parse BigQuery PARTITION BY property and return a typed AST node.
57328    fn parse_bigquery_partition_by_property(&mut self) -> Result<Option<Expression>> {
57329        let start = self.current;
57330        let matched_partition = if self.match_token(TokenType::PartitionBy) {
57331            true
57332        } else if self.match_token(TokenType::Partition) {
57333            self.match_token(TokenType::By)
57334        } else {
57335            false
57336        };
57337
57338        if !matched_partition {
57339            self.current = start;
57340            return Ok(None);
57341        }
57342
57343        let mut expressions = Vec::new();
57344        while !self.is_at_end()
57345            && !self.check(TokenType::Cluster)
57346            && !self.check(TokenType::As)
57347            && !self.check(TokenType::Semicolon)
57348            && !self.check(TokenType::RParen)
57349            && !self.check_identifier("OPTIONS")
57350        {
57351            match self.parse_expression() {
57352                Ok(expr) => expressions.push(expr),
57353                Err(_) => {
57354                    // Fall back to generic/raw parsing if typed parsing can't consume this form.
57355                    self.current = start;
57356                    return Ok(None);
57357                }
57358            }
57359
57360            if !self.match_token(TokenType::Comma) {
57361                break;
57362            }
57363        }
57364
57365        if expressions.is_empty() {
57366            self.current = start;
57367            return Ok(None);
57368        }
57369
57370        Ok(Some(Expression::PartitionByProperty(Box::new(
57371            PartitionByProperty { expressions },
57372        ))))
57373    }
57374
57375    /// Parse BigQuery CLUSTER BY property and return a typed AST node.
57376    fn parse_bigquery_cluster_by_property(&mut self) -> Result<Option<Expression>> {
57377        let start = self.current;
57378        if !self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
57379            self.current = start;
57380            return Ok(None);
57381        }
57382
57383        let mut columns = Vec::new();
57384        loop {
57385            if let Some(Expression::Identifier(id)) = self.parse_identifier()? {
57386                columns.push(id);
57387            } else if self.is_identifier_or_keyword_token() {
57388                let name = self.advance().text;
57389                columns.push(Identifier {
57390                    name,
57391                    quoted: false,
57392                    trailing_comments: Vec::new(),
57393                    span: None,
57394                });
57395            } else {
57396                // Fall back to generic/raw parsing if typed parsing can't consume this form.
57397                self.current = start;
57398                return Ok(None);
57399            }
57400
57401            if !self.match_token(TokenType::Comma) {
57402                break;
57403            }
57404        }
57405
57406        if columns.is_empty() {
57407            self.current = start;
57408            return Ok(None);
57409        }
57410
57411        Ok(Some(Expression::ClusterByColumnsProperty(Box::new(
57412            ClusterByColumnsProperty { columns },
57413        ))))
57414    }
57415
57416    /// Parse BigQuery OPTIONS (...) clause into typed entries when possible.
57417    /// Falls back to generic `Properties` when options are not simple key/value assignments.
57418    fn parse_bigquery_options_property(&mut self) -> Result<Option<Expression>> {
57419        let start = self.current;
57420        if !self.match_identifier("OPTIONS") {
57421            self.current = start;
57422            return Ok(None);
57423        }
57424
57425        let options = self.parse_options_list()?;
57426        if options.is_empty() {
57427            return Ok(Some(Expression::OptionsProperty(Box::new(
57428                OptionsProperty {
57429                    entries: Vec::new(),
57430                },
57431            ))));
57432        }
57433
57434        let mut entries = Vec::new();
57435        for option_expr in &options {
57436            let Some(entry) = Self::option_entry_from_expression(option_expr) else {
57437                return Ok(Some(Expression::Properties(Box::new(Properties {
57438                    expressions: options,
57439                }))));
57440            };
57441            entries.push(entry);
57442        }
57443
57444        Ok(Some(Expression::OptionsProperty(Box::new(
57445            OptionsProperty { entries },
57446        ))))
57447    }
57448
57449    fn option_entry_from_expression(expr: &Expression) -> Option<OptionEntry> {
57450        let Expression::Eq(eq) = expr else {
57451            return None;
57452        };
57453
57454        let key = match &eq.left {
57455            Expression::Column(col) if col.table.is_none() => col.name.clone(),
57456            Expression::Identifier(id) => id.clone(),
57457            Expression::Var(var) => Identifier {
57458                name: var.this.clone(),
57459                quoted: false,
57460                trailing_comments: Vec::new(),
57461                span: None,
57462            },
57463            _ => return None,
57464        };
57465
57466        Some(OptionEntry {
57467            key,
57468            value: eq.right.clone(),
57469        })
57470    }
57471
57472    /// parse_environment_list - Parses Databricks ENVIRONMENT list: (dependencies = '...', environment_version = '...')
57473    /// Parses key=value assignments where values can be string literals
57474    pub fn parse_environment_list(&mut self) -> Result<Vec<Expression>> {
57475        // Expect opening paren
57476        if !self.match_token(TokenType::LParen) {
57477            return Ok(Vec::new());
57478        }
57479
57480        // Parse comma-separated key=value pairs
57481        let mut env_items = Vec::new();
57482        loop {
57483            // Check for empty ENVIRONMENT () or end of list
57484            if self.check(TokenType::RParen) {
57485                break;
57486            }
57487
57488            // Parse key=value using parse_assignment which handles EQ operations
57489            if let Some(opt) = self.parse_assignment()? {
57490                env_items.push(opt);
57491            } else {
57492                break;
57493            }
57494
57495            if !self.match_token(TokenType::Comma) {
57496                break;
57497            }
57498        }
57499
57500        // Expect closing paren
57501        self.expect(TokenType::RParen)?;
57502
57503        Ok(env_items)
57504    }
57505
57506    /// parse_wrapped_properties - Ported from Python _parse_wrapped_properties
57507    /// Parses properties wrapped in parentheses
57508    #[allow(unused_variables, unused_mut)]
57509    pub fn parse_wrapped_properties(&mut self) -> Result<Option<Expression>> {
57510        // Parse wrapped list of properties: (prop1, prop2, ...)
57511        if !self.match_token(TokenType::LParen) {
57512            return Ok(None);
57513        }
57514
57515        let mut props = Vec::new();
57516        loop {
57517            if let Some(prop) = self.parse_property()? {
57518                props.push(prop);
57519            }
57520            if !self.match_token(TokenType::Comma) {
57521                break;
57522            }
57523        }
57524
57525        self.match_token(TokenType::RParen);
57526
57527        if props.is_empty() {
57528            return Ok(None);
57529        }
57530
57531        // Return as a Properties expression
57532        Ok(Some(Expression::Properties(Box::new(Properties {
57533            expressions: props,
57534        }))))
57535    }
57536
57537    /// parse_wrapped_select - Ported from Python _parse_wrapped_select
57538    /// Parses wrapped select statements including PIVOT/UNPIVOT and FROM-first syntax
57539    #[allow(unused_variables, unused_mut)]
57540    pub fn parse_wrapped_select(&mut self, table: bool) -> Result<Option<Expression>> {
57541        // Check for PIVOT/UNPIVOT
57542        let is_unpivot = self.check(TokenType::Unpivot);
57543        if self.match_token(TokenType::Pivot) || self.match_token(TokenType::Unpivot) {
57544            // Call simplified pivot parser
57545            return self.parse_simplified_pivot(is_unpivot);
57546        }
57547
57548        // Check for FROM (DuckDB FROM-first syntax)
57549        if self.match_token(TokenType::From) {
57550            // Parse the FROM clause (table reference)
57551            let from_expr = self.parse_table()?;
57552
57553            // Try to parse a full SELECT
57554            let select = self.parse_select_query()?;
57555
57556            if let Some(sel) = select {
57557                // Apply set operations and query modifiers
57558                let with_ops = self.parse_set_operations_with_expr(Some(sel))?;
57559                return Ok(with_ops);
57560            } else if let Some(from_table) = from_expr {
57561                // Create a SELECT * FROM <table>
57562                let mut select_struct = Select::new();
57563                select_struct.expressions = vec![Expression::Star(Star {
57564                    table: None,
57565                    except: None,
57566                    replace: None,
57567                    rename: None,
57568                    trailing_comments: Vec::new(),
57569                    span: None,
57570                })];
57571                select_struct.from = Some(From {
57572                    expressions: vec![from_table],
57573                });
57574                let select_all = Expression::Select(Box::new(select_struct));
57575                let with_ops = self.parse_set_operations_with_expr(Some(select_all))?;
57576                return Ok(with_ops);
57577            }
57578            return Ok(None);
57579        }
57580
57581        // Regular case: parse table or nested select
57582        let this = if table {
57583            self.parse_table()?
57584        } else {
57585            // Parse nested select without set operations
57586            self.parse_select_query()?
57587        };
57588
57589        if this.is_none() {
57590            return Ok(None);
57591        }
57592
57593        // Apply set operations and query modifiers
57594        let with_ops = self.parse_set_operations_with_expr(this)?;
57595        Ok(with_ops)
57596    }
57597
57598    /// Helper for parse_wrapped_select with default table=false
57599    pub fn parse_wrapped_select_default(&mut self) -> Result<Option<Expression>> {
57600        self.parse_wrapped_select(false)
57601    }
57602
57603    /// parse_xml_element - Implemented from Python _parse_xml_element
57604    /// Python: parser.py:6917-6931
57605    /// Parses XMLELEMENT(NAME name [, expr, ...]) or XMLELEMENT(EVALNAME expr [, expr, ...])
57606    #[allow(unused_variables, unused_mut)]
57607    pub fn parse_xml_element(&mut self) -> Result<Option<Expression>> {
57608        let (this, evalname) = if self.match_text_seq(&["EVALNAME"]) {
57609            // EVALNAME - parse expression for dynamic element name
57610            let expr = self.parse_bitwise()?;
57611            (
57612                expr,
57613                Some(Box::new(Expression::Boolean(BooleanLiteral {
57614                    value: true,
57615                }))),
57616            )
57617        } else {
57618            // NAME - parse static element name
57619            self.match_text_seq(&["NAME"]);
57620            let id = self.parse_id_var()?;
57621            (id, None)
57622        };
57623
57624        // Parse optional expressions (comma-separated content/attributes)
57625        let expressions = if self.match_token(TokenType::Comma) {
57626            self.parse_expression_list()?
57627        } else {
57628            Vec::new()
57629        };
57630
57631        match this {
57632            Some(t) => Ok(Some(Expression::XMLElement(Box::new(XMLElement {
57633                this: Box::new(t),
57634                expressions,
57635                evalname,
57636            })))),
57637            None => Ok(None),
57638        }
57639    }
57640
57641    /// parse_xml_namespace - Ported from Python _parse_xml_namespace
57642    /// Parses XML namespace declarations
57643    #[allow(unused_variables, unused_mut)]
57644    pub fn parse_xml_namespace(&mut self) -> Result<Option<Expression>> {
57645        let mut namespaces = Vec::new();
57646
57647        loop {
57648            // Check for DEFAULT namespace
57649            let is_default = self.match_text_seq(&["DEFAULT"]);
57650
57651            // Parse the URI string
57652            let uri = if is_default {
57653                self.parse_string()?
57654            } else {
57655                // Parse URI with optional alias (AS name)
57656                let uri_expr = self.parse_string()?;
57657                if let Some(u) = uri_expr {
57658                    self.parse_alias_with_expr(Some(u))?
57659                } else {
57660                    None
57661                }
57662            };
57663
57664            if let Some(u) = uri {
57665                namespaces.push(u);
57666            }
57667
57668            // Continue if comma
57669            if !self.match_token(TokenType::Comma) {
57670                break;
57671            }
57672        }
57673
57674        if namespaces.is_empty() {
57675            return Ok(None);
57676        }
57677
57678        // Return as a Tuple (list of namespaces)
57679        Ok(Some(Expression::Tuple(Box::new(Tuple {
57680            expressions: namespaces,
57681        }))))
57682    }
57683
57684    /// parse_xml_table - Implemented from Python _parse_xml_table
57685    /// Python: parser.py:6933-6961
57686    /// Parses XMLTABLE(xpath_expr PASSING xml_doc COLUMNS ...)
57687    #[allow(unused_variables, unused_mut)]
57688    pub fn parse_xml_table(&mut self) -> Result<Option<Expression>> {
57689        // Parse optional XMLNAMESPACES clause
57690        let namespaces = if self.match_text_seq(&["XMLNAMESPACES", "("]) {
57691            let ns = self.parse_xml_namespace()?;
57692            self.match_text_seq(&[")", ","]);
57693            ns.map(Box::new)
57694        } else {
57695            None
57696        };
57697
57698        // Parse XPath expression (string)
57699        let this = self.parse_string()?;
57700        if this.is_none() {
57701            return Ok(None);
57702        }
57703
57704        // Parse PASSING clause
57705        let passing = if self.match_text_seq(&["PASSING"]) {
57706            // BY VALUE is optional
57707            self.match_text_seq(&["BY", "VALUE"]);
57708            // Parse comma-separated expressions.
57709            // Oracle XMLTABLE PASSING accepts full expressions (including function calls),
57710            // not just column references.
57711            // We need to stop before COLUMNS, RETURNING, or )
57712            let mut cols = Vec::new();
57713            loop {
57714                // Check for stop keywords before parsing a column
57715                if !self.is_at_end() {
57716                    let next_text = self.peek().text.to_ascii_uppercase();
57717                    if next_text == "COLUMNS" || next_text == "RETURNING" {
57718                        break;
57719                    }
57720                    if self.check(TokenType::RParen) {
57721                        break;
57722                    }
57723                }
57724                if let Some(col) = self.parse_assignment()? {
57725                    cols.push(col);
57726                } else {
57727                    break;
57728                }
57729                if !self.match_token(TokenType::Comma) {
57730                    break;
57731                }
57732            }
57733            if cols.is_empty() {
57734                None
57735            } else {
57736                Some(Box::new(Expression::Tuple(Box::new(Tuple {
57737                    expressions: cols,
57738                }))))
57739            }
57740        } else {
57741            None
57742        };
57743
57744        // Parse optional RETURNING SEQUENCE BY REF
57745        let by_ref = if self.match_text_seq(&["RETURNING", "SEQUENCE", "BY", "REF"]) {
57746            Some(Box::new(Expression::Boolean(BooleanLiteral {
57747                value: true,
57748            })))
57749        } else {
57750            None
57751        };
57752
57753        // Parse COLUMNS clause
57754        let columns = if self.match_text_seq(&["COLUMNS"]) {
57755            let mut cols = Vec::new();
57756            loop {
57757                // Stop if we hit the closing paren
57758                if self.check(TokenType::RParen) {
57759                    break;
57760                }
57761                // Be permissive with leading commas in multiline XMLTABLE COLUMNS lists.
57762                if self.match_token(TokenType::Comma) {
57763                    continue;
57764                }
57765                if let Some(col_def) = self.parse_field_def()? {
57766                    cols.push(col_def);
57767                } else {
57768                    break;
57769                }
57770                if !self.match_token(TokenType::Comma) {
57771                    break;
57772                }
57773            }
57774            cols
57775        } else {
57776            Vec::new()
57777        };
57778
57779        Ok(Some(Expression::XMLTable(Box::new(XMLTable {
57780            this: Box::new(this.unwrap()),
57781            namespaces,
57782            passing,
57783            columns,
57784            by_ref,
57785        }))))
57786    }
57787
57788    /// Parse UNLOAD statement (Athena/Presto/Redshift)
57789    /// UNLOAD (SELECT ...) TO 'location' WITH (options)
57790    fn parse_unload(&mut self) -> Result<Expression> {
57791        // Collect entire statement as a Command
57792        let mut parts = Vec::new();
57793        parts.push(self.advance().text.clone()); // consume UNLOAD
57794        parts.push(" ".to_string()); // space after UNLOAD
57795
57796        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
57797            let token_type = self.peek().token_type;
57798            let token_text = self.peek().text.clone();
57799
57800            // Track string literals
57801            if token_type == TokenType::String {
57802                parts.push(format!("'{}'", token_text.replace('\'', "''")));
57803                self.skip();
57804                // Add space after string unless followed by punctuation
57805                if !self.is_at_end() {
57806                    let next_type = self.peek().token_type;
57807                    if !matches!(
57808                        next_type,
57809                        TokenType::Comma | TokenType::RParen | TokenType::Semicolon
57810                    ) {
57811                        parts.push(" ".to_string());
57812                    }
57813                }
57814                continue;
57815            }
57816
57817            // Handle ARRAY[...] syntax - no space between ARRAY and [
57818            if token_text.eq_ignore_ascii_case("ARRAY")
57819                && self
57820                    .peek_nth(1)
57821                    .is_some_and(|t| t.token_type == TokenType::LBracket)
57822            {
57823                parts.push(token_text);
57824                self.skip();
57825                // Consume [
57826                parts.push("[".to_string());
57827                self.skip();
57828                // Collect until RBracket
57829                while !self.is_at_end() && !self.check(TokenType::RBracket) {
57830                    let inner_type = self.peek().token_type;
57831                    let inner_text = self.peek().text.clone();
57832                    if inner_type == TokenType::String {
57833                        parts.push(format!("'{}'", inner_text.replace('\'', "''")));
57834                    } else {
57835                        parts.push(inner_text);
57836                    }
57837                    self.skip();
57838                    if self.check(TokenType::Comma) {
57839                        parts.push(", ".to_string());
57840                        self.skip();
57841                    }
57842                }
57843                if self.check(TokenType::RBracket) {
57844                    parts.push("]".to_string());
57845                    self.skip();
57846                }
57847                continue;
57848            }
57849
57850            parts.push(token_text);
57851            self.skip();
57852
57853            // Add space after most tokens except punctuation
57854            if !self.is_at_end() {
57855                let next_type = self.peek().token_type;
57856                let no_space_before = matches!(
57857                    next_type,
57858                    TokenType::Comma
57859                        | TokenType::RParen
57860                        | TokenType::RBracket
57861                        | TokenType::Semicolon
57862                        | TokenType::LBracket
57863                );
57864                let no_space_after = matches!(token_type, TokenType::LParen | TokenType::LBracket);
57865                if !no_space_before && !no_space_after {
57866                    parts.push(" ".to_string());
57867                }
57868            }
57869        }
57870
57871        Ok(Expression::Command(Box::new(Command {
57872            this: parts.join(""),
57873        })))
57874    }
57875
57876    /// Parse USING EXTERNAL FUNCTION statement (Athena)
57877    /// USING EXTERNAL FUNCTION name(params) RETURNS type LAMBDA 'arn' SELECT ...
57878    fn parse_using_external_function(&mut self) -> Result<Expression> {
57879        // Record start position
57880        let start_pos = self.peek().span.start;
57881
57882        // Advance through all tokens until end or semicolon
57883        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
57884            self.skip();
57885        }
57886
57887        // Get end position from the last consumed token
57888        let end_pos = if self.current > 0 {
57889            self.tokens[self.current - 1].span.end
57890        } else {
57891            start_pos
57892        };
57893
57894        // Extract exact text from source if available
57895        let command_text = if let Some(ref source) = self.source {
57896            source[start_pos..end_pos].to_string()
57897        } else {
57898            // Fallback: reconstruct from tokens (loses whitespace)
57899            let mut parts = Vec::new();
57900            for i in 0..self.current {
57901                if self.tokens[i].span.start >= start_pos && self.tokens[i].span.end <= end_pos {
57902                    if self.tokens[i].token_type == TokenType::String {
57903                        parts.push(format!("'{}'", self.tokens[i].text.replace('\'', "''")));
57904                    } else {
57905                        parts.push(self.tokens[i].text.clone());
57906                    }
57907                    if i + 1 < self.current {
57908                        parts.push(" ".to_string());
57909                    }
57910                }
57911            }
57912            parts.join("")
57913        };
57914
57915        Ok(Expression::Command(Box::new(Command {
57916            this: command_text,
57917        })))
57918    }
57919}
57920
57921#[cfg(test)]
57922mod tests {
57923    use super::*;
57924    use crate::traversal::ExpressionWalk;
57925
57926    #[test]
57927    fn test_comment_before_limit() {
57928        let sql = "SELECT a FROM b WHERE foo AND bla\n-- comment 3\nLIMIT 10";
57929        let result = Parser::parse_sql(sql).unwrap();
57930        let output = crate::Generator::sql(&result[0]).unwrap();
57931        assert_eq!(
57932            output,
57933            "SELECT a FROM b WHERE foo AND bla LIMIT 10 /* comment 3 */"
57934        );
57935    }
57936
57937    #[test]
57938    fn test_variadic_array_postgres() {
57939        use crate::dialects::DialectType;
57940        use crate::transpile;
57941
57942        // Test: ARRAY[10, -1, 5, 4.4] should parse correctly in Postgres
57943        let sql = "SELECT ARRAY[10, -1, 5, 4.4]";
57944        let result = transpile(sql, DialectType::PostgreSQL, DialectType::PostgreSQL).unwrap();
57945        eprintln!("Array test: {} -> {}", sql, result[0]);
57946
57947        // Test: VARIADIC ARRAY[10, -1, 5, 4.4] in function call
57948        let sql2 = "SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])";
57949        let result2 = transpile(sql2, DialectType::PostgreSQL, DialectType::PostgreSQL).unwrap();
57950        eprintln!("VARIADIC test: {} -> {}", sql2, result2[0]);
57951        assert_eq!(result2[0], sql2);
57952    }
57953
57954    #[test]
57955    fn test_parse_simple_select() {
57956        let result = Parser::parse_sql("SELECT 1").unwrap();
57957        assert_eq!(result.len(), 1);
57958        assert!(result[0].is_select());
57959    }
57960
57961    #[test]
57962    fn test_parse_select_from() {
57963        let result = Parser::parse_sql("SELECT a, b FROM t").unwrap();
57964        assert_eq!(result.len(), 1);
57965
57966        let select = result[0].as_select().unwrap();
57967        assert_eq!(select.expressions.len(), 2);
57968        assert!(select.from.is_some());
57969    }
57970
57971    #[test]
57972    fn test_parse_select_where() {
57973        let result = Parser::parse_sql("SELECT * FROM t WHERE x = 1").unwrap();
57974        let select = result[0].as_select().unwrap();
57975        assert!(select.where_clause.is_some());
57976    }
57977
57978    #[test]
57979    fn test_parse_balances_large_and_chain_depth() {
57980        let mut sql = String::from("SELECT 1 WHERE c0 = 0");
57981        for i in 1..4096 {
57982            sql.push_str(&format!(" AND c{i} = {i}"));
57983        }
57984
57985        let result = Parser::parse_sql(&sql).unwrap();
57986        let select = result[0].as_select().unwrap();
57987        let where_clause = select.where_clause.as_ref().expect("WHERE clause missing");
57988        let depth = where_clause.this.tree_depth();
57989        assert!(
57990            depth < 128,
57991            "Expected balanced boolean tree depth, got {}",
57992            depth
57993        );
57994    }
57995
57996    #[test]
57997    fn test_parse_balances_large_or_chain_depth() {
57998        let mut sql = String::from("SELECT 1 WHERE c0 = 0");
57999        for i in 1..4096 {
58000            sql.push_str(&format!(" OR c{i} = {i}"));
58001        }
58002
58003        let result = Parser::parse_sql(&sql).unwrap();
58004        let select = result[0].as_select().unwrap();
58005        let where_clause = select.where_clause.as_ref().expect("WHERE clause missing");
58006        let depth = where_clause.this.tree_depth();
58007        assert!(
58008            depth < 128,
58009            "Expected balanced boolean tree depth, got {}",
58010            depth
58011        );
58012    }
58013
58014    #[test]
58015    fn test_parse_select_join() {
58016        let result = Parser::parse_sql("SELECT * FROM a JOIN b ON a.id = b.id").unwrap();
58017        let select = result[0].as_select().unwrap();
58018        assert_eq!(select.joins.len(), 1);
58019        assert_eq!(select.joins[0].kind, JoinKind::Inner);
58020    }
58021
58022    #[test]
58023    fn test_parse_expression_precedence() {
58024        let result = Parser::parse_sql("SELECT 1 + 2 * 3").unwrap();
58025        let select = result[0].as_select().unwrap();
58026        // Should parse as 1 + (2 * 3) due to precedence
58027        assert!(matches!(select.expressions[0], Expression::Add(_)));
58028    }
58029
58030    #[test]
58031    fn test_parse_function() {
58032        // COUNT(*) is now a typed Count expression
58033        let result = Parser::parse_sql("SELECT COUNT(*)").unwrap();
58034        let select = result[0].as_select().unwrap();
58035        assert!(matches!(select.expressions[0], Expression::Count(_)));
58036
58037        // Unknown functions stay as generic Function
58038        let result = Parser::parse_sql("SELECT MY_CUSTOM_FUNC(name)").unwrap();
58039        let select = result[0].as_select().unwrap();
58040        assert!(matches!(select.expressions[0], Expression::Function(_)));
58041
58042        // Known aggregate functions are now typed
58043        let result = Parser::parse_sql("SELECT SUM(amount)").unwrap();
58044        let select = result[0].as_select().unwrap();
58045        assert!(matches!(select.expressions[0], Expression::Sum(_)));
58046    }
58047
58048    #[test]
58049    fn test_parse_window_function() {
58050        let result =
58051            Parser::parse_sql("SELECT ROW_NUMBER() OVER (PARTITION BY category ORDER BY id)")
58052                .unwrap();
58053        let select = result[0].as_select().unwrap();
58054        assert!(matches!(
58055            select.expressions[0],
58056            Expression::WindowFunction(_)
58057        ));
58058    }
58059
58060    #[test]
58061    fn test_parse_window_function_with_frame() {
58062        let result = Parser::parse_sql("SELECT SUM(amount) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)").unwrap();
58063        let select = result[0].as_select().unwrap();
58064        assert!(matches!(
58065            select.expressions[0],
58066            Expression::WindowFunction(_)
58067        ));
58068    }
58069
58070    #[test]
58071    fn test_parse_subscript() {
58072        // Array subscript
58073        let result = Parser::parse_sql("SELECT arr[0]").unwrap();
58074        let select = result[0].as_select().unwrap();
58075        assert!(matches!(select.expressions[0], Expression::Subscript(_)));
58076
58077        // Function result subscript
58078        let result = Parser::parse_sql("SELECT SPLIT(name, ',')[0]").unwrap();
58079        let select = result[0].as_select().unwrap();
58080        assert!(matches!(select.expressions[0], Expression::Subscript(_)));
58081    }
58082
58083    #[test]
58084    fn test_parse_case() {
58085        let result = Parser::parse_sql("SELECT CASE WHEN x = 1 THEN 'a' ELSE 'b' END").unwrap();
58086        let select = result[0].as_select().unwrap();
58087        assert!(matches!(select.expressions[0], Expression::Case(_)));
58088    }
58089
58090    #[test]
58091    fn test_parse_insert() {
58092        let result = Parser::parse_sql("INSERT INTO t (a, b) VALUES (1, 2)").unwrap();
58093        assert!(matches!(result[0], Expression::Insert(_)));
58094    }
58095
58096    #[test]
58097    fn test_parse_template_variable() {
58098        // Test Databricks/Hive ${variable} syntax
58099        let result = Parser::parse_sql("SELECT ${x} FROM ${y} WHERE ${z} > 1").unwrap();
58100        let select = result[0].as_select().unwrap();
58101        // The expression should be a Parameter with DollarBrace style
58102        assert!(
58103            matches!(&select.expressions[0], Expression::Parameter(p) if p.name == Some("x".to_string()))
58104        );
58105        // Check the style is DollarBrace
58106        if let Expression::Parameter(p) = &select.expressions[0] {
58107            assert_eq!(p.style, ParameterStyle::DollarBrace);
58108        }
58109    }
58110
58111    #[test]
58112    fn test_parse_update() {
58113        let result = Parser::parse_sql("UPDATE t SET a = 1 WHERE b = 2").unwrap();
58114        assert!(matches!(result[0], Expression::Update(_)));
58115    }
58116
58117    #[test]
58118    fn test_parse_delete() {
58119        let result = Parser::parse_sql("DELETE FROM t WHERE a = 1").unwrap();
58120        assert!(matches!(result[0], Expression::Delete(_)));
58121    }
58122
58123    // DDL tests
58124    #[test]
58125    fn test_parse_create_table() {
58126        let result = Parser::parse_sql(
58127            "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100) NOT NULL)",
58128        )
58129        .unwrap();
58130        assert!(matches!(result[0], Expression::CreateTable(_)));
58131
58132        if let Expression::CreateTable(ct) = &result[0] {
58133            assert_eq!(ct.name.name.name, "users");
58134            assert_eq!(ct.columns.len(), 2);
58135            assert!(ct.columns[0].primary_key);
58136            assert_eq!(ct.columns[1].nullable, Some(false));
58137        }
58138    }
58139
58140    #[test]
58141    fn test_parse_create_table_if_not_exists() {
58142        let result = Parser::parse_sql("CREATE TABLE IF NOT EXISTS t (id INT)").unwrap();
58143        if let Expression::CreateTable(ct) = &result[0] {
58144            assert!(ct.if_not_exists);
58145        }
58146    }
58147
58148    #[test]
58149    fn test_parse_create_temporary_table() {
58150        let result = Parser::parse_sql("CREATE TEMPORARY TABLE t (id INT)").unwrap();
58151        if let Expression::CreateTable(ct) = &result[0] {
58152            assert!(ct.temporary);
58153        }
58154    }
58155
58156    #[test]
58157    fn test_bigquery_create_table_properties_are_typed() {
58158        use crate::DialectType;
58159
58160        let sql = "CREATE OR REPLACE TABLE `p1`.`d1`.`t1` PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS(description='', labels=[('l1', 'v1'), ('l2', 'v2')]) AS SELECT CURRENT_DATE AS day, DATE_TRUNC(CURRENT_DATE(), month) AS month, 'c' AS some_cluster_column";
58161        let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
58162
58163        let create = match &parsed[0] {
58164            Expression::CreateTable(ct) => ct,
58165            other => panic!(
58166                "Expected CreateTable, got {:?}",
58167                std::mem::discriminant(other)
58168            ),
58169        };
58170
58171        assert!(
58172            create
58173                .properties
58174                .iter()
58175                .any(|p| matches!(p, Expression::PartitionByProperty(_))),
58176            "Expected typed PARTITION BY property"
58177        );
58178        assert!(
58179            create
58180                .properties
58181                .iter()
58182                .any(|p| matches!(p, Expression::ClusterByColumnsProperty(_))),
58183            "Expected typed CLUSTER BY property"
58184        );
58185        assert!(
58186            create
58187                .properties
58188                .iter()
58189                .any(|p| matches!(p, Expression::OptionsProperty(_))),
58190            "Expected typed OPTIONS property"
58191        );
58192        assert!(
58193            !create
58194                .properties
58195                .iter()
58196                .any(|p| matches!(p, Expression::Raw(_))),
58197            "BigQuery table properties should not fall back to Raw"
58198        );
58199
58200        let options = create
58201            .properties
58202            .iter()
58203            .find_map(|p| match p {
58204                Expression::OptionsProperty(o) => Some(o),
58205                _ => None,
58206            })
58207            .expect("Expected OptionsProperty");
58208        assert_eq!(options.entries.len(), 2);
58209        assert_eq!(options.entries[0].key.name, "description");
58210        assert_eq!(options.entries[1].key.name, "labels");
58211    }
58212
58213    #[test]
58214    fn test_bigquery_create_table_properties_roundtrip() {
58215        use crate::DialectType;
58216
58217        let sql = "CREATE TABLE t1 PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS(description='', labels=[('l1', 'v1')]) AS SELECT 1 AS day, 1 AS month, 'c' AS some_cluster_column";
58218        let expected = "CREATE TABLE t1 PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS (description='', labels=[('l1', 'v1')]) AS SELECT 1 AS day, 1 AS month, 'c' AS some_cluster_column";
58219        let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
58220        let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
58221        assert_eq!(generated, expected);
58222    }
58223
58224    #[test]
58225    fn test_parse_drop_table() {
58226        let result = Parser::parse_sql("DROP TABLE IF EXISTS users CASCADE").unwrap();
58227        assert!(matches!(result[0], Expression::DropTable(_)));
58228
58229        if let Expression::DropTable(dt) = &result[0] {
58230            assert!(dt.if_exists);
58231            assert!(dt.cascade);
58232            assert_eq!(dt.names.len(), 1);
58233        }
58234    }
58235
58236    #[test]
58237    fn test_parse_alter_table_add_column() {
58238        let result = Parser::parse_sql("ALTER TABLE users ADD COLUMN email VARCHAR(255)").unwrap();
58239        assert!(matches!(result[0], Expression::AlterTable(_)));
58240
58241        if let Expression::AlterTable(at) = &result[0] {
58242            assert_eq!(at.actions.len(), 1);
58243            assert!(matches!(at.actions[0], AlterTableAction::AddColumn { .. }));
58244        }
58245    }
58246
58247    #[test]
58248    fn test_parse_alter_table_drop_column() {
58249        let result = Parser::parse_sql("ALTER TABLE users DROP COLUMN email").unwrap();
58250        if let Expression::AlterTable(at) = &result[0] {
58251            assert!(matches!(at.actions[0], AlterTableAction::DropColumn { .. }));
58252        }
58253    }
58254
58255    #[test]
58256    fn test_tsql_alter_table_set_options() {
58257        use crate::{transpile, DialectType};
58258        let tests = vec![
58259            "ALTER TABLE tbl SET (SYSTEM_VERSIONING=OFF)",
58260            "ALTER TABLE tbl SET (FILESTREAM_ON = 'test')",
58261            "ALTER TABLE tbl SET (DATA_DELETION=ON)",
58262            "ALTER TABLE tbl SET (DATA_DELETION=OFF)",
58263            "ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS))",
58264            "ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, HISTORY_RETENTION_PERIOD=INFINITE))",
58265            "ALTER TABLE tbl SET (DATA_DELETION=ON(FILTER_COLUMN=col, RETENTION_PERIOD=5 MONTHS))",
58266        ];
58267        for sql in tests {
58268            let result = transpile(sql, DialectType::TSQL, DialectType::TSQL);
58269            match result {
58270                Ok(output) => {
58271                    assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
58272                }
58273                Err(e) => {
58274                    panic!("Parse/generate failed for: {} -- {:?}", sql, e);
58275                }
58276            }
58277        }
58278    }
58279
58280    #[test]
58281    fn test_parse_create_index() {
58282        let result = Parser::parse_sql("CREATE UNIQUE INDEX idx_email ON users (email)").unwrap();
58283        assert!(matches!(result[0], Expression::CreateIndex(_)));
58284
58285        if let Expression::CreateIndex(ci) = &result[0] {
58286            assert!(ci.unique);
58287            assert_eq!(ci.name.name, "idx_email");
58288            assert_eq!(ci.table.name.name, "users");
58289            assert_eq!(ci.columns.len(), 1);
58290        }
58291    }
58292
58293    #[test]
58294    fn test_parse_drop_index() {
58295        let result = Parser::parse_sql("DROP INDEX IF EXISTS idx_email ON users").unwrap();
58296        assert!(matches!(result[0], Expression::DropIndex(_)));
58297
58298        if let Expression::DropIndex(di) = &result[0] {
58299            assert!(di.if_exists);
58300            assert!(di.table.is_some());
58301        }
58302    }
58303
58304    #[test]
58305    fn test_parse_create_view() {
58306        let result =
58307            Parser::parse_sql("CREATE VIEW active_users AS SELECT * FROM users WHERE active = 1")
58308                .unwrap();
58309        assert!(matches!(result[0], Expression::CreateView(_)));
58310    }
58311
58312    #[test]
58313    fn test_parse_create_materialized_view() {
58314        let result =
58315            Parser::parse_sql("CREATE MATERIALIZED VIEW stats AS SELECT COUNT(*) FROM users")
58316                .unwrap();
58317        if let Expression::CreateView(cv) = &result[0] {
58318            assert!(cv.materialized);
58319        }
58320    }
58321
58322    #[test]
58323    fn test_hive_stored_by() {
58324        use crate::{transpile, DialectType};
58325        let sql = "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'";
58326        let result = transpile(sql, DialectType::Hive, DialectType::Hive);
58327        match result {
58328            Ok(output) => {
58329                assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
58330            }
58331            Err(e) => {
58332                panic!("Parse/generate failed for: {} -- {:?}", sql, e);
58333            }
58334        }
58335    }
58336
58337    #[test]
58338    fn test_hive_row_format_serde() {
58339        use crate::{transpile, DialectType};
58340
58341        // Test various Hive CREATE TABLE syntax
58342        let test_cases = vec![
58343            (
58344                "CREATE TABLE my_table (a7 ARRAY<DATE>)",
58345                "CREATE TABLE my_table (a7 ARRAY<DATE>)",
58346            ),
58347            (
58348                "CREATE EXTERNAL TABLE my_table (x INT) ROW FORMAT SERDE 'a'",
58349                "CREATE EXTERNAL TABLE my_table (x INT) ROW FORMAT SERDE 'a'",
58350            ),
58351            (
58352                "CREATE EXTERNAL TABLE my_table (x INT) STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c'",
58353                "CREATE EXTERNAL TABLE my_table (x INT) STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c'",
58354            ),
58355            (
58356                "CREATE EXTERNAL TABLE my_table (x INT) LOCATION 'd'",
58357                "CREATE EXTERNAL TABLE my_table (x INT) LOCATION 'd'",
58358            ),
58359            (
58360                "CREATE EXTERNAL TABLE my_table (x INT) TBLPROPERTIES ('e'='f')",
58361                "CREATE EXTERNAL TABLE my_table (x INT) TBLPROPERTIES ('e'='f')",
58362            ),
58363            (
58364                "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'",
58365                "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'",
58366            ),
58367        ];
58368
58369        for (sql, expected) in &test_cases {
58370            let result = transpile(sql, DialectType::Hive, DialectType::Hive);
58371            match result {
58372                Ok(output) => {
58373                    assert_eq!(output[0].trim(), *expected, "Identity failed for: {}", sql);
58374                }
58375                Err(e) => {
58376                    panic!("Parse/generate failed for: {} -- {:?}", sql, e);
58377                }
58378            }
58379        }
58380
58381        // Test full case with all Hive table properties
58382        let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
58383        let result = transpile(sql, DialectType::Hive, DialectType::Hive);
58384        match result {
58385            Ok(output) => {
58386                assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
58387            }
58388            Err(e) => {
58389                panic!("Parse/generate failed for: {} -- {:?}", sql, e);
58390            }
58391        }
58392    }
58393
58394    #[test]
58395    fn test_parse_drop_view() {
58396        let result = Parser::parse_sql("DROP VIEW IF EXISTS active_users").unwrap();
58397        assert!(matches!(result[0], Expression::DropView(_)));
58398    }
58399
58400    #[test]
58401    fn test_parse_truncate() {
58402        let result = Parser::parse_sql("TRUNCATE TABLE users CASCADE").unwrap();
58403        assert!(matches!(result[0], Expression::Truncate(_)));
58404
58405        if let Expression::Truncate(tr) = &result[0] {
58406            assert!(tr.cascade);
58407        }
58408    }
58409
58410    // Tests for typed aggregate functions
58411    #[test]
58412    fn test_parse_typed_aggregates() {
58413        // COUNT with DISTINCT
58414        let result = Parser::parse_sql("SELECT COUNT(DISTINCT user_id)").unwrap();
58415        let select = result[0].as_select().unwrap();
58416        if let Expression::Count(c) = &select.expressions[0] {
58417            assert!(c.distinct);
58418            assert!(!c.star);
58419        } else {
58420            panic!("Expected Count expression");
58421        }
58422
58423        // AVG
58424        let result = Parser::parse_sql("SELECT AVG(price)").unwrap();
58425        let select = result[0].as_select().unwrap();
58426        assert!(matches!(select.expressions[0], Expression::Avg(_)));
58427
58428        // MIN/MAX
58429        let result = Parser::parse_sql("SELECT MIN(a), MAX(b)").unwrap();
58430        let select = result[0].as_select().unwrap();
58431        assert!(matches!(select.expressions[0], Expression::Min(_)));
58432        assert!(matches!(select.expressions[1], Expression::Max(_)));
58433
58434        // STDDEV/VARIANCE
58435        let result = Parser::parse_sql("SELECT STDDEV(x), VARIANCE(y)").unwrap();
58436        let select = result[0].as_select().unwrap();
58437        assert!(matches!(select.expressions[0], Expression::Stddev(_)));
58438        assert!(matches!(select.expressions[1], Expression::Variance(_)));
58439    }
58440
58441    #[test]
58442    fn test_parse_typed_window_functions() {
58443        // ROW_NUMBER
58444        let result = Parser::parse_sql("SELECT ROW_NUMBER() OVER (ORDER BY id)").unwrap();
58445        let select = result[0].as_select().unwrap();
58446        if let Expression::WindowFunction(wf) = &select.expressions[0] {
58447            assert!(matches!(wf.this, Expression::RowNumber(_)));
58448        } else {
58449            panic!("Expected WindowFunction");
58450        }
58451
58452        // RANK and DENSE_RANK
58453        let result = Parser::parse_sql("SELECT RANK() OVER (), DENSE_RANK() OVER ()").unwrap();
58454        let select = result[0].as_select().unwrap();
58455        if let Expression::WindowFunction(wf) = &select.expressions[0] {
58456            assert!(matches!(wf.this, Expression::Rank(_)));
58457        }
58458        if let Expression::WindowFunction(wf) = &select.expressions[1] {
58459            assert!(matches!(wf.this, Expression::DenseRank(_)));
58460        }
58461
58462        // LEAD/LAG
58463        let result = Parser::parse_sql("SELECT LEAD(val, 1, 0) OVER (ORDER BY id)").unwrap();
58464        let select = result[0].as_select().unwrap();
58465        if let Expression::WindowFunction(wf) = &select.expressions[0] {
58466            if let Expression::Lead(f) = &wf.this {
58467                assert!(f.offset.is_some());
58468                assert!(f.default.is_some());
58469            } else {
58470                panic!("Expected Lead");
58471            }
58472        }
58473
58474        // NTILE
58475        let result = Parser::parse_sql("SELECT NTILE(4) OVER (ORDER BY score)").unwrap();
58476        let select = result[0].as_select().unwrap();
58477        if let Expression::WindowFunction(wf) = &select.expressions[0] {
58478            assert!(matches!(wf.this, Expression::NTile(_)));
58479        }
58480    }
58481
58482    #[test]
58483    fn test_parse_string_functions() {
58484        // CONTAINS, STARTS_WITH, ENDS_WITH
58485        let result = Parser::parse_sql("SELECT CONTAINS(name, 'test')").unwrap();
58486        let select = result[0].as_select().unwrap();
58487        assert!(matches!(select.expressions[0], Expression::Contains(_)));
58488
58489        let result = Parser::parse_sql("SELECT STARTS_WITH(name, 'A')").unwrap();
58490        let select = result[0].as_select().unwrap();
58491        assert!(matches!(select.expressions[0], Expression::StartsWith(_)));
58492
58493        let result = Parser::parse_sql("SELECT ENDS_WITH(name, 'z')").unwrap();
58494        let select = result[0].as_select().unwrap();
58495        assert!(matches!(select.expressions[0], Expression::EndsWith(_)));
58496    }
58497
58498    #[test]
58499    fn test_parse_math_functions() {
58500        // MOD function
58501        let result = Parser::parse_sql("SELECT MOD(10, 3)").unwrap();
58502        let select = result[0].as_select().unwrap();
58503        assert!(matches!(select.expressions[0], Expression::ModFunc(_)));
58504
58505        // RANDOM and RAND
58506        let result = Parser::parse_sql("SELECT RANDOM()").unwrap();
58507        let select = result[0].as_select().unwrap();
58508        assert!(matches!(select.expressions[0], Expression::Random(_)));
58509
58510        let result = Parser::parse_sql("SELECT RAND(42)").unwrap();
58511        let select = result[0].as_select().unwrap();
58512        assert!(matches!(select.expressions[0], Expression::Rand(_)));
58513
58514        // Trigonometric functions
58515        let result = Parser::parse_sql("SELECT SIN(x), COS(x), TAN(x)").unwrap();
58516        let select = result[0].as_select().unwrap();
58517        assert!(matches!(select.expressions[0], Expression::Sin(_)));
58518        assert!(matches!(select.expressions[1], Expression::Cos(_)));
58519        assert!(matches!(select.expressions[2], Expression::Tan(_)));
58520    }
58521
58522    #[test]
58523    fn test_parse_date_functions() {
58524        // Date part extraction functions
58525        let result =
58526            Parser::parse_sql("SELECT YEAR(date_col), MONTH(date_col), DAY(date_col)").unwrap();
58527        let select = result[0].as_select().unwrap();
58528        assert!(matches!(select.expressions[0], Expression::Year(_)));
58529        assert!(matches!(select.expressions[1], Expression::Month(_)));
58530        assert!(matches!(select.expressions[2], Expression::Day(_)));
58531
58532        // EPOCH and EPOCH_MS
58533        let result = Parser::parse_sql("SELECT EPOCH(ts), EPOCH_MS(ts)").unwrap();
58534        let select = result[0].as_select().unwrap();
58535        assert!(matches!(select.expressions[0], Expression::Epoch(_)));
58536        assert!(matches!(select.expressions[1], Expression::EpochMs(_)));
58537    }
58538
58539    #[test]
58540    fn test_parse_array_functions() {
58541        // ARRAY_LENGTH
58542        let result = Parser::parse_sql("SELECT ARRAY_LENGTH(arr)").unwrap();
58543        let select = result[0].as_select().unwrap();
58544        assert!(matches!(select.expressions[0], Expression::ArrayLength(_)));
58545
58546        // ARRAY_CONTAINS
58547        let result = Parser::parse_sql("SELECT ARRAY_CONTAINS(arr, 1)").unwrap();
58548        let select = result[0].as_select().unwrap();
58549        assert!(matches!(
58550            select.expressions[0],
58551            Expression::ArrayContains(_)
58552        ));
58553
58554        // EXPLODE
58555        let result = Parser::parse_sql("SELECT EXPLODE(arr)").unwrap();
58556        let select = result[0].as_select().unwrap();
58557        assert!(matches!(select.expressions[0], Expression::Explode(_)));
58558    }
58559
58560    #[test]
58561    fn test_parse_json_functions() {
58562        // JSON_EXTRACT
58563        let result = Parser::parse_sql("SELECT JSON_EXTRACT(data, '$.name')").unwrap();
58564        let select = result[0].as_select().unwrap();
58565        assert!(matches!(select.expressions[0], Expression::JsonExtract(_)));
58566
58567        // JSON_ARRAY_LENGTH
58568        let result = Parser::parse_sql("SELECT JSON_ARRAY_LENGTH(arr)").unwrap();
58569        let select = result[0].as_select().unwrap();
58570        assert!(matches!(
58571            select.expressions[0],
58572            Expression::JsonArrayLength(_)
58573        ));
58574
58575        // TO_JSON and PARSE_JSON
58576        let result = Parser::parse_sql("SELECT TO_JSON(obj), PARSE_JSON(str)").unwrap();
58577        let select = result[0].as_select().unwrap();
58578        assert!(matches!(select.expressions[0], Expression::ToJson(_)));
58579        assert!(matches!(select.expressions[1], Expression::ParseJson(_)));
58580
58581        // JSON literal: JSON '"foo"' -> ParseJson
58582        let result = Parser::parse_sql("SELECT JSON '\"foo\"'").unwrap();
58583        let select = result[0].as_select().unwrap();
58584        assert!(
58585            matches!(select.expressions[0], Expression::ParseJson(_)),
58586            "Expected ParseJson, got: {:?}",
58587            select.expressions[0]
58588        );
58589    }
58590
58591    #[test]
58592    fn test_parse_map_functions() {
58593        // MAP_KEYS and MAP_VALUES
58594        let result = Parser::parse_sql("SELECT MAP_KEYS(m), MAP_VALUES(m)").unwrap();
58595        let select = result[0].as_select().unwrap();
58596        assert!(matches!(select.expressions[0], Expression::MapKeys(_)));
58597        assert!(matches!(select.expressions[1], Expression::MapValues(_)));
58598
58599        // ELEMENT_AT
58600        let result = Parser::parse_sql("SELECT ELEMENT_AT(m, 'key')").unwrap();
58601        let select = result[0].as_select().unwrap();
58602        assert!(matches!(select.expressions[0], Expression::ElementAt(_)));
58603    }
58604
58605    #[test]
58606    fn test_parse_date_literals() {
58607        // DATE literal (generic mode normalizes to CAST)
58608        let result = Parser::parse_sql("SELECT DATE '2024-01-15'").unwrap();
58609        let select = result[0].as_select().unwrap();
58610        match &select.expressions[0] {
58611            Expression::Cast(cast) => {
58612                match &cast.this {
58613                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
58614                        let Literal::String(s) = lit.as_ref() else {
58615                            unreachable!()
58616                        };
58617                        assert_eq!(s, "2024-01-15")
58618                    }
58619                    other => panic!("Expected String literal in Cast, got {:?}", other),
58620                }
58621                assert!(matches!(cast.to, DataType::Date));
58622            }
58623            other => panic!("Expected Cast expression, got {:?}", other),
58624        }
58625
58626        // TIME literal
58627        let result = Parser::parse_sql("SELECT TIME '10:30:00'").unwrap();
58628        let select = result[0].as_select().unwrap();
58629        match &select.expressions[0] {
58630            Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)) => {
58631                let Literal::Time(t) = lit.as_ref() else {
58632                    unreachable!()
58633                };
58634                assert_eq!(t, "10:30:00");
58635            }
58636            _ => panic!("Expected Time literal"),
58637        }
58638
58639        // TIMESTAMP literal -> CAST in generic mode
58640        let result = Parser::parse_sql("SELECT TIMESTAMP '2024-01-15 10:30:00'").unwrap();
58641        let select = result[0].as_select().unwrap();
58642        match &select.expressions[0] {
58643            Expression::Cast(cast) => {
58644                match &cast.this {
58645                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
58646                        let Literal::String(s) = lit.as_ref() else {
58647                            unreachable!()
58648                        };
58649                        assert_eq!(s, "2024-01-15 10:30:00")
58650                    }
58651                    other => panic!("Expected String literal inside Cast, got {:?}", other),
58652                }
58653                assert!(matches!(
58654                    &cast.to,
58655                    DataType::Timestamp {
58656                        precision: None,
58657                        timezone: false
58658                    }
58659                ));
58660            }
58661            _ => panic!("Expected Cast expression for TIMESTAMP literal"),
58662        }
58663    }
58664
58665    #[test]
58666    fn test_parse_star_exclude() {
58667        // EXCLUDE with multiple columns
58668        let result = Parser::parse_sql("SELECT * EXCLUDE (col1, col2) FROM t").unwrap();
58669        let select = result[0].as_select().unwrap();
58670        if let Expression::Star(star) = &select.expressions[0] {
58671            assert!(star.except.is_some());
58672            let except = star.except.as_ref().unwrap();
58673            assert_eq!(except.len(), 2);
58674            assert_eq!(except[0].name, "col1");
58675            assert_eq!(except[1].name, "col2");
58676        } else {
58677            panic!("Expected Star expression");
58678        }
58679
58680        // EXCEPT (BigQuery syntax)
58681        let result = Parser::parse_sql("SELECT * EXCEPT (id, created_at) FROM t").unwrap();
58682        let select = result[0].as_select().unwrap();
58683        if let Expression::Star(star) = &select.expressions[0] {
58684            assert!(star.except.is_some());
58685        } else {
58686            panic!("Expected Star expression");
58687        }
58688
58689        // table.* with EXCLUDE
58690        let result = Parser::parse_sql("SELECT t.* EXCLUDE (col1) FROM t").unwrap();
58691        let select = result[0].as_select().unwrap();
58692        if let Expression::Star(star) = &select.expressions[0] {
58693            assert!(star.table.is_some());
58694            assert_eq!(star.table.as_ref().unwrap().name, "t");
58695            assert!(star.except.is_some());
58696        } else {
58697            panic!("Expected Star expression");
58698        }
58699    }
58700
58701    #[test]
58702    fn test_parse_star_replace() {
58703        // REPLACE with single expression
58704        let result = Parser::parse_sql("SELECT * REPLACE (UPPER(name) AS name) FROM t").unwrap();
58705        let select = result[0].as_select().unwrap();
58706        if let Expression::Star(star) = &select.expressions[0] {
58707            assert!(star.replace.is_some());
58708            let replace = star.replace.as_ref().unwrap();
58709            assert_eq!(replace.len(), 1);
58710            assert_eq!(replace[0].alias.name, "name");
58711        } else {
58712            panic!("Expected Star expression");
58713        }
58714
58715        // REPLACE with multiple expressions
58716        let result = Parser::parse_sql("SELECT * REPLACE (a + 1 AS a, b * 2 AS b) FROM t").unwrap();
58717        let select = result[0].as_select().unwrap();
58718        if let Expression::Star(star) = &select.expressions[0] {
58719            let replace = star.replace.as_ref().unwrap();
58720            assert_eq!(replace.len(), 2);
58721        } else {
58722            panic!("Expected Star expression");
58723        }
58724    }
58725
58726    #[test]
58727    fn test_parse_star_rename() {
58728        // RENAME with multiple columns
58729        let result =
58730            Parser::parse_sql("SELECT * RENAME (old_col AS new_col, x AS y) FROM t").unwrap();
58731        let select = result[0].as_select().unwrap();
58732        if let Expression::Star(star) = &select.expressions[0] {
58733            assert!(star.rename.is_some());
58734            let rename = star.rename.as_ref().unwrap();
58735            assert_eq!(rename.len(), 2);
58736            assert_eq!(rename[0].0.name, "old_col");
58737            assert_eq!(rename[0].1.name, "new_col");
58738        } else {
58739            panic!("Expected Star expression");
58740        }
58741    }
58742
58743    #[test]
58744    fn test_parse_star_combined() {
58745        // EXCLUDE + REPLACE combined
58746        let result =
58747            Parser::parse_sql("SELECT * EXCLUDE (id) REPLACE (name || '!' AS name) FROM t")
58748                .unwrap();
58749        let select = result[0].as_select().unwrap();
58750        if let Expression::Star(star) = &select.expressions[0] {
58751            assert!(star.except.is_some());
58752            assert!(star.replace.is_some());
58753        } else {
58754            panic!("Expected Star expression");
58755        }
58756    }
58757
58758    #[test]
58759    fn test_parse_spatial_types() {
58760        // GEOMETRY with subtype and SRID (PostgreSQL syntax)
58761        let result = Parser::parse_sql("CREATE TABLE t (geom GEOMETRY(Point, 4326))").unwrap();
58762        if let Expression::CreateTable(ct) = &result[0] {
58763            assert_eq!(ct.columns.len(), 1);
58764            match &ct.columns[0].data_type {
58765                DataType::Geometry { subtype, srid } => {
58766                    assert_eq!(subtype.as_deref(), Some("POINT"));
58767                    assert_eq!(*srid, Some(4326));
58768                }
58769                _ => panic!("Expected Geometry type"),
58770            }
58771        }
58772
58773        // GEOGRAPHY without parameters
58774        let result = Parser::parse_sql("CREATE TABLE t (loc GEOGRAPHY)").unwrap();
58775        if let Expression::CreateTable(ct) = &result[0] {
58776            match &ct.columns[0].data_type {
58777                DataType::Geography { subtype, srid } => {
58778                    assert!(subtype.is_none());
58779                    assert!(srid.is_none());
58780                }
58781                _ => panic!("Expected Geography type"),
58782            }
58783        }
58784
58785        // GEOMETRY subtype only (no SRID)
58786        let result = Parser::parse_sql("CREATE TABLE t (geom GEOMETRY(LineString))").unwrap();
58787        if let Expression::CreateTable(ct) = &result[0] {
58788            match &ct.columns[0].data_type {
58789                DataType::Geometry { subtype, srid } => {
58790                    assert_eq!(subtype.as_deref(), Some("LINESTRING"));
58791                    assert!(srid.is_none());
58792                }
58793                _ => panic!("Expected Geometry type"),
58794            }
58795        }
58796
58797        // Simple POINT type (MySQL-style without SRID)
58798        let result = Parser::parse_sql("CREATE TABLE t (pt POINT)").unwrap();
58799        if let Expression::CreateTable(ct) = &result[0] {
58800            match &ct.columns[0].data_type {
58801                DataType::Geometry { subtype, srid } => {
58802                    assert_eq!(subtype.as_deref(), Some("POINT"));
58803                    assert!(srid.is_none());
58804                }
58805                _ => panic!("Expected Geometry type"),
58806            }
58807        }
58808    }
58809
58810    #[test]
58811    fn test_parse_duckdb_pivot_simple() {
58812        let sql = "PIVOT Cities ON Year USING SUM(Population)";
58813        let result = Parser::parse_sql(sql);
58814        assert!(
58815            result.is_ok(),
58816            "Failed to parse: {} - {:?}",
58817            sql,
58818            result.err()
58819        );
58820        let stmts = result.unwrap();
58821        assert_eq!(
58822            stmts.len(),
58823            1,
58824            "Expected 1 statement, got {}: {:?}",
58825            stmts.len(),
58826            stmts
58827        );
58828        match &stmts[0] {
58829            Expression::Pivot(p) => {
58830                assert!(!p.unpivot);
58831                assert!(!p.expressions.is_empty(), "Should have ON expressions");
58832                assert!(!p.using.is_empty(), "Should have USING expressions");
58833            }
58834            other => panic!("Expected Pivot, got {:?}", other),
58835        }
58836    }
58837
58838    #[test]
58839    fn test_parse_duckdb_pivot_with_group_by() {
58840        let sql = "PIVOT Cities ON Year USING SUM(Population) GROUP BY Country";
58841        let result = Parser::parse_sql(sql);
58842        assert!(
58843            result.is_ok(),
58844            "Failed to parse: {} - {:?}",
58845            sql,
58846            result.err()
58847        );
58848    }
58849
58850    #[test]
58851    fn test_parse_duckdb_unpivot() {
58852        let sql = "UNPIVOT monthly_sales ON jan, feb, mar INTO NAME month VALUE sales";
58853        let result = Parser::parse_sql(sql);
58854        assert!(
58855            result.is_ok(),
58856            "Failed to parse: {} - {:?}",
58857            sql,
58858            result.err()
58859        );
58860    }
58861
58862    #[test]
58863    fn test_parse_standard_pivot_in_from() {
58864        let sql = "SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020))";
58865        let result = Parser::parse_sql(sql);
58866        assert!(
58867            result.is_ok(),
58868            "Failed to parse: {} - {:?}",
58869            sql,
58870            result.err()
58871        );
58872    }
58873
58874
58875
58876
58877    fn assert_pivot_roundtrip(sql: &str) {
58878        let parsed = crate::parse(sql, crate::DialectType::DuckDB);
58879        assert!(
58880            parsed.is_ok(),
58881            "Failed to parse: {} - {:?}",
58882            sql,
58883            parsed.err()
58884        );
58885        let stmts = parsed.unwrap();
58886        assert_eq!(stmts.len(), 1, "Expected 1 statement for: {}", sql);
58887        let generated = crate::generate(&stmts[0], crate::DialectType::DuckDB);
58888        assert!(
58889            generated.is_ok(),
58890            "Failed to generate: {} - {:?}",
58891            sql,
58892            generated.err()
58893        );
58894        let result = generated.unwrap();
58895        assert_eq!(result.trim(), sql, "Round-trip mismatch for: {}", sql);
58896    }
58897
58898    fn assert_pivot_roundtrip_bq(sql: &str) {
58899        let parsed = crate::parse(sql, crate::DialectType::BigQuery);
58900        assert!(
58901            parsed.is_ok(),
58902            "Failed to parse: {} - {:?}",
58903            sql,
58904            parsed.err()
58905        );
58906        let stmts = parsed.unwrap();
58907        assert_eq!(stmts.len(), 1, "Expected 1 statement for: {}", sql);
58908        let generated = crate::generate(&stmts[0], crate::DialectType::BigQuery);
58909        assert!(
58910            generated.is_ok(),
58911            "Failed to generate: {} - {:?}",
58912            sql,
58913            generated.err()
58914        );
58915        let result = generated.unwrap();
58916        assert_eq!(result.trim(), sql, "Round-trip mismatch for: {}", sql);
58917    }
58918
58919    #[test]
58920    fn test_pivot_roundtrip_duckdb_simple() {
58921        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population)");
58922    }
58923
58924    #[test]
58925    fn test_pivot_roundtrip_duckdb_group_by() {
58926        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country");
58927    }
58928
58929    #[test]
58930    fn test_pivot_roundtrip_duckdb_in_clause() {
58931        assert_pivot_roundtrip(
58932            "PIVOT Cities ON Year IN (2000, 2010) USING SUM(Population) GROUP BY Country",
58933        );
58934    }
58935
58936    #[test]
58937    fn test_pivot_roundtrip_duckdb_multiple_using() {
58938        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) AS total, MAX(Population) AS max GROUP BY Country");
58939    }
58940
58941    #[test]
58942    fn test_pivot_roundtrip_duckdb_multiple_on() {
58943        assert_pivot_roundtrip("PIVOT Cities ON Country, Name USING SUM(Population)");
58944    }
58945
58946    #[test]
58947    fn test_pivot_roundtrip_duckdb_concat_on() {
58948        assert_pivot_roundtrip("PIVOT Cities ON Country || '_' || Name USING SUM(Population)");
58949    }
58950
58951    #[test]
58952    fn test_pivot_roundtrip_duckdb_multiple_group_by() {
58953        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country, Name");
58954    }
58955
58956    #[test]
58957    fn test_pivot_roundtrip_duckdb_first() {
58958        assert_pivot_roundtrip("PIVOT Cities ON Year USING FIRST(Population)");
58959    }
58960
58961    #[test]
58962    fn test_unpivot_roundtrip_duckdb_basic() {
58963        assert_pivot_roundtrip(
58964            "UNPIVOT monthly_sales ON jan, feb, mar, apr, may, jun INTO NAME month VALUE sales",
58965        );
58966    }
58967
58968    #[test]
58969    fn test_unpivot_roundtrip_duckdb_subquery() {
58970        assert_pivot_roundtrip("UNPIVOT (SELECT 1 AS col1, 2 AS col2) ON foo, bar");
58971    }
58972
58973    #[test]
58974    fn test_pivot_roundtrip_duckdb_cte() {
58975        assert_pivot_roundtrip("WITH pivot_alias AS (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) SELECT * FROM pivot_alias");
58976    }
58977
58978    #[test]
58979    fn test_pivot_roundtrip_duckdb_subquery() {
58980        assert_pivot_roundtrip("SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias");
58981    }
58982
58983    #[test]
58984    fn test_pivot_roundtrip_standard_from() {
58985        assert_pivot_roundtrip("SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020) GROUP BY country)");
58986    }
58987
58988    #[test]
58989    fn test_pivot_roundtrip_standard_bare_in() {
58990        assert_pivot_roundtrip("SELECT * FROM t PIVOT(SUM(y) FOR foo IN y_enum)");
58991    }
58992
58993    #[test]
58994    fn test_unpivot_roundtrip_bigquery() {
58995        assert_pivot_roundtrip_bq("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))");
58996    }
58997
58998    #[test]
58999    fn test_pivot_roundtrip_bigquery_aliases() {
59000        assert_pivot_roundtrip_bq("SELECT cars, apples FROM some_table PIVOT(SUM(total_counts) FOR products IN ('general.cars' AS cars, 'food.apples' AS apples))");
59001    }
59002
59003    #[test]
59004    fn test_unpivot_roundtrip_bigquery_parens() {
59005        assert_pivot_roundtrip_bq(
59006            "SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2))",
59007        );
59008    }
59009
59010    #[test]
59011    fn test_pivot_roundtrip_bigquery_multi_agg() {
59012        // Note: BigQuery fixture expects implicit aliases to become explicit AS
59013        let sql = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))";
59014        assert_pivot_roundtrip_bq(sql);
59015    }
59016
59017    // Additional fixture tests for UNPIVOT with COLUMNS and grouped ON
59018    #[test]
59019    fn test_unpivot_roundtrip_duckdb_columns_exclude() {
59020        assert_pivot_roundtrip(
59021            "UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales",
59022        );
59023    }
59024
59025    #[test]
59026    fn test_unpivot_roundtrip_duckdb_grouped_columns() {
59027        assert_pivot_roundtrip("UNPIVOT monthly_sales ON (jan, feb, mar) AS q1, (apr, may, jun) AS q2 INTO NAME quarter VALUE month_1_sales, month_2_sales, month_3_sales");
59028    }
59029
59030    #[test]
59031    fn test_unpivot_roundtrip_duckdb_cte_columns() {
59032        assert_pivot_roundtrip("WITH unpivot_alias AS (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) SELECT * FROM unpivot_alias");
59033    }
59034
59035    #[test]
59036    fn test_unpivot_roundtrip_duckdb_subquery_columns() {
59037        assert_pivot_roundtrip("SELECT * FROM (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) AS unpivot_alias");
59038    }
59039
59040    #[test]
59041    fn test_pivot_roundtrip_duckdb_cte_with_columns() {
59042        assert_pivot_roundtrip("WITH cities(country, name, year, population) AS (SELECT 'NL', 'Amsterdam', 2000, 1005 UNION ALL SELECT 'US', 'Seattle', 2020, 738) PIVOT cities ON year USING SUM(population)");
59043    }
59044
59045    #[test]
59046    fn test_pivot_roundtrip_standard_first_with_alias() {
59047        // DuckDB fixture #73: comma before FOR is dropped in expected output
59048        let sql = "SELECT * FROM t PIVOT(FIRST(t) AS t, FOR quarter IN ('Q1', 'Q2'))";
59049        let expected = "SELECT * FROM t PIVOT(FIRST(t) AS t FOR quarter IN ('Q1', 'Q2'))";
59050        let parsed = crate::parse(sql, crate::DialectType::DuckDB);
59051        assert!(
59052            parsed.is_ok(),
59053            "Failed to parse: {} - {:?}",
59054            sql,
59055            parsed.err()
59056        );
59057        let stmts = parsed.unwrap();
59058        assert_eq!(stmts.len(), 1);
59059        let generated = crate::generate(&stmts[0], crate::DialectType::DuckDB);
59060        assert!(
59061            generated.is_ok(),
59062            "Failed to generate: {} - {:?}",
59063            sql,
59064            generated.err()
59065        );
59066        let result = generated.unwrap();
59067        assert_eq!(result.trim(), expected, "Round-trip mismatch");
59068    }
59069
59070    #[test]
59071    fn test_pivot_roundtrip_bigquery_implicit_alias() {
59072        // BigQuery fixture #134: implicit aliases become explicit AS
59073        let sql = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))";
59074        let expected = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))";
59075        let parsed = crate::parse(sql, crate::DialectType::BigQuery);
59076        assert!(
59077            parsed.is_ok(),
59078            "Failed to parse: {} - {:?}",
59079            sql,
59080            parsed.err()
59081        );
59082        let stmts = parsed.unwrap();
59083        assert_eq!(stmts.len(), 1);
59084        let generated = crate::generate(&stmts[0], crate::DialectType::BigQuery);
59085        assert!(
59086            generated.is_ok(),
59087            "Failed to generate: {} - {:?}",
59088            sql,
59089            generated.err()
59090        );
59091        let result = generated.unwrap();
59092        assert_eq!(result.trim(), expected, "Round-trip mismatch");
59093    }
59094
59095    #[test]
59096    fn test_duckdb_struct_enum_union_row_types() {
59097        use crate::DialectType;
59098
59099        // Helper to test roundtrip with DuckDB dialect - runs in a thread with larger stack
59100        fn check(sql: &str, expected: Option<&str>) {
59101            let sql = sql.to_string();
59102            let expected = expected.map(|s| s.to_string());
59103            let result = std::thread::Builder::new()
59104                .stack_size(16 * 1024 * 1024) // 16MB stack
59105                .spawn(move || {
59106                    let expected_out = expected.as_deref().unwrap_or(&sql);
59107                    let parsed = crate::parse(&sql, DialectType::DuckDB);
59108                    assert!(
59109                        parsed.is_ok(),
59110                        "Failed to parse: {} - {:?}",
59111                        sql,
59112                        parsed.err()
59113                    );
59114                    let stmts = parsed.unwrap();
59115                    assert!(!stmts.is_empty(), "No statements parsed: {}", sql);
59116                    let generated = crate::generate(&stmts[0], DialectType::DuckDB);
59117                    assert!(
59118                        generated.is_ok(),
59119                        "Failed to generate: {} - {:?}",
59120                        sql,
59121                        generated.err()
59122                    );
59123                    let result = generated.unwrap();
59124                    assert_eq!(result.trim(), expected_out, "Mismatch for: {}", sql);
59125                })
59126                .expect("Failed to spawn test thread")
59127                .join();
59128            assert!(result.is_ok(), "Test thread panicked");
59129        }
59130
59131        // UNION type
59132        check("CREATE TABLE tbl1 (u UNION(num INT, str TEXT))", None);
59133        // ENUM type
59134        check(
59135            "CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))",
59136            None,
59137        );
59138        // ROW type -> STRUCT
59139        check(
59140            "SELECT CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))",
59141            Some("SELECT CAST(ROW(1, 2) AS STRUCT(a INT, b INT))"),
59142        );
59143        // STRUCT with parens
59144        check("CAST(x AS STRUCT(number BIGINT))", None);
59145        // STRUCT with quoted field names
59146        check(
59147            "CAST({'i': 1, 's': 'foo'} AS STRUCT(\"s\" TEXT, \"i\" INT))",
59148            None,
59149        );
59150        // Nested STRUCT
59151        check(
59152            "CAST(ROW(1, ROW(1)) AS STRUCT(number BIGINT, row STRUCT(number BIGINT)))",
59153            None,
59154        );
59155        // STRUCT with array suffix - test just the type parsing part
59156        // Note: STRUCT_PACK -> struct literal transform is a separate feature
59157        check("CAST(x AS STRUCT(a BIGINT)[][])", None);
59158        check("CAST(x AS STRUCT(a BIGINT)[])", None);
59159        // Double-colon cast with STRUCT type
59160        check("CAST({'a': 'b'} AS STRUCT(a TEXT))", None);
59161    }
59162
59163    // Helper for roundtrip identity tests
59164    fn roundtrip(sql: &str) -> String {
59165        let ast =
59166            Parser::parse_sql(sql).unwrap_or_else(|e| panic!("Parse error for '{}': {}", sql, e));
59167        crate::generator::Generator::sql(&ast[0])
59168            .unwrap_or_else(|e| panic!("Generate error for '{}': {}", sql, e))
59169    }
59170
59171    fn assert_roundtrip(sql: &str) {
59172        let result = roundtrip(sql);
59173        assert_eq!(result, sql, "\n  Input:    {}\n  Output:   {}", sql, result);
59174    }
59175
59176    fn assert_roundtrip_expected(sql: &str, expected: &str) {
59177        let result = roundtrip(sql);
59178        assert_eq!(
59179            result, expected,
59180            "\n  Input:    {}\n  Expected: {}\n  Output:   {}",
59181            sql, expected, result
59182        );
59183    }
59184
59185    #[test]
59186    fn test_xmlelement_basic() {
59187        assert_roundtrip("SELECT XMLELEMENT(NAME foo)");
59188    }
59189
59190    #[test]
59191    fn test_xmlelement_with_xmlattributes() {
59192        assert_roundtrip("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar))");
59193    }
59194
59195    #[test]
59196    fn test_xmlelement_with_multiple_attrs() {
59197        assert_roundtrip("SELECT XMLELEMENT(NAME test, XMLATTRIBUTES(a, b)) FROM test");
59198    }
59199
59200    #[test]
59201    fn test_xmlelement_with_content() {
59202        assert_roundtrip(
59203            "SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES(CURRENT_DATE AS bar), 'cont', 'ent')",
59204        );
59205    }
59206
59207    #[test]
59208    fn test_xmlelement_nested() {
59209        assert_roundtrip("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar), XMLELEMENT(NAME abc), XMLCOMMENT('test'), XMLELEMENT(NAME xyz))");
59210    }
59211
59212    #[test]
59213    fn test_on_conflict_do_update() {
59214        assert_roundtrip("INSERT INTO newtable AS t(a, b, c) VALUES (1, 2, 3) ON CONFLICT(c) DO UPDATE SET a = t.a + 1 WHERE t.a < 1");
59215    }
59216
59217    #[test]
59218    fn test_on_conflict_do_nothing() {
59219        // ON CONFLICT(id) is the canonical form (no space before paren)
59220        assert_roundtrip_expected(
59221            "INSERT INTO test (id, name) VALUES (1, 'test') ON CONFLICT (id) DO NOTHING",
59222            "INSERT INTO test (id, name) VALUES (1, 'test') ON CONFLICT(id) DO NOTHING",
59223        );
59224    }
59225
59226    #[test]
59227    fn test_truncate_restart_identity() {
59228        assert_roundtrip("TRUNCATE TABLE t1 RESTART IDENTITY");
59229    }
59230
59231    #[test]
59232    fn test_truncate_restart_identity_restrict() {
59233        assert_roundtrip("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT");
59234    }
59235
59236    #[test]
59237    fn test_insert_by_name() {
59238        assert_roundtrip("INSERT INTO x BY NAME SELECT 1 AS y");
59239    }
59240
59241    #[test]
59242    fn test_insert_default_values_returning() {
59243        assert_roundtrip("INSERT INTO t DEFAULT VALUES RETURNING (c1)");
59244    }
59245
59246    #[test]
59247    fn test_union_all_by_name() {
59248        assert_roundtrip("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x");
59249    }
59250
59251    #[test]
59252    fn test_minus_as_except() {
59253        // MINUS is Oracle/Redshift syntax for EXCEPT
59254        assert_roundtrip_expected(
59255            "SELECT foo, bar FROM table_1 MINUS SELECT foo, bar FROM table_2",
59256            "SELECT foo, bar FROM table_1 EXCEPT SELECT foo, bar FROM table_2",
59257        );
59258    }
59259
59260    #[test]
59261    fn test_filter_without_where() {
59262        assert_roundtrip_expected(
59263            "SELECT SUM(x) FILTER (x = 1)",
59264            "SELECT SUM(x) FILTER(WHERE x = 1)",
59265        );
59266    }
59267
59268    #[test]
59269    fn test_comment_on_materialized_view() {
59270        assert_roundtrip("COMMENT ON MATERIALIZED VIEW my_view IS 'this'");
59271    }
59272
59273    #[test]
59274    fn test_create_index_concurrently() {
59275        assert_roundtrip("CREATE INDEX CONCURRENTLY idx ON t(c)");
59276    }
59277
59278    #[test]
59279    fn test_create_index_if_not_exists() {
59280        assert_roundtrip("CREATE INDEX IF NOT EXISTS idx ON t(c)");
59281    }
59282
59283    #[test]
59284    fn test_alter_table_partition_hive() {
59285        // Hive: ALTER TABLE x PARTITION(y=z) ADD COLUMN a VARCHAR(10)
59286        assert_roundtrip("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)");
59287    }
59288
59289    #[test]
59290    fn test_alter_table_change_column_hive() {
59291        // Hive/MySQL: CHANGE COLUMN old_name new_name data_type
59292        assert_roundtrip("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)");
59293    }
59294
59295    #[test]
59296    fn test_alter_table_add_columns_hive() {
59297        // Hive/Spark: ADD COLUMNS (col1 TYPE, col2 TYPE)
59298        assert_roundtrip("ALTER TABLE X ADD COLUMNS (y INT, z STRING)");
59299    }
59300
59301    #[test]
59302    fn test_alter_table_add_columns_cascade_hive() {
59303        // Hive/Spark: ADD COLUMNS (col1 TYPE, col2 TYPE) CASCADE
59304        assert_roundtrip("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE");
59305    }
59306
59307    #[test]
59308    fn test_group_by_with_cube() {
59309        // Hive/MySQL: GROUP BY ... WITH CUBE
59310        let sql = "SELECT key, value FROM T1 GROUP BY key, value WITH CUBE";
59311        let result = Parser::parse_sql(sql).unwrap();
59312        let select = result[0].as_select().unwrap();
59313
59314        if let Some(group_by) = &select.group_by {
59315            // Debug: print the expressions
59316            eprintln!("GROUP BY expressions: {:?}", group_by.expressions);
59317
59318            // Check if there's a Cube expression with empty expressions
59319            let has_cube = group_by.expressions.iter().any(|e| {
59320                if let Expression::Cube(c) = e {
59321                    c.expressions.is_empty()
59322                } else {
59323                    false
59324                }
59325            });
59326            assert!(
59327                has_cube,
59328                "Should have a Cube expression with empty expressions in GROUP BY"
59329            );
59330        } else {
59331            panic!("Should have GROUP BY clause");
59332        }
59333    }
59334
59335    #[test]
59336    fn test_group_by_with_rollup() {
59337        // Hive/MySQL: GROUP BY ... WITH ROLLUP
59338        let sql = "SELECT key, value FROM T1 GROUP BY key, value WITH ROLLUP";
59339        let result = Parser::parse_sql(sql).unwrap();
59340        let select = result[0].as_select().unwrap();
59341
59342        if let Some(group_by) = &select.group_by {
59343            // Check if there's a Rollup expression with empty expressions
59344            let has_rollup = group_by.expressions.iter().any(|e| {
59345                if let Expression::Rollup(r) = e {
59346                    r.expressions.is_empty()
59347                } else {
59348                    false
59349                }
59350            });
59351            assert!(
59352                has_rollup,
59353                "Should have a Rollup expression with empty expressions in GROUP BY"
59354            );
59355        } else {
59356            panic!("Should have GROUP BY clause");
59357        }
59358    }
59359
59360    #[test]
59361    fn test_opendatasource_dot_access() {
59362        use crate::dialects::DialectType;
59363        use crate::transpile;
59364
59365        // OPENDATASOURCE(...).Catalog.dbo.Products — 3-part dot access
59366        let sql =
59367            "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'Data Source=remote;').Catalog.dbo.Products";
59368        let result = transpile(sql, DialectType::TSQL, DialectType::TSQL).unwrap();
59369        assert_eq!(result[0], sql);
59370
59371        // 2-part dot access
59372        let sql2 = "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'x').schema1.table1";
59373        let result2 = transpile(sql2, DialectType::TSQL, DialectType::TSQL).unwrap();
59374        assert_eq!(result2[0], sql2);
59375
59376        // 1-part dot access
59377        let sql3 = "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'x').table1";
59378        let result3 = transpile(sql3, DialectType::TSQL, DialectType::TSQL).unwrap();
59379        assert_eq!(result3[0], sql3);
59380
59381        // No dot access (should still work as plain function)
59382        let sql4 = "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'x')";
59383        let result4 = transpile(sql4, DialectType::TSQL, DialectType::TSQL).unwrap();
59384        assert_eq!(result4[0], sql4);
59385    }
59386
59387    #[test]
59388    fn test_exec_output_param() {
59389        use crate::dialects::DialectType;
59390        use crate::transpile;
59391
59392        // OUTPUT parameter
59393        let sql = "EXECUTE sp_CountOrders @region = 'US', @total = @count OUTPUT";
59394        let result = transpile(sql, DialectType::TSQL, DialectType::TSQL);
59395        assert!(
59396            result.is_ok(),
59397            "OUTPUT param should parse: {:?}",
59398            result.err()
59399        );
59400        assert_eq!(result.unwrap()[0], sql);
59401
59402        // WITH RESULT SETS (opaque — stored as Command)
59403        let sql2 = "EXEC sp_GetReport WITH RESULT SETS ((id INT, name NVARCHAR(100)))";
59404        let result2 = Parser::parse_sql(sql2);
59405        assert!(
59406            result2.is_ok(),
59407            "RESULT SETS should parse: {:?}",
59408            result2.err()
59409        );
59410
59411        // Dynamic SQL: EXECUTE (@sql)
59412        let sql3 = "EXECUTE (@sql)";
59413        let result3 = transpile(sql3, DialectType::TSQL, DialectType::TSQL);
59414        assert!(
59415            result3.is_ok(),
59416            "Dynamic SQL should parse: {:?}",
59417            result3.err()
59418        );
59419    }
59420}
59421
59422#[cfg(test)]
59423mod join_marker_tests {
59424    use super::*;
59425    use crate::dialects::DialectType;
59426
59427    #[test]
59428    fn test_oracle_join_marker_simple() {
59429        let sql = "select a.baz from a where a.baz = b.baz (+)";
59430        let result = Parser::parse_sql(sql);
59431        println!("Result: {:?}", result);
59432        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59433    }
59434
59435    #[test]
59436    fn test_oracle_join_marker_with_comma_join_and_aliases() {
59437        let sql = "SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)";
59438        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
59439        println!("Result: {:?}", result);
59440        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59441    }
59442
59443    #[test]
59444    fn test_oracle_xmltable_with_quoted_dot_columns() {
59445        let sql = "SELECT warehouse_name warehouse,\n   warehouse2.\"Water\", warehouse2.\"Rail\"\n   FROM warehouses,\n   XMLTABLE('/Warehouse'\n      PASSING warehouses.warehouse_spec\n      COLUMNS\n         \"Water\" varchar2(6) PATH 'WaterAccess',\n         \"Rail\" varchar2(6) PATH 'RailAccess')\n      warehouse2";
59446        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
59447        println!("Result: {:?}", result);
59448        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59449    }
59450
59451    #[test]
59452    fn test_optimize_table_mysql() {
59453        use crate::dialects::DialectType;
59454        use crate::transpile;
59455
59456        // Multi-statement: TRUNCATE + OPTIMIZE
59457        let sql1 = "TRUNCATE TABLE session_logs";
59458        let r1 = transpile(sql1, DialectType::MySQL, DialectType::MySQL);
59459        assert!(r1.is_ok(), "TRUNCATE should parse: {:?}", r1.err());
59460
59461        let sql2 = "OPTIMIZE TABLE temp_exports";
59462        let r2 = transpile(sql2, DialectType::MySQL, DialectType::MySQL);
59463        assert!(r2.is_ok(), "OPTIMIZE should parse: {:?}", r2.err());
59464        assert_eq!(r2.unwrap()[0], sql2);
59465    }
59466
59467    #[test]
59468    fn test_mysql_index_hints() {
59469        use crate::dialects::DialectType;
59470        use crate::transpile;
59471
59472        // USE INDEX with alias
59473        let sql1 = "SELECT * FROM t e USE INDEX (idx1) WHERE a = 1";
59474        let r1 = transpile(sql1, DialectType::MySQL, DialectType::MySQL);
59475        assert!(r1.is_ok(), "USE INDEX with alias: {:?}", r1.err());
59476
59477        // IGNORE INDEX in JOIN with PRIMARY keyword
59478        let sql2 = "SELECT * FROM t1 JOIN t2 IGNORE INDEX (PRIMARY) ON t1.id = t2.id";
59479        let r2 = transpile(sql2, DialectType::MySQL, DialectType::MySQL);
59480        assert!(r2.is_ok(), "IGNORE INDEX PRIMARY: {:?}", r2.err());
59481
59482        // Full example from issue
59483        let sql3 = "SELECT e.name, d.department_name FROM employees e USE INDEX (idx_dept, idx_salary) JOIN departments d IGNORE INDEX (PRIMARY) ON e.department_id = d.department_id WHERE e.salary > 60000";
59484        let r3 = transpile(sql3, DialectType::MySQL, DialectType::MySQL);
59485        assert!(r3.is_ok(), "Full example: {:?}", r3.err());
59486    }
59487
59488    #[test]
59489    fn test_oracle_quoted_dot_projection() {
59490        let sql = "SELECT warehouse2.\"Water\", warehouse2.\"Rail\" FROM warehouses warehouse2";
59491        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
59492        println!("Result: {:?}", result);
59493        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59494    }
59495
59496    #[test]
59497    fn test_oracle_xmltable_columns_only() {
59498        let sql = "SELECT * FROM XMLTABLE('/Warehouse' PASSING warehouses.warehouse_spec COLUMNS \"Water\" varchar2(6) PATH 'WaterAccess', \"Rail\" varchar2(6) PATH 'RailAccess') warehouse2";
59499        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
59500        println!("Result: {:?}", result);
59501        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59502    }
59503
59504    #[test]
59505    fn test_spark_limit() {
59506        use crate::dialects::DialectType;
59507        use crate::transpile;
59508
59509        // Spark LIMIT should work
59510        let sql = "SELECT * FROM something LIMIT 100";
59511        let r = transpile(sql, DialectType::Spark, DialectType::Spark);
59512        assert!(r.is_ok(), "Spark LIMIT: {:?}", r.err());
59513        assert_eq!(r.unwrap()[0], sql);
59514
59515        // Hive LIMIT should work
59516        let r2 = transpile(sql, DialectType::Hive, DialectType::Hive);
59517        assert!(r2.is_ok(), "Hive LIMIT: {:?}", r2.err());
59518    }
59519
59520    #[test]
59521    fn test_oracle_projection_alias_then_quoted_dot() {
59522        let sql =
59523            "SELECT warehouse_name warehouse, warehouse2.\"Water\" FROM warehouses warehouse2";
59524        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
59525        println!("Result: {:?}", result);
59526        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59527    }
59528}
59529
59530#[cfg(test)]
59531mod clickhouse_parser_regression_tests {
59532    use crate::dialects::DialectType;
59533
59534    #[test]
59535    fn test_clickhouse_select_format_clause_not_alias() {
59536        let sql = "SELECT 1 FORMAT TabSeparated";
59537        let result = crate::dialects::Dialect::get(DialectType::ClickHouse).parse(sql);
59538        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59539    }
59540
59541    #[test]
59542    fn test_clickhouse_projection_select_group_by_parses() {
59543        let sql = "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()";
59544        let result = crate::dialects::Dialect::get(DialectType::ClickHouse).parse(sql);
59545        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59546    }
59547
59548    /// ClickHouse ternary operator AST structure tests.
59549    /// Ported from Python sqlglot: tests/dialects/test_clickhouse.py::test_ternary (lines 765-778).
59550    /// Verifies that `x ? (y ? 1 : 2) : 3` parses into nested IfFunc nodes
59551    /// with the correct AST shape.
59552    #[test]
59553    fn test_clickhouse_ternary_ast_structure() {
59554        use crate::expressions::Expression;
59555
59556        let result = crate::parse_one("x ? (y ? 1 : 2) : 3", DialectType::ClickHouse);
59557        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59558        let ternary = result.unwrap();
59559
59560        // Root should be IfFunc
59561        let if_func = match &ternary {
59562            Expression::IfFunc(f) => f,
59563            other => panic!("Expected IfFunc, got {:?}", std::mem::discriminant(other)),
59564        };
59565
59566        // this (condition) should be Column "x"
59567        assert!(
59568            matches!(&if_func.condition, Expression::Column(_)),
59569            "Expected condition to be Column, got {:?}",
59570            std::mem::discriminant(&if_func.condition)
59571        );
59572
59573        // true branch should be Paren
59574        assert!(
59575            matches!(&if_func.true_value, Expression::Paren(_)),
59576            "Expected true_value to be Paren, got {:?}",
59577            std::mem::discriminant(&if_func.true_value)
59578        );
59579
59580        // false branch should be Literal
59581        let false_value = if_func.false_value.as_ref().expect("Expected false_value");
59582        assert!(
59583            matches!(false_value, Expression::Literal(_)),
59584            "Expected false_value to be Literal, got {:?}",
59585            std::mem::discriminant(false_value)
59586        );
59587
59588        // Inside the Paren, the nested ternary should also be IfFunc
59589        let inner_paren = match &if_func.true_value {
59590            Expression::Paren(p) => p,
59591            _ => unreachable!(),
59592        };
59593        let nested_if = match &inner_paren.this {
59594            Expression::IfFunc(f) => f,
59595            other => panic!(
59596                "Expected nested IfFunc, got {:?}",
59597                std::mem::discriminant(other)
59598            ),
59599        };
59600
59601        // Nested condition should be Column "y"
59602        assert!(
59603            matches!(&nested_if.condition, Expression::Column(_)),
59604            "Expected nested condition to be Column, got {:?}",
59605            std::mem::discriminant(&nested_if.condition)
59606        );
59607
59608        // Nested true should be Literal 1
59609        assert!(
59610            matches!(&nested_if.true_value, Expression::Literal(_)),
59611            "Expected nested true_value to be Literal, got {:?}",
59612            std::mem::discriminant(&nested_if.true_value)
59613        );
59614
59615        // Nested false should be Literal 2
59616        let nested_false = nested_if
59617            .false_value
59618            .as_ref()
59619            .expect("Expected nested false_value");
59620        assert!(
59621            matches!(nested_false, Expression::Literal(_)),
59622            "Expected nested false_value to be Literal, got {:?}",
59623            std::mem::discriminant(nested_false)
59624        );
59625    }
59626
59627    /// Verify that `a AND b ? 1 : 2` has And as the ternary condition
59628    /// (AND binds tighter than ?).
59629    /// Ported from Python sqlglot: test_clickhouse.py line 778.
59630    #[test]
59631    fn test_clickhouse_ternary_and_precedence() {
59632        use crate::expressions::Expression;
59633
59634        let result = crate::parse_one("a and b ? 1 : 2", DialectType::ClickHouse);
59635        assert!(result.is_ok(), "Parse error: {:?}", result.err());
59636        let ternary = result.unwrap();
59637
59638        let if_func = match &ternary {
59639            Expression::IfFunc(f) => f,
59640            other => panic!("Expected IfFunc, got {:?}", std::mem::discriminant(other)),
59641        };
59642
59643        // The condition should be And (not just Column "b")
59644        assert!(
59645            matches!(&if_func.condition, Expression::And(_)),
59646            "Expected condition to be And, got {:?}",
59647            std::mem::discriminant(&if_func.condition)
59648        );
59649    }
59650
59651    #[test]
59652    fn test_parse_interval_bare_number_duckdb() {
59653        use crate::dialects::{Dialect, DialectType};
59654        let sql = "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL 3 DAY";
59655        let d = Dialect::get(DialectType::DuckDB);
59656        match d.parse(sql) {
59657            Ok(result) => {
59658                assert!(!result.is_empty(), "Should parse to at least one statement");
59659                // Test transpilation to DuckDB target - should normalize number to quoted string
59660                let output_duckdb = d.transpile(sql, DialectType::DuckDB).unwrap();
59661                assert_eq!(
59662                    output_duckdb[0],
59663                    "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY",
59664                    "DuckDB output should have quoted interval value"
59665                );
59666                // Test transpilation to Hive target
59667                let output_hive = d.transpile(sql, DialectType::Hive).unwrap();
59668                assert_eq!(
59669                    output_hive[0], "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY",
59670                    "Hive output should have quoted interval value"
59671                );
59672            }
59673            Err(e) => panic!("Failed to parse DuckDB INTERVAL 3 DAY: {}", e),
59674        }
59675    }
59676}