Skip to main content

polyglot_sql/
parser.rs

1//! SQL Parser -- recursive-descent parser that converts a token stream into an AST.
2//!
3//! The central type is [`Parser`], which consumes tokens produced by the
4//! [`Tokenizer`](crate::tokens::Tokenizer) and builds a tree of [`Expression`]
5//! nodes covering the full SQL grammar: queries, DML, DDL, set operations,
6//! window functions, CTEs, and dialect-specific extensions for 30+ databases.
7//!
8//! The simplest entry point is [`Parser::parse_sql`], which tokenizes and
9//! parses a SQL string in one call.
10//!
11//! # Static configuration maps
12//!
13//! This module also exports several `LazyLock<HashSet<TokenType>>` constants
14//! (ported from Python sqlglot's `parser.py`) that classify token types:
15//!
16//! - [`TYPE_TOKENS`] -- all tokens that represent SQL data types
17//! - [`NESTED_TYPE_TOKENS`] -- parametric types like `ARRAY`, `MAP`, `STRUCT`
18//! - [`RESERVED_TOKENS`] -- tokens that cannot be used as unquoted identifiers
19//! - [`NO_PAREN_FUNCTIONS`] / [`NO_PAREN_FUNCTION_NAMES`] -- zero-argument
20//!   functions that may be written without parentheses (e.g. `CURRENT_DATE`)
21//! - [`DB_CREATABLES`] -- object kinds valid after `CREATE` (TABLE, VIEW, etc.)
22//! - [`SUBQUERY_PREDICATES`] -- tokens introducing subquery predicates (ANY, ALL, EXISTS)
23
24use crate::error::{Error, Result};
25use crate::expressions::*;
26use crate::tokens::{Span, Token, TokenType, Tokenizer, TokenizerConfig};
27use std::collections::HashSet;
28use std::sync::LazyLock;
29
30// =============================================================================
31// Parser Configuration Maps (ported from Python SQLGlot parser.py)
32// =============================================================================
33
34/// NO_PAREN_FUNCTIONS: Functions that can be called without parentheses
35/// Maps TokenType to the function name for generation
36/// Python: NO_PAREN_FUNCTIONS = {TokenType.CURRENT_DATE: exp.CurrentDate, ...}
37pub static NO_PAREN_FUNCTIONS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
38    let mut set = HashSet::new();
39    set.insert(TokenType::CurrentDate);
40    set.insert(TokenType::CurrentDateTime);
41    set.insert(TokenType::CurrentTime);
42    set.insert(TokenType::CurrentTimestamp);
43    set.insert(TokenType::CurrentUser);
44    set.insert(TokenType::CurrentRole);
45    set.insert(TokenType::CurrentSchema);
46    set.insert(TokenType::CurrentCatalog);
47    // Additional no-paren functions (from tokens.rs)
48    set.insert(TokenType::LocalTime);
49    set.insert(TokenType::LocalTimestamp);
50    set.insert(TokenType::SysTimestamp);
51    set.insert(TokenType::UtcDate);
52    set.insert(TokenType::UtcTime);
53    set.insert(TokenType::UtcTimestamp);
54    set.insert(TokenType::SessionUser);
55    set
56});
57
58/// NO_PAREN_FUNCTION_NAMES: String names that can be no-paren functions
59/// These are often tokenized as Var/Identifier instead of specific TokenTypes
60pub static NO_PAREN_FUNCTION_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
61    crate::function_registry::NO_PAREN_FUNCTION_NAME_LIST
62        .iter()
63        .copied()
64        .collect()
65});
66
67/// STRUCT_TYPE_TOKENS: Tokens that represent struct-like types
68/// Python: STRUCT_TYPE_TOKENS = {TokenType.FILE, TokenType.NESTED, TokenType.OBJECT, ...}
69pub static STRUCT_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
70    let mut set = HashSet::new();
71    set.insert(TokenType::File);
72    set.insert(TokenType::Nested);
73    set.insert(TokenType::Object);
74    set.insert(TokenType::Struct);
75    // Note: UNION is part of STRUCT_TYPE_TOKENS in Python but we handle it as a set operation
76    set
77});
78
79/// NESTED_TYPE_TOKENS: Tokens that can have nested type parameters
80/// Python: NESTED_TYPE_TOKENS = {TokenType.ARRAY, TokenType.LIST, ...}
81pub static NESTED_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
82    let mut set = HashSet::new();
83    set.insert(TokenType::Array);
84    set.insert(TokenType::List);
85    set.insert(TokenType::LowCardinality);
86    set.insert(TokenType::Map);
87    set.insert(TokenType::Nullable);
88    set.insert(TokenType::Range);
89    // Include STRUCT_TYPE_TOKENS
90    set.insert(TokenType::File);
91    set.insert(TokenType::Nested);
92    set.insert(TokenType::Object);
93    set.insert(TokenType::Struct);
94    set
95});
96
97/// Check if an uppercased type name is a known SQL custom type that should stay uppercased.
98/// Used to distinguish between known types like DATETIME2, SYSNAME etc. and user-defined types
99/// like UserDefinedTableType that should preserve their original case.
100fn convert_name_is_known_custom(name: &str) -> bool {
101    // Known SQL types that appear in the _ (default) branch of parse_data_type
102    // These should remain uppercased.
103    matches!(
104        name,
105        "DATETIME2"
106            | "DATETIMEOFFSET"
107            | "SMALLDATETIME"
108            | "DATETIME"
109            | "NVARCHAR2"
110            | "VARCHAR2"
111            | "NCHAR"
112            | "MONEY"
113            | "SMALLMONEY"
114            | "TINYINT"
115            | "MEDIUMINT"
116            | "BYTEINT"
117            | "SUPER"
118            | "HLLSKETCH"
119            | "TIMETZ"
120            | "TIMESTAMPTZ"
121            | "SYSNAME"
122            | "XML"
123            | "SQL_VARIANT"
124            | "HIERARCHYID"
125            | "ROWVERSION"
126            | "IMAGE"
127            | "CURSOR"
128            | "TABLE"
129            | "UNIQUEIDENTIFIER"
130            | "VARIANT"
131            | "OBJECT"
132            | "NUMBER"
133            | "BINARY_FLOAT"
134            | "BINARY_DOUBLE"
135            | "CLOB"
136            | "NCLOB"
137            | "RAW"
138            | "LONG"
139            | "MEDIUMTEXT"
140            | "LONGTEXT"
141            | "MEDIUMBLOB"
142            | "LONGBLOB"
143            | "TINYTEXT"
144            | "TINYBLOB"
145            | "INT2"
146            | "INT4"
147            | "INT8"
148            | "FLOAT4"
149            | "FLOAT8"
150            | "SERIAL"
151            | "BIGSERIAL"
152            | "SMALLSERIAL"
153            | "YEAR"
154            | "FIXED"
155            | "SIGNED"
156            | "UNSIGNED"
157            | "ROW"
158            | "BIT"
159            | "BOOLEAN"
160            | "BOOL"
161            | "TEXT"
162            | "STRING"
163            | "NTEXT"
164            | "INT128"
165            | "INT256"
166            | "UINT8"
167            | "UINT16"
168            | "UINT32"
169            | "UINT64"
170            | "UINT128"
171            | "UINT256"
172            | "FLOAT32"
173            | "FLOAT64"
174            | "LOWCARDINALITY"
175            | "NULLABLE"
176            | "IPADDRESS"
177            | "IPV4"
178            | "IPV6"
179            | "AGGREGATEFUNCTION"
180            | "SIMPLEAGGREGATEFUNCTION"
181            | "FIXEDSTRING"
182            | "RING"
183            | "NESTED"
184    )
185}
186
187/// ENUM_TYPE_TOKENS: Tokens that represent enum types
188/// Python: ENUM_TYPE_TOKENS = {TokenType.DYNAMIC, TokenType.ENUM, ...}
189pub static ENUM_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
190    let mut set = HashSet::new();
191    set.insert(TokenType::Dynamic);
192    set.insert(TokenType::Enum);
193    set.insert(TokenType::Enum8);
194    set.insert(TokenType::Enum16);
195    set
196});
197
198/// AGGREGATE_TYPE_TOKENS: Tokens for aggregate function types (ClickHouse)
199/// Python: AGGREGATE_TYPE_TOKENS = {TokenType.AGGREGATEFUNCTION, ...}
200pub static AGGREGATE_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
201    let mut set = HashSet::new();
202    set.insert(TokenType::AggregateFunction);
203    set.insert(TokenType::SimpleAggregateFunction);
204    set
205});
206
207/// TYPE_TOKENS: All tokens that represent data types
208/// Python: TYPE_TOKENS = {TokenType.BIT, TokenType.BOOLEAN, ...}
209pub static TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
210    let mut set = HashSet::new();
211    // Basic types
212    set.insert(TokenType::Bit);
213    set.insert(TokenType::Boolean);
214    // Integer types
215    set.insert(TokenType::TinyInt);
216    set.insert(TokenType::UTinyInt);
217    set.insert(TokenType::SmallInt);
218    set.insert(TokenType::USmallInt);
219    set.insert(TokenType::MediumInt);
220    set.insert(TokenType::UMediumInt);
221    set.insert(TokenType::Int);
222    set.insert(TokenType::UInt);
223    set.insert(TokenType::BigInt);
224    set.insert(TokenType::UBigInt);
225    set.insert(TokenType::BigNum);
226    set.insert(TokenType::Int128);
227    set.insert(TokenType::UInt128);
228    set.insert(TokenType::Int256);
229    set.insert(TokenType::UInt256);
230    // Floating point types
231    set.insert(TokenType::Float);
232    set.insert(TokenType::Double);
233    set.insert(TokenType::UDouble);
234    // Decimal types
235    set.insert(TokenType::Decimal);
236    set.insert(TokenType::Decimal32);
237    set.insert(TokenType::Decimal64);
238    set.insert(TokenType::Decimal128);
239    set.insert(TokenType::Decimal256);
240    set.insert(TokenType::DecFloat);
241    set.insert(TokenType::UDecimal);
242    set.insert(TokenType::BigDecimal);
243    // String types
244    set.insert(TokenType::Char);
245    set.insert(TokenType::NChar);
246    set.insert(TokenType::VarChar);
247    set.insert(TokenType::NVarChar);
248    set.insert(TokenType::BpChar);
249    set.insert(TokenType::Text);
250    set.insert(TokenType::MediumText);
251    set.insert(TokenType::LongText);
252    set.insert(TokenType::TinyText);
253    set.insert(TokenType::Name);
254    set.insert(TokenType::FixedString);
255    // Binary types
256    set.insert(TokenType::Binary);
257    set.insert(TokenType::VarBinary);
258    set.insert(TokenType::Blob);
259    set.insert(TokenType::MediumBlob);
260    set.insert(TokenType::LongBlob);
261    set.insert(TokenType::TinyBlob);
262    // Date/time types
263    set.insert(TokenType::Date);
264    set.insert(TokenType::Date32);
265    set.insert(TokenType::Time);
266    set.insert(TokenType::TimeTz);
267    set.insert(TokenType::TimeNs);
268    set.insert(TokenType::Timestamp);
269    set.insert(TokenType::TimestampTz);
270    set.insert(TokenType::TimestampLtz);
271    set.insert(TokenType::TimestampNtz);
272    set.insert(TokenType::TimestampS);
273    set.insert(TokenType::TimestampMs);
274    set.insert(TokenType::TimestampNs);
275    set.insert(TokenType::DateTime);
276    set.insert(TokenType::DateTime2);
277    set.insert(TokenType::DateTime64);
278    set.insert(TokenType::SmallDateTime);
279    set.insert(TokenType::Year);
280    set.insert(TokenType::Interval);
281    // JSON types
282    set.insert(TokenType::Json);
283    set.insert(TokenType::JsonB);
284    // UUID
285    set.insert(TokenType::Uuid);
286    // Spatial types
287    set.insert(TokenType::Geography);
288    set.insert(TokenType::GeographyPoint);
289    set.insert(TokenType::Geometry);
290    set.insert(TokenType::Point);
291    set.insert(TokenType::Ring);
292    set.insert(TokenType::LineString);
293    set.insert(TokenType::MultiLineString);
294    set.insert(TokenType::Polygon);
295    set.insert(TokenType::MultiPolygon);
296    // Range types (PostgreSQL)
297    set.insert(TokenType::Int4Range);
298    set.insert(TokenType::Int4MultiRange);
299    set.insert(TokenType::Int8Range);
300    set.insert(TokenType::Int8MultiRange);
301    set.insert(TokenType::NumRange);
302    set.insert(TokenType::NumMultiRange);
303    set.insert(TokenType::TsRange);
304    set.insert(TokenType::TsMultiRange);
305    set.insert(TokenType::TsTzRange);
306    set.insert(TokenType::TsTzMultiRange);
307    set.insert(TokenType::DateRange);
308    set.insert(TokenType::DateMultiRange);
309    // PostgreSQL special types
310    set.insert(TokenType::HllSketch);
311    set.insert(TokenType::HStore);
312    set.insert(TokenType::Serial);
313    set.insert(TokenType::SmallSerial);
314    set.insert(TokenType::BigSerial);
315    // XML
316    set.insert(TokenType::Xml);
317    // Other special types
318    set.insert(TokenType::Super);
319    set.insert(TokenType::PseudoType);
320    set.insert(TokenType::UserDefined);
321    set.insert(TokenType::Money);
322    set.insert(TokenType::SmallMoney);
323    set.insert(TokenType::RowVersion);
324    set.insert(TokenType::Image);
325    set.insert(TokenType::Variant);
326    set.insert(TokenType::Object);
327    set.insert(TokenType::ObjectIdentifier);
328    set.insert(TokenType::Inet);
329    set.insert(TokenType::IpAddress);
330    set.insert(TokenType::IpPrefix);
331    set.insert(TokenType::Ipv4);
332    set.insert(TokenType::Ipv6);
333    set.insert(TokenType::Unknown);
334    set.insert(TokenType::Null);
335    set.insert(TokenType::TDigest);
336    set.insert(TokenType::Vector);
337    set.insert(TokenType::Void);
338    // Include ENUM_TYPE_TOKENS
339    set.insert(TokenType::Dynamic);
340    set.insert(TokenType::Enum);
341    set.insert(TokenType::Enum8);
342    set.insert(TokenType::Enum16);
343    // Include NESTED_TYPE_TOKENS
344    set.insert(TokenType::Array);
345    set.insert(TokenType::List);
346    set.insert(TokenType::LowCardinality);
347    set.insert(TokenType::Map);
348    set.insert(TokenType::Nullable);
349    set.insert(TokenType::Range);
350    set.insert(TokenType::File);
351    set.insert(TokenType::Nested);
352    set.insert(TokenType::Struct);
353    // Include AGGREGATE_TYPE_TOKENS
354    set.insert(TokenType::AggregateFunction);
355    set.insert(TokenType::SimpleAggregateFunction);
356    set
357});
358
359/// SIGNED_TO_UNSIGNED_TYPE_TOKEN: Maps signed types to unsigned types
360/// Python: SIGNED_TO_UNSIGNED_TYPE_TOKEN = {TokenType.BIGINT: TokenType.UBIGINT, ...}
361pub static SIGNED_TO_UNSIGNED_TYPE_TOKEN: LazyLock<
362    std::collections::HashMap<TokenType, TokenType>,
363> = LazyLock::new(|| {
364    let mut map = std::collections::HashMap::new();
365    map.insert(TokenType::BigInt, TokenType::UBigInt);
366    map.insert(TokenType::Int, TokenType::UInt);
367    map.insert(TokenType::MediumInt, TokenType::UMediumInt);
368    map.insert(TokenType::SmallInt, TokenType::USmallInt);
369    map.insert(TokenType::TinyInt, TokenType::UTinyInt);
370    map.insert(TokenType::Decimal, TokenType::UDecimal);
371    map.insert(TokenType::Double, TokenType::UDouble);
372    map
373});
374
375/// SUBQUERY_PREDICATES: Tokens that introduce subquery predicates
376/// Python: SUBQUERY_PREDICATES = {TokenType.ANY: exp.Any, ...}
377pub static SUBQUERY_PREDICATES: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
378    let mut set = HashSet::new();
379    set.insert(TokenType::Any);
380    set.insert(TokenType::All);
381    set.insert(TokenType::Exists);
382    set.insert(TokenType::Some);
383    set
384});
385
386/// DB_CREATABLES: Object types that can be created with CREATE
387/// Python: DB_CREATABLES = {TokenType.DATABASE, TokenType.SCHEMA, ...}
388pub static DB_CREATABLES: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
389    let mut set = HashSet::new();
390    set.insert(TokenType::Database);
391    set.insert(TokenType::Dictionary);
392    set.insert(TokenType::FileFormat);
393    set.insert(TokenType::Model);
394    set.insert(TokenType::Namespace);
395    set.insert(TokenType::Schema);
396    set.insert(TokenType::SemanticView);
397    set.insert(TokenType::Sequence);
398    set.insert(TokenType::Sink);
399    set.insert(TokenType::Source);
400    set.insert(TokenType::Stage);
401    set.insert(TokenType::StorageIntegration);
402    set.insert(TokenType::Streamlit);
403    set.insert(TokenType::Table);
404    set.insert(TokenType::Tag);
405    set.insert(TokenType::View);
406    set.insert(TokenType::Warehouse);
407    set
408});
409
410/// RESERVED_TOKENS: Tokens that cannot be used as identifiers without quoting
411/// These are typically structural keywords that affect query parsing
412pub static RESERVED_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
413    let mut set = HashSet::new();
414    // Query structure keywords
415    set.insert(TokenType::Select);
416    set.insert(TokenType::From);
417    set.insert(TokenType::Where);
418    set.insert(TokenType::GroupBy);
419    set.insert(TokenType::OrderBy);
420    set.insert(TokenType::Having);
421    set.insert(TokenType::Limit);
422    set.insert(TokenType::Offset);
423    set.insert(TokenType::Union);
424    set.insert(TokenType::Intersect);
425    set.insert(TokenType::Except);
426    set.insert(TokenType::Join);
427    set.insert(TokenType::On);
428    set.insert(TokenType::With);
429    set.insert(TokenType::Into);
430    set.insert(TokenType::Values);
431    set.insert(TokenType::Set);
432    // DDL keywords
433    set.insert(TokenType::Create);
434    set.insert(TokenType::Drop);
435    set.insert(TokenType::Alter);
436    set.insert(TokenType::Truncate);
437    // DML keywords
438    set.insert(TokenType::Insert);
439    set.insert(TokenType::Update);
440    set.insert(TokenType::Delete);
441    set.insert(TokenType::Merge);
442    // Control flow
443    set.insert(TokenType::Case);
444    set.insert(TokenType::When);
445    set.insert(TokenType::Then);
446    set.insert(TokenType::Else);
447    set.insert(TokenType::End);
448    // Boolean operators
449    set.insert(TokenType::And);
450    set.insert(TokenType::Or);
451    set.insert(TokenType::Not);
452    // Comparison
453    set.insert(TokenType::In);
454    set.insert(TokenType::Is);
455    set.insert(TokenType::Between);
456    set.insert(TokenType::Like);
457    set.insert(TokenType::ILike);
458    set.insert(TokenType::Exists);
459    // Literals
460    set.insert(TokenType::Null);
461    set.insert(TokenType::True);
462    set.insert(TokenType::False);
463    // Punctuation tokens (these are always reserved)
464    set.insert(TokenType::LParen);
465    set.insert(TokenType::RParen);
466    set.insert(TokenType::LBracket);
467    set.insert(TokenType::RBracket);
468    set.insert(TokenType::LBrace);
469    set.insert(TokenType::RBrace);
470    set.insert(TokenType::Comma);
471    set.insert(TokenType::Semicolon);
472    set.insert(TokenType::Star);
473    set.insert(TokenType::Eq);
474    set.insert(TokenType::Neq);
475    set.insert(TokenType::Lt);
476    set.insert(TokenType::Lte);
477    set.insert(TokenType::Gt);
478    set.insert(TokenType::Gte);
479    set
480});
481
482// Note: Function name normalization is handled directly in parse_typed_function
483// by matching all aliases to the same typed expression, following Python SQLGlot's pattern.
484// The generator then outputs dialect-specific names via TRANSFORMS.
485
486/// Recursive-descent SQL parser that converts a token stream into an AST.
487///
488/// The parser consumes a `Vec<Token>` produced by the [`Tokenizer`](crate::tokens::Tokenizer)
489/// and builds a tree of [`Expression`] nodes. It supports the full SQL grammar
490/// including SELECT, DML (INSERT/UPDATE/DELETE/MERGE), DDL (CREATE/ALTER/DROP),
491/// window functions, CTEs, set operations, and 30+ dialect-specific extensions.
492///
493/// # Quick start
494///
495/// For most use cases the static helper [`Parser::parse_sql`] is the simplest entry point:
496///
497/// ```rust,ignore
498/// use polyglot_sql::parser::Parser;
499///
500/// let statements = Parser::parse_sql("SELECT 1; SELECT 2")?;
501/// assert_eq!(statements.len(), 2);
502/// ```
503///
504/// For dialect-aware parsing, use [`Parser::with_config`] or
505/// [`Parser::parse_sql_with_config`].
506pub struct Parser {
507    tokens: Vec<Token>,
508    current: usize,
509    config: ParserConfig,
510    /// Original source SQL (used for preserving exact text in Command expressions)
511    source: Option<String>,
512    /// Comments captured by parse_comparison when no comparison operator follows.
513    /// These are leading comments from the first token of an expression that need
514    /// to be placed by the caller (e.g., after an alias, or after an AND operand).
515    pending_leading_comments: Vec<String>,
516}
517
518/// Configuration for the SQL [`Parser`].
519///
520/// Controls dialect-specific parsing behavior. Most users can rely on the
521/// `Default` implementation; set `dialect` when you need to handle syntax
522/// that is unique to a particular database engine (e.g. BigQuery backtick
523/// quoting, TSQL square-bracket identifiers, Snowflake QUALIFY clause).
524#[derive(Debug, Clone, Default)]
525pub struct ParserConfig {
526    /// Allow trailing commas in SELECT lists (e.g. BigQuery permits `SELECT a, b, FROM t`).
527    pub allow_trailing_commas: bool,
528    /// Dialect type for dialect-specific parsing behavior.
529    pub dialect: Option<crate::dialects::DialectType>,
530}
531
532impl Parser {
533    /// Create a new parser from a pre-tokenized token stream with default configuration.
534    ///
535    /// Prefer [`Parser::parse_sql`] if you are starting from a raw SQL string.
536    pub fn new(tokens: Vec<Token>) -> Self {
537        Self {
538            tokens,
539            current: 0,
540            config: ParserConfig::default(),
541            source: None,
542            pending_leading_comments: Vec::new(),
543        }
544    }
545
546    /// Create a parser from a pre-tokenized token stream with a custom [`ParserConfig`].
547    pub fn with_config(tokens: Vec<Token>, config: ParserConfig) -> Self {
548        Self {
549            tokens,
550            current: 0,
551            config,
552            source: None,
553            pending_leading_comments: Vec::new(),
554        }
555    }
556
557    /// Create a parser with source SQL attached.
558    ///
559    /// The original SQL text is stored so that `Command` expressions (unparsed
560    /// dialect-specific statements) can preserve the exact source verbatim.
561    pub fn with_source(tokens: Vec<Token>, config: ParserConfig, source: String) -> Self {
562        Self {
563            tokens,
564            current: 0,
565            config,
566            source: Some(source),
567            pending_leading_comments: Vec::new(),
568        }
569    }
570
571    /// Parse one or more SQL statements from a raw string.
572    ///
573    /// This is the main entry point for most callers. It tokenizes the input with
574    /// the default [`TokenizerConfig`], then parses all semicolon-separated
575    /// statements and returns them as a `Vec<Expression>`.
576    ///
577    /// # Errors
578    ///
579    /// Returns an error if the input contains invalid tokens or syntax that the
580    /// parser cannot recognize.
581    ///
582    /// # Example
583    ///
584    /// ```rust,ignore
585    /// let stmts = Parser::parse_sql("SELECT a FROM t WHERE x = 1")?;
586    /// ```
587    pub fn parse_sql(sql: &str) -> Result<Vec<Expression>> {
588        let tokenizer = Tokenizer::default();
589        let tokens = tokenizer.tokenize(sql)?;
590        let mut parser = Parser::with_source(tokens, ParserConfig::default(), sql.to_string());
591        parser.parse()
592    }
593
594    /// Parse SQL from a string using a custom [`TokenizerConfig`].
595    ///
596    /// Use this variant when the source dialect requires non-default tokenizer
597    /// settings (e.g. different string quoting or comment syntax).
598    pub fn parse_sql_with_config(
599        sql: &str,
600        tokenizer_config: TokenizerConfig,
601    ) -> Result<Vec<Expression>> {
602        let tokenizer = Tokenizer::new(tokenizer_config);
603        let tokens = tokenizer.tokenize(sql)?;
604        let mut parser = Parser::with_source(tokens, ParserConfig::default(), sql.to_string());
605        parser.parse()
606    }
607
608    /// Parse all remaining statements from the token stream.
609    ///
610    /// Consumes tokens until the end of input, splitting on semicolons.
611    /// Returns one `Expression` per statement.
612    pub fn parse(&mut self) -> Result<Vec<Expression>> {
613        let mut statements = Vec::new();
614
615        while !self.is_at_end() {
616            let mut stmt = self.parse_statement()?;
617
618            // Before consuming the semicolon, capture its leading comments
619            // and attach them to the statement (e.g., SELECT foo\n/* comment */\n;)
620            if self.check(TokenType::Semicolon) {
621                let semi_comments = self.current_leading_comments();
622                if !semi_comments.is_empty() {
623                    stmt = Expression::Annotated(Box::new(Annotated {
624                        this: stmt,
625                        trailing_comments: semi_comments,
626                    }));
627                }
628            }
629
630            // ClickHouse: consume trailing SETTINGS key=val, ... after any statement
631            if matches!(
632                self.config.dialect,
633                Some(crate::dialects::DialectType::ClickHouse)
634            ) && self.check(TokenType::Settings)
635            {
636                self.advance(); // consume SETTINGS
637                let _ = self.parse_settings_property()?;
638            }
639
640            // ClickHouse: consume trailing FORMAT <name> after any statement
641            if matches!(
642                self.config.dialect,
643                Some(crate::dialects::DialectType::ClickHouse)
644            ) && self.check(TokenType::Format)
645            {
646                self.advance(); // consume FORMAT
647                                // Accept any identifier/keyword/Null as format name
648                if self.check(TokenType::Null) {
649                    self.advance();
650                } else if self.is_identifier_token() || self.check_keyword() {
651                    self.advance();
652                }
653            }
654
655            // ClickHouse: PARALLEL WITH between statements (multi-statement execution)
656            if matches!(
657                self.config.dialect,
658                Some(crate::dialects::DialectType::ClickHouse)
659            ) && self.check_identifier("PARALLEL")
660                && self.check_next(TokenType::With)
661            {
662                self.advance(); // consume PARALLEL
663                self.advance(); // consume WITH
664                statements.push(stmt);
665                continue;
666            }
667
668            // After parsing a statement, the next token must be a semicolon or EOF.
669            // If not, there are unconsumed tokens which indicates a parse error.
670            // This matches Python sqlglot's behavior (parser.py line 1826-1827).
671            if !self.is_at_end() && !self.check(TokenType::Semicolon) {
672                if matches!(
673                    self.config.dialect,
674                    Some(crate::dialects::DialectType::ClickHouse)
675                ) {
676                    // ClickHouse fallback: consume unconsumed tokens until semicolon/EOF.
677                    // This matches Python sqlglot's _parse_as_command behavior for
678                    // ClickHouse-specific syntax that we don't fully parse yet.
679                    while !self.is_at_end() && !self.check(TokenType::Semicolon) {
680                        self.advance();
681                    }
682                } else {
683                    return Err(self.parse_error("Invalid expression / Unexpected token"));
684                }
685            }
686
687            // Consume optional semicolons (ClickHouse allows multiple like `;;`)
688            while self.match_token(TokenType::Semicolon) {}
689
690            statements.push(stmt);
691        }
692
693        Ok(statements)
694    }
695
696    /// Parse a single SQL statement from the current position in the token stream.
697    ///
698    /// Dispatches to the appropriate sub-parser based on the leading keyword
699    /// (SELECT, INSERT, CREATE, etc.). Unknown or dialect-specific statements
700    /// fall through to a `Command` expression that preserves the raw SQL text.
701    pub fn parse_statement(&mut self) -> Result<Expression> {
702        // Skip any leading semicolons
703        while self.match_token(TokenType::Semicolon) {}
704
705        if self.is_at_end() {
706            return Err(self.parse_error("Unexpected end of input"));
707        }
708
709        match self.peek().token_type {
710            // Handle hint comment /*+ ... */ before a statement - convert to regular comment
711            TokenType::Hint => {
712                let hint_token = self.advance();
713                let hint_text = hint_token.text.clone();
714                // Convert hint to regular comment (preserve the + as part of the content)
715                let comment = format!("/* + {} */", hint_text.trim());
716
717                // Parse the following statement
718                let mut stmt = self.parse_statement()?;
719
720                // Attach the comment to the statement's leading_comments
721                match &mut stmt {
722                    Expression::Select(select) => {
723                        select.leading_comments.insert(0, comment);
724                    }
725                    Expression::Insert(insert) => {
726                        insert.leading_comments.insert(0, comment);
727                    }
728                    Expression::Update(update) => {
729                        update.leading_comments.insert(0, comment);
730                    }
731                    Expression::Delete(delete) => {
732                        delete.leading_comments.insert(0, comment);
733                    }
734                    Expression::CreateTable(ct) => {
735                        ct.leading_comments.insert(0, comment);
736                    }
737                    _ => {
738                        // For other statement types, we can't attach comments
739                        // but at least the statement parses successfully
740                    }
741                }
742                Ok(stmt)
743            }
744            TokenType::Select => self.parse_select(),
745            TokenType::With => self.parse_with(),
746            TokenType::Insert => self.parse_insert(),
747            TokenType::Replace => self.parse_replace(),
748            TokenType::Update => self.parse_update(),
749            TokenType::Delete => self.parse_delete(),
750            TokenType::Create => self.parse_create(),
751            TokenType::Drop => self.parse_drop(),
752            TokenType::Alter => self.parse_alter(),
753            TokenType::Truncate => {
754                // TRUNCATE could be TRUNCATE TABLE (statement) or TRUNCATE(a, b) (function)
755                // Check if followed by ( to determine which
756                if self.check_next(TokenType::LParen) {
757                    // TRUNCATE(a, b) - function call
758                    self.parse_expression()
759                } else {
760                    self.parse_truncate()
761                }
762            }
763            TokenType::Values => {
764                // VALUES could be VALUES(...) statement or VALUES 1, 2, 3 (bare values)
765                if self.check_next(TokenType::LParen)
766                    || self.check_next(TokenType::Number)
767                    || self.check_next(TokenType::String)
768                {
769                    self.parse_values()
770                } else {
771                    // "values" by itself is an identifier/expression
772                    self.parse_expression()
773                }
774            }
775            TokenType::Use => self.parse_use(),
776            TokenType::Cache => self.parse_cache(),
777            TokenType::Uncache => self.parse_uncache(),
778            TokenType::Refresh => {
779                self.advance(); // consume REFRESH
780                self.parse_refresh()?
781                    .ok_or_else(|| self.parse_error("Failed to parse REFRESH statement"))
782            }
783            TokenType::Load => self.parse_load_data(),
784            TokenType::Grant => self.parse_grant(),
785            TokenType::Revoke => self.parse_revoke(),
786            TokenType::Comment => self.parse_comment(),
787            TokenType::Merge => {
788                self.advance(); // consume MERGE
789                self.parse_merge()?
790                    .ok_or_else(|| self.parse_error("Failed to parse MERGE statement"))
791            }
792            TokenType::Set => self.parse_set(),
793            TokenType::Database
794                if matches!(
795                    self.config.dialect,
796                    Some(crate::dialects::DialectType::Teradata)
797                ) =>
798            {
799                // Teradata: DATABASE tduser -> USE tduser
800                self.advance(); // consume DATABASE
801                let name = self.expect_identifier_or_keyword()?;
802                Ok(Expression::Use(Box::new(Use {
803                    kind: None,
804                    this: Identifier::new(name),
805                })))
806            }
807            TokenType::Lock
808                if matches!(
809                    self.config.dialect,
810                    Some(crate::dialects::DialectType::Teradata)
811                ) =>
812            {
813                self.parse_locking_statement()
814            }
815            TokenType::Command => {
816                self.advance(); // consume command keyword
817                self.parse_command()?
818                    .ok_or_else(|| self.parse_error("Failed to parse COMMAND statement"))
819            }
820            TokenType::Rename
821                if matches!(
822                    self.config.dialect,
823                    Some(crate::dialects::DialectType::Teradata)
824                        | Some(crate::dialects::DialectType::ClickHouse)
825                ) =>
826            {
827                self.advance(); // consume RENAME
828                self.parse_command()?
829                    .ok_or_else(|| self.parse_error("Failed to parse RENAME statement"))
830            }
831            TokenType::Pragma => self.parse_pragma(),
832            TokenType::Rollback => self.parse_rollback(),
833            TokenType::Commit => self.parse_commit(),
834            TokenType::Begin => self.parse_transaction(),
835            TokenType::End => {
836                // In PostgreSQL, END is an alias for COMMIT (END [WORK|TRANSACTION])
837                // In TSQL and other dialects, END is a block delimiter (BEGIN...END)
838                if matches!(
839                    self.config.dialect,
840                    Some(crate::dialects::DialectType::PostgreSQL)
841                ) {
842                    self.parse_end_transaction()
843                } else {
844                    self.advance(); // consume END
845                    Ok(Expression::Command(Box::new(Command {
846                        this: "END".to_string(),
847                    })))
848                }
849            }
850            TokenType::Start => self.parse_start_transaction(),
851            TokenType::Describe | TokenType::Desc => self.parse_describe(),
852            TokenType::Show => self.parse_show(),
853            TokenType::Copy => self.parse_copy(),
854            TokenType::Put => self.parse_put(),
855            TokenType::Kill
856                if matches!(
857                    self.config.dialect,
858                    Some(crate::dialects::DialectType::ClickHouse)
859                ) =>
860            {
861                self.advance(); // consume KILL
862                self.parse_command()?
863                    .ok_or_else(|| self.parse_error("Failed to parse KILL statement"))
864            }
865            TokenType::Kill => self.parse_kill(),
866            TokenType::Execute => {
867                // ClickHouse: EXECUTE AS username statement → parse as command
868                if matches!(
869                    self.config.dialect,
870                    Some(crate::dialects::DialectType::ClickHouse)
871                ) {
872                    self.advance(); // consume EXECUTE
873                    self.parse_command()?
874                        .ok_or_else(|| self.parse_error("Failed to parse EXECUTE statement"))
875                } else {
876                    self.parse_execute()
877                }
878            }
879            TokenType::Declare => {
880                self.advance(); // consume DECLARE
881                self.parse_declare()?
882                    .ok_or_else(|| self.parse_error("Failed to parse DECLARE statement"))
883            }
884            // GET is a command only when followed by @ (stage reference), otherwise it's a function
885            // If followed by ( it should be parsed as GET() function, so fall through to expression parsing
886            TokenType::Get
887                if self.check_next(TokenType::DAt) || !self.check_next(TokenType::LParen) =>
888            {
889                self.parse_get_command()
890            }
891            TokenType::Var
892                if self.peek().text.eq_ignore_ascii_case("RM")
893                    || self.peek().text.eq_ignore_ascii_case("REMOVE") =>
894            {
895                self.parse_rm_command()
896            }
897            TokenType::Var if self.peek().text.eq_ignore_ascii_case("CALL") => self.parse_call(),
898            TokenType::Var
899                if self.peek().text.eq_ignore_ascii_case("EXCHANGE")
900                    && matches!(
901                        self.config.dialect,
902                        Some(crate::dialects::DialectType::ClickHouse)
903                    ) =>
904            {
905                self.advance(); // consume EXCHANGE
906                self.parse_command()?
907                    .ok_or_else(|| self.parse_error("Failed to parse EXCHANGE statement"))
908            }
909            // EXPLAIN is treated as DESCRIBE (MySQL maps EXPLAIN -> DESCRIBE)
910            TokenType::Var if self.peek().text.eq_ignore_ascii_case("EXPLAIN") => {
911                self.parse_describe()
912            }
913            // LOCK TABLES / UNLOCK TABLES (MySQL)
914            TokenType::Var
915                if self.peek().text.eq_ignore_ascii_case("LOCK")
916                    || self.peek().text.eq_ignore_ascii_case("UNLOCK") =>
917            {
918                self.advance(); // consume LOCK/UNLOCK
919                self.parse_command()?
920                    .ok_or_else(|| self.parse_error("Failed to parse LOCK/UNLOCK statement"))
921            }
922            TokenType::Var if self.peek().text.eq_ignore_ascii_case("ANALYZE") => {
923                self.advance(); // consume ANALYZE
924                self.parse_analyze()?
925                    .ok_or_else(|| self.parse_error("Failed to parse ANALYZE statement"))
926            }
927            // TSQL: PRINT expression
928            TokenType::Var if self.peek().text.eq_ignore_ascii_case("PRINT") => {
929                self.advance(); // consume PRINT
930                self.parse_command()?
931                    .ok_or_else(|| self.parse_error("Failed to parse PRINT statement"))
932            }
933            // ClickHouse: CHECK TABLE t [PARTITION p] [SETTINGS ...]
934            TokenType::Check
935                if matches!(
936                    self.config.dialect,
937                    Some(crate::dialects::DialectType::ClickHouse)
938                ) =>
939            {
940                self.advance(); // consume CHECK
941                self.parse_command()?
942                    .ok_or_else(|| self.parse_error("Failed to parse CHECK statement"))
943            }
944            // ClickHouse: SETTINGS key=value, ... (standalone statement or after another statement)
945            TokenType::Settings
946                if matches!(
947                    self.config.dialect,
948                    Some(crate::dialects::DialectType::ClickHouse)
949                ) =>
950            {
951                self.advance(); // consume SETTINGS
952                self.parse_command()?
953                    .ok_or_else(|| self.parse_error("Failed to parse SETTINGS statement"))
954            }
955            // ClickHouse: SYSTEM STOP/START MERGES, etc.
956            TokenType::System
957                if matches!(
958                    self.config.dialect,
959                    Some(crate::dialects::DialectType::ClickHouse)
960                ) =>
961            {
962                self.advance(); // consume SYSTEM
963                self.parse_command()?
964                    .ok_or_else(|| self.parse_error("Failed to parse SYSTEM statement"))
965            }
966            // ClickHouse: RENAME TABLE db.t1 TO db.t2 [, db.t3 TO db.t4 ...]
967            TokenType::Var
968                if self.peek().text.eq_ignore_ascii_case("RENAME")
969                    && matches!(
970                        self.config.dialect,
971                        Some(crate::dialects::DialectType::ClickHouse)
972                    ) =>
973            {
974                self.advance(); // consume RENAME
975                self.parse_command()?
976                    .ok_or_else(|| self.parse_error("Failed to parse RENAME statement"))
977            }
978            // ClickHouse: OPTIMIZE TABLE t [FINAL] [DEDUPLICATE [BY ...]]
979            TokenType::Var
980                if self.peek().text.eq_ignore_ascii_case("OPTIMIZE")
981                    && matches!(
982                        self.config.dialect,
983                        Some(crate::dialects::DialectType::ClickHouse)
984                    ) =>
985            {
986                self.advance(); // consume OPTIMIZE
987                self.parse_command()?
988                    .ok_or_else(|| self.parse_error("Failed to parse OPTIMIZE statement"))
989            }
990            // ClickHouse: EXISTS [TEMPORARY] TABLE/DATABASE/DICTIONARY ...
991            TokenType::Exists
992                if matches!(
993                    self.config.dialect,
994                    Some(crate::dialects::DialectType::ClickHouse)
995                ) && !self.check_next(TokenType::LParen) =>
996            {
997                self.advance(); // consume EXISTS
998                self.parse_command()?
999                    .ok_or_else(|| self.parse_error("Failed to parse EXISTS statement"))
1000            }
1001            // ClickHouse: SHOW ... (various SHOW commands beyond what's already handled)
1002            TokenType::Var
1003                if self.peek().text.eq_ignore_ascii_case("EXISTS")
1004                    && matches!(
1005                        self.config.dialect,
1006                        Some(crate::dialects::DialectType::ClickHouse)
1007                    ) =>
1008            {
1009                self.advance(); // consume EXISTS
1010                self.parse_command()?
1011                    .ok_or_else(|| self.parse_error("Failed to parse EXISTS statement"))
1012            }
1013            // DuckDB: ATTACH [DATABASE] [IF NOT EXISTS] 'path' [AS alias] [(options)]
1014            TokenType::Var if self.peek().text.eq_ignore_ascii_case("ATTACH") => {
1015                self.advance(); // consume ATTACH
1016                if matches!(
1017                    self.config.dialect,
1018                    Some(crate::dialects::DialectType::ClickHouse)
1019                ) {
1020                    self.parse_command()?
1021                        .ok_or_else(|| self.parse_error("Failed to parse ATTACH statement"))
1022                } else {
1023                    self.parse_attach_detach(true)
1024                }
1025            }
1026            // ClickHouse: UNDROP TABLE [IF EXISTS] ... [UUID '...'] [ON CLUSTER ...]
1027            TokenType::Var
1028                if self.peek().text.eq_ignore_ascii_case("UNDROP")
1029                    && matches!(
1030                        self.config.dialect,
1031                        Some(crate::dialects::DialectType::ClickHouse)
1032                    ) =>
1033            {
1034                self.advance(); // consume UNDROP
1035                self.parse_command()?
1036                    .ok_or_else(|| self.parse_error("Failed to parse UNDROP statement"))
1037            }
1038            // ClickHouse: DETACH TABLE [IF EXISTS] ... [ON CLUSTER ...]
1039            TokenType::Var
1040                if self.peek().text.eq_ignore_ascii_case("DETACH")
1041                    && matches!(
1042                        self.config.dialect,
1043                        Some(crate::dialects::DialectType::ClickHouse)
1044                    ) =>
1045            {
1046                self.advance(); // consume DETACH
1047                self.parse_command()?
1048                    .ok_or_else(|| self.parse_error("Failed to parse DETACH statement"))
1049            }
1050            // DuckDB: DETACH [DATABASE] [IF EXISTS] name
1051            TokenType::Var if self.peek().text.eq_ignore_ascii_case("DETACH") => {
1052                self.advance(); // consume DETACH
1053                self.parse_attach_detach(false)
1054            }
1055            // DuckDB: INSTALL extension [FROM source]
1056            TokenType::Var if self.peek().text.eq_ignore_ascii_case("INSTALL") => {
1057                self.advance(); // consume INSTALL
1058                self.parse_install(false)
1059            }
1060            // DuckDB: FORCE INSTALL extension | FORCE CHECKPOINT db
1061            TokenType::Var if self.peek().text.eq_ignore_ascii_case("FORCE") => {
1062                self.advance(); // consume FORCE
1063                self.parse_force_statement()
1064            }
1065            // DuckDB: SUMMARIZE [TABLE] expression
1066            TokenType::Var if self.peek().text.eq_ignore_ascii_case("SUMMARIZE") => {
1067                self.advance(); // consume SUMMARIZE
1068                self.parse_summarize_statement()
1069            }
1070            // DuckDB: RESET [SESSION|GLOBAL|LOCAL] variable
1071            TokenType::Var if self.peek().text.eq_ignore_ascii_case("RESET") => {
1072                self.advance(); // consume RESET
1073                self.parse_as_command()?
1074                    .ok_or_else(|| self.parse_error("Failed to parse RESET statement"))
1075            }
1076            // DuckDB statement-level PIVOT/UNPIVOT/PIVOT_WIDER syntax
1077            TokenType::Pivot => {
1078                self.advance(); // consume PIVOT
1079                self.parse_simplified_pivot(false)?
1080                    .ok_or_else(|| self.parse_error("Failed to parse PIVOT statement"))
1081            }
1082            TokenType::Unpivot => {
1083                self.advance(); // consume UNPIVOT
1084                self.parse_simplified_pivot(true)?
1085                    .ok_or_else(|| self.parse_error("Failed to parse UNPIVOT statement"))
1086            }
1087            // DuckDB: PIVOT_WIDER is an alias for PIVOT
1088            TokenType::Var if self.peek().text.eq_ignore_ascii_case("PIVOT_WIDER") => {
1089                self.advance(); // consume PIVOT_WIDER
1090                self.parse_simplified_pivot(false)?
1091                    .ok_or_else(|| self.parse_error("Failed to parse PIVOT_WIDER statement"))
1092            }
1093            // BigQuery procedural FOR...IN...DO loop
1094            TokenType::For => {
1095                self.advance(); // consume FOR
1096                self.parse_for_in()
1097            }
1098            // BigQuery/procedural LOOP, REPEAT, WHILE control flow statements
1099            TokenType::Var if self.peek().text.eq_ignore_ascii_case("LOOP") => {
1100                self.advance(); // consume LOOP
1101                self.parse_command()?
1102                    .ok_or_else(|| self.parse_error("Failed to parse LOOP statement"))
1103            }
1104            TokenType::Var if self.peek().text.eq_ignore_ascii_case("REPEAT") => {
1105                self.advance(); // consume REPEAT
1106                self.parse_command()?
1107                    .ok_or_else(|| self.parse_error("Failed to parse REPEAT statement"))
1108            }
1109            TokenType::Var if self.peek().text.eq_ignore_ascii_case("WHILE") => {
1110                self.advance(); // consume WHILE
1111                self.parse_command()?
1112                    .ok_or_else(|| self.parse_error("Failed to parse WHILE statement"))
1113            }
1114            // Athena/Presto: UNLOAD (SELECT ...) TO 'location' WITH (options)
1115            TokenType::Var if self.peek().text.eq_ignore_ascii_case("UNLOAD") => {
1116                self.parse_unload()
1117            }
1118            // Athena: USING EXTERNAL FUNCTION ... SELECT ...
1119            TokenType::Using => self.parse_using_external_function(),
1120            // BigQuery: EXPORT DATA [WITH CONNECTION conn] OPTIONS (...) AS SELECT ...
1121            TokenType::Var if self.peek().text.eq_ignore_ascii_case("EXPORT") => {
1122                self.parse_export_data()
1123            }
1124            // Presto/Trino: DEALLOCATE PREPARE <name>
1125            TokenType::Var if self.peek().text.eq_ignore_ascii_case("DEALLOCATE") => {
1126                self.parse_deallocate_prepare()
1127            }
1128            // DuckDB FROM-first syntax: FROM tbl = SELECT * FROM tbl
1129            TokenType::From => self.parse_from_first_query(),
1130            TokenType::LParen => {
1131                // Check if this is a parenthesized query (SELECT, WITH, PIVOT, UNPIVOT, FROM, or EXPLAIN inside)
1132                // by looking ahead after the opening paren
1133                let next_is_explain = self.current + 1 < self.tokens.len()
1134                    && self.tokens[self.current + 1].token_type == TokenType::Var
1135                    && self.tokens[self.current + 1]
1136                        .text
1137                        .eq_ignore_ascii_case("EXPLAIN");
1138                if self.check_next(TokenType::Select)
1139                    || self.check_next(TokenType::With)
1140                    || self.check_next(TokenType::Pivot)
1141                    || self.check_next(TokenType::Unpivot)
1142                    || self.check_next(TokenType::From)
1143                    || next_is_explain
1144                {
1145                    // Parse parenthesized query: (SELECT ...) ORDER BY x LIMIT y OFFSET z
1146                    self.advance(); // consume (
1147                    let inner = self.parse_statement()?;
1148                    self.expect(TokenType::RParen)?;
1149                    // Wrap in Subquery to preserve parentheses when used in set operations
1150                    let subquery = Expression::Subquery(Box::new(Subquery {
1151                        this: inner,
1152                        alias: None,
1153                        column_aliases: Vec::new(),
1154                        order_by: None,
1155                        limit: None,
1156                        offset: None,
1157                        distribute_by: None,
1158                        sort_by: None,
1159                        cluster_by: None,
1160                        lateral: false,
1161                        modifiers_inside: false,
1162                        trailing_comments: Vec::new(),
1163                        inferred_type: None,
1164                    }));
1165                    // Check for set operations after the parenthesized query
1166                    let result = self.parse_set_operation(subquery)?;
1167                    // Check for ORDER BY, LIMIT, OFFSET after parenthesized subquery
1168                    self.parse_query_modifiers(result)
1169                } else if self.check_next(TokenType::LParen) {
1170                    // Nested parentheses - could be ((SELECT...)) or ((a, b))
1171                    // For deeply nested queries like (((SELECT 1) UNION SELECT 1) UNION SELECT 1),
1172                    // recurse into parse_statement to handle the inner parenthesized query with set ops
1173                    self.advance(); // consume (
1174                    let inner = self.parse_statement()?;
1175                    // Check for set operations inside the outer parens
1176                    let result = self.parse_set_operation(inner)?;
1177                    self.expect(TokenType::RParen)?;
1178                    let subquery = Expression::Subquery(Box::new(Subquery {
1179                        this: result,
1180                        alias: None,
1181                        column_aliases: Vec::new(),
1182                        order_by: None,
1183                        limit: None,
1184                        offset: None,
1185                        distribute_by: None,
1186                        sort_by: None,
1187                        cluster_by: None,
1188                        lateral: false,
1189                        modifiers_inside: false,
1190                        trailing_comments: Vec::new(),
1191                        inferred_type: None,
1192                    }));
1193                    // Check for set operations after the outer parenthesized query
1194                    let result = self.parse_set_operation(subquery)?;
1195                    let pre_alias_comments = self.previous_trailing_comments();
1196                    if self.match_token(TokenType::As) {
1197                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
1198                        let trailing_comments = self.previous_trailing_comments();
1199                        Ok(Expression::Alias(Box::new(Alias {
1200                            this: result,
1201                            alias,
1202                            column_aliases: Vec::new(),
1203                            pre_alias_comments,
1204                            trailing_comments,
1205                            inferred_type: None,
1206                        })))
1207                    } else {
1208                        // Check for LIMIT/OFFSET after parenthesized expression
1209                        // e.g., ((SELECT 1)) LIMIT 1
1210                        self.parse_query_modifiers(result)
1211                    }
1212                } else {
1213                    // Regular parenthesized expression like (a, b) or (x)
1214                    // Let parse_expression handle it
1215                    let expr = self.parse_expression()?;
1216                    let pre_alias_comments = self.previous_trailing_comments();
1217                    if self.match_token(TokenType::As) {
1218                        // Check for tuple alias: AS ("a", "b", ...)
1219                        if self.match_token(TokenType::LParen) {
1220                            let mut column_aliases = Vec::new();
1221                            loop {
1222                                let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
1223                                column_aliases.push(col_alias);
1224                                if !self.match_token(TokenType::Comma) {
1225                                    break;
1226                                }
1227                            }
1228                            self.expect(TokenType::RParen)?;
1229                            let trailing_comments = self.previous_trailing_comments();
1230                            Ok(Expression::Alias(Box::new(Alias {
1231                                this: expr,
1232                                alias: Identifier::empty(),
1233                                column_aliases,
1234                                pre_alias_comments,
1235                                trailing_comments,
1236                                inferred_type: None,
1237                            })))
1238                        } else {
1239                            let alias = self.expect_identifier_or_keyword_with_quoted()?;
1240                            let trailing_comments = self.previous_trailing_comments();
1241                            Ok(Expression::Alias(Box::new(Alias {
1242                                this: expr,
1243                                alias,
1244                                column_aliases: Vec::new(),
1245                                pre_alias_comments,
1246                                trailing_comments,
1247                                inferred_type: None,
1248                            })))
1249                        }
1250                    } else {
1251                        Ok(expr)
1252                    }
1253                }
1254            }
1255            _ => {
1256                // Capture leading comments from the first token before parsing
1257                let leading_comments = self.current_leading_comments();
1258                // Parse expression and check for optional alias
1259                let expr = self.parse_expression()?;
1260                // Capture any comments between expression and AS keyword
1261                let pre_alias_comments = self.previous_trailing_comments();
1262                if self.match_token(TokenType::As) {
1263                    // Capture comments from AS token (e.g., AS /* foo */ (a, b, c))
1264                    // These go into trailing_comments (after the alias), not pre_alias_comments
1265                    let as_comments = self.previous_trailing_comments();
1266                    // Check for tuple alias: AS ("a", "b", ...)
1267                    if self.match_token(TokenType::LParen) {
1268                        let mut column_aliases = Vec::new();
1269                        loop {
1270                            let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
1271                            column_aliases.push(col_alias);
1272                            if !self.match_token(TokenType::Comma) {
1273                                break;
1274                            }
1275                        }
1276                        self.expect(TokenType::RParen)?;
1277                        let mut trailing_comments = as_comments;
1278                        trailing_comments.extend(self.previous_trailing_comments());
1279                        Ok(Expression::Alias(Box::new(Alias {
1280                            this: expr,
1281                            alias: Identifier::empty(),
1282                            column_aliases,
1283                            pre_alias_comments,
1284                            trailing_comments,
1285                            inferred_type: None,
1286                        })))
1287                    } else {
1288                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
1289                        let mut trailing_comments = self.previous_trailing_comments();
1290                        // If there were leading comments on the expression (from a separate line),
1291                        // add them as trailing comments after the alias
1292                        trailing_comments.extend(leading_comments.iter().cloned());
1293                        Ok(Expression::Alias(Box::new(Alias {
1294                            this: expr,
1295                            alias,
1296                            column_aliases: Vec::new(),
1297                            pre_alias_comments,
1298                            trailing_comments,
1299                            inferred_type: None,
1300                        })))
1301                    }
1302                } else if (self.check(TokenType::Var) && !self.check_keyword())
1303                    || self.is_command_keyword_as_alias()
1304                {
1305                    // Implicit alias (without AS) - e.g., "1. x" or "1.x" -> "1. AS x"
1306                    // This handles cases like PostgreSQL's "1.x" which parses as float 1. with alias x
1307                    let alias_text = self.advance().text.clone();
1308                    let trailing_comments = self.previous_trailing_comments();
1309                    Ok(Expression::Alias(Box::new(Alias {
1310                        this: expr,
1311                        alias: Identifier::new(alias_text),
1312                        column_aliases: Vec::new(),
1313                        pre_alias_comments,
1314                        trailing_comments,
1315                        inferred_type: None,
1316                    })))
1317                } else if !pre_alias_comments.is_empty() {
1318                    // Wrap in Annotated to preserve trailing comments for expressions without aliases
1319                    match &expr {
1320                        Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
1321                            Ok(Expression::Annotated(Box::new(
1322                                crate::expressions::Annotated {
1323                                    this: expr,
1324                                    trailing_comments: pre_alias_comments,
1325                                },
1326                            )))
1327                        }
1328                        // For expressions that already have trailing_comments fields, don't double-wrap
1329                        _ => Ok(expr),
1330                    }
1331                } else if !leading_comments.is_empty() {
1332                    // Wrap in Annotated to preserve leading comments as trailing comments
1333                    // This matches Python sqlglot which converts leading line comments to trailing block comments
1334                    Ok(Expression::Annotated(Box::new(
1335                        crate::expressions::Annotated {
1336                            this: expr,
1337                            trailing_comments: leading_comments,
1338                        },
1339                    )))
1340                } else {
1341                    Ok(expr)
1342                }
1343            }
1344        }
1345    }
1346
1347    /// Parse a SELECT statement
1348    fn parse_select(&mut self) -> Result<Expression> {
1349        // Capture the SELECT token to get its comments
1350        let select_token = self.expect(TokenType::Select)?;
1351        let leading_comments = select_token.comments;
1352        let post_select_comments = select_token.trailing_comments;
1353
1354        // Parse query hint /*+ ... */ if present (comes immediately after SELECT)
1355        let hint = if self.check(TokenType::Hint) {
1356            Some(self.parse_hint()?)
1357        } else {
1358            None
1359        };
1360
1361        // Parse TOP clause (SQL Server style - comes before DISTINCT)
1362        // But not if TOP is followed by DOT (e.g., SELECT top.x - top is a table alias)
1363        let top = if self.check(TokenType::Top)
1364            && !self.check_next(TokenType::Dot)
1365            && self.match_token(TokenType::Top)
1366        {
1367            // TOP can have parentheses: TOP (10) or without: TOP 10
1368            let (amount, parenthesized) = if self.match_token(TokenType::LParen) {
1369                let expr = self.parse_expression()?;
1370                self.expect(TokenType::RParen)?;
1371                (expr, true)
1372            } else {
1373                (self.parse_primary()?, false)
1374            };
1375            let percent = self.match_token(TokenType::Percent);
1376            let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
1377            Some(Top {
1378                this: amount,
1379                percent,
1380                with_ties,
1381                parenthesized,
1382            })
1383        } else {
1384            None
1385        };
1386
1387        // Parse DISTINCT / DISTINCT ON / DISTINCTROW / ALL
1388        // Oracle: UNIQUE is equivalent to DISTINCT (SELECT UNIQUE ... is old-style Oracle syntax)
1389        let is_distinct_token = self.match_token(TokenType::Distinct)
1390            || (matches!(
1391                self.config.dialect,
1392                Some(crate::dialects::DialectType::Oracle)
1393            ) && self.match_token(TokenType::Unique));
1394        let (distinct, distinct_on) = if is_distinct_token {
1395            if self.match_token(TokenType::On) {
1396                // DISTINCT ON (expr, ...)
1397                self.expect(TokenType::LParen)?;
1398                let exprs = self.parse_expression_list()?;
1399                self.expect(TokenType::RParen)?;
1400                (true, Some(exprs))
1401            } else {
1402                (true, None)
1403            }
1404        } else if self.check_identifier("DISTINCTROW") {
1405            // MySQL DISTINCTROW - equivalent to DISTINCT
1406            self.advance();
1407            (true, None)
1408        } else {
1409            // Only consume ALL if it's the SELECT ALL modifier, not if it's a column reference like "all.count"
1410            if self.check(TokenType::All) && !self.check_next(TokenType::Dot) {
1411                self.advance();
1412            }
1413            (false, None)
1414        };
1415
1416        // TSQL: SELECT DISTINCT TOP n - TOP can come after DISTINCT
1417        // If no TOP was parsed before DISTINCT, check for TOP after DISTINCT
1418        let top = if top.is_none()
1419            && self.check(TokenType::Top)
1420            && !self.check_next(TokenType::Dot)
1421            && self.match_token(TokenType::Top)
1422        {
1423            let (amount, parenthesized) = if self.match_token(TokenType::LParen) {
1424                let expr = self.parse_expression()?;
1425                self.expect(TokenType::RParen)?;
1426                (expr, true)
1427            } else {
1428                (self.parse_primary()?, false)
1429            };
1430            let percent = self.match_token(TokenType::Percent);
1431            let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
1432            Some(Top {
1433                this: amount,
1434                percent,
1435                with_ties,
1436                parenthesized,
1437            })
1438        } else {
1439            top
1440        };
1441
1442        // Parse MySQL operation modifiers (HIGH_PRIORITY, STRAIGHT_JOIN, SQL_CALC_FOUND_ROWS, etc.)
1443        // These appear after DISTINCT/ALL and before the projections
1444        // Only apply for MySQL-family dialects - other dialects treat these as identifiers
1445        let mut operation_modifiers = Vec::new();
1446        let is_mysql_dialect = matches!(
1447            self.config.dialect,
1448            Some(crate::dialects::DialectType::MySQL)
1449                | Some(crate::dialects::DialectType::SingleStore)
1450                | Some(crate::dialects::DialectType::StarRocks)
1451                | Some(crate::dialects::DialectType::TiDB)
1452                | Some(crate::dialects::DialectType::Doris)
1453        );
1454        if is_mysql_dialect {
1455            const MYSQL_MODIFIERS: &[&str] = &[
1456                "HIGH_PRIORITY",
1457                "STRAIGHT_JOIN",
1458                "SQL_SMALL_RESULT",
1459                "SQL_BIG_RESULT",
1460                "SQL_BUFFER_RESULT",
1461                "SQL_NO_CACHE",
1462                "SQL_CALC_FOUND_ROWS",
1463            ];
1464            loop {
1465                if self.check(TokenType::StraightJoin) {
1466                    self.advance();
1467                    operation_modifiers.push("STRAIGHT_JOIN".to_string());
1468                } else if self.check(TokenType::Var) {
1469                    let upper = self.peek().text.to_uppercase();
1470                    if MYSQL_MODIFIERS.contains(&upper.as_str()) {
1471                        self.advance();
1472                        operation_modifiers.push(upper);
1473                    } else {
1474                        break;
1475                    }
1476                } else {
1477                    break;
1478                }
1479            }
1480        }
1481
1482        // Parse BigQuery SELECT AS STRUCT / SELECT AS VALUE
1483        let kind = if self.match_token(TokenType::As) {
1484            if self.match_identifier("STRUCT") {
1485                Some("STRUCT".to_string())
1486            } else if self.match_identifier("VALUE") {
1487                Some("VALUE".to_string())
1488            } else {
1489                // Not AS STRUCT/VALUE, backtrack the AS token
1490                self.current -= 1;
1491                None
1492            }
1493        } else {
1494            None
1495        };
1496
1497        // Parse select expressions
1498        let mut expressions = self.parse_select_expressions()?;
1499
1500        // Redshift: EXCLUDE clause at the end of the projection list
1501        // e.g., SELECT *, 4 AS col4 EXCLUDE (col2, col3) FROM ...
1502        // e.g., SELECT col1, *, col2 EXCLUDE(col3) FROM ...
1503        // e.g., SELECT *, 4 AS col4 EXCLUDE col2, col3 FROM ...
1504        // In Python sqlglot, this is handled by overriding _parse_projections in the Redshift parser.
1505        // The EXCLUDE clause is separate from * EXCLUDE — it applies to the entire projection list.
1506        let exclude = if matches!(
1507            self.config.dialect,
1508            Some(crate::dialects::DialectType::Redshift)
1509        ) {
1510            // Check if previous token was EXCLUDE (parsed as implicit alias).
1511            // e.g., SELECT *, 4 AS col4 EXCLUDE col2, col3 FROM ...
1512            //   → "col4 EXCLUDE" was parsed as (col4 aliased-as EXCLUDE), then "col2" as next projection
1513            //   → We need to strip the EXCLUDE alias from the last projection and retreat
1514            // Also handle: EXCLUDE was consumed as a bare column name if no AS was present
1515            let mut retreat_for_exclude = false;
1516            if let Some(last_expr) = expressions.last() {
1517                // Case: "4 AS col4 EXCLUDE" without parens — parsed as separate column "EXCLUDE"
1518                // Actually with the comma break, this won't happen. But "col2 EXCLUDE(col3)" might.
1519                match last_expr {
1520                    Expression::Alias(alias) if alias.alias.name.eq_ignore_ascii_case("EXCLUDE") => {
1521                        // The last expression is "something AS EXCLUDE" or implicit alias EXCLUDE
1522                        // Strip the alias and check if EXCLUDE is followed by paren or identifier
1523                        if self.check(TokenType::LParen) || self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
1524                            // Strip the EXCLUDE alias from the last expression
1525                            let stripped = alias.this.clone();
1526                            if let Some(last) = expressions.last_mut() {
1527                                *last = stripped;
1528                            }
1529                            retreat_for_exclude = true;
1530                        }
1531                    }
1532                    _ => {}
1533                }
1534            }
1535
1536            if retreat_for_exclude || self.check(TokenType::Exclude) {
1537                if !retreat_for_exclude {
1538                    self.advance(); // consume EXCLUDE
1539                }
1540                // Parse EXCLUDE columns - with or without parens
1541                let mut exclude_cols = Vec::new();
1542                if self.match_token(TokenType::LParen) {
1543                    // Parenthesized list: EXCLUDE (col1, col2, ...)
1544                    loop {
1545                        let col_expr = self.parse_expression()?;
1546                        exclude_cols.push(col_expr);
1547                        if !self.match_token(TokenType::Comma) {
1548                            break;
1549                        }
1550                    }
1551                    self.match_token(TokenType::RParen);
1552                } else {
1553                    // Non-parenthesized: EXCLUDE col1, col2, ...
1554                    // Parse comma-separated identifiers until FROM or other clause boundary
1555                    loop {
1556                        if self.is_at_end() || self.check(TokenType::From) || self.check(TokenType::Where)
1557                            || self.check(TokenType::Semicolon) || self.check(TokenType::RParen)
1558                        {
1559                            break;
1560                        }
1561                        let col_expr = self.parse_expression()?;
1562                        exclude_cols.push(col_expr);
1563                        if !self.match_token(TokenType::Comma) {
1564                            break;
1565                        }
1566                    }
1567                }
1568                if exclude_cols.is_empty() { None } else { Some(exclude_cols) }
1569            } else {
1570                None
1571            }
1572        } else {
1573            None
1574        };
1575
1576        // Parse INTO clause (SELECT ... INTO [TEMPORARY|UNLOGGED] table_name)
1577        // Also handles Oracle PL/SQL: BULK COLLECT INTO v1, v2, ...
1578        let into = if self.match_text_seq(&["BULK", "COLLECT", "INTO"]) {
1579            // Oracle PL/SQL: BULK COLLECT INTO var1, var2, ...
1580            // Parse target variables as a comma-separated list
1581            let mut target_expressions = vec![self.parse_expression()?];
1582            while self.match_token(TokenType::Comma) {
1583                target_expressions.push(self.parse_expression()?);
1584            }
1585            if target_expressions.len() == 1 {
1586                Some(SelectInto {
1587                    this: target_expressions.remove(0),
1588                    temporary: false,
1589                    unlogged: false,
1590                    bulk_collect: true,
1591                    expressions: Vec::new(),
1592                })
1593            } else {
1594                // Multiple targets - use first as `this` and rest as `expressions`
1595                // Actually, to match Python sqlglot behavior, store all in expressions
1596                Some(SelectInto {
1597                    this: Expression::Null(Null),
1598                    temporary: false,
1599                    unlogged: false,
1600                    bulk_collect: true,
1601                    expressions: target_expressions,
1602                })
1603            }
1604        } else if self.match_token(TokenType::Into) {
1605            // Check for TEMPORARY/TEMP/UNLOGGED keyword (PostgreSQL)
1606            let temporary = self.match_token(TokenType::Temporary) || self.match_identifier("TEMP");
1607            let unlogged = !temporary && self.match_identifier("UNLOGGED");
1608            // Parse first target (table name or PL/SQL variable)
1609            let table_name = self.parse_table_ref()?;
1610            // Oracle PL/SQL: SELECT ... INTO var1, var2, ... FROM ...
1611            // If followed by comma, parse additional target variables
1612            if self.match_token(TokenType::Comma) {
1613                let mut target_expressions = vec![Expression::Table(table_name)];
1614                target_expressions.push(self.parse_expression()?);
1615                while self.match_token(TokenType::Comma) {
1616                    target_expressions.push(self.parse_expression()?);
1617                }
1618                Some(SelectInto {
1619                    this: Expression::Null(Null),
1620                    temporary,
1621                    unlogged,
1622                    bulk_collect: false,
1623                    expressions: target_expressions,
1624                })
1625            } else {
1626                Some(SelectInto {
1627                    this: Expression::Table(table_name),
1628                    temporary,
1629                    unlogged,
1630                    bulk_collect: false,
1631                    expressions: Vec::new(),
1632                })
1633            }
1634        } else {
1635            None
1636        };
1637
1638        // Parse FROM clause
1639        let from = if self.match_token(TokenType::From) {
1640            Some(self.parse_from()?)
1641        } else {
1642            None
1643        };
1644
1645        // Parse JOINs
1646        let mut joins = self.parse_joins()?;
1647
1648        // Handle PIVOT/UNPIVOT that comes after JOINs (e.g., SELECT * FROM a JOIN b ON ... PIVOT(...))
1649        // Store PIVOT/UNPIVOT in the last join's pivots field (this matches SQLGlot's semantics)
1650        while self.check(TokenType::Pivot) || self.check(TokenType::Unpivot) {
1651            if !joins.is_empty() {
1652                let last_idx = joins.len() - 1;
1653                // Parse the pivot/unpivot and store in the join's pivots vector
1654                // We pass a Null expression as the `this` since the pivot applies to the entire join result
1655                if self.match_token(TokenType::Pivot) {
1656                    let pivot = self.parse_pivot(Expression::Null(crate::expressions::Null))?;
1657                    joins[last_idx].pivots.push(pivot);
1658                } else if self.match_token(TokenType::Unpivot) {
1659                    let unpivot = self.parse_unpivot(Expression::Null(crate::expressions::Null))?;
1660                    joins[last_idx].pivots.push(unpivot);
1661                }
1662            } else {
1663                // No joins - break to avoid infinite loop
1664                break;
1665            }
1666        }
1667
1668        // Parse LATERAL VIEW clauses (Hive/Spark)
1669        let lateral_views = self.parse_lateral_views()?;
1670
1671        // Parse PREWHERE clause (ClickHouse specific)
1672        let prewhere = if self.match_token(TokenType::Prewhere) {
1673            Some(self.parse_expression()?)
1674        } else {
1675            None
1676        };
1677
1678        // Parse WHERE clause
1679        let mut where_clause = if self.match_token(TokenType::Where) {
1680            Some(Where {
1681                this: self.parse_expression()?,
1682            })
1683        } else {
1684            None
1685        };
1686
1687        // Parse CONNECT BY clause (Oracle hierarchical queries)
1688        let connect = self.parse_connect()?;
1689
1690        // Parse GROUP BY
1691        let group_by = if self.check(TokenType::Group) {
1692            let group_comments = self.current_leading_comments();
1693            if self.match_keywords(&[TokenType::Group, TokenType::By]) {
1694                let mut gb = self.parse_group_by()?;
1695                gb.comments = group_comments;
1696                Some(gb)
1697            } else {
1698                None
1699            }
1700        } else if matches!(
1701            self.config.dialect,
1702            Some(crate::dialects::DialectType::ClickHouse)
1703        ) && self.check(TokenType::With)
1704            && (self.check_next_identifier("TOTALS")
1705                || self.check_next(TokenType::Rollup)
1706                || self.check_next(TokenType::Cube))
1707        {
1708            // ClickHouse: WITH TOTALS/ROLLUP/CUBE without GROUP BY
1709            self.advance(); // consume WITH
1710            let totals = self.match_identifier("TOTALS");
1711            let mut expressions = Vec::new();
1712            if self.match_token(TokenType::Rollup) {
1713                expressions.push(Expression::Rollup(Box::new(Rollup {
1714                    expressions: Vec::new(),
1715                })));
1716            } else if self.match_token(TokenType::Cube) {
1717                expressions.push(Expression::Cube(Box::new(Cube {
1718                    expressions: Vec::new(),
1719                })));
1720            }
1721            // Check for chained WITH TOTALS after WITH ROLLUP/CUBE
1722            if !totals && self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
1723                self.advance();
1724                self.advance();
1725            }
1726            Some(GroupBy {
1727                expressions,
1728                all: None,
1729                totals,
1730                comments: Vec::new(),
1731            })
1732        } else {
1733            None
1734        };
1735
1736        // Parse HAVING
1737        let having = if self.check(TokenType::Having) {
1738            let having_comments = self.current_leading_comments();
1739            self.advance(); // consume HAVING
1740            Some(Having {
1741                this: self.parse_expression()?,
1742                comments: having_comments,
1743            })
1744        } else {
1745            None
1746        };
1747
1748        // Parse QUALIFY clause (Snowflake, BigQuery, DuckDB)
1749        // QUALIFY can appear before or after WINDOW clause
1750        let mut qualify = if self.match_token(TokenType::Qualify) {
1751            Some(Qualify {
1752                this: self.parse_expression()?,
1753            })
1754        } else {
1755            None
1756        };
1757
1758        // Parse WINDOW clause (named windows)
1759        // Only match WINDOW if followed by identifier AS ( (a real window definition)
1760        // Otherwise "window" may be a table alias (e.g., SELECT * FROM foo window)
1761        let windows = if self.check(TokenType::Window) && {
1762            let next_pos = self.current + 1;
1763            next_pos < self.tokens.len()
1764                && (self.tokens[next_pos].token_type == TokenType::Var
1765                    || self.tokens[next_pos].token_type == TokenType::Identifier)
1766        } {
1767            self.advance(); // consume WINDOW
1768            Some(self.parse_named_windows()?)
1769        } else {
1770            None
1771        };
1772
1773        // QUALIFY can also appear after WINDOW clause (DuckDB)
1774        let qualify_after_window = if qualify.is_none() && self.match_token(TokenType::Qualify) {
1775            qualify = Some(Qualify {
1776                this: self.parse_expression()?,
1777            });
1778            true
1779        } else {
1780            false
1781        };
1782
1783        // Parse DISTRIBUTE BY (Hive/Spark) - comes before SORT BY
1784        let distribute_by = if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
1785            Some(self.parse_distribute_by()?)
1786        } else {
1787            None
1788        };
1789
1790        // Parse CLUSTER BY (Hive/Spark)
1791        let cluster_by = if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
1792            Some(self.parse_cluster_by()?)
1793        } else {
1794            None
1795        };
1796
1797        // Parse SORT BY (Hive/Spark) - can come before ORDER BY
1798        let sort_by = if self.match_keywords(&[TokenType::Sort, TokenType::By]) {
1799            Some(self.parse_sort_by()?)
1800        } else {
1801            None
1802        };
1803
1804        // Parse ORDER BY or ORDER SIBLINGS BY (Oracle) - comes after SORT BY
1805        let order_by = if self.check(TokenType::Order) {
1806            let order_comments = self.current_leading_comments();
1807            if self.match_keywords(&[TokenType::Order, TokenType::Siblings, TokenType::By]) {
1808                // ORDER SIBLINGS BY (Oracle hierarchical queries)
1809                let mut ob = self.parse_order_by_with_siblings(true)?;
1810                ob.comments = order_comments;
1811                Some(ob)
1812            } else if self.match_keywords(&[TokenType::Order, TokenType::By]) {
1813                let mut ob = self.parse_order_by()?;
1814                ob.comments = order_comments;
1815                Some(ob)
1816            } else {
1817                None
1818            }
1819        } else {
1820            None
1821        };
1822
1823        // Parse LIMIT (supports MySQL syntax: LIMIT offset, count)
1824        // DuckDB supports: LIMIT 10 PERCENT or LIMIT 10%
1825        // Capture trailing comments from the token before LIMIT (e.g., WHERE condition's last token)
1826        // These comments should be emitted after the LIMIT value, not before LIMIT.
1827        let pre_limit_comments = if self.check(TokenType::Limit) {
1828            let mut comments = self.previous_trailing_comments();
1829            // Also capture leading comments on the LIMIT token (comments on a separate line before LIMIT)
1830            comments.extend(self.current_leading_comments());
1831            comments
1832        } else {
1833            Vec::new()
1834        };
1835        let (limit, offset) = if self.match_token(TokenType::Limit) {
1836            // Clear the pre-LIMIT comments from the WHERE condition expression to avoid duplication
1837            if !pre_limit_comments.is_empty() {
1838                if let Some(ref mut w) = where_clause {
1839                    Self::clear_rightmost_trailing_comments(&mut w.this);
1840                }
1841            }
1842            // First try parse_unary to check for PERCENT/% modifier.
1843            // This avoids parse_expression consuming % as the modulo operator.
1844            // Both "PERCENT" and "%" tokens have TokenType::Percent, but we need to
1845            // distinguish PERCENT-as-modifier from %-as-modulo. "%" is PERCENT when
1846            // followed by a clause boundary (OFFSET, end, semicolon, etc.).
1847            let saved_pos = self.current;
1848            let (first_expr, has_percent) = {
1849                let unary_result = self.parse_unary();
1850                match unary_result {
1851                    Ok(expr) => {
1852                        if self.check(TokenType::Percent) && self.is_percent_modifier() {
1853                            // Found PERCENT keyword or % symbol used as PERCENT modifier
1854                            self.advance();
1855                            (expr, true)
1856                        } else {
1857                            // No PERCENT - backtrack and use full parse_expression
1858                            self.current = saved_pos;
1859                            let full_expr = self.parse_expression()?;
1860                            // Check again for PERCENT keyword (e.g., after complex expression)
1861                            let has_pct =
1862                                if self.check(TokenType::Percent) && self.is_percent_modifier() {
1863                                    self.advance();
1864                                    true
1865                                } else {
1866                                    false
1867                                };
1868                            (full_expr, has_pct)
1869                        }
1870                    }
1871                    Err(_) => {
1872                        // Unary parsing failed - backtrack and use parse_expression
1873                        self.current = saved_pos;
1874                        let full_expr = self.parse_expression()?;
1875                        let has_pct =
1876                            if self.check(TokenType::Percent) && self.is_percent_modifier() {
1877                                self.advance();
1878                                true
1879                            } else {
1880                                false
1881                            };
1882                        (full_expr, has_pct)
1883                    }
1884                }
1885            };
1886            // MySQL syntax: LIMIT offset, count
1887            if self.match_token(TokenType::Comma) {
1888                let second_expr = self.parse_expression()?;
1889                // First expression is offset, second is count
1890                (
1891                    Some(Limit {
1892                        this: second_expr,
1893                        percent: false,
1894                        comments: pre_limit_comments.clone(),
1895                    }),
1896                    Some(Offset {
1897                        this: first_expr,
1898                        rows: None,
1899                    }),
1900                )
1901            } else {
1902                // Standard: LIMIT count [PERCENT]
1903                (
1904                    Some(Limit {
1905                        this: first_expr,
1906                        percent: has_percent,
1907                        comments: pre_limit_comments,
1908                    }),
1909                    None,
1910                )
1911            }
1912        } else {
1913            (None, None)
1914        };
1915
1916        // WITH TIES after LIMIT (ClickHouse, DuckDB)
1917        if limit.is_some() {
1918            let _ = self.match_keywords(&[TokenType::With, TokenType::Ties]);
1919        }
1920
1921        // Parse OFFSET (if not already parsed from MySQL LIMIT syntax)
1922        // Standard SQL syntax: OFFSET n [ROW|ROWS]
1923        // Some dialects (Presto/Trino) support: OFFSET n LIMIT m
1924        let (limit, offset) = if offset.is_none() && self.match_token(TokenType::Offset) {
1925            let expr = self.parse_expression()?;
1926            // Consume optional ROW or ROWS keyword and track it
1927            let rows = if self.match_token(TokenType::Row) || self.match_token(TokenType::Rows) {
1928                Some(true)
1929            } else {
1930                None
1931            };
1932            let offset = Some(Offset { this: expr, rows });
1933
1934            // Check for LIMIT after OFFSET (Presto/Trino syntax: OFFSET n LIMIT m)
1935            let limit = if limit.is_none() && self.match_token(TokenType::Limit) {
1936                let limit_expr = self.parse_expression()?;
1937                Some(Limit {
1938                    this: limit_expr,
1939                    percent: false,
1940                    comments: Vec::new(),
1941                })
1942            } else {
1943                limit
1944            };
1945
1946            (limit, offset)
1947        } else {
1948            (limit, offset)
1949        };
1950
1951        // ClickHouse: LIMIT ... BY expressions
1952        let limit_by = if matches!(
1953            self.config.dialect,
1954            Some(crate::dialects::DialectType::ClickHouse)
1955        ) && limit.is_some()
1956            && self.match_token(TokenType::By)
1957        {
1958            let expressions = self.parse_expression_list()?;
1959            if expressions.is_empty() {
1960                return Err(self.parse_error("Expected expression after LIMIT BY"));
1961            }
1962            Some(expressions)
1963        } else {
1964            None
1965        };
1966
1967        // ClickHouse: second LIMIT after LIMIT BY (LIMIT n BY expr LIMIT m)
1968        // Also supports LIMIT offset, count syntax
1969        let (limit, offset) = if limit_by.is_some() && self.match_token(TokenType::Limit) {
1970            let first_expr = self.parse_expression()?;
1971            if self.match_token(TokenType::Comma) {
1972                // LIMIT offset, count
1973                let count_expr = self.parse_expression()?;
1974                (
1975                    Some(Limit {
1976                        this: count_expr,
1977                        percent: false,
1978                        comments: Vec::new(),
1979                    }),
1980                    Some(Offset {
1981                        this: first_expr,
1982                        rows: None,
1983                    }),
1984                )
1985            } else {
1986                (
1987                    Some(Limit {
1988                        this: first_expr,
1989                        percent: false,
1990                        comments: Vec::new(),
1991                    }),
1992                    offset,
1993                )
1994            }
1995        } else {
1996            (limit, offset)
1997        };
1998
1999        // Parse FETCH FIRST/NEXT clause
2000        let fetch = if self.match_token(TokenType::Fetch) {
2001            Some(self.parse_fetch()?)
2002        } else {
2003            None
2004        };
2005
2006        // Parse SAMPLE / TABLESAMPLE clause
2007        let sample = self.parse_sample_clause()?;
2008
2009        // Parse FOR UPDATE/SHARE locks or FOR XML (T-SQL)
2010        let (locks, for_xml) = self.parse_locks_and_for_xml()?;
2011
2012        // TSQL: OPTION clause (e.g., OPTION(LABEL = 'foo', HASH JOIN))
2013        let option = if self.check_identifier("OPTION") && self.check_next(TokenType::LParen) {
2014            self.advance(); // consume OPTION
2015            self.advance(); // consume (
2016            let mut content = String::from("OPTION(");
2017            let mut depth = 1;
2018            while !self.is_at_end() && depth > 0 {
2019                let tok = self.advance();
2020                if tok.token_type == TokenType::LParen {
2021                    depth += 1;
2022                } else if tok.token_type == TokenType::RParen {
2023                    depth -= 1;
2024                }
2025                if depth > 0 {
2026                    if tok.token_type == TokenType::String {
2027                        if content.len() > 7 && !content.ends_with('(') && !content.ends_with(' ') {
2028                            content.push(' ');
2029                        }
2030                        content.push('\'');
2031                        content.push_str(&tok.text.replace('\'', "''"));
2032                        content.push('\'');
2033                    } else if tok.token_type == TokenType::Eq {
2034                        content.push_str(" = ");
2035                    } else if tok.token_type == TokenType::Comma {
2036                        content.push_str(", ");
2037                    } else {
2038                        if content.len() > 7 && !content.ends_with('(') && !content.ends_with(' ') {
2039                            content.push(' ');
2040                        }
2041                        content.push_str(&tok.text);
2042                    }
2043                }
2044            }
2045            content.push(')');
2046            Some(content)
2047        } else {
2048            None
2049        };
2050
2051        // ClickHouse: SETTINGS and FORMAT clauses after LIMIT/OFFSET/FETCH
2052        let (settings, format) = if matches!(
2053            self.config.dialect,
2054            Some(crate::dialects::DialectType::ClickHouse)
2055        ) {
2056            let mut settings: Option<Vec<Expression>> = None;
2057            let mut format: Option<Expression> = None;
2058
2059            loop {
2060                if settings.is_none() && self.match_token(TokenType::Settings) {
2061                    let mut settings_exprs = Vec::new();
2062                    loop {
2063                        settings_exprs.push(self.parse_expression()?);
2064                        if !self.match_token(TokenType::Comma) {
2065                            break;
2066                        }
2067                    }
2068                    settings = Some(settings_exprs);
2069                    continue;
2070                }
2071
2072                if format.is_none() && self.match_token(TokenType::Format) {
2073                    // ClickHouse: FORMAT Null is valid (Null is a keyword token, not an identifier)
2074                    let ident = if self.check(TokenType::Null) {
2075                        let text = self.advance().text;
2076                        Identifier::new(text)
2077                    } else {
2078                        self.expect_identifier_or_keyword_with_quoted()?
2079                    };
2080                    format = Some(Expression::Identifier(ident));
2081                    // ClickHouse: FORMAT <name> may be followed by inline data
2082                    // (CSV rows, JSON objects, etc.) — consume to semicolon
2083                    if matches!(
2084                        self.config.dialect,
2085                        Some(crate::dialects::DialectType::ClickHouse)
2086                    ) && !self.is_at_end()
2087                        && !self.check(TokenType::Semicolon)
2088                        && !self.check(TokenType::Settings)
2089                    {
2090                        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
2091                            self.advance();
2092                        }
2093                    }
2094                    continue;
2095                }
2096
2097                break;
2098            }
2099
2100            (settings, format)
2101        } else {
2102            (None, None)
2103        };
2104
2105        let select = Select {
2106            expressions,
2107            from,
2108            joins,
2109            lateral_views,
2110            prewhere,
2111            where_clause,
2112            group_by,
2113            having,
2114            qualify,
2115            order_by,
2116            distribute_by,
2117            cluster_by,
2118            sort_by,
2119            limit,
2120            offset,
2121            limit_by,
2122            fetch,
2123            distinct,
2124            distinct_on,
2125            top,
2126            with: None,
2127            sample,
2128            settings,
2129            format,
2130            windows,
2131            hint,
2132            connect,
2133            into,
2134            locks,
2135            for_xml,
2136            leading_comments,
2137            post_select_comments,
2138            kind,
2139            operation_modifiers,
2140            qualify_after_window,
2141            option,
2142            exclude,
2143        };
2144
2145        // Check for set operations (UNION, INTERSECT, EXCEPT)
2146        let result = Expression::Select(Box::new(select));
2147        self.parse_set_operation(result)
2148    }
2149
2150    /// Parse a WITH clause (CTEs)
2151    fn parse_with(&mut self) -> Result<Expression> {
2152        use crate::dialects::DialectType;
2153
2154        let with_token = self.expect(TokenType::With)?;
2155        let leading_comments = with_token.comments;
2156
2157        let recursive = self.match_token(TokenType::Recursive);
2158        let mut ctes = Vec::new();
2159
2160        loop {
2161            // ClickHouse supports expression-first WITH items:
2162            // WITH <expr> AS <alias> SELECT ...
2163            if matches!(self.config.dialect, Some(DialectType::ClickHouse)) {
2164                let saved_pos = self.current;
2165                if let Ok(expr) = self.parse_expression() {
2166                    // Check if parse_expression already consumed the AS alias
2167                    // (e.g., `(1, 2) AS a` gets parsed as Alias(Tuple, "a") by the tuple alias handler)
2168                    let (inner_expr, alias_opt) = if let Expression::Alias(ref alias_box) = expr {
2169                        (alias_box.this.clone(), Some(alias_box.alias.clone()))
2170                    } else {
2171                        (expr, None)
2172                    };
2173
2174                    if let Some(alias) = alias_opt {
2175                        // Expression already had AS alias consumed
2176                        ctes.push(Cte {
2177                            alias,
2178                            this: inner_expr,
2179                            columns: Vec::new(),
2180                            materialized: None,
2181                            key_expressions: Vec::new(),
2182                            alias_first: false,
2183                            comments: Vec::new(),
2184                        });
2185
2186                        if self.match_token(TokenType::Comma) {
2187                            continue;
2188                        }
2189                        break;
2190                    } else if self.match_token(TokenType::As)
2191                        && self.is_identifier_or_keyword_token()
2192                    {
2193                        // Require AS <alias> to disambiguate from standard CTE syntax
2194                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
2195                        ctes.push(Cte {
2196                            alias,
2197                            this: inner_expr,
2198                            columns: Vec::new(),
2199                            materialized: None,
2200                            key_expressions: Vec::new(),
2201                            alias_first: false,
2202                            comments: Vec::new(),
2203                        });
2204
2205                        if self.match_token(TokenType::Comma) {
2206                            continue;
2207                        }
2208                        break;
2209                    } else if self.check(TokenType::Select) || self.check(TokenType::Comma) {
2210                        // ClickHouse: WITH expr SELECT ... (unaliased expression in CTE)
2211                        ctes.push(Cte {
2212                            alias: Identifier::new(format!("{}", inner_expr)),
2213                            this: inner_expr,
2214                            columns: Vec::new(),
2215                            materialized: None,
2216                            key_expressions: Vec::new(),
2217                            alias_first: false,
2218                            comments: Vec::new(),
2219                        });
2220
2221                        if self.match_token(TokenType::Comma) {
2222                            continue;
2223                        }
2224                        break;
2225                    }
2226                }
2227                // Fall back to standard CTE parsing
2228                self.current = saved_pos;
2229            }
2230
2231            // CTE names can be keywords like 'view', 'use', 'all', etc.
2232            let name = self.expect_identifier_or_alias_keyword_with_quoted()?;
2233
2234            // Optional column list
2235            // But first check for Snowflake-style CTE: WITH t (SELECT ...) - no AS keyword
2236            // In that case, LParen is followed by SELECT, not column names
2237            let columns = if self.check(TokenType::LParen) && !self.check_next(TokenType::Select) {
2238                self.advance(); // consume LParen
2239                let cols = self.parse_identifier_list()?;
2240                self.expect(TokenType::RParen)?;
2241                cols
2242            } else {
2243                Vec::new()
2244            };
2245
2246            // Optional USING KEY (columns) for DuckDB recursive CTEs
2247            let key_expressions = if self.match_keywords(&[TokenType::Using, TokenType::Key]) {
2248                self.expect(TokenType::LParen)?;
2249                let keys = self.parse_identifier_list()?;
2250                self.expect(TokenType::RParen)?;
2251                keys
2252            } else {
2253                Vec::new()
2254            };
2255
2256            // ClickHouse: keyword -> body AS alias (single-param lambda where param is a keyword)
2257            // e.g., WITH time -> sin(time * 2 * pi()) AS sine_wave
2258            if matches!(self.config.dialect, Some(DialectType::ClickHouse))
2259                && self.check(TokenType::Arrow)
2260            {
2261                self.advance(); // consume ->
2262                let body = self.parse_expression()?;
2263                let lambda = Expression::Lambda(Box::new(LambdaExpr {
2264                    parameters: vec![name.clone()],
2265                    body,
2266                    colon: false,
2267                    parameter_types: Vec::new(),
2268                }));
2269                // Expect AS alias
2270                if self.match_token(TokenType::As) && self.is_identifier_or_keyword_token() {
2271                    let alias = self.expect_identifier_or_keyword_with_quoted()?;
2272                    ctes.push(Cte {
2273                        alias,
2274                        this: lambda,
2275                        columns: Vec::new(),
2276                        materialized: None,
2277                        key_expressions: Vec::new(),
2278                        alias_first: false,
2279                        comments: Vec::new(),
2280                    });
2281                } else {
2282                    // Unaliased lambda CTE
2283                    ctes.push(Cte {
2284                        alias: name,
2285                        this: lambda,
2286                        columns: Vec::new(),
2287                        materialized: None,
2288                        key_expressions: Vec::new(),
2289                        alias_first: false,
2290                        comments: Vec::new(),
2291                    });
2292                }
2293                if self.match_token(TokenType::Comma) {
2294                    continue;
2295                }
2296                break;
2297            }
2298
2299            // AS is optional (Snowflake allows WITH t (SELECT ...) without AS)
2300            let cte_comments = if self.match_token(TokenType::As) {
2301                // Capture trailing comments from the AS token
2302                // e.g., "WITH a AS /* comment */ (...)" -> comment goes after alias
2303                self.previous_trailing_comments()
2304            } else {
2305                Vec::new()
2306            };
2307
2308            // Check for MATERIALIZED or NOT MATERIALIZED
2309            let materialized = if self.match_token(TokenType::Materialized) {
2310                Some(true)
2311            } else if self.match_token(TokenType::Not) {
2312                self.expect(TokenType::Materialized)?;
2313                Some(false)
2314            } else {
2315                None
2316            };
2317
2318            self.expect(TokenType::LParen)?;
2319            let query = self.parse_statement()?;
2320            self.expect(TokenType::RParen)?;
2321
2322            ctes.push(Cte {
2323                alias: name,
2324                this: query,
2325                columns,
2326                materialized,
2327                key_expressions,
2328                alias_first: true,
2329                comments: cte_comments,
2330            });
2331
2332            if !self.match_token(TokenType::Comma) {
2333                // Check for WITH merging: WITH a AS (...) WITH b AS (...) -> merged
2334                // If the next token is WITH (not followed by nothing), continue parsing CTEs
2335                if self.check(TokenType::With) {
2336                    self.advance(); // consume the redundant WITH keyword
2337                                    // Check if this WITH is also RECURSIVE
2338                    if self.match_token(TokenType::Recursive) && !recursive {
2339                        // If second WITH is RECURSIVE but first wasn't, ignore (keep non-recursive)
2340                    }
2341                    continue; // continue the loop to parse more CTEs
2342                }
2343                break;
2344            }
2345            // WI-14f: Skip redundant WITH keyword after comma in CTE list
2346            // e.g., WITH a AS (SELECT 1), WITH b AS (SELECT 2) SELECT *
2347            self.match_token(TokenType::With);
2348        }
2349
2350        // Parse optional SEARCH/CYCLE clause for recursive CTEs (PostgreSQL)
2351        // Syntax: SEARCH BREADTH|DEPTH FIRST BY column SET column [USING column]
2352        //     or: CYCLE column SET column USING column
2353        let search = self.parse_recursive_with_search()?;
2354
2355        // Parse the main query
2356        let mut main_query = self.parse_statement()?;
2357
2358        // Unwrap parenthesized wrappers to find the inner SELECT
2359        // (matching Python sqlglot: while isinstance(this, Subquery) and this.is_wrapper)
2360        loop {
2361            match main_query {
2362                Expression::Paren(paren) => {
2363                    main_query = paren.this;
2364                }
2365                Expression::Subquery(ref sub)
2366                    if sub.alias.is_none()
2367                        && sub.order_by.is_none()
2368                        && sub.limit.is_none()
2369                        && sub.offset.is_none() =>
2370                {
2371                    // Unwrap Subquery wrapper (parenthesized query without modifiers)
2372                    if let Expression::Subquery(sub) = main_query {
2373                        main_query = sub.this;
2374                    } else {
2375                        break;
2376                    }
2377                }
2378                _ => break,
2379            }
2380        }
2381
2382        // Attach WITH to the main query
2383        let with_clause = With {
2384            ctes,
2385            recursive,
2386            leading_comments,
2387            search,
2388        };
2389        match &mut main_query {
2390            Expression::Select(ref mut select) => {
2391                select.with = Some(with_clause);
2392            }
2393            Expression::Union(ref mut union) => {
2394                union.with = Some(with_clause);
2395            }
2396            Expression::Intersect(ref mut intersect) => {
2397                intersect.with = Some(with_clause);
2398            }
2399            Expression::Except(ref mut except) => {
2400                except.with = Some(with_clause);
2401            }
2402            Expression::Update(ref mut update) => {
2403                update.with = Some(with_clause);
2404            }
2405            Expression::Insert(ref mut insert) => {
2406                insert.with = Some(with_clause);
2407            }
2408            Expression::Delete(ref mut delete) => {
2409                delete.with = Some(with_clause);
2410            }
2411            Expression::CreateTable(ref mut ct) => {
2412                ct.with_cte = Some(with_clause);
2413            }
2414            Expression::Pivot(ref mut pivot) => {
2415                pivot.with = Some(with_clause);
2416            }
2417            _ => {}
2418        }
2419
2420        Ok(main_query)
2421    }
2422
2423    /// Parse SELECT expressions
2424    fn parse_select_expressions(&mut self) -> Result<Vec<Expression>> {
2425        let mut expressions = Vec::new();
2426
2427        loop {
2428            // Check if we're at end of select list (empty list case for TSQL TOP)
2429            // This allows queries like "SELECT TOP 10 PERCENT" with no columns
2430            // Also check for Oracle BULK COLLECT INTO sequence
2431            // ClickHouse: minus() is tokenized as Except but should be treated as function
2432            let is_ch_keyword_func = matches!(
2433                self.config.dialect,
2434                Some(crate::dialects::DialectType::ClickHouse)
2435            ) && (self.check(TokenType::Except)
2436                || self.check(TokenType::Intersect))
2437                && self.check_next(TokenType::LParen);
2438            // ClickHouse: `from`/`except` can be column names when followed by an operator
2439            // (e.g., `from + from`, `from in [0]`, `from, ...`)
2440            // Also: `from FROM t` — two consecutive FROM tokens means first is column name
2441            let is_ch_keyword_as_column = matches!(
2442                self.config.dialect,
2443                Some(crate::dialects::DialectType::ClickHouse)
2444            ) && (self.check(TokenType::From)
2445                || self.check(TokenType::Except))
2446                && {
2447                    let next_tt = self
2448                        .peek_nth(1)
2449                        .map(|t| t.token_type)
2450                        .unwrap_or(TokenType::Semicolon);
2451                    matches!(
2452                        next_tt,
2453                        TokenType::Plus | TokenType::Dash | TokenType::Star | TokenType::Slash
2454                        | TokenType::Percent | TokenType::Eq | TokenType::Neq | TokenType::Lt
2455                        | TokenType::Gt | TokenType::Lte | TokenType::Gte
2456                        | TokenType::And | TokenType::Or | TokenType::Comma | TokenType::Dot
2457                        | TokenType::In | TokenType::Is | TokenType::Not | TokenType::Like
2458                        | TokenType::Between | TokenType::Semicolon | TokenType::RParen
2459                        | TokenType::As | TokenType::DPipe | TokenType::Amp | TokenType::Pipe
2460                        | TokenType::LBracket
2461                        // Two consecutive FROM tokens: first is column name (e.g., SELECT from FROM t)
2462                        | TokenType::From
2463                    )
2464                };
2465            if !is_ch_keyword_func
2466                && !is_ch_keyword_as_column
2467                && (self.is_at_end()
2468                    || self.check(TokenType::From)
2469                    || self.check(TokenType::Where)
2470                    || self.check(TokenType::Into)
2471                    || self.check(TokenType::Union)
2472                    || self.check(TokenType::Intersect)
2473                    || self.check(TokenType::Except)
2474                    || self.check(TokenType::Order)
2475                    || self.check(TokenType::Limit)
2476                    || self.check(TokenType::Semicolon)
2477                    || self.check_text_seq(&["BULK", "COLLECT", "INTO"]))
2478            {
2479                break;
2480            }
2481
2482            // Handle star
2483            if self.check(TokenType::Star) {
2484                self.advance();
2485                let star_trailing_comments = self.previous_trailing_comments();
2486                let star = self.parse_star_modifiers_with_comments(None, star_trailing_comments)?;
2487                let mut star_expr = Expression::Star(star);
2488                // ClickHouse: * APPLY(func) or * APPLY func or * APPLY(x -> expr) column transformer
2489                if matches!(
2490                    self.config.dialect,
2491                    Some(crate::dialects::DialectType::ClickHouse)
2492                ) {
2493                    while self.check(TokenType::Apply) {
2494                        self.advance(); // consume APPLY
2495                        let apply_expr = if self.match_token(TokenType::LParen) {
2496                            // Could be APPLY(func_name) or APPLY(x -> expr)
2497                            let expr = self.parse_expression()?;
2498                            self.expect(TokenType::RParen)?;
2499                            expr
2500                        } else {
2501                            // APPLY func or APPLY x -> expr (no parens)
2502                            // Parse as expression to handle lambdas
2503                            self.parse_expression()?
2504                        };
2505                        star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
2506                            this: Box::new(star_expr),
2507                            expression: Box::new(apply_expr),
2508                        }));
2509                    }
2510                }
2511                // ClickHouse: Also handle EXCEPT/REPLACE between APPLYs:
2512                // * APPLY(toDate) EXCEPT(i, j) APPLY(any)
2513                if matches!(
2514                    self.config.dialect,
2515                    Some(crate::dialects::DialectType::ClickHouse)
2516                ) && (self.check(TokenType::Except)
2517                    || self.check(TokenType::Exclude)
2518                    || self.check(TokenType::Replace))
2519                {
2520                    // Consume EXCEPT/REPLACE modifiers after APPLY
2521                    self.parse_star_modifiers(None)?;
2522                    // Continue with more APPLYs
2523                    while self.check(TokenType::Apply) {
2524                        self.advance();
2525                        let apply_expr = if self.match_token(TokenType::LParen) {
2526                            let expr = self.parse_expression()?;
2527                            self.expect(TokenType::RParen)?;
2528                            expr
2529                        } else {
2530                            self.parse_expression()?
2531                        };
2532                        star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
2533                            this: Box::new(star_expr),
2534                            expression: Box::new(apply_expr),
2535                        }));
2536                    }
2537                }
2538                // ClickHouse: * followed by operators (e.g., * IS NOT NULL, * AND expr)
2539                // Treat * as a regular expression and continue parsing operators
2540                if matches!(
2541                    self.config.dialect,
2542                    Some(crate::dialects::DialectType::ClickHouse)
2543                ) && matches!(
2544                    self.peek().token_type,
2545                    TokenType::Is
2546                        | TokenType::And
2547                        | TokenType::Or
2548                        | TokenType::Eq
2549                        | TokenType::Neq
2550                        | TokenType::Lt
2551                        | TokenType::Gt
2552                        | TokenType::Lte
2553                        | TokenType::Gte
2554                        | TokenType::Not
2555                        | TokenType::Plus
2556                        | TokenType::Dash
2557                        | TokenType::Slash
2558                        | TokenType::Percent
2559                        | TokenType::Like
2560                        | TokenType::Between
2561                        | TokenType::In
2562                ) {
2563                    // Re-parse from the operator with star_expr as the left side
2564                    let left = star_expr;
2565                    // Use parse_comparison / parse_is chain
2566                    if self.check(TokenType::Is) {
2567                        self.advance(); // consume IS
2568                        let not = self.match_token(TokenType::Not);
2569                        if self.match_token(TokenType::Null) {
2570                            star_expr = if not {
2571                                Expression::Not(Box::new(UnaryOp {
2572                                    this: Expression::Is(Box::new(BinaryOp::new(
2573                                        left,
2574                                        Expression::Null(Null),
2575                                    ))),
2576                                    inferred_type: None,
2577                                }))
2578                            } else {
2579                                Expression::Is(Box::new(BinaryOp::new(
2580                                    left,
2581                                    Expression::Null(Null),
2582                                )))
2583                            };
2584                        } else {
2585                            let right = self.parse_or()?;
2586                            star_expr = if not {
2587                                Expression::Not(Box::new(UnaryOp {
2588                                    this: Expression::Is(Box::new(BinaryOp::new(left, right))),
2589                                    inferred_type: None,
2590                                }))
2591                            } else {
2592                                Expression::Is(Box::new(BinaryOp::new(left, right)))
2593                            };
2594                        }
2595                    } else if self.match_token(TokenType::And) {
2596                        let right = self.parse_or()?;
2597                        star_expr = Expression::And(Box::new(BinaryOp::new(left, right)));
2598                    } else if self.match_token(TokenType::Or) {
2599                        let right = self.parse_or()?;
2600                        star_expr = Expression::Or(Box::new(BinaryOp::new(left, right)));
2601                    } else {
2602                        let op_token = self.advance();
2603                        let right = self.parse_or()?;
2604                        star_expr = match op_token.token_type {
2605                            TokenType::Eq => Expression::Eq(Box::new(BinaryOp::new(left, right))),
2606                            TokenType::Neq => Expression::Neq(Box::new(BinaryOp::new(left, right))),
2607                            TokenType::Lt => Expression::Lt(Box::new(BinaryOp::new(left, right))),
2608                            TokenType::Gt => Expression::Gt(Box::new(BinaryOp::new(left, right))),
2609                            TokenType::Lte => Expression::Lte(Box::new(BinaryOp::new(left, right))),
2610                            TokenType::Gte => Expression::Gte(Box::new(BinaryOp::new(left, right))),
2611                            TokenType::Plus => {
2612                                Expression::Add(Box::new(BinaryOp::new(left, right)))
2613                            }
2614                            TokenType::Dash => {
2615                                Expression::Sub(Box::new(BinaryOp::new(left, right)))
2616                            }
2617                            _ => left, // fallback
2618                        };
2619                    }
2620                }
2621                expressions.push(star_expr);
2622            } else {
2623                // Capture leading comments from the first token before parsing
2624                // These are comments on a separate line before the expression
2625                let leading_comments = self.current_leading_comments();
2626                let expr = self.parse_expression()?;
2627
2628                // ClickHouse: COLUMNS(id, value) EXCEPT (id) REPLACE (5 AS id) APPLY func
2629                // Also: a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) - qualified star with APPLY
2630                let expr = if matches!(
2631                    self.config.dialect,
2632                    Some(crate::dialects::DialectType::ClickHouse)
2633                ) {
2634                    let is_columns_func = match &expr {
2635                        Expression::Function(f) => f.name.eq_ignore_ascii_case("COLUMNS"),
2636                        Expression::MethodCall(m) => m.method.name.eq_ignore_ascii_case("COLUMNS"),
2637                        Expression::Columns(_) => true,
2638                        _ => false,
2639                    };
2640                    let is_qualified_star = matches!(&expr, Expression::Star(_));
2641                    if (is_columns_func || is_qualified_star)
2642                        && (self.check(TokenType::Except)
2643                            || self.check(TokenType::Exclude)
2644                            || self.check(TokenType::Replace)
2645                            || self.check(TokenType::Apply))
2646                    {
2647                        let mut result = expr;
2648                        // Parse any mix of EXCEPT/REPLACE/APPLY in any order
2649                        // e.g., * APPLY(toDate) EXCEPT(i, j) APPLY(any)
2650                        loop {
2651                            if self.check(TokenType::Except) || self.check(TokenType::Exclude) {
2652                                // Parse EXCEPT/EXCLUDE modifier
2653                                self.advance();
2654                                self.match_identifier("STRICT");
2655                                if self.match_token(TokenType::LParen) {
2656                                    loop {
2657                                        if self.check(TokenType::RParen) {
2658                                            break;
2659                                        }
2660                                        let _ = self.parse_expression()?;
2661                                        if !self.match_token(TokenType::Comma) {
2662                                            break;
2663                                        }
2664                                    }
2665                                    self.expect(TokenType::RParen)?;
2666                                } else if self.is_identifier_token()
2667                                    || self.is_safe_keyword_as_identifier()
2668                                {
2669                                    let _ = self.parse_expression()?;
2670                                }
2671                            } else if self.check(TokenType::Replace) {
2672                                // Parse REPLACE modifier: REPLACE (expr AS alias, ...)
2673                                self.advance();
2674                                self.match_identifier("STRICT");
2675                                if self.match_token(TokenType::LParen) {
2676                                    loop {
2677                                        if self.check(TokenType::RParen) {
2678                                            break;
2679                                        }
2680                                        let _ = self.parse_expression()?;
2681                                        if self.match_token(TokenType::As) {
2682                                            if self.is_identifier_token()
2683                                                || self.is_safe_keyword_as_identifier()
2684                                            {
2685                                                self.advance();
2686                                            }
2687                                        }
2688                                        if !self.match_token(TokenType::Comma) {
2689                                            break;
2690                                        }
2691                                    }
2692                                    self.expect(TokenType::RParen)?;
2693                                } else {
2694                                    let _ = self.parse_expression()?;
2695                                    if self.match_token(TokenType::As) {
2696                                        if self.is_identifier_token()
2697                                            || self.is_safe_keyword_as_identifier()
2698                                        {
2699                                            self.advance();
2700                                        }
2701                                    }
2702                                }
2703                            } else if self.check(TokenType::Apply) {
2704                                // Parse APPLY transformer
2705                                self.advance();
2706                                let apply_expr = if self.match_token(TokenType::LParen) {
2707                                    let e = self.parse_expression()?;
2708                                    self.expect(TokenType::RParen)?;
2709                                    e
2710                                } else {
2711                                    self.parse_expression()?
2712                                };
2713                                result = Expression::Apply(Box::new(crate::expressions::Apply {
2714                                    this: Box::new(result),
2715                                    expression: Box::new(apply_expr),
2716                                }));
2717                            } else {
2718                                break;
2719                            }
2720                        }
2721                        result
2722                    } else {
2723                        expr
2724                    }
2725                } else {
2726                    expr
2727                };
2728
2729                // Capture comments between expression and potential AS
2730                let pre_alias_comments = self.previous_trailing_comments();
2731
2732                // DuckDB prefix alias syntax: identifier: expression (e.g., "foo: 1" means "1 AS foo")
2733                // Check if the expression is a simple identifier followed by a colon
2734                let expr = if self.check(TokenType::Colon) && !self.check_next(TokenType::Colon) {
2735                    // Extract the alias name from the identifier expression
2736                    let alias_ident = match &expr {
2737                        Expression::Identifier(id) => Some(id.clone()),
2738                        Expression::Column(col) if col.table.is_none() => Some(col.name.clone()),
2739                        _ => None,
2740                    };
2741                    if let Some(alias) = alias_ident {
2742                        // Consume the colon
2743                        self.advance();
2744                        let colon_comments = self.previous_trailing_comments();
2745                        // Parse the actual value expression
2746                        let value = self.parse_expression()?;
2747                        let value_trailing = self.previous_trailing_comments();
2748                        // For colon-alias (foo: expr), comments between alias and colon should
2749                        // become trailing comments (placed after the alias in output).
2750                        // Comments after the value expression are also trailing.
2751                        let mut all_trailing = pre_alias_comments.clone();
2752                        all_trailing.extend(colon_comments);
2753                        all_trailing.extend(value_trailing);
2754                        Expression::Alias(Box::new(Alias {
2755                            this: value,
2756                            alias,
2757                            column_aliases: Vec::new(),
2758                            pre_alias_comments: Vec::new(),
2759                            trailing_comments: all_trailing,
2760                            inferred_type: None,
2761                        }))
2762                    } else {
2763                        // Not a simple identifier, fall through to normal alias handling
2764                        // (this handles cases where the expression is complex before the colon)
2765                        expr
2766                    }
2767                } else if self.match_token(TokenType::As) {
2768                    // Capture comments from AS token (e.g., AS /* foo */ (a, b, c))
2769                    // These go into trailing_comments (after the alias), not pre_alias_comments
2770                    let as_comments = self.previous_trailing_comments();
2771                    // Check for column aliases: AS (col1, col2) - used by POSEXPLODE etc.
2772                    if self.match_token(TokenType::LParen) {
2773                        let mut column_aliases = Vec::new();
2774                        loop {
2775                            if let Some(col_expr) = self.parse_id_var()? {
2776                                if let Expression::Identifier(id) = col_expr {
2777                                    column_aliases.push(id);
2778                                }
2779                            } else {
2780                                break;
2781                            }
2782                            if !self.match_token(TokenType::Comma) {
2783                                break;
2784                            }
2785                        }
2786                        self.match_token(TokenType::RParen);
2787                        let mut trailing_comments = as_comments;
2788                        trailing_comments.extend(self.previous_trailing_comments());
2789                        Expression::Alias(Box::new(Alias {
2790                            this: expr,
2791                            alias: Identifier::new(String::new()),
2792                            column_aliases,
2793                            pre_alias_comments,
2794                            trailing_comments,
2795                            inferred_type: None,
2796                        }))
2797                    } else {
2798                        // Allow keywords as aliases (e.g., SELECT 1 AS filter)
2799                        // Use _with_quoted to preserve quoted alias
2800                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
2801                        let mut trailing_comments = self.previous_trailing_comments();
2802                        // If parse_comparison stored pending leading comments (no comparison
2803                        // followed), use those. Otherwise use the leading_comments we captured
2804                        // before parse_expression(). Both come from the same token, so we
2805                        // only add one set to avoid duplication.
2806                        if !self.pending_leading_comments.is_empty() {
2807                            trailing_comments.extend(self.pending_leading_comments.drain(..));
2808                        } else {
2809                            trailing_comments.extend(leading_comments.iter().cloned());
2810                        }
2811                        Expression::Alias(Box::new(Alias {
2812                            this: expr,
2813                            alias,
2814                            column_aliases: Vec::new(),
2815                            pre_alias_comments,
2816                            trailing_comments,
2817                            inferred_type: None,
2818                        }))
2819                    }
2820                } else if ((self.check(TokenType::Var) && !self.check_keyword()) || self.check(TokenType::QuotedIdentifier) || self.can_be_alias_keyword() || self.is_command_keyword_as_alias() || self.check(TokenType::Overlaps)
2821                    // ClickHouse: APPLY without ( is an implicit alias (e.g., SELECT col apply)
2822                    || (self.check(TokenType::Apply) && !self.check_next(TokenType::LParen)
2823                        && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))))
2824                    && !self.check_text_seq(&["BULK", "COLLECT", "INTO"])
2825                    // ClickHouse clauses must not be consumed as implicit aliases.
2826                    && !(matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
2827                        && (self.check(TokenType::Format) || self.check(TokenType::Settings)))
2828                    // LIMIT/OFFSET/FETCH are clause starters in most dialects and must not
2829                    // be consumed as implicit aliases in SELECT lists.
2830                    && !(
2831                        self.check(TokenType::Fetch)
2832                        || ((self.check(TokenType::Limit) || self.check(TokenType::Offset))
2833                            && !matches!(
2834                                self.config.dialect,
2835                                Some(crate::dialects::DialectType::Spark)
2836                                    | Some(crate::dialects::DialectType::Hive)
2837                            ))
2838                    )
2839                    // GROUP BY / ORDER BY are clause boundaries, not aliases.
2840                    && !self.check_text_seq(&["GROUP", "BY"])
2841                    && !self.check_text_seq(&["ORDER", "BY"])
2842                    // WINDOW is a clause boundary (named window definitions), not an alias.
2843                    && !self.check(TokenType::Window)
2844                    // ClickHouse: PARALLEL WITH is a statement separator, not an alias.
2845                    && !(self.check_identifier("PARALLEL") && self.check_next(TokenType::With)
2846                        && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
2847                {
2848                    // Implicit alias (without AS) - allow Var tokens, QuotedIdentifiers, command keywords (like GET, PUT, etc.), and OVERLAPS
2849                    // But NOT when it's the Oracle BULK COLLECT INTO sequence
2850                    let alias_token = self.advance();
2851                    let alias_text = alias_token.text.clone();
2852                    let is_quoted = alias_token.token_type == TokenType::QuotedIdentifier;
2853                    let trailing_comments = self.previous_trailing_comments();
2854                    Expression::Alias(Box::new(Alias {
2855                        this: expr,
2856                        alias: Identifier {
2857                            name: alias_text,
2858                            quoted: is_quoted,
2859                            trailing_comments: Vec::new(),
2860                            span: None,
2861                        },
2862                        column_aliases: Vec::new(),
2863                        pre_alias_comments,
2864                        trailing_comments,
2865                        inferred_type: None,
2866                    }))
2867                } else if !pre_alias_comments.is_empty() {
2868                    // Only wrap in Annotated if the expression doesn't already handle trailing comments.
2869                    // BinaryOp, Column, Cast, Function, etc. have their own trailing_comments field that the generator uses.
2870                    let already_has_trailing = matches!(
2871                        &expr,
2872                        Expression::Add(_)
2873                            | Expression::Sub(_)
2874                            | Expression::Mul(_)
2875                            | Expression::Div(_)
2876                            | Expression::Mod(_)
2877                            | Expression::Concat(_)
2878                            | Expression::BitwiseAnd(_)
2879                            | Expression::BitwiseOr(_)
2880                            | Expression::BitwiseXor(_)
2881                            | Expression::Column(_)
2882                            | Expression::Paren(_)
2883                            | Expression::Annotated(_)
2884                            | Expression::Cast(_)
2885                            | Expression::Function(_)
2886                            | Expression::Subquery(_)
2887                    );
2888                    if already_has_trailing {
2889                        expr
2890                    } else {
2891                        // Wrap in Annotated to preserve trailing comments
2892                        Expression::Annotated(Box::new(Annotated {
2893                            this: expr,
2894                            trailing_comments: pre_alias_comments,
2895                        }))
2896                    }
2897                } else if !leading_comments.is_empty() {
2898                    // Wrap in Annotated to preserve leading comments as trailing comments
2899                    Expression::Annotated(Box::new(Annotated {
2900                        this: expr,
2901                        trailing_comments: leading_comments,
2902                    }))
2903                } else {
2904                    expr
2905                };
2906
2907                expressions.push(expr);
2908            }
2909
2910            if !self.match_token(TokenType::Comma) {
2911                break;
2912            }
2913
2914            // Handle trailing comma (ClickHouse supports trailing commas in SELECT)
2915            // ClickHouse: `from` after comma is a column name if followed by an operator
2916            // (e.g., `from + from` or `from in [0]`), comma, or line-end
2917            let from_is_column = matches!(
2918                self.config.dialect,
2919                Some(crate::dialects::DialectType::ClickHouse)
2920            ) && self.check(TokenType::From)
2921                && {
2922                    let next_tt = self
2923                        .peek_nth(1)
2924                        .map(|t| t.token_type)
2925                        .unwrap_or(TokenType::Semicolon);
2926                    matches!(
2927                        next_tt,
2928                        TokenType::Plus
2929                            | TokenType::Dash
2930                            | TokenType::Star
2931                            | TokenType::Slash
2932                            | TokenType::Percent
2933                            | TokenType::Eq
2934                            | TokenType::Neq
2935                            | TokenType::Lt
2936                            | TokenType::Gt
2937                            | TokenType::Lte
2938                            | TokenType::Gte
2939                            | TokenType::And
2940                            | TokenType::Or
2941                            | TokenType::Comma
2942                            | TokenType::Dot
2943                            | TokenType::In
2944                            | TokenType::Is
2945                            | TokenType::Not
2946                            | TokenType::Like
2947                            | TokenType::Between
2948                            | TokenType::Semicolon
2949                            | TokenType::RParen
2950                            | TokenType::As
2951                            | TokenType::DPipe
2952                            | TokenType::Amp
2953                            | TokenType::Pipe
2954                            | TokenType::LBracket
2955                    )
2956                };
2957            if (self.config.allow_trailing_commas
2958                || matches!(
2959                    self.config.dialect,
2960                    Some(crate::dialects::DialectType::ClickHouse)
2961                ))
2962                && (!from_is_column && self.check_from_keyword()
2963                    || self.check(TokenType::Where)
2964                    || self.check(TokenType::GroupBy)
2965                    || self.check(TokenType::Having)
2966                    || self.check(TokenType::Order)
2967                    || self.check(TokenType::Limit)
2968                    || self.check(TokenType::Union)
2969                    || self.check(TokenType::Intersect)
2970                    || (self.check(TokenType::Except) && !self.check_next(TokenType::LParen) && !self.check_next(TokenType::Comma))
2971                    || self.check(TokenType::Semicolon)
2972                    || self.check(TokenType::RParen)
2973                    // SETTINGS/FORMAT only as boundaries when NOT followed by ( or [ (function/column ref)
2974                    || (self.check(TokenType::Settings) && !self.check_next(TokenType::LParen) && !self.check_next(TokenType::LBracket))
2975                    || (self.check(TokenType::Format) && !self.check_next(TokenType::LParen))
2976                    || self.is_at_end())
2977            {
2978                break;
2979            }
2980        }
2981
2982        Ok(expressions)
2983    }
2984
2985    /// Parse DuckDB FROM-first query syntax
2986    /// FROM tbl = SELECT * FROM tbl
2987    /// FROM tbl SELECT col1, col2 = SELECT col1, col2 FROM tbl
2988    fn parse_from_first_query(&mut self) -> Result<Expression> {
2989        self.expect(TokenType::From)?;
2990
2991        // Parse the FROM clause (table references)
2992        let from = self.parse_from()?;
2993
2994        // Check if there's an explicit SELECT clause after FROM
2995        let expressions = if self.check(TokenType::Select) {
2996            self.advance(); // consume SELECT
2997            self.parse_select_expressions()?
2998        } else {
2999            // No explicit SELECT means SELECT *
3000            vec![Expression::Star(crate::expressions::Star {
3001                table: None,
3002                except: None,
3003                replace: None,
3004                rename: None,
3005                trailing_comments: Vec::new(),
3006                span: None,
3007            })]
3008        };
3009
3010        // Parse PREWHERE clause (ClickHouse specific)
3011        let prewhere = if self.match_token(TokenType::Prewhere) {
3012            Some(self.parse_expression()?)
3013        } else {
3014            None
3015        };
3016
3017        // Parse WHERE clause
3018        let where_clause = if self.match_token(TokenType::Where) {
3019            Some(Where {
3020                this: self.parse_expression()?,
3021            })
3022        } else {
3023            None
3024        };
3025
3026        // Parse GROUP BY
3027        let group_by = if self.match_token(TokenType::Group) {
3028            self.expect(TokenType::By)?;
3029            let mut groups = Vec::new();
3030            loop {
3031                groups.push(self.parse_expression()?);
3032                if !self.match_token(TokenType::Comma) {
3033                    break;
3034                }
3035            }
3036            Some(GroupBy {
3037                expressions: groups,
3038                all: None,
3039                totals: false,
3040                comments: Vec::new(),
3041            })
3042        } else {
3043            None
3044        };
3045
3046        // Parse HAVING
3047        let having = if self.match_token(TokenType::Having) {
3048            Some(Having {
3049                this: self.parse_expression()?,
3050                comments: Vec::new(),
3051            })
3052        } else {
3053            None
3054        };
3055
3056        // Parse ORDER BY
3057        let order_by = if self.match_token(TokenType::Order) {
3058            self.expect(TokenType::By)?;
3059            Some(self.parse_order_by()?)
3060        } else {
3061            None
3062        };
3063
3064        // Parse LIMIT
3065        let limit = if self.match_token(TokenType::Limit) {
3066            let first_expr = self.parse_expression()?;
3067            Some(Limit {
3068                this: first_expr,
3069                percent: false,
3070                comments: Vec::new(),
3071            })
3072        } else {
3073            None
3074        };
3075
3076        // Parse OFFSET
3077        let offset = if self.match_token(TokenType::Offset) {
3078            let expr = self.parse_expression()?;
3079            let rows = if self.match_token(TokenType::Row) || self.match_token(TokenType::Rows) {
3080                Some(true)
3081            } else {
3082                None
3083            };
3084            Some(Offset { this: expr, rows })
3085        } else {
3086            None
3087        };
3088
3089        // Build SELECT expression
3090        let select = Select {
3091            expressions,
3092            from: Some(from),
3093            joins: Vec::new(),
3094            lateral_views: Vec::new(),
3095            prewhere,
3096            where_clause,
3097            group_by,
3098            having,
3099            qualify: None,
3100            order_by,
3101            distribute_by: None,
3102            cluster_by: None,
3103            sort_by: None,
3104            limit,
3105            offset,
3106            limit_by: None,
3107            fetch: None,
3108            distinct: false,
3109            distinct_on: None,
3110            top: None,
3111            with: None,
3112            sample: None,
3113            settings: None,
3114            format: None,
3115            windows: None,
3116            hint: None,
3117            connect: None,
3118            into: None,
3119            locks: Vec::new(),
3120            for_xml: Vec::new(),
3121            leading_comments: Vec::new(),
3122            post_select_comments: Vec::new(),
3123            kind: None,
3124            operation_modifiers: Vec::new(),
3125            qualify_after_window: false,
3126            option: None,
3127            exclude: None,
3128        };
3129
3130        // Check for set operations (UNION, INTERSECT, EXCEPT)
3131        let result = Expression::Select(Box::new(select));
3132        self.parse_set_operation(result)
3133    }
3134
3135    /// Parse FROM clause
3136    fn parse_from(&mut self) -> Result<From> {
3137        let mut expressions = Vec::new();
3138
3139        loop {
3140            let table = self.parse_table_expression()?;
3141            expressions.push(table);
3142
3143            if !self.match_token(TokenType::Comma) {
3144                break;
3145            }
3146
3147            // Handle trailing comma in FROM clause (Snowflake allows this)
3148            // If next token is a clause boundary keyword or end of input, break
3149            // Note: For Redshift, UNPIVOT after comma is a table expression (SUPER object traversal),
3150            // so we don't treat it as a boundary in that case
3151            let is_redshift = matches!(
3152                self.config.dialect,
3153                Some(crate::dialects::DialectType::Redshift)
3154            );
3155            let is_unpivot_boundary = !is_redshift && self.check(TokenType::Unpivot);
3156            if self.is_at_end()
3157                || is_unpivot_boundary
3158                || matches!(
3159                    self.peek().token_type,
3160                    TokenType::Where
3161                        | TokenType::GroupBy
3162                        | TokenType::Having
3163                        | TokenType::Order
3164                        | TokenType::Limit
3165                        | TokenType::Offset
3166                        | TokenType::Union
3167                        | TokenType::Intersect
3168                        | TokenType::Except
3169                        | TokenType::Semicolon
3170                        | TokenType::RParen
3171                        | TokenType::Window
3172                        | TokenType::Qualify
3173                        | TokenType::Distribute
3174                        | TokenType::Cluster
3175                        | TokenType::Pivot
3176                )
3177            {
3178                break;
3179            }
3180        }
3181
3182        Ok(From { expressions })
3183    }
3184
3185    /// Parse a table expression (table name, subquery, etc.)
3186    fn parse_table_expression(&mut self) -> Result<Expression> {
3187        // Handle PostgreSQL ONLY modifier: FROM ONLY t1
3188        // ONLY prevents scanning child tables in inheritance hierarchy
3189        let has_only = self.match_token(TokenType::Only);
3190
3191        // Handle PostgreSQL ROWS FROM syntax:
3192        // ROWS FROM (func1(args) AS alias1(col1 type1), func2(args) AS alias2(col2 type2)) [WITH ORDINALITY] [AS alias(cols)]
3193        if self.match_text_seq(&["ROWS", "FROM"]) {
3194            return self.parse_rows_from();
3195        }
3196
3197        // Redshift UNPIVOT in FROM clause for SUPER object traversal:
3198        // UNPIVOT expr [AS val_alias AT attr_alias]
3199        // Examples:
3200        //   UNPIVOT c.c_orders[0]
3201        //   UNPIVOT c.c_orders AS val AT attr
3202        if self.match_token(TokenType::Unpivot) {
3203            return self.parse_redshift_unpivot_table();
3204        }
3205
3206        let mut expr = if self.check(TokenType::Values) && self.check_next(TokenType::LParen) {
3207            // VALUES as table expression: FROM (VALUES ...)
3208            // In ClickHouse, bare `values` without ( is a table name
3209            self.parse_values()?
3210        } else if self.check(TokenType::Values)
3211            && matches!(
3212                self.config.dialect,
3213                Some(crate::dialects::DialectType::ClickHouse)
3214            )
3215        {
3216            // ClickHouse: `values` as a table name (not followed by LParen)
3217            let token = self.advance();
3218            let ident = Identifier::new(token.text);
3219            let trailing_comments = self.previous_trailing_comments();
3220            Expression::Table(TableRef {
3221                name: ident,
3222                schema: None,
3223                catalog: None,
3224                alias: None,
3225                alias_explicit_as: false,
3226                column_aliases: Vec::new(),
3227                trailing_comments,
3228                when: None,
3229                only: false,
3230                final_: false,
3231                table_sample: None,
3232                hints: Vec::new(),
3233                system_time: None,
3234                partitions: Vec::new(),
3235                identifier_func: None,
3236                changes: None,
3237                version: None,
3238                span: None,
3239            })
3240        } else if self.check(TokenType::DAt) {
3241            // Snowflake stage reference: @stage_name or @"stage_name" or @namespace.stage/path
3242            self.parse_stage_reference()?
3243        } else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
3244            // Snowflake stage reference tokenized as Var: @mystage/path
3245            // When @ is followed by alphanumeric, tokenizer creates a Var token instead of DAt
3246            self.parse_stage_reference_from_var()?
3247        } else if self.check(TokenType::String) && self.peek().text.starts_with('@') {
3248            // Snowflake stage reference in string: '@mystage' or '@external/location'
3249            self.parse_stage_reference_from_string()?
3250        } else if self.match_token(TokenType::Lateral) {
3251            if self.check(TokenType::LParen) {
3252                // LATERAL (SELECT ...) or LATERAL (table_expression) or LATERAL (FROM ...) for DuckDB
3253                self.expect(TokenType::LParen)?;
3254                if self.check(TokenType::Select)
3255                    || self.check(TokenType::With)
3256                    || self.check(TokenType::From)
3257                {
3258                    let query = self.parse_statement()?;
3259                    self.expect(TokenType::RParen)?;
3260                    Expression::Subquery(Box::new(Subquery {
3261                        this: query,
3262                        alias: None,
3263                        column_aliases: Vec::new(),
3264                        order_by: None,
3265                        limit: None,
3266                        offset: None,
3267                        lateral: true,
3268                        modifiers_inside: false,
3269                        trailing_comments: Vec::new(),
3270                        distribute_by: None,
3271                        sort_by: None,
3272                        cluster_by: None,
3273                        inferred_type: None,
3274                    }))
3275                } else {
3276                    // LATERAL (table_function()) - parenthesized non-subquery
3277                    let table_expr = self.parse_table_expression()?;
3278                    self.expect(TokenType::RParen)?;
3279                    Expression::Subquery(Box::new(Subquery {
3280                        this: table_expr,
3281                        alias: None,
3282                        column_aliases: Vec::new(),
3283                        order_by: None,
3284                        limit: None,
3285                        offset: None,
3286                        lateral: true,
3287                        modifiers_inside: false,
3288                        trailing_comments: Vec::new(),
3289                        distribute_by: None,
3290                        sort_by: None,
3291                        cluster_by: None,
3292                        inferred_type: None,
3293                    }))
3294                }
3295            } else {
3296                // LATERAL function_name(args) [WITH ORDINALITY] [AS alias(columns)]
3297                // Parse function name
3298                let first_ident = self.expect_identifier_or_keyword_with_quoted()?;
3299                let first_name = first_ident.name.clone();
3300
3301                // Parse function arguments
3302                self.expect(TokenType::LParen)?;
3303                let args = if self.check(TokenType::RParen) {
3304                    Vec::new()
3305                } else {
3306                    self.parse_function_arguments()?
3307                };
3308                self.expect(TokenType::RParen)?;
3309
3310                // Handle UNNEST specially to create UnnestFunc expression
3311                let mut func_expr = if first_name.to_uppercase() == "UNNEST" {
3312                    let mut args_iter = args.into_iter();
3313                    let this = args_iter
3314                        .next()
3315                        .ok_or_else(|| self.parse_error("Expected expression in UNNEST"))?;
3316                    let expressions: Vec<Expression> = args_iter.collect();
3317                    Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
3318                        this,
3319                        expressions,
3320                        with_ordinality: false,
3321                        alias: None,
3322                        offset_alias: None,
3323                    }))
3324                } else {
3325                    Expression::Function(Box::new(Function {
3326                        name: first_name,
3327                        args,
3328                        distinct: false,
3329                        trailing_comments: Vec::new(),
3330                        use_bracket_syntax: false,
3331                        no_parens: false,
3332                        quoted: false,
3333                        span: None,
3334                        inferred_type: None,
3335                    }))
3336                };
3337
3338                // Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
3339                let mut with_offset_alias: Option<crate::expressions::Identifier> = None;
3340                let ordinality = if self.match_token(TokenType::With) {
3341                    if self.match_token(TokenType::Ordinality) {
3342                        Some(Box::new(Expression::Boolean(BooleanLiteral {
3343                            value: true,
3344                        })))
3345                    } else if self.check(TokenType::Offset) || self.check_identifier("OFFSET") {
3346                        // BigQuery: WITH OFFSET [AS alias]
3347                        self.advance(); // consume OFFSET
3348                                        // Check for optional offset alias: WITH OFFSET AS y or WITH OFFSET y
3349                        if matches!(
3350                            self.config.dialect,
3351                            Some(crate::dialects::DialectType::BigQuery)
3352                        ) {
3353                            let has_as = self.match_token(TokenType::As);
3354                            if has_as
3355                                || self.check(TokenType::Identifier)
3356                                || self.check(TokenType::Var)
3357                            {
3358                                let alias_name = self.advance().text;
3359                                with_offset_alias = Some(crate::expressions::Identifier {
3360                                    name: alias_name,
3361                                    quoted: false,
3362                                    trailing_comments: Vec::new(),
3363                                    span: None,
3364                                });
3365                            }
3366                        }
3367                        Some(Box::new(Expression::Boolean(BooleanLiteral {
3368                            value: true,
3369                        })))
3370                    } else {
3371                        // Not ORDINALITY or OFFSET, put back WITH
3372                        self.current -= 1;
3373                        None
3374                    }
3375                } else {
3376                    None
3377                };
3378
3379                // Update the inner UnnestFunc with WITH ORDINALITY/OFFSET info
3380                if ordinality.is_some() {
3381                    if let Expression::Unnest(ref mut u) = func_expr {
3382                        u.with_ordinality = true;
3383                        u.offset_alias = with_offset_alias;
3384                    }
3385                }
3386
3387                // Parse optional alias: AS alias or just alias
3388                let alias_ident = if self.match_token(TokenType::As) {
3389                    Some(self.expect_identifier_or_keyword_with_quoted()?)
3390                } else if !self.is_at_end()
3391                    && !self.check(TokenType::Comma)
3392                    && !self.check(TokenType::RParen)
3393                    && !self.check(TokenType::On)
3394                    && !self.check(TokenType::Cross)
3395                    && !self.check(TokenType::Inner)
3396                    && !self.check(TokenType::Left)
3397                    && !self.check(TokenType::Right)
3398                    && !self.check(TokenType::Full)
3399                    && !self.check(TokenType::Join)
3400                    && !self.check(TokenType::Where)
3401                    && !self.check(TokenType::Order)
3402                    && !self.check(TokenType::Limit)
3403                    && !self.check(TokenType::Semicolon)
3404                    && (self.check(TokenType::Identifier) || self.check(TokenType::Var))
3405                {
3406                    Some(self.expect_identifier_or_keyword_with_quoted()?)
3407                } else {
3408                    None
3409                };
3410                let alias_quoted = alias_ident.as_ref().map_or(false, |id| id.quoted);
3411                let alias = alias_ident.map(|id| id.name);
3412
3413                // Parse column aliases: (col1, col2, ...)
3414                let column_aliases = if alias.is_some() && self.match_token(TokenType::LParen) {
3415                    let mut cols = Vec::new();
3416                    loop {
3417                        cols.push(self.expect_identifier_or_keyword()?);
3418                        if !self.match_token(TokenType::Comma) {
3419                            break;
3420                        }
3421                    }
3422                    self.expect(TokenType::RParen)?;
3423                    cols
3424                } else {
3425                    Vec::new()
3426                };
3427
3428                Expression::Lateral(Box::new(Lateral {
3429                    this: Box::new(func_expr),
3430                    view: None,
3431                    outer: None,
3432                    alias,
3433                    alias_quoted,
3434                    cross_apply: None,
3435                    ordinality,
3436                    column_aliases,
3437                }))
3438            }
3439        } else if self.match_token(TokenType::LParen) {
3440            // Subquery or parenthesized set operation or (VALUES ...)
3441            if self.check(TokenType::Values) {
3442                // (VALUES (...), (...)) AS t(c1, c2) or (VALUES (0) foo(bar))
3443                let mut values = self.parse_values()?;
3444                self.expect(TokenType::RParen)?;
3445                // Extract alias from Values if present and move to Subquery
3446                let (alias, column_aliases) = if let Expression::Values(ref mut v) = values {
3447                    (v.alias.take(), std::mem::take(&mut v.column_aliases))
3448                } else {
3449                    (None, Vec::new())
3450                };
3451                Expression::Subquery(Box::new(Subquery {
3452                    this: values,
3453                    alias,
3454                    column_aliases,
3455                    order_by: None,
3456                    limit: None,
3457                    offset: None,
3458                    distribute_by: None,
3459                    sort_by: None,
3460                    cluster_by: None,
3461                    lateral: false,
3462                    modifiers_inside: false,
3463                    trailing_comments: self.previous_trailing_comments(),
3464                    inferred_type: None,
3465                }))
3466            } else if self.check(TokenType::Select)
3467                || self.check(TokenType::With)
3468                || self.check(TokenType::Pivot)
3469                || self.check(TokenType::Unpivot)
3470                || self.check(TokenType::From)
3471                || self.check(TokenType::Merge)
3472                || self.check(TokenType::Describe)
3473                || (self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EXPLAIN"))
3474                || (self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SUMMARIZE"))
3475            {
3476                let query = self.parse_statement()?;
3477                self.expect(TokenType::RParen)?;
3478                let trailing = self.previous_trailing_comments();
3479                // Check for set operations after parenthesized query
3480                // If there's a set operation, wrap query in Subquery first to preserve parens
3481                // e.g., (SELECT 1) UNION (SELECT 2) - the left operand needs Subquery wrapper
3482                let result = if self.check(TokenType::Union)
3483                    || self.check(TokenType::Intersect)
3484                    || self.check(TokenType::Except)
3485                {
3486                    let left = Expression::Subquery(Box::new(Subquery {
3487                        this: query,
3488                        alias: None,
3489                        column_aliases: Vec::new(),
3490                        order_by: None,
3491                        limit: None,
3492                        offset: None,
3493                        lateral: false,
3494                        modifiers_inside: false,
3495                        trailing_comments: Vec::new(),
3496                        distribute_by: None,
3497                        sort_by: None,
3498                        cluster_by: None,
3499                        inferred_type: None,
3500                    }));
3501                    self.parse_set_operation(left)?
3502                } else {
3503                    query
3504                };
3505                Expression::Subquery(Box::new(Subquery {
3506                    this: result,
3507                    alias: None,
3508                    column_aliases: Vec::new(),
3509                    order_by: None,
3510                    limit: None,
3511                    offset: None,
3512                    distribute_by: None,
3513                    sort_by: None,
3514                    cluster_by: None,
3515                    lateral: false,
3516                    modifiers_inside: false,
3517                    trailing_comments: trailing,
3518                    inferred_type: None,
3519                }))
3520            } else if self.check(TokenType::LParen) {
3521                // Nested parens like ((SELECT ...)) or ((x))
3522                // Also handles ((SELECT 1) UNION (SELECT 2)) - set operations inside parens
3523                let inner = self.parse_table_expression()?;
3524
3525                // Handle alias on subquery before set operation: ((SELECT 1) AS a UNION ALL (SELECT 2) AS b)
3526                let inner = if self.match_token(TokenType::As) {
3527                    let alias = self.expect_identifier()?;
3528                    if let Expression::Subquery(mut subq) = inner {
3529                        subq.alias = Some(Identifier::new(alias));
3530                        Expression::Subquery(subq)
3531                    } else {
3532                        Expression::Alias(Box::new(Alias::new(inner, Identifier::new(alias))))
3533                    }
3534                } else if self.is_identifier_token()
3535                    && !self.check(TokenType::Union)
3536                    && !self.check(TokenType::Intersect)
3537                    && !self.check(TokenType::Except)
3538                    && !self.check(TokenType::Cross)
3539                    && !self.check(TokenType::Inner)
3540                    && !self.check(TokenType::Left)
3541                    && !self.check(TokenType::Right)
3542                    && !self.check(TokenType::Full)
3543                    && !self.check(TokenType::Join)
3544                    && !self.check(TokenType::Order)
3545                    && !self.check(TokenType::Limit)
3546                    && !self.check(TokenType::Offset)
3547                    && !self.check(TokenType::Xor)
3548                {
3549                    // Implicit alias (no AS keyword)
3550                    let alias = self.expect_identifier()?;
3551                    if let Expression::Subquery(mut subq) = inner {
3552                        subq.alias = Some(Identifier::new(alias));
3553                        Expression::Subquery(subq)
3554                    } else {
3555                        Expression::Alias(Box::new(Alias::new(inner, Identifier::new(alias))))
3556                    }
3557                } else {
3558                    inner
3559                };
3560
3561                // ClickHouse: ((SELECT 1) AS x, (SELECT 2) AS y) — tuple of aliased subqueries
3562                if matches!(
3563                    self.config.dialect,
3564                    Some(crate::dialects::DialectType::ClickHouse)
3565                ) && self.check(TokenType::Comma)
3566                {
3567                    let mut exprs = vec![inner];
3568                    while self.match_token(TokenType::Comma) {
3569                        if self.check(TokenType::RParen) {
3570                            break;
3571                        }
3572                        let e = self.parse_expression()?;
3573                        exprs.push(e);
3574                    }
3575                    self.expect(TokenType::RParen)?;
3576                    return Ok(Expression::Tuple(Box::new(Tuple { expressions: exprs })));
3577                }
3578
3579                // Check for set operations after the first table expression
3580                let had_set_operation = self.check(TokenType::Union)
3581                    || self.check(TokenType::Intersect)
3582                    || self.check(TokenType::Except);
3583                let result = if had_set_operation {
3584                    // This is a set operation like ((SELECT 1) UNION (SELECT 2))
3585                    // Wrap inner in a subquery-like expression and parse set operation
3586                    let set_result = self.parse_set_operation(inner)?;
3587                    set_result
3588                } else if self.check(TokenType::Cross)
3589                    || self.check(TokenType::Inner)
3590                    || self.check(TokenType::Left)
3591                    || self.check(TokenType::Right)
3592                    || self.check(TokenType::Full)
3593                    || self.check(TokenType::Join)
3594                {
3595                    // This is a join: ((SELECT 1) CROSS JOIN (SELECT 2))
3596                    let joins = self.parse_joins()?;
3597                    let lateral_views = self.parse_lateral_views()?;
3598                    Expression::JoinedTable(Box::new(JoinedTable {
3599                        left: inner,
3600                        joins,
3601                        lateral_views,
3602                        alias: None,
3603                    }))
3604                } else {
3605                    inner
3606                };
3607
3608                // Handle ORDER BY, LIMIT, OFFSET after set operations inside parens
3609                let result = if self.check(TokenType::Order) {
3610                    // Wrap in a subquery with order/limit
3611                    self.expect(TokenType::Order)?;
3612                    self.expect(TokenType::By)?;
3613                    let order_by = self.parse_order_by()?;
3614                    let limit = if self.match_token(TokenType::Limit) {
3615                        Some(Limit {
3616                            this: self.parse_expression()?,
3617                            percent: false,
3618                            comments: Vec::new(),
3619                        })
3620                    } else {
3621                        None
3622                    };
3623                    let offset = if self.match_token(TokenType::Offset) {
3624                        Some(Offset {
3625                            this: self.parse_expression()?,
3626                            rows: None,
3627                        })
3628                    } else {
3629                        None
3630                    };
3631                    Expression::Subquery(Box::new(Subquery {
3632                        this: result,
3633                        alias: None,
3634                        column_aliases: Vec::new(),
3635                        order_by: Some(order_by),
3636                        limit,
3637                        offset,
3638                        distribute_by: None,
3639                        sort_by: None,
3640                        cluster_by: None,
3641                        lateral: false,
3642                        modifiers_inside: true, // ORDER BY was inside the parens
3643                        trailing_comments: Vec::new(),
3644                        inferred_type: None,
3645                    }))
3646                } else if self.check(TokenType::Limit) || self.check(TokenType::Offset) {
3647                    // LIMIT/OFFSET without ORDER BY
3648                    let limit = if self.match_token(TokenType::Limit) {
3649                        Some(Limit {
3650                            this: self.parse_expression()?,
3651                            percent: false,
3652                            comments: Vec::new(),
3653                        })
3654                    } else {
3655                        None
3656                    };
3657                    let offset = if self.match_token(TokenType::Offset) {
3658                        Some(Offset {
3659                            this: self.parse_expression()?,
3660                            rows: None,
3661                        })
3662                    } else {
3663                        None
3664                    };
3665                    Expression::Subquery(Box::new(Subquery {
3666                        this: result,
3667                        alias: None,
3668                        column_aliases: Vec::new(),
3669                        order_by: None,
3670                        limit,
3671                        offset,
3672                        distribute_by: None,
3673                        sort_by: None,
3674                        cluster_by: None,
3675                        lateral: false,
3676                        modifiers_inside: true, // LIMIT/OFFSET was inside the parens
3677                        trailing_comments: Vec::new(),
3678                        inferred_type: None,
3679                    }))
3680                } else {
3681                    result
3682                };
3683
3684                self.expect(TokenType::RParen)?;
3685                // Wrap result in Paren to preserve the outer parentheses when needed
3686                // Cases:
3687                // - ((SELECT 1)) -> Paren(Subquery(Select)) - inner was subquery of SELECT, wrap in Paren
3688                // - ((SELECT 1) UNION (SELECT 2)) -> Subquery(Union) - recursive call handled set op, don't add Paren
3689                // - ((SELECT 1) AS a UNION ALL ...) -> Union - we handled set op, need to add Paren
3690                // - (((SELECT 1) UNION SELECT 2) ORDER BY x) -> Subquery with modifiers_inside=true
3691                let had_modifiers = matches!(&result, Expression::Subquery(s) if s.order_by.is_some() || s.limit.is_some() || s.offset.is_some());
3692                let result_is_subquery_of_set_op = matches!(&result, Expression::Subquery(s) if matches!(&s.this, Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)));
3693                if had_modifiers || result_is_subquery_of_set_op {
3694                    // Subquery with modifiers or Subquery(Union) - already has proper structure
3695                    result
3696                } else {
3697                    // All other cases need Paren wrapper to preserve outer parentheses
3698                    Expression::Paren(Box::new(Paren {
3699                        this: result,
3700                        trailing_comments: Vec::new(),
3701                    }))
3702                }
3703            } else if self.is_identifier_token()
3704                || self.is_safe_keyword_as_identifier()
3705                || self.can_be_alias_keyword()
3706            {
3707                // Parenthesized join expression: (tbl1 CROSS JOIN tbl2) or just (x)
3708                // Also allow safe keywords and alias keywords (all, left, etc.) as table names
3709                let (left, joins) = self.parse_table_expression_with_joins()?;
3710                // Parse LATERAL VIEW after joins: (x CROSS JOIN foo LATERAL VIEW EXPLODE(y))
3711                let lateral_views = self.parse_lateral_views()?;
3712                self.expect(TokenType::RParen)?;
3713                if joins.is_empty() && lateral_views.is_empty() {
3714                    // Just a parenthesized table expression, wrap in Paren to preserve parens
3715                    Expression::Paren(Box::new(Paren {
3716                        this: left,
3717                        trailing_comments: Vec::new(),
3718                    }))
3719                } else {
3720                    // Create a JoinedTable
3721                    Expression::JoinedTable(Box::new(JoinedTable {
3722                        left,
3723                        joins,
3724                        lateral_views,
3725                        alias: None, // Alias is parsed separately after this
3726                    }))
3727                }
3728            } else {
3729                let query = self.parse_statement()?;
3730                self.expect(TokenType::RParen)?;
3731                Expression::Subquery(Box::new(Subquery {
3732                    this: query,
3733                    alias: None,
3734                    column_aliases: Vec::new(),
3735                    order_by: None,
3736                    limit: None,
3737                    offset: None,
3738                    distribute_by: None,
3739                    sort_by: None,
3740                    cluster_by: None,
3741                    lateral: false,
3742                    modifiers_inside: false,
3743                    trailing_comments: self.previous_trailing_comments(),
3744                    inferred_type: None,
3745                }))
3746            }
3747        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() || self.can_be_alias_keyword()
3748            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::BigQuery)) && self.check(TokenType::Number))
3749            || self.is_mysql_numeric_identifier()
3750            // PIVOT/UNPIVOT can be table names when not followed by (
3751            || (self.check(TokenType::Pivot) && !self.check_next(TokenType::LParen))
3752            || (self.check(TokenType::Unpivot) && !self.check_next(TokenType::LParen))
3753            // ClickHouse: braced query parameters as table names {db:Identifier}.table
3754            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) && self.check(TokenType::LBrace))
3755            // ClickHouse: allow union/except/intersect as table names when not followed by ALL/DISTINCT/SELECT/(
3756            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
3757                && (self.check(TokenType::Union) || self.check(TokenType::Except) || self.check(TokenType::Intersect))
3758                && !self.check_next(TokenType::All) && !self.check_next(TokenType::Distinct)
3759                && !self.check_next(TokenType::Select) && !self.check_next(TokenType::LParen))
3760        {
3761            // Table name - could be simple, qualified, or table function
3762            // Also allow safe keywords (like 'table', 'view', 'case', 'all', etc.) as table names
3763            // BigQuery: also allows numeric table parts and hyphenated identifiers
3764            // MySQL: allows numeric-starting identifiers (e.g., 00f, 1d)
3765
3766            // DuckDB prefix alias syntax: alias: table (e.g., "foo: bar" means "bar AS foo")
3767            // Check if next token is COLON (but not :: which is DCOLON for casts)
3768            if matches!(
3769                self.config.dialect,
3770                Some(crate::dialects::DialectType::DuckDB)
3771            ) && self.check_next(TokenType::Colon)
3772                && !(self.current + 2 < self.tokens.len()
3773                    && self.tokens[self.current + 2].token_type == TokenType::Colon)
3774            {
3775                // Parse the alias identifier
3776                let alias_ident = self.parse_bigquery_table_part()?;
3777                let pre_alias_comments = self.previous_trailing_comments();
3778                // Consume the colon
3779                self.expect(TokenType::Colon)?;
3780                let colon_comments = self.previous_trailing_comments();
3781                // Parse the actual table expression recursively
3782                let mut table_expr = self.parse_table_expression()?;
3783                // Merge comments
3784                let mut all_comments = pre_alias_comments;
3785                all_comments.extend(colon_comments);
3786                // Apply the alias to the table expression
3787                match &mut table_expr {
3788                    Expression::Table(ref mut t) => {
3789                        t.alias = Some(alias_ident);
3790                        t.alias_explicit_as = true; // Output AS keyword (required by expected format)
3791                                                    // Store prefix alias comments - they should come BEFORE the table's trailing comments
3792                                                    // For "foo /* bla */: bar /* baz */", output is "bar AS foo /* bla */ /* baz */"
3793                                                    // So alias comments (/* bla */) come first, then table comments (/* baz */)
3794                        if !all_comments.is_empty() {
3795                            let existing_comments = std::mem::take(&mut t.trailing_comments);
3796                            t.trailing_comments = all_comments;
3797                            t.trailing_comments.extend(existing_comments);
3798                        }
3799                    }
3800                    Expression::Subquery(ref mut s) => {
3801                        s.alias = Some(alias_ident);
3802                    }
3803                    Expression::Function(ref mut _f) => {
3804                        // Wrap function in alias
3805                        return Ok(Expression::Alias(Box::new(Alias {
3806                            this: table_expr,
3807                            alias: alias_ident,
3808                            column_aliases: Vec::new(),
3809                            pre_alias_comments: all_comments,
3810                            trailing_comments: Vec::new(),
3811                            inferred_type: None,
3812                        })));
3813                    }
3814                    _ => {
3815                        // For other expressions, wrap in Alias
3816                        return Ok(Expression::Alias(Box::new(Alias {
3817                            this: table_expr,
3818                            alias: alias_ident,
3819                            column_aliases: Vec::new(),
3820                            pre_alias_comments: all_comments,
3821                            trailing_comments: Vec::new(),
3822                            inferred_type: None,
3823                        })));
3824                    }
3825                }
3826                return Ok(table_expr);
3827            }
3828
3829            let first_ident = self.parse_bigquery_table_part()?;
3830            let first_name = first_ident.name.clone();
3831
3832            // Check for qualified name (schema.table) or table function
3833            if self.match_token(TokenType::Dot) {
3834                // Handle TSQL a..b syntax (database..table with empty schema)
3835                if self.check(TokenType::Dot) {
3836                    // Two consecutive dots: a..b means catalog..table (empty schema)
3837                    self.advance(); // consume second dot
3838                    let table_ident = self.parse_bigquery_table_part()?;
3839                    let trailing_comments = self.previous_trailing_comments();
3840                    return Ok(Expression::Table(TableRef {
3841                        catalog: Some(first_ident),
3842                        schema: Some(Identifier::new("")), // Empty schema represents ..
3843                        name: table_ident,
3844                        alias: None,
3845                        alias_explicit_as: false,
3846                        column_aliases: Vec::new(),
3847                        trailing_comments,
3848                        when: None,
3849                        only: false,
3850                        final_: false,
3851                        table_sample: None,
3852                        hints: Vec::new(),
3853                        system_time: None,
3854                        partitions: Vec::new(),
3855                        identifier_func: None,
3856                        changes: None,
3857                        version: None,
3858                        span: None,
3859                    }));
3860                }
3861
3862                // BigQuery: handle x.* wildcard table reference (e.g., SELECT * FROM x.*)
3863                // After the first dot, if we see a Star token, it's a wildcard table name
3864                if matches!(
3865                    self.config.dialect,
3866                    Some(crate::dialects::DialectType::BigQuery)
3867                ) && self.check(TokenType::Star)
3868                {
3869                    self.advance(); // consume *
3870                    let trailing_comments = self.previous_trailing_comments();
3871                    return Ok(Expression::Table(TableRef {
3872                        catalog: None,
3873                        schema: Some(first_ident),
3874                        name: Identifier::new("*"),
3875                        alias: None,
3876                        alias_explicit_as: false,
3877                        column_aliases: Vec::new(),
3878                        trailing_comments,
3879                        when: None,
3880                        only: false,
3881                        final_: false,
3882                        table_sample: None,
3883                        hints: Vec::new(),
3884                        system_time: None,
3885                        partitions: Vec::new(),
3886                        identifier_func: None,
3887                        changes: None,
3888                        version: None,
3889                        span: None,
3890                    }));
3891                }
3892
3893                // schema.table or schema.function()
3894                // Allow keywords as table/schema names (e.g., schema.table, catalog.view)
3895                let second_ident = self.parse_bigquery_table_part()?;
3896                let second_name = second_ident.name.clone();
3897
3898                if self.match_token(TokenType::Dot) {
3899                    // BigQuery: handle a.b.* wildcard table reference
3900                    if matches!(
3901                        self.config.dialect,
3902                        Some(crate::dialects::DialectType::BigQuery)
3903                    ) && self.check(TokenType::Star)
3904                    {
3905                        self.advance(); // consume *
3906                        let trailing_comments = self.previous_trailing_comments();
3907                        return Ok(Expression::Table(TableRef {
3908                            catalog: Some(first_ident),
3909                            schema: Some(second_ident),
3910                            name: Identifier::new("*"),
3911                            alias: None,
3912                            alias_explicit_as: false,
3913                            column_aliases: Vec::new(),
3914                            trailing_comments,
3915                            when: None,
3916                            only: false,
3917                            final_: false,
3918                            table_sample: None,
3919                            hints: Vec::new(),
3920                            system_time: None,
3921                            partitions: Vec::new(),
3922                            identifier_func: None,
3923                            changes: None,
3924                            version: None,
3925                            span: None,
3926                        }));
3927                    }
3928                    // catalog.schema.table or catalog.schema.function()
3929                    let third_ident = self.parse_bigquery_table_part()?;
3930                    let third_name = third_ident.name.clone();
3931
3932                    // Check for 4-part name (e.g., project.dataset.INFORMATION_SCHEMA.TABLES)
3933                    if self.match_token(TokenType::Dot) {
3934                        let fourth_ident = self.parse_bigquery_table_part()?;
3935                        // BigQuery wildcard table suffix: a.b.c.d* matches all tables starting with d
3936                        let mut table_name = fourth_ident;
3937                        if matches!(
3938                            self.config.dialect,
3939                            Some(crate::dialects::DialectType::BigQuery)
3940                        ) && self.check(TokenType::Star)
3941                            && self.is_connected()
3942                        {
3943                            self.advance(); // consume *
3944                            table_name.name.push('*');
3945                        }
3946                        let trailing_comments = self.previous_trailing_comments();
3947                        // For 4-part names, combine first two parts as catalog, third as schema
3948                        Expression::Table(TableRef {
3949                            catalog: Some(Identifier::new(format!(
3950                                "{}.{}",
3951                                first_name, second_name
3952                            ))),
3953                            schema: Some(third_ident),
3954                            name: table_name,
3955                            alias: None,
3956                            alias_explicit_as: false,
3957                            column_aliases: Vec::new(),
3958                            trailing_comments,
3959                            when: None,
3960                            only: false,
3961                            final_: false,
3962                            table_sample: None,
3963                            hints: Vec::new(),
3964                            system_time: None,
3965                            partitions: Vec::new(),
3966                            identifier_func: None,
3967                            changes: None,
3968                            version: None,
3969                            span: None,
3970                        })
3971                    } else if self.match_token(TokenType::LParen) {
3972                        // catalog.schema.function() - table-valued function
3973                        let args = if self.check(TokenType::RParen) {
3974                            Vec::new()
3975                        } else {
3976                            self.parse_function_arguments()?
3977                        };
3978                        self.expect(TokenType::RParen)?;
3979                        let trailing_comments = self.previous_trailing_comments();
3980                        Expression::Function(Box::new(Function {
3981                            name: format!("{}.{}.{}", first_name, second_name, third_name),
3982                            args,
3983                            distinct: false,
3984                            trailing_comments,
3985                            use_bracket_syntax: false,
3986                            no_parens: false,
3987                            quoted: false,
3988                            span: None,
3989                            inferred_type: None,
3990                        }))
3991                    } else {
3992                        // catalog.schema.table
3993                        // BigQuery wildcard table suffix: x.y.z* matches all tables starting with z
3994                        let mut table_name = third_ident;
3995                        if matches!(
3996                            self.config.dialect,
3997                            Some(crate::dialects::DialectType::BigQuery)
3998                        ) && self.check(TokenType::Star)
3999                            && self.is_connected()
4000                        {
4001                            self.advance(); // consume *
4002                            table_name.name.push('*');
4003                        }
4004                        let trailing_comments = self.previous_trailing_comments();
4005                        Expression::Table(TableRef {
4006                            catalog: Some(first_ident),
4007                            schema: Some(second_ident),
4008                            name: table_name,
4009                            alias: None,
4010                            alias_explicit_as: false,
4011                            column_aliases: Vec::new(),
4012                            trailing_comments,
4013                            when: None,
4014                            only: false,
4015                            final_: false,
4016                            table_sample: None,
4017                            hints: Vec::new(),
4018                            system_time: None,
4019                            partitions: Vec::new(),
4020                            identifier_func: None,
4021                            changes: None,
4022                            version: None,
4023                            span: None,
4024                        })
4025                    }
4026                } else if self.match_token(TokenType::LParen) {
4027                    // schema.function() - table-valued function
4028                    let args = if self.check(TokenType::RParen) {
4029                        Vec::new()
4030                    } else {
4031                        self.parse_function_arguments()?
4032                    };
4033                    self.expect(TokenType::RParen)?;
4034                    let trailing_comments = self.previous_trailing_comments();
4035                    Expression::Function(Box::new(Function {
4036                        name: format!("{}.{}", first_name, second_name),
4037                        args,
4038                        distinct: false,
4039                        trailing_comments,
4040                        use_bracket_syntax: false,
4041                        no_parens: false,
4042                        quoted: false,
4043                        span: None,
4044                        inferred_type: None,
4045                    }))
4046                } else {
4047                    // schema.table
4048                    // BigQuery wildcard table suffix: x.y* matches all tables starting with y
4049                    let mut table_name = second_ident;
4050                    if matches!(
4051                        self.config.dialect,
4052                        Some(crate::dialects::DialectType::BigQuery)
4053                    ) && self.check(TokenType::Star)
4054                        && self.is_connected()
4055                    {
4056                        self.advance(); // consume *
4057                        table_name.name.push('*');
4058                    }
4059                    let trailing_comments = self.previous_trailing_comments();
4060                    Expression::Table(TableRef {
4061                        catalog: None,
4062                        schema: Some(first_ident),
4063                        name: table_name,
4064                        alias: None,
4065                        alias_explicit_as: false,
4066                        column_aliases: Vec::new(),
4067                        trailing_comments,
4068                        when: None,
4069                        only: false,
4070                        final_: false,
4071                        table_sample: None,
4072                        hints: Vec::new(),
4073                        system_time: None,
4074                        partitions: Vec::new(),
4075                        identifier_func: None,
4076                        changes: None,
4077                        version: None,
4078                        span: None,
4079                    })
4080                }
4081            } else if self.match_token(TokenType::LParen) {
4082                // Handle JSON_TABLE specially - it has COLUMNS clause syntax
4083                if first_name.to_uppercase() == "JSON_TABLE" {
4084                    // Parse the JSON expression (use parse_bitwise to avoid consuming FORMAT)
4085                    let this = self
4086                        .parse_bitwise()?
4087                        .unwrap_or(Expression::Null(crate::expressions::Null));
4088
4089                    // Check for FORMAT JSON after the expression
4090                    let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
4091                        Expression::JSONFormat(Box::new(crate::expressions::JSONFormat {
4092                            this: Some(Box::new(this)),
4093                            options: Vec::new(),
4094                            is_json: None,
4095                            to_json: None,
4096                        }))
4097                    } else {
4098                        this
4099                    };
4100
4101                    // Parse path (after comma)
4102                    let path = if self.match_token(TokenType::Comma) {
4103                        if let Some(s) = self.parse_string()? {
4104                            Some(Box::new(s))
4105                        } else {
4106                            None
4107                        }
4108                    } else {
4109                        None
4110                    };
4111
4112                    // Oracle uses "ERROR ON ERROR" (value then behavior) instead of "ON ERROR ERROR"
4113                    // Parse error handling: ERROR ON ERROR or NULL ON ERROR
4114                    let error_handling = if self.match_identifier("ERROR")
4115                        && self.match_text_seq(&["ON", "ERROR"])
4116                    {
4117                        Some(Box::new(Expression::Var(Box::new(Var {
4118                            this: "ERROR ON ERROR".to_string(),
4119                        }))))
4120                    } else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
4121                        Some(Box::new(Expression::Var(Box::new(Var {
4122                            this: "NULL ON ERROR".to_string(),
4123                        }))))
4124                    } else {
4125                        None
4126                    };
4127
4128                    // Parse empty handling: ERROR ON EMPTY or NULL ON EMPTY
4129                    let empty_handling = if self.match_identifier("ERROR")
4130                        && self.match_text_seq(&["ON", "EMPTY"])
4131                    {
4132                        Some(Box::new(Expression::Var(Box::new(Var {
4133                            this: "ERROR ON EMPTY".to_string(),
4134                        }))))
4135                    } else if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
4136                        Some(Box::new(Expression::Var(Box::new(Var {
4137                            this: "NULL ON EMPTY".to_string(),
4138                        }))))
4139                    } else {
4140                        None
4141                    };
4142
4143                    // Parse COLUMNS clause
4144                    let schema = self.parse_json_table_columns()?;
4145
4146                    self.expect(TokenType::RParen)?;
4147
4148                    Expression::JSONTable(Box::new(JSONTable {
4149                        this: Box::new(this_with_format),
4150                        schema: schema.map(Box::new),
4151                        path,
4152                        error_handling,
4153                        empty_handling,
4154                    }))
4155                } else if first_name.to_uppercase() == "XMLTABLE" {
4156                    // Handle XMLTABLE specially - it has COLUMNS clause syntax
4157                    // XMLTABLE([XMLNAMESPACES(...),] '/xpath' PASSING xml_doc COLUMNS ...)
4158                    if let Some(xml_table) = self.parse_xml_table()? {
4159                        self.expect(TokenType::RParen)?;
4160                        xml_table
4161                    } else {
4162                        return Err(self.parse_error("Failed to parse XMLTABLE"));
4163                    }
4164                } else if first_name.to_uppercase() == "OPENJSON" {
4165                    // Handle OPENJSON specially - it has WITH clause for column definitions
4166                    // OPENJSON(json[, path]) [WITH (col1 type1 'path' [AS JSON], ...)]
4167                    if let Some(openjson_expr) = self.parse_open_json()? {
4168                        openjson_expr
4169                    } else {
4170                        return Err(self.parse_error("Failed to parse OPENJSON"));
4171                    }
4172                } else if first_name.to_uppercase() == "SEMANTIC_VIEW" {
4173                    // Handle SEMANTIC_VIEW specially - it has METRICS/DIMENSIONS/FACTS/WHERE syntax
4174                    // SEMANTIC_VIEW(table METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE expr)
4175                    let semantic_view = self.parse_semantic_view()?;
4176                    self.expect(TokenType::RParen)?;
4177                    semantic_view
4178                } else if (first_name.eq_ignore_ascii_case("view")
4179                    || first_name.eq_ignore_ascii_case("merge"))
4180                    && (self.check(TokenType::Select) || self.check(TokenType::With))
4181                {
4182                    // ClickHouse: view(SELECT ...) and merge(SELECT ...) table functions
4183                    // contain a subquery as the argument
4184                    let query = self.parse_statement()?;
4185                    self.expect(TokenType::RParen)?;
4186                    let trailing_comments = self.previous_trailing_comments();
4187                    Expression::Function(Box::new(Function {
4188                        name: first_name.to_string(),
4189                        args: vec![query],
4190                        distinct: false,
4191                        trailing_comments,
4192                        use_bracket_syntax: false,
4193                        no_parens: false,
4194                        quoted: false,
4195                        span: None,
4196                        inferred_type: None,
4197                    }))
4198                } else {
4199                    // Simple table function like UNNEST(), GAP_FILL(), etc.
4200                    let args = if self.check(TokenType::RParen) {
4201                        Vec::new()
4202                    } else {
4203                        self.parse_function_arguments()?
4204                    };
4205                    self.expect(TokenType::RParen)?;
4206                    let trailing_comments = self.previous_trailing_comments();
4207
4208                    // Handle UNNEST specially to create UnnestFunc expression
4209                    if first_name.to_uppercase() == "UNNEST" {
4210                        // Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
4211                        // Both are semantically the same - provide an ordinal/offset column
4212                        let with_ordinality = self
4213                            .match_keywords(&[TokenType::With, TokenType::Ordinality])
4214                            || self.match_text_seq(&["WITH", "OFFSET"]);
4215                        // If WITH OFFSET matched, check for optional offset alias: WITH OFFSET AS y or WITH OFFSET y
4216                        let offset_alias = if with_ordinality
4217                            && matches!(
4218                                self.config.dialect,
4219                                Some(crate::dialects::DialectType::BigQuery)
4220                            ) {
4221                            let has_as = self.match_token(TokenType::As);
4222                            if has_as
4223                                || (self.check(TokenType::Identifier) || self.check(TokenType::Var))
4224                            {
4225                                let alias_name = self.advance().text;
4226                                Some(crate::expressions::Identifier {
4227                                    name: alias_name,
4228                                    quoted: false,
4229                                    trailing_comments: Vec::new(),
4230                                    span: None,
4231                                })
4232                            } else {
4233                                None
4234                            }
4235                        } else {
4236                            None
4237                        };
4238                        let mut args_iter = args.into_iter();
4239                        let this = args_iter
4240                            .next()
4241                            .ok_or_else(|| self.parse_error("Expected expression in UNNEST"))?;
4242                        let expressions: Vec<Expression> = args_iter.collect();
4243                        Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
4244                            this,
4245                            expressions,
4246                            with_ordinality,
4247                            alias: None,
4248                            offset_alias,
4249                        }))
4250                    } else {
4251                        // Check for WITH ORDINALITY after any table-valued function
4252                        let with_ordinality =
4253                            self.match_keywords(&[TokenType::With, TokenType::Ordinality]);
4254                        let func_name = if with_ordinality {
4255                            format!("{} WITH ORDINALITY", first_name)
4256                        } else {
4257                            first_name.clone()
4258                        };
4259                        let func = Function {
4260                            name: func_name,
4261                            args,
4262                            distinct: false,
4263                            trailing_comments,
4264                            use_bracket_syntax: false,
4265                            no_parens: false,
4266                            quoted: false,
4267                            span: None,
4268                            inferred_type: None,
4269                        };
4270                        Expression::Function(Box::new(func))
4271                    }
4272                }
4273            } else {
4274                // Simple table name
4275                // BigQuery wildcard table suffix: x* matches all tables starting with x
4276                let mut table_name = first_ident;
4277                if matches!(
4278                    self.config.dialect,
4279                    Some(crate::dialects::DialectType::BigQuery)
4280                ) && self.check(TokenType::Star)
4281                    && self.is_connected()
4282                {
4283                    self.advance(); // consume *
4284                    table_name.name.push('*');
4285                }
4286                let trailing_comments = self.previous_trailing_comments();
4287                Expression::Table(TableRef {
4288                    catalog: None,
4289                    schema: None,
4290                    name: table_name,
4291                    alias: None,
4292                    alias_explicit_as: false,
4293                    column_aliases: Vec::new(),
4294                    trailing_comments,
4295                    when: None,
4296                    only: false,
4297                    final_: false,
4298                    table_sample: None,
4299                    hints: Vec::new(),
4300                    system_time: None,
4301                    partitions: Vec::new(),
4302                    identifier_func: None,
4303                    changes: None,
4304                    version: None,
4305                    span: None,
4306                })
4307            }
4308        } else if self.check(TokenType::LBrace) {
4309            // ClickHouse query parameter: {name: Type}
4310            if let Some(param) = self.parse_clickhouse_braced_parameter()? {
4311                param
4312            } else {
4313                // Spark/Databricks widget template variable: {name}
4314                self.advance(); // consume {
4315                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
4316                    let name_token = self.advance();
4317                    self.expect(TokenType::RBrace)?;
4318                    Expression::Parameter(Box::new(Parameter {
4319                        name: Some(name_token.text.clone()),
4320                        index: None,
4321                        style: ParameterStyle::Brace,
4322                        quoted: false,
4323                        string_quoted: false,
4324                        expression: None,
4325                    }))
4326                } else {
4327                    return Err(self.parse_error("Expected identifier after {"));
4328                }
4329            }
4330        } else if self.check(TokenType::Dollar) && self.check_next(TokenType::LBrace) {
4331            // Template variable as table reference: ${variable_name} or ${kind:name}
4332            // This is used in Databricks/Hive for parameterized queries
4333            self.advance(); // consume $
4334            self.advance(); // consume {
4335            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
4336                let name_token = self.advance();
4337                // Check for ${kind:name} syntax (e.g., ${hiveconf:some_var})
4338                let expression = if self.match_token(TokenType::Colon) {
4339                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
4340                        let expr_token = self.advance();
4341                        Some(expr_token.text.clone())
4342                    } else {
4343                        return Err(self.parse_error("Expected identifier after : in ${...}"));
4344                    }
4345                } else {
4346                    None
4347                };
4348                self.expect(TokenType::RBrace)?;
4349                Expression::Parameter(Box::new(Parameter {
4350                    name: Some(name_token.text.clone()),
4351                    index: None,
4352                    style: ParameterStyle::DollarBrace,
4353                    quoted: false,
4354                    string_quoted: false,
4355                    expression,
4356                }))
4357            } else {
4358                return Err(self.parse_error("Expected identifier after ${"));
4359            }
4360        } else if self.check(TokenType::String) {
4361            // DuckDB allows string literals as table names: SELECT * FROM 'x.y'
4362            // Convert to a quoted identifier
4363            let string_token = self.advance();
4364            let table_name = Identifier {
4365                name: string_token.text.clone(),
4366                quoted: true,
4367                trailing_comments: Vec::new(),
4368                span: None,
4369            };
4370            let trailing_comments = self.previous_trailing_comments();
4371            Expression::Table(TableRef {
4372                catalog: None,
4373                schema: None,
4374                name: table_name,
4375                alias: None,
4376                alias_explicit_as: false,
4377                column_aliases: Vec::new(),
4378                trailing_comments,
4379                when: None,
4380                only: false,
4381                final_: false,
4382                table_sample: None,
4383                hints: Vec::new(),
4384                system_time: None,
4385                partitions: Vec::new(),
4386                identifier_func: None,
4387                changes: None,
4388                version: None,
4389                span: None,
4390            })
4391        } else {
4392            return Err(self.parse_error(format!(
4393                "Expected table name or subquery, got {:?}",
4394                self.peek().token_type
4395            )));
4396        };
4397
4398        // Postgres supports a wildcard (table) suffix operator, which is a no-op in this context.
4399        // e.g., FROM t1* means "include inherited tables". Matches Python sqlglot behavior.
4400        self.match_token(TokenType::Star);
4401
4402        // Check for Snowflake CHANGES clause: CHANGES (INFORMATION => ...) AT|BEFORE (...) END (...)
4403        // Must be checked before time travel since CHANGES includes its own AT/BEFORE clauses
4404        if self.check_keyword_text("CHANGES") {
4405            if let Some(changes_expr) = self.parse_changes()? {
4406                if let Expression::Table(ref mut table) = expr {
4407                    if let Expression::Changes(changes_box) = changes_expr {
4408                        table.changes = Some(changes_box);
4409                    }
4410                }
4411            }
4412        }
4413
4414        // Check for Snowflake time travel: BEFORE (STATEMENT => ...) or AT (TIMESTAMP => ...)
4415        if self.check(TokenType::Before) || self.check_keyword_text("AT") {
4416            if let Some(historical_expr) = self.parse_historical_data()? {
4417                // Attach historical data to the table expression
4418                if let Expression::Table(ref mut table) = expr {
4419                    if let Expression::HistoricalData(hd) = historical_expr {
4420                        table.when = Some(hd);
4421                    }
4422                }
4423            }
4424        }
4425
4426        // Check for TSQL FOR SYSTEM_TIME temporal clause (not BigQuery - handled post-alias)
4427        // Syntax: FOR SYSTEM_TIME AS OF expr
4428        //         FOR SYSTEM_TIME FROM expr TO expr
4429        //         FOR SYSTEM_TIME BETWEEN expr AND expr
4430        //         FOR SYSTEM_TIME CONTAINED IN (expr, expr)
4431        //         FOR SYSTEM_TIME ALL
4432        if !matches!(self.config.dialect, Some(crate::dialects::DialectType::BigQuery))
4433            && self.check(TokenType::For)
4434            && self.current + 1 < self.tokens.len()
4435            && self.tokens[self.current + 1]
4436                .text
4437                .eq_ignore_ascii_case("SYSTEM_TIME")
4438        {
4439            self.advance(); // consume FOR
4440            self.advance(); // consume SYSTEM_TIME
4441            let system_time_str = if self.match_token(TokenType::As) {
4442                // AS OF expr
4443                if self.check_keyword_text("OF") {
4444                    self.advance(); // consume OF
4445                    let start = self.current;
4446                    // Collect expression tokens until we hit a clause boundary
4447                    while !self.is_at_end()
4448                        && !self.check(TokenType::Semicolon)
4449                        && !self.check(TokenType::Where)
4450                        && !self.check(TokenType::Join)
4451                        && !self.check(TokenType::Left)
4452                        && !self.check(TokenType::Right)
4453                        && !self.check(TokenType::Inner)
4454                        && !self.check(TokenType::Outer)
4455                        && !self.check(TokenType::Full)
4456                        && !self.check(TokenType::Cross)
4457                        && !self.check(TokenType::Order)
4458                        && !self.check(TokenType::Group)
4459                        && !self.check(TokenType::Having)
4460                        && !self.check(TokenType::Limit)
4461                        && !self.check(TokenType::Union)
4462                        && !self.check(TokenType::Except)
4463                        && !self.check(TokenType::Intersect)
4464                        && !self.check(TokenType::As)
4465                        && !self.check(TokenType::Comma)
4466                        && !self.check(TokenType::RParen)
4467                        && !self.check(TokenType::With)
4468                        && !self.check(TokenType::Pivot)
4469                        && !self.check(TokenType::Unpivot)
4470                    {
4471                        self.advance();
4472                    }
4473                    let expr_text = self.tokens_to_sql_uppercased(start, self.current);
4474                    format!("FOR SYSTEM_TIME AS OF {}", expr_text)
4475                } else {
4476                    "FOR SYSTEM_TIME AS".to_string()
4477                }
4478            } else if self.match_token(TokenType::Between) {
4479                // BETWEEN expr AND expr
4480                let start = self.current;
4481                while !self.is_at_end() && !self.check(TokenType::And) {
4482                    self.advance();
4483                }
4484                let expr1_text = self.tokens_to_sql_uppercased(start, self.current);
4485                self.advance(); // consume AND
4486                let start2 = self.current;
4487                while !self.is_at_end()
4488                    && !self.check(TokenType::Semicolon)
4489                    && !self.check(TokenType::Where)
4490                    && !self.check(TokenType::Join)
4491                    && !self.check(TokenType::Left)
4492                    && !self.check(TokenType::Right)
4493                    && !self.check(TokenType::Inner)
4494                    && !self.check(TokenType::Outer)
4495                    && !self.check(TokenType::Full)
4496                    && !self.check(TokenType::Cross)
4497                    && !self.check(TokenType::Order)
4498                    && !self.check(TokenType::Group)
4499                    && !self.check(TokenType::Having)
4500                    && !self.check(TokenType::Limit)
4501                    && !self.check(TokenType::Union)
4502                    && !self.check(TokenType::Except)
4503                    && !self.check(TokenType::Intersect)
4504                    && !self.check(TokenType::As)
4505                    && !self.check(TokenType::Comma)
4506                    && !self.check(TokenType::RParen)
4507                    && !self.check(TokenType::With)
4508                    && !self.check(TokenType::Pivot)
4509                    && !self.check(TokenType::Unpivot)
4510                {
4511                    self.advance();
4512                }
4513                let expr2_text = self.tokens_to_sql_uppercased(start2, self.current);
4514                format!("FOR SYSTEM_TIME BETWEEN {} AND {}", expr1_text, expr2_text)
4515            } else if self.match_token(TokenType::From) {
4516                // FROM expr TO expr
4517                let start = self.current;
4518                while !self.is_at_end() && !self.check(TokenType::To) {
4519                    self.advance();
4520                }
4521                let expr1_text = self.tokens_to_sql_uppercased(start, self.current);
4522                self.advance(); // consume TO
4523                let start2 = self.current;
4524                while !self.is_at_end()
4525                    && !self.check(TokenType::Semicolon)
4526                    && !self.check(TokenType::Where)
4527                    && !self.check(TokenType::As)
4528                    && !self.check(TokenType::Comma)
4529                    && !self.check(TokenType::RParen)
4530                {
4531                    self.advance();
4532                }
4533                let expr2_text = self.tokens_to_sql_uppercased(start2, self.current);
4534                format!("FOR SYSTEM_TIME FROM {} TO {}", expr1_text, expr2_text)
4535            } else if self.check_identifier("CONTAINED") {
4536                self.advance(); // consume CONTAINED
4537                self.expect(TokenType::In)?;
4538                self.expect(TokenType::LParen)?;
4539                let start = self.current;
4540                let mut depth = 1;
4541                while !self.is_at_end() && depth > 0 {
4542                    if self.check(TokenType::LParen) {
4543                        depth += 1;
4544                    }
4545                    if self.check(TokenType::RParen) {
4546                        depth -= 1;
4547                        if depth == 0 {
4548                            break;
4549                        }
4550                    }
4551                    self.advance();
4552                }
4553                let inner_text = self.tokens_to_sql_uppercased(start, self.current);
4554                self.expect(TokenType::RParen)?;
4555                format!("FOR SYSTEM_TIME CONTAINED IN ({})", inner_text)
4556            } else if self.match_token(TokenType::All) {
4557                "FOR SYSTEM_TIME ALL".to_string()
4558            } else {
4559                "FOR SYSTEM_TIME".to_string()
4560            };
4561            if let Expression::Table(ref mut table) = expr {
4562                table.system_time = Some(system_time_str);
4563            }
4564        }
4565
4566        // Check for Presto/Trino time travel: FOR VERSION AS OF / FOR TIMESTAMP AS OF
4567        // Syntax: FOR VERSION AS OF <snapshot_id>
4568        //         FOR TIMESTAMP AS OF <timestamp_expr>
4569        if self.check(TokenType::For) && self.current + 1 < self.tokens.len() {
4570            let next_text = self.tokens[self.current + 1].text.to_uppercase();
4571            if next_text == "VERSION" || next_text == "TIMESTAMP" {
4572                self.advance(); // consume FOR
4573                let version_kind = self.advance().text.to_uppercase(); // consume VERSION or TIMESTAMP
4574
4575                // Expect AS OF
4576                if self.match_token(TokenType::As) && self.check_keyword_text("OF") {
4577                    self.advance(); // consume OF
4578
4579                    // Parse the expression value
4580                    if let Some(value_expr) = self.parse_bitwise()? {
4581                        let version = crate::expressions::Version {
4582                            this: Box::new(Expression::Identifier(Identifier::new(&version_kind))),
4583                            kind: "AS OF".to_string(),
4584                            expression: Some(Box::new(value_expr)),
4585                        };
4586                        if let Expression::Table(ref mut table) = expr {
4587                            table.version = Some(Box::new(version));
4588                        }
4589                    }
4590                }
4591            }
4592        }
4593
4594        // Check for Hive-style time travel: TIMESTAMP AS OF / VERSION AS OF (without FOR)
4595        // Syntax: TIMESTAMP AS OF <timestamp_expr>
4596        //         VERSION AS OF <snapshot_id>
4597        if self.current < self.tokens.len() {
4598            let current_text = self.tokens[self.current].text.to_uppercase();
4599            if (current_text == "TIMESTAMP" || current_text == "VERSION")
4600                && self.current + 2 < self.tokens.len()
4601                && self.tokens[self.current + 1].token_type == TokenType::As
4602                && self.tokens[self.current + 2]
4603                    .text
4604                    .eq_ignore_ascii_case("OF")
4605            {
4606                let version_kind = self.advance().text.to_uppercase(); // consume TIMESTAMP or VERSION
4607                self.advance(); // consume AS
4608                self.advance(); // consume OF
4609
4610                // Parse the expression value
4611                if let Some(value_expr) = self.parse_bitwise()? {
4612                    let version = crate::expressions::Version {
4613                        this: Box::new(Expression::Identifier(Identifier::new(&version_kind))),
4614                        kind: "AS OF".to_string(),
4615                        expression: Some(Box::new(value_expr)),
4616                    };
4617                    if let Expression::Table(ref mut table) = expr {
4618                        table.version = Some(Box::new(version));
4619                    }
4620                }
4621            }
4622        }
4623
4624        // Check for MySQL PARTITION(p0, p1, ...) clause
4625        // Only supported by MySQL-compatible dialects (not generic dialect)
4626        let supports_partition_selection = matches!(
4627            self.config.dialect,
4628            Some(crate::dialects::DialectType::MySQL)
4629                | Some(crate::dialects::DialectType::SingleStore)
4630                | Some(crate::dialects::DialectType::Doris)
4631                | Some(crate::dialects::DialectType::StarRocks)
4632        );
4633        if supports_partition_selection && self.match_token(TokenType::Partition) {
4634            if self.match_token(TokenType::LParen) {
4635                let mut partitions = Vec::new();
4636                loop {
4637                    let partition_name = self.expect_identifier_or_keyword_with_quoted()?;
4638                    partitions.push(partition_name);
4639                    if !self.match_token(TokenType::Comma) {
4640                        break;
4641                    }
4642                }
4643                self.expect(TokenType::RParen)?;
4644                if let Expression::Table(ref mut table) = expr {
4645                    table.partitions = partitions;
4646                }
4647            }
4648        }
4649
4650        // Check for table-level TABLESAMPLE/SAMPLE: tbl TABLESAMPLE METHOD(size) or tbl SAMPLE ROW(0)
4651        // Snowflake supports both TABLESAMPLE and SAMPLE
4652        if self.check(TokenType::TableSample) || self.check(TokenType::Sample) {
4653            if let Some(sample) = self.parse_table_level_sample()? {
4654                if let Expression::Table(ref mut table) = expr {
4655                    table.table_sample = Some(Box::new(sample));
4656                } else {
4657                    // For non-Table expressions (subqueries, functions, etc.),
4658                    // wrap in TableSample expression node
4659                    expr = Expression::TableSample(Box::new(crate::expressions::TableSample {
4660                        this: Some(Box::new(expr)),
4661                        sample: Some(Box::new(sample)),
4662                        expressions: Vec::new(),
4663                        method: None,
4664                        bucket_numerator: None,
4665                        bucket_denominator: None,
4666                        bucket_field: None,
4667                        percent: None,
4668                        rows: None,
4669                        size: None,
4670                        seed: None,
4671                    }));
4672                }
4673            }
4674        }
4675
4676        // Check for TSQL table hints: WITH (TABLOCK, INDEX(myindex), ...)
4677        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
4678            if let Expression::Table(ref mut table) = expr {
4679                if let Some(hint_expr) = self.parse_table_hints()? {
4680                    // parse_table_hints returns a Tuple wrapping individual hint expressions.
4681                    // Extract the inner hints so we store them directly.
4682                    match hint_expr {
4683                        Expression::Tuple(tuple) => {
4684                            table.hints = tuple.expressions;
4685                        }
4686                        other => {
4687                            table.hints = vec![other];
4688                        }
4689                    }
4690                }
4691            }
4692        }
4693
4694        // Check for MySQL index hints: USE INDEX, IGNORE INDEX, FORCE INDEX
4695        if self.check_keyword_text("USE")
4696            || self.check(TokenType::Ignore)
4697            || self.check_keyword_text("FORCE")
4698        {
4699            // Peek ahead to see if next token after USE/IGNORE/FORCE is INDEX or KEY
4700            let next_idx = self.current + 1;
4701            let is_index_hint = next_idx < self.tokens.len() && {
4702                let next_text = self.tokens[next_idx].text.to_uppercase();
4703                next_text == "INDEX" || next_text == "KEY"
4704            };
4705            if is_index_hint {
4706                if let Expression::Table(ref mut table) = expr {
4707                    if let Some(hint_expr) = self.parse_table_hints()? {
4708                        match hint_expr {
4709                            Expression::Tuple(tuple) => {
4710                                table.hints = tuple.expressions;
4711                            }
4712                            other => {
4713                                table.hints = vec![other];
4714                            }
4715                        }
4716                    }
4717                }
4718            }
4719        }
4720
4721        // Check for SQLite INDEXED BY or NOT INDEXED table hints
4722        if self.check_identifier("INDEXED") {
4723            self.advance(); // consume INDEXED
4724            self.expect(TokenType::By)?;
4725            // Parse index name (can be qualified: schema.index)
4726            let first_part = self.expect_identifier_or_keyword()?;
4727            let index_name = if self.match_token(TokenType::Dot) {
4728                let second_part = self.expect_identifier_or_keyword()?;
4729                format!("{}.{}", first_part, second_part)
4730            } else {
4731                first_part
4732            };
4733            if let Expression::Table(ref mut table) = expr {
4734                table.hints.push(Expression::Identifier(Identifier {
4735                    name: format!("INDEXED BY {}", index_name),
4736                    quoted: false,
4737                    trailing_comments: Vec::new(),
4738                    span: None,
4739                }));
4740            }
4741        } else if self.check(TokenType::Not) && self.check_next_identifier("INDEXED") {
4742            self.advance(); // consume NOT
4743            self.advance(); // consume INDEXED
4744            if let Expression::Table(ref mut table) = expr {
4745                table.hints.push(Expression::Identifier(Identifier {
4746                    name: "NOT INDEXED".to_string(),
4747                    quoted: false,
4748                    trailing_comments: Vec::new(),
4749                    span: None,
4750                }));
4751            }
4752        }
4753
4754        // Check for PIVOT (can be followed by UNPIVOT)
4755        // Only treat as PIVOT clause when followed by ( — otherwise it's a table alias
4756        if self.check(TokenType::Pivot) && self.check_next(TokenType::LParen) {
4757            self.advance(); // consume PIVOT
4758            expr = self.parse_pivot(expr)?;
4759        }
4760        // Check for UNPIVOT (can follow PIVOT or be standalone)
4761        // Only treat as UNPIVOT clause when followed by (, INCLUDE, or EXCLUDE — otherwise it's a table alias
4762        if self.check(TokenType::Unpivot) && self.is_unpivot_clause_start() {
4763            self.advance(); // consume UNPIVOT
4764            expr = self.parse_unpivot(expr)?;
4765        }
4766        // Check for MATCH_RECOGNIZE
4767        else if self.check(TokenType::MatchRecognize)
4768            && !matches!(&expr, Expression::Pivot(_) | Expression::Unpivot(_))
4769        {
4770            self.advance();
4771            expr = self.parse_match_recognize(Some(expr))?;
4772        }
4773
4774        // Check for alias
4775        if self.match_token(TokenType::As) {
4776            // Handle AS (col1, col2) without alias name - used by POSEXPLODE etc.
4777            if self.check(TokenType::LParen) {
4778                self.advance(); // consume LParen
4779                let mut column_aliases = Vec::new();
4780                loop {
4781                    if self.check(TokenType::RParen) {
4782                        break;
4783                    }
4784                    column_aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
4785                    if !self.match_token(TokenType::Comma) {
4786                        break;
4787                    }
4788                }
4789                self.expect(TokenType::RParen)?;
4790                expr = Expression::Alias(Box::new(Alias {
4791                    this: expr,
4792                    alias: Identifier::new(String::new()),
4793                    column_aliases,
4794                    pre_alias_comments: Vec::new(),
4795                    trailing_comments: Vec::new(),
4796                    inferred_type: None,
4797                }));
4798            } else {
4799                let alias_ident_parsed = self.expect_identifier_or_alias_keyword_with_quoted()?;
4800                let alias = alias_ident_parsed.name;
4801                let alias_is_quoted = alias_ident_parsed.quoted;
4802                let make_alias_ident = |name: String| -> Identifier {
4803                    if alias_is_quoted {
4804                        Identifier::quoted(name)
4805                    } else {
4806                        Identifier::new(name)
4807                    }
4808                };
4809                // Check for column aliases: AS t(c1, c2) or AS t(c1 type1, c2 type2) for table functions
4810                if self.match_token(TokenType::LParen) {
4811                    // Check if this is typed column definitions (for table functions like JSON_TO_RECORDSET)
4812                    // by looking ahead: if we see identifier followed by another identifier/type (not comma/rparen),
4813                    // it's typed columns
4814                    let has_typed_columns = self.check_typed_column_list();
4815
4816                    if has_typed_columns {
4817                        // Parse typed column definitions like: (col1 type1, col2 type2)
4818                        let mut typed_cols = Vec::new();
4819                        loop {
4820                            if self.check(TokenType::RParen) {
4821                                break;
4822                            }
4823                            // Parse column name (can be quoted)
4824                            let col_name = self.expect_identifier_or_keyword_with_quoted()?;
4825                            // Parse column type
4826                            let col_type = self.parse_data_type()?;
4827                            // Create ColumnDef expression, preserving the quoted status
4828                            let mut col_def = ColumnDef::new(col_name.name.clone(), col_type);
4829                            col_def.name = col_name;
4830                            typed_cols.push(Expression::ColumnDef(Box::new(col_def)));
4831
4832                            if !self.match_token(TokenType::Comma) {
4833                                break;
4834                            }
4835                        }
4836                        self.expect(TokenType::RParen)?;
4837
4838                        // Create TableAlias with typed columns
4839                        let table_alias = Expression::TableAlias(Box::new(TableAlias {
4840                            this: Some(Box::new(Expression::Identifier(make_alias_ident(alias)))),
4841                            columns: typed_cols,
4842                        }));
4843
4844                        // Wrap function with TableAlias using Tuple pattern (like ROWS FROM)
4845                        expr = Expression::Tuple(Box::new(Tuple {
4846                            expressions: vec![expr, table_alias],
4847                        }));
4848                    } else {
4849                        // Parse simple column aliases: (c1, c2, ...)
4850                        // Use expect_identifier_or_keyword to allow keywords like KEY, INDEX, VALUE as column aliases
4851                        let mut aliases = Vec::new();
4852                        loop {
4853                            if self.check(TokenType::RParen) {
4854                                break;
4855                            }
4856                            aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
4857                            if !self.match_token(TokenType::Comma) {
4858                                break;
4859                            }
4860                        }
4861                        self.expect(TokenType::RParen)?;
4862
4863                        expr = match expr {
4864                            Expression::Table(mut t) => {
4865                                t.alias = Some(make_alias_ident(alias));
4866                                t.alias_explicit_as = true;
4867                                t.column_aliases = aliases;
4868                                Expression::Table(t)
4869                            }
4870                            Expression::Subquery(mut s) => {
4871                                s.alias = Some(make_alias_ident(alias));
4872                                s.column_aliases = aliases;
4873                                Expression::Subquery(s)
4874                            }
4875                            Expression::Pivot(mut p) => {
4876                                p.alias = Some(make_alias_ident(alias));
4877                                Expression::Pivot(p)
4878                            }
4879                            Expression::Unpivot(mut u) => {
4880                                u.alias = Some(make_alias_ident(alias));
4881                                Expression::Unpivot(u)
4882                            }
4883                            Expression::MatchRecognize(mut mr) => {
4884                                mr.alias = Some(make_alias_ident(alias));
4885                                mr.alias_explicit_as = true;
4886                                Expression::MatchRecognize(mr)
4887                            }
4888                            Expression::JoinedTable(mut jt) => {
4889                                jt.alias = Some(make_alias_ident(alias));
4890                                Expression::JoinedTable(jt)
4891                            }
4892                            _ => Expression::Alias(Box::new(Alias {
4893                                this: expr,
4894                                alias: make_alias_ident(alias),
4895                                column_aliases: aliases,
4896                                pre_alias_comments: Vec::new(),
4897                                trailing_comments: Vec::new(),
4898                                inferred_type: None,
4899                            })),
4900                        };
4901                    }
4902                } else {
4903                    // No column aliases, just simple alias
4904                    let default_column_aliases = if matches!(
4905                        self.config.dialect,
4906                        Some(crate::dialects::DialectType::ClickHouse)
4907                    ) && matches!(&expr, Expression::Function(func) if func.name.eq_ignore_ascii_case("generate_series"))
4908                    {
4909                        vec![Identifier::new("generate_series")]
4910                    } else {
4911                        Vec::new()
4912                    };
4913                    expr = match expr {
4914                        Expression::Table(mut t) => {
4915                            t.alias = Some(make_alias_ident(alias));
4916                            t.alias_explicit_as = true;
4917                            t.column_aliases = Vec::new();
4918                            Expression::Table(t)
4919                        }
4920                        Expression::Subquery(mut s) => {
4921                            s.alias = Some(make_alias_ident(alias));
4922                            s.column_aliases = Vec::new();
4923                            Expression::Subquery(s)
4924                        }
4925                        Expression::Pivot(mut p) => {
4926                            p.alias = Some(make_alias_ident(alias));
4927                            Expression::Pivot(p)
4928                        }
4929                        Expression::Unpivot(mut u) => {
4930                            u.alias = Some(make_alias_ident(alias));
4931                            Expression::Unpivot(u)
4932                        }
4933                        Expression::MatchRecognize(mut mr) => {
4934                            mr.alias = Some(make_alias_ident(alias));
4935                            mr.alias_explicit_as = true;
4936                            Expression::MatchRecognize(mr)
4937                        }
4938                        Expression::JoinedTable(mut jt) => {
4939                            jt.alias = Some(make_alias_ident(alias));
4940                            Expression::JoinedTable(jt)
4941                        }
4942                        _ => Expression::Alias(Box::new(Alias {
4943                            this: expr,
4944                            alias: make_alias_ident(alias),
4945                            column_aliases: default_column_aliases,
4946                            pre_alias_comments: Vec::new(),
4947                            trailing_comments: Vec::new(),
4948                            inferred_type: None,
4949                        })),
4950                    };
4951                }
4952            } // close the else for AS (col1, col2) handling
4953        } else if (self.check(TokenType::QuotedIdentifier)
4954            || (self.check(TokenType::Var) && !self.check_keyword() && !self.check_identifier("MATCH_CONDITION")
4955                && !(self.check_identifier("ARRAY") && self.check_next(TokenType::Join)
4956                     && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
4957                // TSQL: OPTION(LABEL = 'foo') is a query hint, not an alias
4958                && !(self.check_identifier("OPTION") && self.check_next(TokenType::LParen))
4959                // MySQL: LOCK IN SHARE MODE is a locking clause, not an alias
4960                && !(self.check_identifier("LOCK") && self.check_next(TokenType::In))
4961                // ClickHouse: PARALLEL WITH is a statement separator, not a table alias
4962                && !(self.check_identifier("PARALLEL") && self.check_next(TokenType::With)
4963                     && matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
4964                // DuckDB: POSITIONAL JOIN is a join method, not a table alias
4965                && !(self.check_identifier("POSITIONAL") && self.check_next(TokenType::Join))))
4966            || self.is_command_keyword_as_alias()
4967            // ClickHouse: allow FIRST/LAST as implicit table aliases
4968            // (they're keywords used in NULLS FIRST/LAST but also valid as identifiers)
4969            || (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
4970                && (self.check(TokenType::First) || self.check(TokenType::Last)))
4971            // PIVOT/UNPIVOT can be table aliases when not followed by clause-starting tokens
4972            || (self.check(TokenType::Pivot) && !self.check_next(TokenType::LParen))
4973            || (self.check(TokenType::Unpivot) && !self.is_unpivot_clause_start())
4974            // PARTITION can be a table alias when the dialect doesn't support partition selection
4975            || (self.check(TokenType::Partition) && !matches!(
4976                self.config.dialect,
4977                Some(crate::dialects::DialectType::MySQL)
4978                | Some(crate::dialects::DialectType::SingleStore)
4979                | Some(crate::dialects::DialectType::Doris)
4980                | Some(crate::dialects::DialectType::StarRocks)
4981            ))
4982            || (self.check(TokenType::Window) && {
4983                // WINDOW can be a table alias if NOT followed by an identifier (window definition)
4984                let next_pos = self.current + 1;
4985                next_pos >= self.tokens.len()
4986                    || (self.tokens[next_pos].token_type != TokenType::Var
4987                        && self.tokens[next_pos].token_type != TokenType::Identifier)
4988            })
4989        {
4990            // Implicit alias (but not MATCH_CONDITION which is a join condition keyword)
4991            // Also allow command keywords (GET, PUT, etc.) and WINDOW (when not a clause) as implicit table aliases
4992            let is_keyword_alias = self.peek().token_type.is_keyword();
4993            let is_quoted_alias = self.peek().token_type == TokenType::QuotedIdentifier;
4994            let alias = self.advance().text.clone();
4995            // Check for column aliases: t(c1, c2)
4996            // Use expect_identifier_or_keyword to allow keywords like KEY, INDEX, VALUE as column aliases
4997            let mut column_aliases = if self.match_token(TokenType::LParen) {
4998                let mut aliases = Vec::new();
4999                loop {
5000                    aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
5001                    if !self.match_token(TokenType::Comma) {
5002                        break;
5003                    }
5004                }
5005                self.expect(TokenType::RParen)?;
5006                aliases
5007            } else {
5008                Vec::new()
5009            };
5010            if column_aliases.is_empty()
5011                && matches!(
5012                    self.config.dialect,
5013                    Some(crate::dialects::DialectType::ClickHouse)
5014                )
5015                && matches!(&expr, Expression::Function(func) if func.name.eq_ignore_ascii_case("generate_series"))
5016            {
5017                column_aliases = vec![Identifier::new("generate_series")];
5018            }
5019            let make_alias_ident = |name: String| -> Identifier {
5020                if is_quoted_alias {
5021                    Identifier::quoted(name)
5022                } else {
5023                    Identifier::new(name)
5024                }
5025            };
5026            expr = match expr {
5027                Expression::Table(mut t) => {
5028                    t.alias = Some(make_alias_ident(alias));
5029                    t.alias_explicit_as = is_keyword_alias;
5030                    t.column_aliases = column_aliases;
5031                    Expression::Table(t)
5032                }
5033                Expression::Subquery(mut s) => {
5034                    s.alias = Some(make_alias_ident(alias));
5035                    s.column_aliases = column_aliases;
5036                    Expression::Subquery(s)
5037                }
5038                Expression::Pivot(mut p) => {
5039                    p.alias = Some(make_alias_ident(alias));
5040                    Expression::Pivot(p)
5041                }
5042                Expression::Unpivot(mut u) => {
5043                    u.alias = Some(make_alias_ident(alias));
5044                    Expression::Unpivot(u)
5045                }
5046                Expression::MatchRecognize(mut mr) => {
5047                    mr.alias = Some(make_alias_ident(alias));
5048                    Expression::MatchRecognize(mr)
5049                }
5050                Expression::JoinedTable(mut jt) => {
5051                    jt.alias = Some(make_alias_ident(alias));
5052                    Expression::JoinedTable(jt)
5053                }
5054                _ => Expression::Alias(Box::new(Alias {
5055                    this: expr,
5056                    alias: make_alias_ident(alias),
5057                    column_aliases,
5058                    pre_alias_comments: Vec::new(),
5059                    trailing_comments: Vec::new(),
5060                    inferred_type: None,
5061                })),
5062            };
5063        }
5064
5065        // ClickHouse: subquery column alias list without alias name: FROM (...) (c0, c1)
5066        if matches!(
5067            self.config.dialect,
5068            Some(crate::dialects::DialectType::ClickHouse)
5069        ) && self.check(TokenType::LParen)
5070            && matches!(&expr, Expression::Subquery(s) if s.alias.is_none())
5071        {
5072            // Lookahead: check if this is (identifier, identifier, ...) — column alias list
5073            let mut look = self.current + 1;
5074            let mut is_col_list = true;
5075            let mut col_count = 0;
5076            loop {
5077                if look >= self.tokens.len() {
5078                    is_col_list = false;
5079                    break;
5080                }
5081                let tt = self.tokens[look].token_type;
5082                if tt == TokenType::Identifier
5083                    || tt == TokenType::Var
5084                    || tt == TokenType::QuotedIdentifier
5085                    || tt.is_keyword()
5086                {
5087                    col_count += 1;
5088                    look += 1;
5089                } else {
5090                    is_col_list = false;
5091                    break;
5092                }
5093                if look >= self.tokens.len() {
5094                    is_col_list = false;
5095                    break;
5096                }
5097                if self.tokens[look].token_type == TokenType::Comma {
5098                    look += 1;
5099                } else if self.tokens[look].token_type == TokenType::RParen {
5100                    break;
5101                } else {
5102                    is_col_list = false;
5103                    break;
5104                }
5105            }
5106            if is_col_list && col_count >= 1 {
5107                self.advance(); // consume LParen
5108                let mut aliases = Vec::new();
5109                loop {
5110                    aliases.push(Identifier::new(self.advance().text.clone()));
5111                    if !self.match_token(TokenType::Comma) {
5112                        break;
5113                    }
5114                }
5115                self.expect(TokenType::RParen)?;
5116                if let Expression::Subquery(ref mut s) = expr {
5117                    s.column_aliases = aliases;
5118                }
5119            }
5120        }
5121
5122        // ClickHouse FINAL modifier: table [AS alias] FINAL
5123        if matches!(
5124            self.config.dialect,
5125            Some(crate::dialects::DialectType::ClickHouse)
5126        ) && self.match_token(TokenType::Final)
5127        {
5128            if let Expression::Table(ref mut table) = expr {
5129                table.final_ = true;
5130            }
5131        }
5132
5133        // Check for SQLite INDEXED BY after alias: t AS t INDEXED BY idx
5134        if self.check_identifier("INDEXED") {
5135            self.advance(); // consume INDEXED
5136            self.expect(TokenType::By)?;
5137            let first_part = self.expect_identifier_or_keyword()?;
5138            let index_name = if self.match_token(TokenType::Dot) {
5139                let second_part = self.expect_identifier_or_keyword()?;
5140                format!("{}.{}", first_part, second_part)
5141            } else {
5142                first_part
5143            };
5144            if let Expression::Table(ref mut table) = expr {
5145                table.hints.push(Expression::Identifier(Identifier {
5146                    name: format!("INDEXED BY {}", index_name),
5147                    quoted: false,
5148                    trailing_comments: Vec::new(),
5149                    span: None,
5150                }));
5151            }
5152        }
5153
5154        // Check for PIVOT/UNPIVOT after alias (some dialects allow this order)
5155        // Only treat as PIVOT/UNPIVOT clause when followed by ( — otherwise it's a table alias
5156        if self.check(TokenType::Pivot) && self.check_next(TokenType::LParen) {
5157            self.advance(); // consume PIVOT
5158            expr = self.parse_pivot(expr)?;
5159        } else if self.check(TokenType::Unpivot) && self.is_unpivot_clause_start() {
5160            self.advance(); // consume UNPIVOT
5161            expr = self.parse_unpivot(expr)?;
5162        }
5163
5164        // Check for Redshift AT index clause for array unnesting
5165        // Syntax: table_alias.array_column AS element_alias AT index_alias
5166        // e.g., c.c_orders AS orders AT index
5167        // https://docs.aws.amazon.com/redshift/latest/dg/query-super.html
5168        if self.match_identifier("AT") {
5169            let index_alias = self.expect_identifier_or_keyword()?;
5170            // Convert the table expression to a column for AtIndex
5171            let column_expr = match expr {
5172                Expression::Table(t) => {
5173                    // Convert Table to Column reference
5174                    // For c.c_orders, table=c, name=c_orders -> column name should be c.c_orders
5175                    let mut parts = Vec::new();
5176                    if let Some(cat) = t.catalog {
5177                        parts.push(cat.name);
5178                    }
5179                    if let Some(schema) = t.schema {
5180                        parts.push(schema.name);
5181                    }
5182                    parts.push(t.name.name);
5183                    let col_name = parts.join(".");
5184                    let alias_expr = if let Some(alias) = t.alias {
5185                        Expression::Alias(Box::new(Alias {
5186                            this: Expression::Column(Column {
5187                                name: Identifier::new(&col_name),
5188                                table: None,
5189                                join_mark: false,
5190                                trailing_comments: Vec::new(),
5191                                span: None,
5192                                inferred_type: None,
5193                            }),
5194                            alias,
5195                            column_aliases: t.column_aliases,
5196                            pre_alias_comments: Vec::new(),
5197                            trailing_comments: t.trailing_comments,
5198                            inferred_type: None,
5199                        }))
5200                    } else {
5201                        Expression::Column(Column {
5202                            name: Identifier::new(&col_name),
5203                            table: None,
5204                            join_mark: false,
5205                            trailing_comments: t.trailing_comments,
5206                            span: None,
5207                            inferred_type: None,
5208                        })
5209                    };
5210                    alias_expr
5211                }
5212                other => other, // Keep as is for non-table expressions
5213            };
5214            expr = Expression::AtIndex(Box::new(AtIndex {
5215                this: Box::new(column_expr),
5216                expression: Box::new(Expression::Identifier(Identifier::new(index_alias))),
5217            }));
5218        }
5219
5220        // Check for TABLESAMPLE/SAMPLE after alias (Snowflake ALIAS_POST_TABLESAMPLE)
5221        // e.g., table2 AS t2 TABLESAMPLE BERNOULLI (50), table2 AS t2 SAMPLE ROW (0)
5222        if self.check(TokenType::TableSample) || self.check(TokenType::Sample) {
5223            if let Some(sample) = self.parse_table_level_sample()? {
5224                // Capture trailing comments after the SAMPLE clause (e.g., -- 25% of rows in table1)
5225                let post_sample_comments = self.previous_trailing_comments();
5226                if let Expression::Table(ref mut table) = expr {
5227                    table.table_sample = Some(Box::new(sample));
5228                    if !post_sample_comments.is_empty() {
5229                        table.trailing_comments.extend(post_sample_comments);
5230                    }
5231                } else {
5232                    // For non-Table expressions, wrap in TableSample expression node
5233                    expr = Expression::TableSample(Box::new(crate::expressions::TableSample {
5234                        this: Some(Box::new(expr)),
5235                        sample: Some(Box::new(sample)),
5236                        expressions: Vec::new(),
5237                        method: None,
5238                        bucket_numerator: None,
5239                        bucket_denominator: None,
5240                        bucket_field: None,
5241                        percent: None,
5242                        rows: None,
5243                        size: None,
5244                        seed: None,
5245                    }));
5246                }
5247            }
5248        }
5249
5250        // Apply PostgreSQL ONLY modifier if present
5251        if has_only {
5252            if let Expression::Table(ref mut table) = expr {
5253                table.only = true;
5254            }
5255        }
5256
5257        // BigQuery: FOR SYSTEM_TIME AS OF after alias
5258        // e.g., FROM foo AS t0 FOR SYSTEM_TIME AS OF '2026-01-01'
5259        if self.check(TokenType::For)
5260            && self.current + 1 < self.tokens.len()
5261            && self.tokens[self.current + 1]
5262                .text
5263                .eq_ignore_ascii_case("SYSTEM_TIME")
5264        {
5265            self.advance(); // consume FOR
5266            self.advance(); // consume SYSTEM_TIME
5267            if self.match_token(TokenType::As) && self.check_keyword_text("OF") {
5268                self.advance(); // consume OF
5269                let start = self.current;
5270                // Collect expression tokens until clause boundary
5271                while !self.is_at_end()
5272                    && !self.check(TokenType::Semicolon)
5273                    && !self.check(TokenType::Where)
5274                    && !self.check(TokenType::Join)
5275                    && !self.check(TokenType::Left)
5276                    && !self.check(TokenType::Right)
5277                    && !self.check(TokenType::Inner)
5278                    && !self.check(TokenType::Outer)
5279                    && !self.check(TokenType::Full)
5280                    && !self.check(TokenType::Cross)
5281                    && !self.check(TokenType::Order)
5282                    && !self.check(TokenType::Group)
5283                    && !self.check(TokenType::Having)
5284                    && !self.check(TokenType::Limit)
5285                    && !self.check(TokenType::Union)
5286                    && !self.check(TokenType::Except)
5287                    && !self.check(TokenType::Intersect)
5288                    && !self.check(TokenType::Comma)
5289                    && !self.check(TokenType::RParen)
5290                {
5291                    self.advance();
5292                }
5293                let expr_text = self.tokens_to_sql(start, self.current);
5294                let system_time_str = format!("FOR SYSTEM_TIME AS OF {}", expr_text);
5295                if let Expression::Table(ref mut table) = expr {
5296                    table.system_time = Some(system_time_str);
5297                }
5298            }
5299        }
5300
5301        // BigQuery INFORMATION_SCHEMA handling
5302        // When INFORMATION_SCHEMA is part of a table reference, merge it with the table name
5303        // into a single quoted identifier and auto-add an alias if not present
5304        if matches!(
5305            self.config.dialect,
5306            Some(crate::dialects::DialectType::BigQuery)
5307        ) {
5308            if let Expression::Table(ref mut table) = expr {
5309                // Case 1: Single quoted identifier containing INFORMATION_SCHEMA (e.g., `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`)
5310                // Add an alias that is the same as the table name (only if no alias)
5311                if table.schema.is_none() && table.catalog.is_none() && table.alias.is_none() {
5312                    let name_upper = table.name.name.to_uppercase();
5313                    if name_upper.contains("INFORMATION_SCHEMA.") {
5314                        // Set alias to be the full quoted table name
5315                        table.alias = Some(table.name.clone());
5316                        table.alias_explicit_as = true;
5317                    }
5318                }
5319                // Case 2: Multi-part name where schema part is INFORMATION_SCHEMA
5320                // e.g., region_or_dataset.INFORMATION_SCHEMA.TABLES -> region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES
5321                // e.g., proj.region_or_dataset.INFORMATION_SCHEMA.TABLES -> proj.region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES
5322                // This applies even if an alias is already set (we still need to merge the parts)
5323                else if let Some(ref schema) = table.schema {
5324                    if schema.name.eq_ignore_ascii_case("INFORMATION_SCHEMA") {
5325                        // Merge schema (INFORMATION_SCHEMA) with table name into a single quoted identifier
5326                        let merged_name = format!("{}.{}", schema.name, table.name.name);
5327                        let original_table_name = table.name.name.clone();
5328
5329                        // Set alias to original table name (TABLES, VIEWS, etc.) only if no alias exists
5330                        if table.alias.is_none() {
5331                            table.alias = Some(Identifier::new(original_table_name));
5332                            table.alias_explicit_as = true;
5333                        }
5334
5335                        // Create new quoted identifier
5336                        table.name = Identifier {
5337                            name: merged_name,
5338                            quoted: true,
5339                            trailing_comments: Vec::new(),
5340                            span: None,
5341                        };
5342
5343                        // Shift: schema becomes catalog, catalog becomes None or stays
5344                        table.schema = table.catalog.take();
5345                        // catalog is now None
5346                    }
5347                }
5348            }
5349        }
5350
5351        Ok(expr)
5352    }
5353
5354    /// Parse standard PIVOT clause (in FROM clause)
5355    /// PIVOT(agg_func [AS alias], ... FOR column IN (value [AS alias], ...) [GROUP BY ...])
5356    fn parse_pivot(&mut self, source: Expression) -> Result<Expression> {
5357        self.expect(TokenType::LParen)?;
5358
5359        // Parse aggregation functions (comma-separated, may have aliases)
5360        // Stop when we see FOR keyword
5361        // Use parse_primary() to handle keyword function names like FIRST, LAST
5362        let mut expressions = Vec::new();
5363        loop {
5364            if self.check(TokenType::For) || self.check(TokenType::RParen) {
5365                break;
5366            }
5367            // Parse the aggregation expression using parse_primary (handles keyword functions)
5368            let func = self.parse_primary()?;
5369            // Check for alias (AS alias or just identifier after function)
5370            let expr = if self.match_token(TokenType::As) {
5371                // AS alias
5372                let alias_name = self.expect_identifier_or_keyword()?;
5373                Expression::Alias(Box::new(Alias::new(func, Identifier::new(alias_name))))
5374            } else if !self.check(TokenType::Comma)
5375                && !self.check(TokenType::For)
5376                && !self.check(TokenType::RParen)
5377            {
5378                // Implicit alias (no AS keyword): SUM(b) d
5379                if let Some(id) = self.parse_id_var()? {
5380                    let alias_name = match &id {
5381                        Expression::Identifier(ident) => ident.name.clone(),
5382                        Expression::Column(col) => col.name.name.clone(),
5383                        _ => String::new(),
5384                    };
5385                    if !alias_name.is_empty() {
5386                        Expression::Alias(Box::new(Alias::new(func, Identifier::new(alias_name))))
5387                    } else {
5388                        func
5389                    }
5390                } else {
5391                    func
5392                }
5393            } else {
5394                func
5395            };
5396            expressions.push(expr);
5397            if !self.match_token(TokenType::Comma) {
5398                break;
5399            }
5400            // After consuming comma, if next is FOR, break (comma before FOR is optional/dropped)
5401            if self.check(TokenType::For) {
5402                break;
5403            }
5404        }
5405
5406        // FOR column IN (values)
5407        self.expect(TokenType::For)?;
5408
5409        let mut fields = Vec::new();
5410        loop {
5411            let field = self.parse_standard_pivot_in()?;
5412            fields.push(field);
5413
5414            // Check for additional FOR clauses (rare but possible)
5415            if !self.match_token(TokenType::For) {
5416                break;
5417            }
5418        }
5419
5420        // Handle Snowflake's DEFAULT ON NULL (default_value) clause
5421        let default_on_null = if self.match_text_seq(&["DEFAULT", "ON", "NULL"]) {
5422            if self.match_token(TokenType::LParen) {
5423                let val = self.parse_expression()?;
5424                self.expect(TokenType::RParen)?;
5425                Some(Box::new(val))
5426            } else {
5427                None
5428            }
5429        } else {
5430            None
5431        };
5432
5433        // Parse optional GROUP BY inside PIVOT parens
5434        let group = self.parse_group()?;
5435
5436        self.expect(TokenType::RParen)?;
5437
5438        Ok(Expression::Pivot(Box::new(Pivot {
5439            this: source,
5440            expressions,
5441            fields,
5442            using: Vec::new(),
5443            group: group.map(Box::new),
5444            unpivot: false,
5445            into: None,
5446            alias: None,
5447            include_nulls: None,
5448            default_on_null,
5449            with: None,
5450        })))
5451    }
5452
5453    /// Parse FOR column IN (...) part of standard PIVOT
5454    fn parse_standard_pivot_in(&mut self) -> Result<Expression> {
5455        // Parse the column being pivoted
5456        let column = self.parse_primary()?;
5457
5458        // IN keyword
5459        self.expect(TokenType::In)?;
5460
5461        // IN values - can be parenthesized or bare identifier
5462        if self.match_token(TokenType::LParen) {
5463            // Check for ANY keyword
5464            let in_exprs = if self.match_text_seq(&["ANY"]) {
5465                let order = self.parse_order()?;
5466                vec![Expression::PivotAny(Box::new(PivotAny {
5467                    this: order.map(Box::new),
5468                }))]
5469            } else {
5470                // Parse comma-separated values with optional aliases
5471                let mut vals = Vec::new();
5472                loop {
5473                    if self.check(TokenType::RParen) {
5474                        break;
5475                    }
5476                    if let Some(val) = self.parse_select_or_expression()? {
5477                        // Check for alias - alias can be an identifier or an expression
5478                        // (e.g., 'PREFIX ' || CHR(38) || ' SUFFIX' in Oracle)
5479                        let val = if self.match_token(TokenType::As) {
5480                            // Parse the alias as an expression (not just an identifier)
5481                            // This allows for string concatenation aliases
5482                            let alias_expr = self.parse_bitwise()?.ok_or_else(|| {
5483                                self.parse_error(
5484                                    "Expected expression after AS in PIVOT/UNPIVOT IN clause",
5485                                )
5486                            })?;
5487                            Expression::PivotAlias(Box::new(PivotAlias {
5488                                this: val,
5489                                alias: alias_expr,
5490                            }))
5491                        } else {
5492                            val
5493                        };
5494                        vals.push(val);
5495                    }
5496                    if !self.match_token(TokenType::Comma) {
5497                        break;
5498                    }
5499                }
5500                vals
5501            };
5502            self.expect(TokenType::RParen)?;
5503            Ok(Expression::In(Box::new(In {
5504                this: column,
5505                expressions: in_exprs,
5506                query: None,
5507                not: false,
5508                global: false,
5509                unnest: None,
5510                is_field: false,
5511            })))
5512        } else {
5513            // Bare identifier: FOR foo IN y_enum (no parentheses)
5514            // Store in query field to distinguish from parenthesized IN
5515            let field_id = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
5516            Ok(Expression::In(Box::new(In {
5517                this: column,
5518                expressions: Vec::new(),
5519                query: Some(field_id),
5520                not: false,
5521                global: false,
5522                unnest: None,
5523                is_field: true,
5524            })))
5525        }
5526    }
5527
5528    /// Parse UNPIVOT clause
5529    /// UNPIVOT (value_column FOR name_column IN (col1, col2, ...))
5530    /// UNPIVOT ((col1, col2) FOR name_column IN (col1, col2, ...))
5531    /// UNPIVOT INCLUDE NULLS (value_column FOR name_column IN (...))
5532    /// UNPIVOT EXCLUDE NULLS (value_column FOR name_column IN (...))
5533    fn parse_unpivot(&mut self, source: Expression) -> Result<Expression> {
5534        // Check for optional INCLUDE NULLS or EXCLUDE NULLS
5535        let include_nulls = if self.match_text_seq(&["INCLUDE", "NULLS"]) {
5536            Some(true)
5537        } else if self.match_text_seq(&["EXCLUDE", "NULLS"]) {
5538            Some(false)
5539        } else {
5540            None
5541        };
5542
5543        self.expect(TokenType::LParen)?;
5544
5545        // Value column(s) - can be identifier or (col1, col2, ...)
5546        // Allow keywords as identifiers (e.g., "values" is a common column name in UNPIVOT)
5547        let (value_column, value_column_parenthesized, extra_value_columns) =
5548            if self.match_token(TokenType::LParen) {
5549                // Parenthesized value column(s)
5550                let col = self.expect_identifier_or_keyword()?;
5551                let mut extra_cols = Vec::new();
5552                while self.match_token(TokenType::Comma) {
5553                    extra_cols.push(Identifier::new(self.expect_identifier_or_keyword()?));
5554                }
5555                self.expect(TokenType::RParen)?;
5556                (Identifier::new(col), true, extra_cols)
5557            } else {
5558                (
5559                    Identifier::new(self.expect_identifier_or_keyword()?),
5560                    false,
5561                    Vec::new(),
5562                )
5563            };
5564
5565        // FOR name_column
5566        self.expect(TokenType::For)?;
5567        let name_column = Identifier::new(self.expect_identifier_or_keyword()?);
5568
5569        // IN (columns with optional aliases)
5570        // Format: col1 [AS alias1], col2 [AS alias2], ...
5571        // Or tuple format: (col1, col2) [AS alias1], (col3, col4) [AS alias2], ...
5572        // Aliases can be expressions like 'PREFIX ' || CHR(38) || ' SUFFIX'
5573        self.expect(TokenType::In)?;
5574        self.expect(TokenType::LParen)?;
5575        let columns = {
5576            let mut cols = Vec::new();
5577            loop {
5578                if self.check(TokenType::RParen) {
5579                    break;
5580                }
5581                // Check if this is a tuple of columns: (col1, col2)
5582                let col_expr = if self.check(TokenType::LParen) {
5583                    // Could be a tuple of columns for multi-value unpivot
5584                    let saved = self.current;
5585                    self.advance(); // consume (
5586                                    // Try parsing as identifier list (tuple of columns)
5587                    let mut tuple_cols = Vec::new();
5588                    let first = self.expect_identifier_or_keyword();
5589                    if let Ok(first_id) = first {
5590                        tuple_cols.push(Expression::column(first_id));
5591                        while self.match_token(TokenType::Comma) {
5592                            if let Ok(id) = self.expect_identifier_or_keyword() {
5593                                tuple_cols.push(Expression::column(id));
5594                            } else {
5595                                break;
5596                            }
5597                        }
5598                        if self.match_token(TokenType::RParen) && tuple_cols.len() > 1 {
5599                            // Successful tuple parse
5600                            Some(Expression::Tuple(Box::new(Tuple {
5601                                expressions: tuple_cols,
5602                            })))
5603                        } else {
5604                            // Not a tuple, backtrack
5605                            self.current = saved;
5606                            self.parse_select_or_expression()?
5607                        }
5608                    } else {
5609                        // Not an identifier, backtrack
5610                        self.current = saved;
5611                        self.parse_select_or_expression()?
5612                    }
5613                } else {
5614                    self.parse_select_or_expression()?
5615                };
5616
5617                if let Some(col) = col_expr {
5618                    // Check for alias
5619                    let col = if self.match_token(TokenType::As) {
5620                        // Parse the alias as an expression (allows string concatenation)
5621                        let alias_expr = self.parse_bitwise()?.ok_or_else(|| {
5622                            self.parse_error("Expected expression after AS in UNPIVOT IN clause")
5623                        })?;
5624                        Expression::PivotAlias(Box::new(PivotAlias {
5625                            this: col,
5626                            alias: alias_expr,
5627                        }))
5628                    } else {
5629                        col
5630                    };
5631                    cols.push(col);
5632                }
5633                if !self.match_token(TokenType::Comma) {
5634                    break;
5635                }
5636            }
5637            cols
5638        };
5639        self.expect(TokenType::RParen)?;
5640
5641        self.expect(TokenType::RParen)?;
5642
5643        Ok(Expression::Unpivot(Box::new(Unpivot {
5644            this: source,
5645            value_column,
5646            name_column,
5647            columns,
5648            alias: None,
5649            value_column_parenthesized,
5650            include_nulls,
5651            extra_value_columns,
5652        })))
5653    }
5654
5655    /// Parse Redshift UNPIVOT in FROM clause for SUPER object traversal
5656    /// Syntax: UNPIVOT expr [AS val_alias AT attr_alias]
5657    /// Examples:
5658    ///   FROM t, UNPIVOT t.arr[0]
5659    ///   FROM t, UNPIVOT t.arr AS val AT attr
5660    fn parse_redshift_unpivot_table(&mut self) -> Result<Expression> {
5661        // Parse the expression (column reference with possible array subscript)
5662        // We need to parse a primary expression that can include:
5663        // - Simple column: c.c_orders
5664        // - Array subscript: c.c_orders[0]
5665        // - Multiple subscripts: c.c_orders[0].items[1]
5666        // Using parse_primary which handles column refs with subscripts
5667        let this = self.parse_primary()?;
5668
5669        // Check for optional AS val_alias AT attr_alias
5670        let alias = if self.match_token(TokenType::As) {
5671            let val_alias = self.expect_identifier_or_keyword()?;
5672            // Check for AT attr_alias
5673            if self.match_text_seq(&["AT"]) {
5674                let attr_alias = self.expect_identifier_or_keyword()?;
5675                // Create alias expression that captures both aliases
5676                // We'll use the val_alias as the main alias and store attr_alias in a way
5677                // the generator can reconstruct "AS val AT attr"
5678                Some(Identifier::new(format!("{} AT {}", val_alias, attr_alias)))
5679            } else {
5680                Some(Identifier::new(val_alias))
5681            }
5682        } else {
5683            None
5684        };
5685
5686        // Return a Pivot expression with unpivot=true
5687        // Use the simplified form pattern where:
5688        // - this: the expression being unpivoted
5689        // - expressions: empty (no ON expressions)
5690        // - unpivot: true
5691        // - alias: captured above
5692        Ok(Expression::Pivot(Box::new(Pivot {
5693            this,
5694            expressions: Vec::new(),
5695            fields: Vec::new(),
5696            using: Vec::new(),
5697            group: None,
5698            unpivot: true,
5699            into: None,
5700            alias,
5701            include_nulls: None,
5702            default_on_null: None,
5703            with: None,
5704        })))
5705    }
5706
5707    /// BigQuery: Parse a table part that may contain hyphens (e.g., project-id)
5708    /// Also handles numeric table parts (e.g., foo.bar.25 -> foo.bar.`25`)
5709    /// Returns the identifier, possibly with merged hyphenated parts and quoted flag set.
5710    fn parse_bigquery_table_part(&mut self) -> Result<Identifier> {
5711        use crate::dialects::DialectType;
5712
5713        // Try to parse a number for BigQuery numeric table parts (e.g., foo.bar.25)
5714        if matches!(self.config.dialect, Some(DialectType::BigQuery))
5715            && self.check(TokenType::Number)
5716        {
5717            let num_token = self.advance().clone();
5718            let mut name = num_token.text.clone();
5719
5720            // Check if followed by more connected tokens (e.g., 25x, 25_, 25ab)
5721            // Numbers followed immediately by identifiers without whitespace are merged
5722            while !self.is_at_end() && self.is_connected() {
5723                let tok = self.advance().clone();
5724                name.push_str(&tok.text);
5725            }
5726
5727            return Ok(Identifier {
5728                name,
5729                quoted: true,
5730                trailing_comments: Vec::new(),
5731                span: None,
5732            });
5733        }
5734
5735        // MySQL numeric-starting identifiers (e.g., 00f, 1d)
5736        if matches!(self.config.dialect, Some(DialectType::MySQL)) && self.check(TokenType::Number)
5737        {
5738            let num_token = self.advance().clone();
5739            let mut name = num_token.text.clone();
5740
5741            // Merge with connected identifier/var tokens only (not punctuation)
5742            while !self.is_at_end()
5743                && self.is_connected()
5744                && (self.check(TokenType::Var) || self.check(TokenType::Identifier))
5745            {
5746                let tok = self.advance().clone();
5747                name.push_str(&tok.text);
5748            }
5749
5750            return Ok(Identifier {
5751                name,
5752                quoted: true,
5753                trailing_comments: Vec::new(),
5754                span: None,
5755            });
5756        }
5757
5758        let mut ident = self.expect_identifier_or_keyword_with_quoted()?;
5759
5760        // BigQuery: merge hyphenated parts (e.g., pro-ject_id -> `pro-ject_id`)
5761        if matches!(self.config.dialect, Some(DialectType::BigQuery)) && !ident.quoted {
5762            // Check if next token is a dash and it looks connected (no space)
5763            if self.check(TokenType::Dash) && self.is_connected_dash() {
5764                let mut name = ident.name.clone();
5765
5766                while self.check(TokenType::Dash) && self.is_connected_dash() {
5767                    self.advance(); // consume dash
5768                    name.push('-');
5769                    // Consume the next part
5770                    let part = self.advance().clone();
5771                    name.push_str(&part.text);
5772                    // Continue consuming connected tokens (for things like a-b-c)
5773                    while !self.is_at_end()
5774                        && self.is_connected()
5775                        && !self.check(TokenType::Dot)
5776                        && !self.check(TokenType::Dash)
5777                        && !self.check(TokenType::LParen)
5778                        && !self.check(TokenType::RParen)
5779                    {
5780                        let tok = self.advance().clone();
5781                        name.push_str(&tok.text);
5782                    }
5783                }
5784
5785                ident = Identifier {
5786                    name,
5787                    quoted: false,
5788                    trailing_comments: Vec::new(),
5789                    span: None,
5790                };
5791            }
5792        }
5793
5794        Ok(ident)
5795    }
5796
5797    /// Check if the current dash token is "connected" to the next token
5798    /// (i.e., the dash and next token are part of a hyphenated identifier)
5799    fn is_connected_dash(&self) -> bool {
5800        if !self.check(TokenType::Dash) {
5801            return false;
5802        }
5803        if self.current + 1 >= self.tokens.len() {
5804            return false;
5805        }
5806        let dash_token = &self.tokens[self.current];
5807        let next_token = &self.tokens[self.current + 1];
5808
5809        // The next token after dash must be an identifier, number, or keyword
5810        // and it must be adjacent (no whitespace between dash and next token)
5811        let next_is_valid = matches!(
5812            next_token.token_type,
5813            TokenType::Identifier
5814                | TokenType::Var
5815                | TokenType::Number
5816                | TokenType::All
5817                | TokenType::Select
5818                | TokenType::From
5819                | TokenType::Where
5820        ) || next_token.token_type.is_keyword();
5821
5822        // Check adjacency: dash ends at dash.end, next starts at next.start
5823        let adjacent = dash_token.span.end + 1 == next_token.span.start
5824            || dash_token.span.end == next_token.span.start;
5825
5826        next_is_valid && adjacent
5827    }
5828
5829    /// Check if the current token is "connected" to the previous token (no whitespace)
5830    fn is_connected(&self) -> bool {
5831        if self.current == 0 || self.current >= self.tokens.len() {
5832            return false;
5833        }
5834        let prev_token = &self.tokens[self.current - 1];
5835        let curr_token = &self.tokens[self.current];
5836        // Tokens are connected if they are immediately adjacent (no characters between them)
5837        // span.end is exclusive, so if prev.end == curr.start, they are adjacent
5838        prev_token.span.end == curr_token.span.start
5839    }
5840
5841    /// Parse a table reference (schema.table format)
5842    fn parse_table_ref(&mut self) -> Result<TableRef> {
5843        // Check for Snowflake IDENTIFIER() function: IDENTIFIER('string') or IDENTIFIER($var)
5844        if self.check_identifier("IDENTIFIER") && self.check_next(TokenType::LParen) {
5845            self.advance(); // consume IDENTIFIER
5846            self.advance(); // consume (
5847                            // Parse the argument: either a string literal, a variable ($foo), or identifier
5848            let arg = if self.check(TokenType::String) {
5849                let s = self.advance().text.clone();
5850                Expression::Literal(Literal::String(s))
5851            } else if self.check(TokenType::Parameter) {
5852                // ?-style parameter
5853                let var = self.advance().text.clone();
5854                Expression::Var(Box::new(crate::expressions::Var { this: var }))
5855            } else if self.check(TokenType::Dollar) {
5856                // $foo style variable - Dollar followed by identifier
5857                self.advance(); // consume $
5858                let var_name = self.expect_identifier()?;
5859                Expression::Var(Box::new(crate::expressions::Var {
5860                    this: format!("${}", var_name),
5861                }))
5862            } else {
5863                // Could be an identifier too
5864                let ident = self.expect_identifier()?;
5865                Expression::Identifier(Identifier::new(ident))
5866            };
5867            self.expect(TokenType::RParen)?;
5868            let trailing_comments = self.previous_trailing_comments();
5869            // Create a Function expression to represent IDENTIFIER(arg)
5870            let identifier_func = Expression::Function(Box::new(crate::expressions::Function {
5871                name: "IDENTIFIER".to_string(),
5872                args: vec![arg],
5873                distinct: false,
5874                trailing_comments: Vec::new(),
5875                use_bracket_syntax: false,
5876                no_parens: false,
5877                quoted: false,
5878                span: None,
5879                inferred_type: None,
5880            }));
5881            return Ok(TableRef {
5882                catalog: None,
5883                schema: None,
5884                name: Identifier::empty(),
5885                alias: None,
5886                alias_explicit_as: false,
5887                column_aliases: Vec::new(),
5888                trailing_comments,
5889                when: None,
5890                only: false,
5891                final_: false,
5892                table_sample: None,
5893                hints: Vec::new(),
5894                system_time: None,
5895                partitions: Vec::new(),
5896                identifier_func: Some(Box::new(identifier_func)),
5897                changes: None,
5898                version: None,
5899                span: None,
5900            });
5901        }
5902
5903        let first = self.parse_bigquery_table_part()?;
5904
5905        // Check for schema.table format
5906        if self.match_token(TokenType::Dot) {
5907            // Handle TSQL a..b syntax (database..table with empty schema)
5908            if self.check(TokenType::Dot) {
5909                // Two consecutive dots: a..b means catalog..table (empty schema)
5910                self.advance(); // consume second dot
5911                let table = self.parse_bigquery_table_part()?;
5912                let trailing_comments = self.previous_trailing_comments();
5913                Ok(TableRef {
5914                    catalog: Some(first),
5915                    schema: Some(Identifier::new("")), // Empty schema represents ..
5916                    name: table,
5917                    alias: None,
5918                    alias_explicit_as: false,
5919                    column_aliases: Vec::new(),
5920                    trailing_comments,
5921                    when: None,
5922                    only: false,
5923                    final_: false,
5924                    table_sample: None,
5925                    hints: Vec::new(),
5926                    system_time: None,
5927                    partitions: Vec::new(),
5928                    identifier_func: None,
5929                    changes: None,
5930                    version: None,
5931                    span: None,
5932                })
5933            } else {
5934                // BigQuery: handle x.* wildcard table reference (e.g., SELECT * FROM x.*)
5935                // After the first dot, if we see a Star token, it's a wildcard table name
5936                if matches!(
5937                    self.config.dialect,
5938                    Some(crate::dialects::DialectType::BigQuery)
5939                ) && self.check(TokenType::Star)
5940                {
5941                    self.advance(); // consume *
5942                    let trailing_comments = self.previous_trailing_comments();
5943                    return Ok(TableRef {
5944                        catalog: None,
5945                        schema: Some(first),
5946                        name: Identifier::new("*"),
5947                        alias: None,
5948                        alias_explicit_as: false,
5949                        column_aliases: Vec::new(),
5950                        trailing_comments,
5951                        when: None,
5952                        only: false,
5953                        final_: false,
5954                        table_sample: None,
5955                        hints: Vec::new(),
5956                        system_time: None,
5957                        partitions: Vec::new(),
5958                        identifier_func: None,
5959                        changes: None,
5960                        version: None,
5961                        span: None,
5962                    });
5963                }
5964                let table = self.parse_bigquery_table_part()?;
5965                // Check for catalog.schema.table format
5966                if self.match_token(TokenType::Dot) {
5967                    // BigQuery: handle a.b.* wildcard table reference
5968                    if matches!(
5969                        self.config.dialect,
5970                        Some(crate::dialects::DialectType::BigQuery)
5971                    ) && self.check(TokenType::Star)
5972                    {
5973                        self.advance(); // consume *
5974                        let trailing_comments = self.previous_trailing_comments();
5975                        return Ok(TableRef {
5976                            catalog: Some(first),
5977                            schema: Some(table),
5978                            name: Identifier::new("*"),
5979                            alias: None,
5980                            alias_explicit_as: false,
5981                            column_aliases: Vec::new(),
5982                            trailing_comments,
5983                            when: None,
5984                            only: false,
5985                            final_: false,
5986                            table_sample: None,
5987                            hints: Vec::new(),
5988                            system_time: None,
5989                            partitions: Vec::new(),
5990                            identifier_func: None,
5991                            changes: None,
5992                            version: None,
5993                            span: None,
5994                        });
5995                    }
5996                    let actual_table = self.parse_bigquery_table_part()?;
5997                    let trailing_comments = self.previous_trailing_comments();
5998                    Ok(TableRef {
5999                        catalog: Some(first),
6000                        schema: Some(table),
6001                        name: actual_table,
6002                        alias: None,
6003                        alias_explicit_as: false,
6004                        column_aliases: Vec::new(),
6005                        trailing_comments,
6006                        when: None,
6007                        only: false,
6008                        final_: false,
6009                        table_sample: None,
6010                        hints: Vec::new(),
6011                        system_time: None,
6012                        partitions: Vec::new(),
6013                        identifier_func: None,
6014                        changes: None,
6015                        version: None,
6016                        span: None,
6017                    })
6018                } else {
6019                    let trailing_comments = self.previous_trailing_comments();
6020                    Ok(TableRef {
6021                        catalog: None,
6022                        schema: Some(first),
6023                        name: table,
6024                        alias: None,
6025                        alias_explicit_as: false,
6026                        column_aliases: Vec::new(),
6027                        trailing_comments,
6028                        when: None,
6029                        only: false,
6030                        final_: false,
6031                        table_sample: None,
6032                        hints: Vec::new(),
6033                        system_time: None,
6034                        partitions: Vec::new(),
6035                        identifier_func: None,
6036                        changes: None,
6037                        version: None,
6038                        span: None,
6039                    })
6040                }
6041            }
6042        } else {
6043            let trailing_comments = self.previous_trailing_comments();
6044            Ok(TableRef {
6045                catalog: None,
6046                schema: None,
6047                name: first,
6048                alias: None,
6049                alias_explicit_as: false,
6050                column_aliases: Vec::new(),
6051                trailing_comments,
6052                when: None,
6053                only: false,
6054                final_: false,
6055                table_sample: None,
6056                hints: Vec::new(),
6057                system_time: None,
6058                partitions: Vec::new(),
6059                identifier_func: None,
6060                changes: None,
6061                version: None,
6062                span: None,
6063            })
6064        }
6065    }
6066
6067    /// Parse a datetime field for EXTRACT function (YEAR, MONTH, DAY, etc.)
6068    fn parse_datetime_field(&mut self) -> Result<DateTimeField> {
6069        let token = self.advance();
6070        let original_name = token.text.clone();
6071        let name = original_name.to_uppercase();
6072        match name.as_str() {
6073            "YEAR" => Ok(DateTimeField::Year),
6074            "MONTH" => Ok(DateTimeField::Month),
6075            "DAY" => Ok(DateTimeField::Day),
6076            "HOUR" => Ok(DateTimeField::Hour),
6077            "MINUTE" => Ok(DateTimeField::Minute),
6078            "SECOND" => Ok(DateTimeField::Second),
6079            "MILLISECOND" => Ok(DateTimeField::Millisecond),
6080            "MICROSECOND" => Ok(DateTimeField::Microsecond),
6081            "DOW" | "DAYOFWEEK" => Ok(DateTimeField::DayOfWeek),
6082            "DOY" | "DAYOFYEAR" => Ok(DateTimeField::DayOfYear),
6083            "WEEK" => {
6084                // Check for modifier like WEEK(monday)
6085                if self.match_token(TokenType::LParen) {
6086                    let modifier = self.expect_identifier_or_keyword()?;
6087                    self.expect(TokenType::RParen)?;
6088                    Ok(DateTimeField::WeekWithModifier(modifier))
6089                } else {
6090                    Ok(DateTimeField::Week)
6091                }
6092            }
6093            "QUARTER" => Ok(DateTimeField::Quarter),
6094            "EPOCH" => Ok(DateTimeField::Epoch),
6095            "TIMEZONE" => Ok(DateTimeField::Timezone),
6096            "TIMEZONE_HOUR" => Ok(DateTimeField::TimezoneHour),
6097            "TIMEZONE_MINUTE" => Ok(DateTimeField::TimezoneMinute),
6098            "DATE" => Ok(DateTimeField::Date),
6099            "TIME" => Ok(DateTimeField::Time),
6100            // Allow arbitrary field names for dialect-specific functionality
6101            _ => Ok(DateTimeField::Custom(original_name)),
6102        }
6103    }
6104
6105    /// Parse a table expression followed by any joins
6106    /// Used for parenthesized join expressions like (tbl1 CROSS JOIN tbl2)
6107    fn parse_table_expression_with_joins(&mut self) -> Result<(Expression, Vec<Join>)> {
6108        // First parse the left table expression
6109        let left = self.parse_table_expression()?;
6110
6111        // Then parse any joins
6112        let joins = self.parse_joins()?;
6113
6114        Ok((left, joins))
6115    }
6116
6117    /// Parse JOIN clauses
6118    ///
6119    /// Supports right-associative chained JOINs where ON/USING clauses are assigned right-to-left:
6120    /// - `a JOIN b JOIN c ON cond1 ON cond2` means `a JOIN (b JOIN c ON cond1) ON cond2`
6121    /// - The rightmost ON applies to the rightmost unconditioned JOIN
6122    fn parse_joins(&mut self) -> Result<Vec<Join>> {
6123        let mut joins = Vec::new();
6124        let mut nesting_group: usize = 0;
6125
6126        // Loop: Phase 1 (parse JOINs) + Phase 2 (assign deferred conditions)
6127        // After phase 2, if there are more JOIN keywords, continue with another round
6128        loop {
6129            let joins_before = joins.len();
6130
6131            // Phase 1: Parse all JOINs with optional inline ON/USING conditions
6132            loop {
6133                let pos_before_join_kind = self.current;
6134                let join_kind_result = self.try_parse_join_kind();
6135                let (kind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint) =
6136                    match join_kind_result {
6137                        Some(r) => r,
6138                        None => break,
6139                    };
6140                // Collect comments from all tokens consumed by try_parse_join_kind:
6141                // - Leading comments on the first token (comments on a separate line before the join)
6142                // - Trailing comments between join keywords (e.g., INNER /* comment */ JOIN)
6143                let mut join_comments = Vec::new();
6144                // Capture leading comments from the first token of the join kind
6145                if pos_before_join_kind < self.tokens.len() {
6146                    join_comments
6147                        .extend(self.tokens[pos_before_join_kind].comments.iter().cloned());
6148                }
6149                for i in pos_before_join_kind..self.current {
6150                    if i < self.tokens.len() {
6151                        join_comments.extend(self.tokens[i].trailing_comments.iter().cloned());
6152                    }
6153                }
6154                // Snowflake: DIRECTED keyword before JOIN (e.g., CROSS DIRECTED JOIN)
6155                let directed = if needs_join_keyword && self.check_identifier("DIRECTED") {
6156                    self.advance();
6157                    true
6158                } else {
6159                    false
6160                };
6161                if needs_join_keyword {
6162                    self.expect(TokenType::Join)?;
6163                }
6164
6165                // ClickHouse: ARRAY JOIN uses expressions, not table references
6166                let table = if matches!(kind, JoinKind::Array | JoinKind::LeftArray) {
6167                    let mut items = Vec::new();
6168                    // Handle ARRAY JOIN with no arguments (intentional error test)
6169                    if !self.is_at_end()
6170                        && !self.check(TokenType::Semicolon)
6171                        && !self.check(TokenType::RParen)
6172                    {
6173                        loop {
6174                            let expr = self.parse_expression()?;
6175                            let item = if self.match_token(TokenType::As) {
6176                                let alias_name = self.expect_identifier_or_safe_keyword()?;
6177                                Expression::Alias(Box::new(Alias {
6178                                    this: expr,
6179                                    alias: Identifier::new(alias_name),
6180                                    column_aliases: Vec::new(),
6181                                    pre_alias_comments: Vec::new(),
6182                                    trailing_comments: Vec::new(),
6183                                    inferred_type: None,
6184                                }))
6185                            } else {
6186                                expr
6187                            };
6188                            items.push(item);
6189                            if !self.match_token(TokenType::Comma) {
6190                                break;
6191                            }
6192                        }
6193                    } // end if !is_at_end check
6194                    if items.len() == 1 {
6195                        items.pop().unwrap()
6196                    } else if items.is_empty() {
6197                        Expression::Null(Null)
6198                    } else {
6199                        Expression::Tuple(Box::new(Tuple { expressions: items }))
6200                    }
6201                } else {
6202                    self.parse_table_expression()?
6203                };
6204
6205                // Snowflake ASOF JOIN: OFFSET/LIMIT before MATCH_CONDITION are table aliases
6206                let table = if matches!(
6207                    kind,
6208                    JoinKind::AsOf | JoinKind::AsOfLeft | JoinKind::AsOfRight
6209                ) && (self.check(TokenType::Offset) || self.check(TokenType::Limit))
6210                    && self
6211                        .peek_nth(1)
6212                        .map(|t| t.text.eq_ignore_ascii_case("MATCH_CONDITION"))
6213                        == Some(true)
6214                {
6215                    let alias_name = self.advance().text.clone();
6216                    Expression::Alias(Box::new(Alias {
6217                        this: table,
6218                        alias: Identifier::new(alias_name),
6219                        column_aliases: Vec::new(),
6220                        pre_alias_comments: Vec::new(),
6221                        trailing_comments: Vec::new(),
6222                        inferred_type: None,
6223                    }))
6224                } else {
6225                    table
6226                };
6227
6228                // Try to parse inline MATCH_CONDITION/ON/USING (only if not followed by another JOIN)
6229                // We need to peek ahead to see if there's another JOIN keyword coming
6230                let has_match_condition = self.check_identifier("MATCH_CONDITION");
6231                let has_inline_condition = self.check(TokenType::On)
6232                    || self.check(TokenType::Using)
6233                    || has_match_condition;
6234                let next_is_join = self.check_join_keyword();
6235
6236                // Parse MATCH_CONDITION first (Snowflake ASOF JOIN can have MATCH_CONDITION before ON)
6237                let match_condition = if has_match_condition && !next_is_join {
6238                    if self.match_identifier("MATCH_CONDITION") {
6239                        self.expect(TokenType::LParen)?;
6240                        let condition = self.parse_expression()?;
6241                        self.expect(TokenType::RParen)?;
6242                        Some(condition)
6243                    } else {
6244                        None
6245                    }
6246                } else {
6247                    None
6248                };
6249
6250                let (on, using) = if (has_inline_condition || match_condition.is_some())
6251                    && !self.check_join_keyword()
6252                {
6253                    // Parse inline condition only if there's no more JOINs following
6254                    if self.match_token(TokenType::On) {
6255                        (Some(self.parse_expression()?), Vec::new())
6256                    } else if self.match_token(TokenType::Using) {
6257                        // ClickHouse allows USING without parentheses
6258                        let has_parens = self.match_token(TokenType::LParen);
6259                        // Use parse_using_column_list to handle qualified names like t1.col
6260                        let cols = self.parse_using_column_list()?;
6261                        if has_parens {
6262                            self.expect(TokenType::RParen)?;
6263                        }
6264                        (None, cols)
6265                    } else {
6266                        (None, Vec::new())
6267                    }
6268                } else {
6269                    (None, Vec::new())
6270                };
6271
6272                joins.push(Join {
6273                    this: table,
6274                    on,
6275                    using,
6276                    kind,
6277                    use_inner_keyword,
6278                    use_outer_keyword,
6279                    deferred_condition: false,
6280                    join_hint,
6281                    match_condition,
6282                    pivots: Vec::new(),
6283                    comments: join_comments,
6284                    nesting_group,
6285                    directed,
6286                });
6287            }
6288
6289            // Phase 2: Assign deferred ON/USING conditions to unconditioned joins (right-to-left)
6290            // Only consider joins from the current batch (joins_before..)
6291            let unconditioned: Vec<usize> = joins[joins_before..]
6292                .iter()
6293                .enumerate()
6294                .filter(|(_, j)| j.on.is_none() && j.using.is_empty())
6295                .map(|(i, _)| joins_before + i)
6296                .collect();
6297
6298            let mut idx = unconditioned.len();
6299            while idx > 0 {
6300                if self.match_token(TokenType::On) {
6301                    idx -= 1;
6302                    let join_idx = unconditioned[idx];
6303                    joins[join_idx].on = Some(self.parse_expression()?);
6304                    joins[join_idx].deferred_condition = true;
6305                } else if self.match_token(TokenType::Using) {
6306                    idx -= 1;
6307                    let join_idx = unconditioned[idx];
6308                    let has_parens = self.match_token(TokenType::LParen);
6309                    // Handle empty USING ()
6310                    let cols = if has_parens && self.check(TokenType::RParen) {
6311                        Vec::new()
6312                    } else {
6313                        // Use parse_using_column_list to handle qualified names like t1.col
6314                        self.parse_using_column_list()?
6315                    };
6316                    joins[join_idx].using = cols;
6317                    if has_parens {
6318                        self.expect(TokenType::RParen)?;
6319                    }
6320                    joins[join_idx].deferred_condition = true;
6321                } else {
6322                    break;
6323                }
6324            }
6325
6326            // If no new joins were parsed in this round, we're done
6327            if joins.len() == joins_before {
6328                break;
6329            }
6330
6331            // If there are more JOIN keywords after deferred conditions, continue with another round
6332            if !self.check_join_keyword() {
6333                break;
6334            }
6335            nesting_group += 1;
6336        }
6337
6338        Ok(joins)
6339    }
6340
6341    /// Check if the current token starts a JOIN clause
6342    fn check_join_keyword(&self) -> bool {
6343        self.check(TokenType::Join) ||
6344        self.check(TokenType::Inner) ||
6345        self.check(TokenType::Left) ||
6346        self.check(TokenType::Right) ||
6347        self.check(TokenType::Full) ||
6348        self.check(TokenType::Cross) ||
6349        self.check(TokenType::Natural) ||
6350        self.check(TokenType::Outer) ||
6351        // ClickHouse: ARRAY JOIN, GLOBAL JOIN, ALL JOIN, ANY JOIN, PASTE JOIN
6352        (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) &&
6353            (self.check_identifier("ARRAY") || self.check_identifier("GLOBAL") || self.check(TokenType::All) || self.check(TokenType::Any) || self.check_identifier("PASTE")))
6354    }
6355
6356    /// Try to parse a JOIN kind
6357    /// Returns (JoinKind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint)
6358    fn try_parse_join_kind(&mut self) -> Option<(JoinKind, bool, bool, bool, Option<String>)> {
6359        if matches!(
6360            self.config.dialect,
6361            Some(crate::dialects::DialectType::ClickHouse)
6362        ) {
6363            let start = self.current;
6364            let mut global = false;
6365            let mut strictness: Option<String> = None;
6366            let mut kind: Option<JoinKind> = None;
6367            let mut use_outer = false;
6368            let mut use_inner = false;
6369
6370            if self.match_identifier("GLOBAL") {
6371                global = true;
6372            }
6373
6374            loop {
6375                if strictness.is_none() && self.match_token(TokenType::All) {
6376                    strictness = Some("ALL".to_string());
6377                    continue;
6378                }
6379                if strictness.is_none() && self.match_token(TokenType::Any) {
6380                    strictness = Some("ANY".to_string());
6381                    continue;
6382                }
6383                if strictness.is_none() && self.match_token(TokenType::AsOf) {
6384                    strictness = Some("ASOF".to_string());
6385                    continue;
6386                }
6387                if strictness.is_none() && self.match_token(TokenType::Semi) {
6388                    strictness = Some("SEMI".to_string());
6389                    continue;
6390                }
6391                if strictness.is_none() && self.match_token(TokenType::Anti) {
6392                    strictness = Some("ANTI".to_string());
6393                    continue;
6394                }
6395                if kind.is_none() && self.match_token(TokenType::Left) {
6396                    use_outer = self.match_token(TokenType::Outer);
6397                    use_inner = self.match_token(TokenType::Inner);
6398                    kind = Some(JoinKind::Left);
6399                    continue;
6400                }
6401                if kind.is_none() && self.match_token(TokenType::Right) {
6402                    use_outer = self.match_token(TokenType::Outer);
6403                    use_inner = self.match_token(TokenType::Inner);
6404                    kind = Some(JoinKind::Right);
6405                    continue;
6406                }
6407                if kind.is_none() && self.match_token(TokenType::Full) {
6408                    use_outer = self.match_token(TokenType::Outer);
6409                    kind = Some(JoinKind::Full);
6410                    continue;
6411                }
6412                if kind.is_none() && self.match_token(TokenType::Inner) {
6413                    use_inner = true;
6414                    kind = Some(JoinKind::Inner);
6415                    continue;
6416                }
6417                break;
6418            }
6419
6420            // ClickHouse: ARRAY JOIN or LEFT ARRAY JOIN
6421            if self.check_identifier("ARRAY") && self.check_next(TokenType::Join) {
6422                let array_kind = if matches!(kind, Some(JoinKind::Left)) {
6423                    JoinKind::LeftArray
6424                } else {
6425                    JoinKind::Array
6426                };
6427                self.advance(); // consume ARRAY
6428                                // JOIN will be consumed by caller
6429                return Some((array_kind, true, false, false, None));
6430            }
6431
6432            // ClickHouse: PASTE JOIN (positional join, no ON/USING)
6433            if self.check_identifier("PASTE") && self.check_next(TokenType::Join) {
6434                self.advance(); // consume PASTE
6435                                // JOIN will be consumed by caller
6436                return Some((JoinKind::Paste, true, false, false, None));
6437            }
6438
6439            if global || strictness.is_some() || kind.is_some() {
6440                if self.check(TokenType::Join) {
6441                    let join_kind = kind.unwrap_or(JoinKind::Inner);
6442                    let mut hints = Vec::new();
6443                    if global {
6444                        hints.push("GLOBAL".to_string());
6445                    }
6446                    if let Some(strict) = strictness {
6447                        hints.push(strict);
6448                    }
6449                    let join_hint = if hints.is_empty() {
6450                        None
6451                    } else {
6452                        Some(hints.join(" "))
6453                    };
6454                    return Some((join_kind, true, use_inner, use_outer, join_hint));
6455                } else {
6456                    self.current = start;
6457                }
6458            }
6459        }
6460
6461        // Check for ASOF first (DuckDB/Snowflake) - can be followed by LEFT/RIGHT/etc.
6462        if self.match_token(TokenType::AsOf) {
6463            // ASOF can be followed by LEFT, RIGHT, INNER, or standalone
6464            if self.match_token(TokenType::Left) {
6465                let use_outer = self.match_token(TokenType::Outer);
6466                Some((JoinKind::AsOfLeft, true, false, use_outer, None))
6467            } else if self.match_token(TokenType::Right) {
6468                let use_outer = self.match_token(TokenType::Outer);
6469                Some((JoinKind::AsOfRight, true, false, use_outer, None))
6470            } else if self.match_token(TokenType::Inner) {
6471                Some((JoinKind::AsOf, true, true, false, None))
6472            } else {
6473                // Standalone ASOF JOIN
6474                Some((JoinKind::AsOf, true, false, false, None))
6475            }
6476        } else if self.check(TokenType::Inner) {
6477            // Check if INNER is followed by a set operation (BigQuery INNER UNION/INTERSECT/EXCEPT)
6478            // In that case, don't treat it as a JOIN keyword
6479            let saved = self.current;
6480            self.advance(); // consume INNER
6481            if self.check(TokenType::Union)
6482                || self.check(TokenType::Intersect)
6483                || self.check(TokenType::Except)
6484            {
6485                self.current = saved; // backtrack
6486                return None;
6487            }
6488            // Check for TSQL join hints: INNER LOOP JOIN, INNER HASH JOIN, INNER MERGE JOIN
6489            let join_hint = self.parse_tsql_join_hint();
6490            Some((JoinKind::Inner, true, true, false, join_hint)) // INNER keyword was explicit
6491        } else if self.check(TokenType::Left) {
6492            // Check if LEFT is followed by a set operation (BigQuery LEFT UNION/INTERSECT/EXCEPT)
6493            let saved = self.current;
6494            self.advance(); // consume LEFT
6495                            // LEFT can be followed by OUTER/INNER then set op, or directly by set op
6496            let at_set_op = self.check(TokenType::Union)
6497                || self.check(TokenType::Intersect)
6498                || self.check(TokenType::Except);
6499            let at_inner_set_op = self.check(TokenType::Inner) && {
6500                let saved2 = self.current;
6501                self.advance();
6502                let is_setop = self.check(TokenType::Union)
6503                    || self.check(TokenType::Intersect)
6504                    || self.check(TokenType::Except);
6505                self.current = saved2;
6506                is_setop
6507            };
6508            if at_set_op || at_inner_set_op {
6509                self.current = saved; // backtrack
6510                return None;
6511            }
6512            // Continue with normal LEFT JOIN parsing
6513            self.current = saved;
6514            self.match_token(TokenType::Left); // re-consume LEFT
6515            let use_outer = self.match_token(TokenType::Outer);
6516            let use_inner = self.match_token(TokenType::Inner);
6517            let join_hint = self.parse_tsql_join_hint();
6518            // Check for SEMI, ANTI, or LATERAL
6519            if self.match_token(TokenType::Semi) {
6520                Some((JoinKind::LeftSemi, true, use_inner, use_outer, join_hint))
6521            } else if self.match_token(TokenType::Anti) {
6522                Some((JoinKind::LeftAnti, true, use_inner, use_outer, join_hint))
6523            } else if self.match_token(TokenType::Lateral) {
6524                Some((JoinKind::LeftLateral, true, use_inner, use_outer, join_hint))
6525            } else {
6526                Some((JoinKind::Left, true, use_inner, use_outer, join_hint))
6527            }
6528        } else if self.check(TokenType::Right) {
6529            // Check if RIGHT is followed by a set operation (BigQuery RIGHT UNION/INTERSECT/EXCEPT)
6530            let saved = self.current;
6531            self.advance(); // consume RIGHT
6532            let at_set_op = self.check(TokenType::Union)
6533                || self.check(TokenType::Intersect)
6534                || self.check(TokenType::Except);
6535            let at_inner_set_op = self.check(TokenType::Inner) && {
6536                let saved2 = self.current;
6537                self.advance();
6538                let is_setop = self.check(TokenType::Union)
6539                    || self.check(TokenType::Intersect)
6540                    || self.check(TokenType::Except);
6541                self.current = saved2;
6542                is_setop
6543            };
6544            if at_set_op || at_inner_set_op {
6545                self.current = saved; // backtrack
6546                return None;
6547            }
6548            // Continue with normal RIGHT JOIN parsing
6549            self.current = saved;
6550            self.match_token(TokenType::Right); // re-consume RIGHT
6551            let use_outer = self.match_token(TokenType::Outer);
6552            let use_inner = self.match_token(TokenType::Inner);
6553            let join_hint = self.parse_tsql_join_hint();
6554            // Check for SEMI or ANTI
6555            if self.match_token(TokenType::Semi) {
6556                Some((JoinKind::RightSemi, true, use_inner, use_outer, join_hint))
6557            } else if self.match_token(TokenType::Anti) {
6558                Some((JoinKind::RightAnti, true, use_inner, use_outer, join_hint))
6559            } else {
6560                Some((JoinKind::Right, true, use_inner, use_outer, join_hint))
6561            }
6562        } else if self.check(TokenType::Full) {
6563            // Check if FULL is followed by a set operation (BigQuery FULL UNION/INTERSECT/EXCEPT)
6564            let saved = self.current;
6565            self.advance(); // consume FULL
6566            let at_set_op = self.check(TokenType::Union)
6567                || self.check(TokenType::Intersect)
6568                || self.check(TokenType::Except);
6569            let at_inner_set_op = self.check(TokenType::Inner) && {
6570                let saved2 = self.current;
6571                self.advance();
6572                let is_setop = self.check(TokenType::Union)
6573                    || self.check(TokenType::Intersect)
6574                    || self.check(TokenType::Except);
6575                self.current = saved2;
6576                is_setop
6577            };
6578            if at_set_op || at_inner_set_op {
6579                self.current = saved; // backtrack
6580                return None;
6581            }
6582            // Continue with normal FULL JOIN parsing
6583            self.current = saved;
6584            self.match_token(TokenType::Full); // re-consume FULL
6585            let use_outer = self.match_token(TokenType::Outer);
6586            let join_hint = self.parse_tsql_join_hint();
6587            Some((JoinKind::Full, true, false, use_outer, join_hint))
6588        } else if self.match_token(TokenType::Cross) {
6589            // CROSS JOIN or CROSS APPLY
6590            if self.match_token(TokenType::Apply) {
6591                Some((JoinKind::CrossApply, false, false, false, None))
6592            } else {
6593                Some((JoinKind::Cross, true, false, false, None))
6594            }
6595        } else if self.match_token(TokenType::Natural) {
6596            // NATURAL can be followed by LEFT, RIGHT, INNER, FULL, or just JOIN
6597            if self.match_token(TokenType::Left) {
6598                let use_outer = self.match_token(TokenType::Outer);
6599                Some((JoinKind::NaturalLeft, true, false, use_outer, None))
6600            } else if self.match_token(TokenType::Right) {
6601                let use_outer = self.match_token(TokenType::Outer);
6602                Some((JoinKind::NaturalRight, true, false, use_outer, None))
6603            } else if self.match_token(TokenType::Full) {
6604                let use_outer = self.match_token(TokenType::Outer);
6605                Some((JoinKind::NaturalFull, true, false, use_outer, None))
6606            } else if self.match_token(TokenType::Inner) {
6607                Some((JoinKind::Natural, true, true, false, None))
6608            } else {
6609                Some((JoinKind::Natural, true, false, false, None))
6610            }
6611        } else if self.match_token(TokenType::Outer) {
6612            // OUTER APPLY or standalone OUTER JOIN
6613            if self.match_token(TokenType::Apply) {
6614                Some((JoinKind::OuterApply, false, false, true, None))
6615            } else {
6616                // Standalone OUTER JOIN (without LEFT/RIGHT/FULL)
6617                Some((JoinKind::Outer, true, false, true, None))
6618            }
6619        } else if self.check(TokenType::Lateral) {
6620            // Check if this is LATERAL VIEW (Hive/Spark syntax) vs LATERAL JOIN
6621            if self.current + 1 < self.tokens.len()
6622                && self.tokens[self.current + 1].token_type == TokenType::View
6623            {
6624                // LATERAL VIEW is not a JOIN type, return None
6625                None
6626            } else {
6627                self.advance(); // Consume LATERAL
6628                Some((JoinKind::Lateral, true, false, false, None))
6629            }
6630        } else if self.match_token(TokenType::Semi) {
6631            Some((JoinKind::Semi, true, false, false, None))
6632        } else if self.match_token(TokenType::Anti) {
6633            Some((JoinKind::Anti, true, false, false, None))
6634        } else if self.check_identifier("POSITIONAL") && self.check_next(TokenType::Join) {
6635            // DuckDB POSITIONAL JOIN
6636            self.advance(); // consume POSITIONAL
6637            Some((JoinKind::Positional, true, false, false, None))
6638        } else if self.match_token(TokenType::StraightJoin) {
6639            // STRAIGHT_JOIN in MySQL - doesn't need JOIN keyword after it
6640            Some((JoinKind::Straight, false, false, false, None))
6641        } else if self.check(TokenType::Join) {
6642            Some((JoinKind::Inner, true, false, false, None)) // Default JOIN is INNER (without explicit INNER keyword)
6643        } else if self.match_token(TokenType::Comma) {
6644            // Comma-separated tables: FROM a, b (old-style ANSI join syntax)
6645            Some((JoinKind::Implicit, false, false, false, None)) // No JOIN keyword needed
6646        } else {
6647            None
6648        }
6649    }
6650
6651    /// Parse TSQL join hints: LOOP, HASH, MERGE, REMOTE
6652    fn parse_tsql_join_hint(&mut self) -> Option<String> {
6653        if self.check_identifier("LOOP") {
6654            self.advance();
6655            Some("LOOP".to_string())
6656        } else if self.check_identifier("HASH") {
6657            self.advance();
6658            Some("HASH".to_string())
6659        } else if self.check_identifier("REMOTE") {
6660            self.advance();
6661            Some("REMOTE".to_string())
6662        } else if self.check(TokenType::Merge) && {
6663            // Be careful: MERGE is also a keyword for MERGE statement
6664            // Only treat as hint if followed by JOIN
6665            let next_pos = self.current + 1;
6666            next_pos < self.tokens.len() && self.tokens[next_pos].token_type == TokenType::Join
6667        } {
6668            self.advance();
6669            Some("MERGE".to_string())
6670        } else {
6671            None
6672        }
6673    }
6674
6675    /// Parse GROUP BY clause
6676    fn parse_group_by(&mut self) -> Result<GroupBy> {
6677        // Check for optional ALL/DISTINCT modifier
6678        // Some(true) = ALL, Some(false) = DISTINCT, None = no modifier
6679        let all = if self.match_token(TokenType::All) {
6680            Some(true)
6681        } else if self.match_token(TokenType::Distinct) {
6682            Some(false)
6683        } else {
6684            None
6685        };
6686
6687        let mut expressions = Vec::new();
6688
6689        // GROUP BY ALL / GROUP BY DISTINCT without following CUBE/ROLLUP/expressions
6690        // should return early (e.g., Snowflake's "GROUP BY ALL" without column list).
6691        // But in Presto/Trino, ALL/DISTINCT can be followed by CUBE/ROLLUP expressions.
6692        if all.is_some() && self.is_at_query_modifier_or_end() {
6693            return Ok(GroupBy {
6694                expressions,
6695                all,
6696                totals: false,
6697                comments: Vec::new(),
6698            });
6699        }
6700
6701        // GROUP BY ALL WITH ROLLUP/CUBE/TOTALS — skip expression parsing, go straight to modifiers
6702        if all.is_some()
6703            && self.check(TokenType::With)
6704            && (self.check_next(TokenType::Cube)
6705                || self.check_next(TokenType::Rollup)
6706                || self.check_next_identifier("TOTALS"))
6707        {
6708            let mut totals = false;
6709            // Process WITH ROLLUP/CUBE
6710            if self.check_next(TokenType::Cube) || self.check_next(TokenType::Rollup) {
6711                self.advance(); // consume WITH
6712                if self.match_token(TokenType::Cube) {
6713                    expressions.push(Expression::Cube(Box::new(Cube {
6714                        expressions: Vec::new(),
6715                    })));
6716                } else if self.match_token(TokenType::Rollup) {
6717                    expressions.push(Expression::Rollup(Box::new(Rollup {
6718                        expressions: Vec::new(),
6719                    })));
6720                }
6721            }
6722            // Check for WITH TOTALS (possibly chained after ROLLUP/CUBE)
6723            if self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
6724                self.advance(); // WITH
6725                self.advance(); // TOTALS
6726                totals = true;
6727            }
6728            return Ok(GroupBy {
6729                expressions,
6730                all,
6731                totals,
6732                comments: Vec::new(),
6733            });
6734        }
6735
6736        loop {
6737            // Check for GROUPING SETS, CUBE, ROLLUP
6738            let expr = if self.check_identifier("GROUPING")
6739                && self
6740                    .peek_nth(1)
6741                    .map_or(false, |t| t.text.eq_ignore_ascii_case("SETS"))
6742                && {
6743                    self.advance();
6744                    self.advance();
6745                    true
6746                } {
6747                // GROUPING SETS (...)
6748                self.expect(TokenType::LParen)?;
6749                let args = self.parse_grouping_sets_args()?;
6750                self.expect(TokenType::RParen)?;
6751                Expression::Function(Box::new(Function {
6752                    name: "GROUPING SETS".to_string(),
6753                    args,
6754                    distinct: false,
6755                    trailing_comments: Vec::new(),
6756                    use_bracket_syntax: false,
6757                    no_parens: false,
6758                    quoted: false,
6759                    span: None,
6760                    inferred_type: None,
6761                }))
6762            } else if self.match_token(TokenType::Cube) {
6763                // CUBE (...)
6764                self.expect(TokenType::LParen)?;
6765                let args = self.parse_expression_list()?;
6766                self.expect(TokenType::RParen)?;
6767                Expression::Function(Box::new(Function {
6768                    name: "CUBE".to_string(),
6769                    args,
6770                    distinct: false,
6771                    trailing_comments: Vec::new(),
6772                    use_bracket_syntax: false,
6773                    no_parens: false,
6774                    quoted: false,
6775                    span: None,
6776                    inferred_type: None,
6777                }))
6778            } else if self.match_token(TokenType::Rollup) {
6779                // ROLLUP (...)
6780                self.expect(TokenType::LParen)?;
6781                let args = self.parse_expression_list()?;
6782                self.expect(TokenType::RParen)?;
6783                Expression::Function(Box::new(Function {
6784                    name: "ROLLUP".to_string(),
6785                    args,
6786                    distinct: false,
6787                    trailing_comments: Vec::new(),
6788                    use_bracket_syntax: false,
6789                    no_parens: false,
6790                    quoted: false,
6791                    span: None,
6792                    inferred_type: None,
6793                }))
6794            } else {
6795                self.parse_expression()?
6796            };
6797
6798            // ClickHouse: GROUP BY expr AS alias
6799            let expr = if matches!(
6800                self.config.dialect,
6801                Some(crate::dialects::DialectType::ClickHouse)
6802            ) && self.check(TokenType::As)
6803                && !self.check_next(TokenType::LParen)
6804            {
6805                self.advance(); // consume AS
6806                let alias = self.expect_identifier_or_keyword_with_quoted()?;
6807                Expression::Alias(Box::new(Alias::new(expr, alias)))
6808            } else {
6809                expr
6810            };
6811
6812            expressions.push(expr);
6813
6814            if !self.match_token(TokenType::Comma) {
6815                // Allow adjacent CUBE/ROLLUP/GROUPING SETS without comma separator
6816                // e.g., GROUP BY CUBE(a) ROLLUP(b), GROUPING SETS((c, d))
6817                if self.check(TokenType::Cube)
6818                    || self.check(TokenType::Rollup)
6819                    || (self.check_identifier("GROUPING")
6820                        && self
6821                            .peek_nth(1)
6822                            .map_or(false, |t| t.text.eq_ignore_ascii_case("SETS")))
6823                {
6824                    continue;
6825                }
6826                break;
6827            }
6828        }
6829
6830        // Check for trailing WITH CUBE or WITH ROLLUP (Hive/MySQL syntax)
6831        // This is different from CUBE(...) or ROLLUP(...) which are parsed inline above
6832        // Use lookahead to avoid consuming WITH if it's not followed by CUBE or ROLLUP
6833        // (e.g., Redshift's WITH NO SCHEMA BINDING should not be consumed here)
6834        if self.check(TokenType::With)
6835            && (self.check_next(TokenType::Cube) || self.check_next(TokenType::Rollup))
6836        {
6837            self.advance(); // consume WITH
6838            if self.match_token(TokenType::Cube) {
6839                // WITH CUBE - add Cube with empty expressions
6840                expressions.push(Expression::Cube(Box::new(Cube {
6841                    expressions: Vec::new(),
6842                })));
6843            } else if self.match_token(TokenType::Rollup) {
6844                // WITH ROLLUP - add Rollup with empty expressions
6845                expressions.push(Expression::Rollup(Box::new(Rollup {
6846                    expressions: Vec::new(),
6847                })));
6848            }
6849        }
6850
6851        // ClickHouse: WITH TOTALS
6852        let totals = if self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
6853            self.advance(); // consume WITH
6854            self.advance(); // consume TOTALS
6855            true
6856        } else {
6857            false
6858        };
6859
6860        Ok(GroupBy {
6861            expressions,
6862            all,
6863            totals,
6864            comments: Vec::new(),
6865        })
6866    }
6867
6868    /// Parse GROUPING SETS arguments which can include tuples like (x, y), nested GROUPING SETS, CUBE, ROLLUP
6869    fn parse_grouping_sets_args(&mut self) -> Result<Vec<Expression>> {
6870        let mut args = Vec::new();
6871
6872        loop {
6873            // Check for nested GROUPING SETS, CUBE, ROLLUP
6874            let expr = if self.check_identifier("GROUPING")
6875                && self
6876                    .peek_nth(1)
6877                    .map_or(false, |t| t.text.eq_ignore_ascii_case("SETS"))
6878                && {
6879                    self.advance();
6880                    self.advance();
6881                    true
6882                } {
6883                // Nested GROUPING SETS (...)
6884                self.expect(TokenType::LParen)?;
6885                let inner_args = self.parse_grouping_sets_args()?;
6886                self.expect(TokenType::RParen)?;
6887                Expression::Function(Box::new(Function {
6888                    name: "GROUPING SETS".to_string(),
6889                    args: inner_args,
6890                    distinct: false,
6891                    trailing_comments: Vec::new(),
6892                    use_bracket_syntax: false,
6893                    no_parens: false,
6894                    quoted: false,
6895                    span: None,
6896                    inferred_type: None,
6897                }))
6898            } else if self.match_token(TokenType::Cube) {
6899                // CUBE (...)
6900                self.expect(TokenType::LParen)?;
6901                let inner_args = self.parse_expression_list()?;
6902                self.expect(TokenType::RParen)?;
6903                Expression::Function(Box::new(Function {
6904                    name: "CUBE".to_string(),
6905                    args: inner_args,
6906                    distinct: false,
6907                    trailing_comments: Vec::new(),
6908                    use_bracket_syntax: false,
6909                    no_parens: false,
6910                    quoted: false,
6911                    span: None,
6912                    inferred_type: None,
6913                }))
6914            } else if self.match_token(TokenType::Rollup) {
6915                // ROLLUP (...)
6916                self.expect(TokenType::LParen)?;
6917                let inner_args = self.parse_expression_list()?;
6918                self.expect(TokenType::RParen)?;
6919                Expression::Function(Box::new(Function {
6920                    name: "ROLLUP".to_string(),
6921                    args: inner_args,
6922                    distinct: false,
6923                    trailing_comments: Vec::new(),
6924                    use_bracket_syntax: false,
6925                    no_parens: false,
6926                    quoted: false,
6927                    span: None,
6928                    inferred_type: None,
6929                }))
6930            } else if self.check(TokenType::LParen) {
6931                // This could be a tuple like (x, y) or empty ()
6932                self.advance(); // consume (
6933                if self.check(TokenType::RParen) {
6934                    // Empty tuple ()
6935                    self.advance();
6936                    Expression::Tuple(Box::new(Tuple {
6937                        expressions: Vec::new(),
6938                    }))
6939                } else {
6940                    let inner = self.parse_expression_list()?;
6941                    self.expect(TokenType::RParen)?;
6942                    Expression::Tuple(Box::new(Tuple { expressions: inner }))
6943                }
6944            } else {
6945                self.parse_expression()?
6946            };
6947
6948            args.push(expr);
6949
6950            if !self.match_token(TokenType::Comma) {
6951                break;
6952            }
6953        }
6954
6955        Ok(args)
6956    }
6957
6958    /// Parse ORDER BY clause
6959    fn parse_order_by(&mut self) -> Result<OrderBy> {
6960        self.parse_order_by_with_siblings(false)
6961    }
6962
6963    /// Parse ORDER BY clause with optional siblings flag (Oracle ORDER SIBLINGS BY)
6964    fn parse_order_by_with_siblings(&mut self, siblings: bool) -> Result<OrderBy> {
6965        let mut expressions = Vec::new();
6966
6967        loop {
6968            let expr = self.parse_expression()?;
6969
6970            // ClickHouse: ORDER BY expr AS alias — allow AS alias before DESC/ASC
6971            // But NOT AS SELECT/WITH which would be CREATE TABLE ... AS SELECT
6972            let expr = if matches!(
6973                self.config.dialect,
6974                Some(crate::dialects::DialectType::ClickHouse)
6975            ) && self.check(TokenType::As)
6976                && !self.check_next(TokenType::LParen)
6977                && !self.check_next(TokenType::Select)
6978                && !self.check_next(TokenType::With)
6979            {
6980                self.advance(); // consume AS
6981                let alias = self.expect_identifier_or_keyword_with_quoted()?;
6982                Expression::Alias(Box::new(Alias::new(expr, alias)))
6983            } else {
6984                expr
6985            };
6986
6987            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
6988                (true, false)
6989            } else if self.match_token(TokenType::Asc) {
6990                (false, true)
6991            } else {
6992                (false, false)
6993            };
6994
6995            let nulls_first = if self.match_token(TokenType::Nulls) {
6996                if self.match_token(TokenType::First) {
6997                    Some(true)
6998                } else if self.match_token(TokenType::Last) {
6999                    Some(false)
7000                } else {
7001                    return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
7002                }
7003            } else {
7004                None
7005            };
7006
7007            // Parse optional WITH FILL clause (ClickHouse)
7008            let with_fill = if self.match_text_seq(&["WITH", "FILL"]) {
7009                let from_ = if self.match_token(TokenType::From) {
7010                    Some(Box::new(self.parse_or()?))
7011                } else {
7012                    None
7013                };
7014                let to = if self.match_text_seq(&["TO"]) {
7015                    Some(Box::new(self.parse_or()?))
7016                } else {
7017                    None
7018                };
7019                let step = if self.match_text_seq(&["STEP"]) {
7020                    Some(Box::new(self.parse_or()?))
7021                } else {
7022                    None
7023                };
7024                // ClickHouse: STALENESS [INTERVAL] expr
7025                let staleness = if self.match_text_seq(&["STALENESS"]) {
7026                    Some(Box::new(self.parse_or()?))
7027                } else {
7028                    None
7029                };
7030                let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
7031                    if self.match_token(TokenType::LParen) {
7032                        // Parse INTERPOLATE items: identifier [AS expression], ...
7033                        let mut items = Vec::new();
7034                        loop {
7035                            if self.check(TokenType::RParen) {
7036                                break;
7037                            }
7038                            let quoted = self.check(TokenType::QuotedIdentifier);
7039                            let name_text = self.expect_identifier_or_safe_keyword()?;
7040                            let name_id = Identifier {
7041                                name: name_text,
7042                                quoted,
7043                                trailing_comments: Vec::new(),
7044                                span: None,
7045                            };
7046                            let item = if self.match_token(TokenType::As) {
7047                                let expr = self.parse_expression()?;
7048                                // Store as Alias: this=expression, alias=name
7049                                Expression::Alias(Box::new(Alias {
7050                                    this: expr,
7051                                    alias: name_id,
7052                                    column_aliases: Vec::new(),
7053                                    pre_alias_comments: Vec::new(),
7054                                    trailing_comments: Vec::new(),
7055                                    inferred_type: None,
7056                                }))
7057                            } else {
7058                                Expression::Identifier(name_id)
7059                            };
7060                            items.push(item);
7061                            if !self.match_token(TokenType::Comma) {
7062                                break;
7063                            }
7064                        }
7065                        self.expect(TokenType::RParen)?;
7066                        if items.len() == 1 {
7067                            Some(Box::new(items.into_iter().next().unwrap()))
7068                        } else {
7069                            Some(Box::new(Expression::Tuple(Box::new(
7070                                crate::expressions::Tuple { expressions: items },
7071                            ))))
7072                        }
7073                    } else {
7074                        None
7075                    }
7076                } else {
7077                    None
7078                };
7079                Some(Box::new(WithFill {
7080                    from_,
7081                    to,
7082                    step,
7083                    staleness,
7084                    interpolate,
7085                }))
7086            } else {
7087                None
7088            };
7089
7090            expressions.push(Ordered {
7091                this: expr,
7092                desc,
7093                nulls_first,
7094                explicit_asc,
7095                with_fill,
7096            });
7097
7098            if !self.match_token(TokenType::Comma) {
7099                break;
7100            }
7101
7102            // Handle trailing comma: if at end of input or semicolon, break
7103            if self.is_at_end() || self.check(TokenType::Semicolon) {
7104                break;
7105            }
7106        }
7107
7108        Ok(OrderBy {
7109            expressions,
7110            siblings,
7111            comments: Vec::new(),
7112        })
7113    }
7114
7115    /// Parse query modifiers (ORDER BY, LIMIT, OFFSET, DISTRIBUTE BY, SORT BY, CLUSTER BY) for parenthesized queries
7116    /// e.g., (SELECT 1) ORDER BY x LIMIT 1 OFFSET 1
7117    /// e.g., (SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x
7118    fn parse_query_modifiers(&mut self, inner: Expression) -> Result<Expression> {
7119        // Parse DISTRIBUTE BY (Hive/Spark)
7120        let distribute_by = if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
7121            let exprs = self.parse_expression_list()?;
7122            Some(DistributeBy { expressions: exprs })
7123        } else {
7124            None
7125        };
7126
7127        // Parse SORT BY (Hive/Spark) or CLUSTER BY (Hive/Spark)
7128        let (sort_by, cluster_by) = if self.match_keywords(&[TokenType::Sort, TokenType::By]) {
7129            // SORT BY
7130            let mut orders = Vec::new();
7131            loop {
7132                if let Some(ordered) = self.parse_ordered_item()? {
7133                    orders.push(ordered);
7134                } else {
7135                    break;
7136                }
7137                if !self.match_token(TokenType::Comma) {
7138                    break;
7139                }
7140            }
7141            (
7142                Some(SortBy {
7143                    expressions: orders,
7144                }),
7145                None,
7146            )
7147        } else if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
7148            // CLUSTER BY
7149            let mut orders = Vec::new();
7150            loop {
7151                if let Some(ordered) = self.parse_ordered_item()? {
7152                    orders.push(ordered);
7153                } else {
7154                    break;
7155                }
7156                if !self.match_token(TokenType::Comma) {
7157                    break;
7158                }
7159            }
7160            (
7161                None,
7162                Some(ClusterBy {
7163                    expressions: orders,
7164                }),
7165            )
7166        } else {
7167            (None, None)
7168        };
7169
7170        // Parse ORDER BY
7171        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
7172            Some(self.parse_order_by()?)
7173        } else {
7174            None
7175        };
7176
7177        // Parse LIMIT
7178        let limit = if self.match_token(TokenType::Limit) {
7179            Some(Limit {
7180                this: self.parse_expression()?,
7181                percent: false,
7182                comments: Vec::new(),
7183            })
7184        } else {
7185            None
7186        };
7187
7188        // Parse OFFSET
7189        let offset = if self.match_token(TokenType::Offset) {
7190            Some(Offset {
7191                this: self.parse_expression()?,
7192                rows: None,
7193            })
7194        } else {
7195            None
7196        };
7197
7198        // If we have any modifiers, wrap in a Subquery with the modifiers
7199        if order_by.is_some()
7200            || limit.is_some()
7201            || offset.is_some()
7202            || distribute_by.is_some()
7203            || sort_by.is_some()
7204            || cluster_by.is_some()
7205        {
7206            // If inner is already a Subquery, add modifiers to it instead of double-wrapping
7207            if let Expression::Subquery(mut subq) = inner {
7208                subq.order_by = order_by;
7209                subq.limit = limit;
7210                subq.offset = offset;
7211                subq.distribute_by = distribute_by;
7212                subq.sort_by = sort_by;
7213                subq.cluster_by = cluster_by;
7214                Ok(Expression::Subquery(subq))
7215            } else if let Expression::Paren(paren) = inner {
7216                // If inner is a Paren containing a Subquery or other query, unwrap it
7217                // and add modifiers to a new Subquery wrapping the Paren
7218                // This handles cases like ((SELECT 1)) LIMIT 1
7219                Ok(Expression::Subquery(Box::new(Subquery {
7220                    this: Expression::Paren(paren),
7221                    alias: None,
7222                    column_aliases: Vec::new(),
7223                    order_by,
7224                    limit,
7225                    offset,
7226                    distribute_by,
7227                    sort_by,
7228                    cluster_by,
7229                    lateral: false,
7230                    modifiers_inside: false,
7231                    trailing_comments: Vec::new(),
7232                    inferred_type: None,
7233                })))
7234            } else {
7235                Ok(Expression::Subquery(Box::new(Subquery {
7236                    this: inner,
7237                    alias: None,
7238                    column_aliases: Vec::new(),
7239                    order_by,
7240                    limit,
7241                    offset,
7242                    distribute_by,
7243                    sort_by,
7244                    cluster_by,
7245                    lateral: false,
7246                    modifiers_inside: false,
7247                    trailing_comments: Vec::new(),
7248                    inferred_type: None,
7249                })))
7250            }
7251        } else {
7252            // No modifiers - return inner as-is (don't double-wrap if already a Subquery)
7253            Ok(inner)
7254        }
7255    }
7256
7257    /// Parse ORDER BY expressions for use inside aggregate functions
7258    /// Returns Vec<Ordered> instead of OrderBy struct
7259    fn parse_order_by_list(&mut self) -> Result<Vec<Ordered>> {
7260        let mut expressions = Vec::new();
7261
7262        loop {
7263            let expr = self.parse_expression()?;
7264
7265            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7266                (true, false)
7267            } else if self.match_token(TokenType::Asc) {
7268                (false, true)
7269            } else {
7270                (false, false)
7271            };
7272
7273            let nulls_first = if self.match_token(TokenType::Nulls) {
7274                if self.match_token(TokenType::First) {
7275                    Some(true)
7276                } else if self.match_token(TokenType::Last) {
7277                    Some(false)
7278                } else {
7279                    return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
7280                }
7281            } else {
7282                None
7283            };
7284
7285            expressions.push(Ordered {
7286                this: expr,
7287                desc,
7288                nulls_first,
7289                explicit_asc,
7290                with_fill: None,
7291            });
7292
7293            if !self.match_token(TokenType::Comma) {
7294                break;
7295            }
7296        }
7297
7298        Ok(expressions)
7299    }
7300
7301    /// Parse DISTRIBUTE BY clause (Hive/Spark)
7302    fn parse_distribute_by(&mut self) -> Result<DistributeBy> {
7303        let mut expressions = Vec::new();
7304
7305        loop {
7306            expressions.push(self.parse_expression()?);
7307            if !self.match_token(TokenType::Comma) {
7308                break;
7309            }
7310        }
7311
7312        Ok(DistributeBy { expressions })
7313    }
7314
7315    /// Parse CLUSTER BY clause (Hive/Spark)
7316    fn parse_cluster_by(&mut self) -> Result<ClusterBy> {
7317        let mut expressions = Vec::new();
7318
7319        loop {
7320            let expr = self.parse_expression()?;
7321
7322            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7323                (true, false)
7324            } else if self.match_token(TokenType::Asc) {
7325                (false, true)
7326            } else {
7327                (false, false)
7328            };
7329
7330            expressions.push(Ordered {
7331                this: expr,
7332                desc,
7333                nulls_first: None,
7334                explicit_asc,
7335                with_fill: None,
7336            });
7337
7338            if !self.match_token(TokenType::Comma) {
7339                break;
7340            }
7341        }
7342
7343        Ok(ClusterBy { expressions })
7344    }
7345
7346    /// Parse SORT BY clause (Hive/Spark)
7347    fn parse_sort_by(&mut self) -> Result<SortBy> {
7348        let mut expressions = Vec::new();
7349
7350        loop {
7351            let expr = self.parse_expression()?;
7352
7353            let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
7354                (true, false)
7355            } else if self.match_token(TokenType::Asc) {
7356                (false, true)
7357            } else {
7358                (false, false)
7359            };
7360
7361            let nulls_first = if self.match_token(TokenType::Nulls) {
7362                if self.match_token(TokenType::First) {
7363                    Some(true)
7364                } else if self.match_token(TokenType::Last) {
7365                    Some(false)
7366                } else {
7367                    return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
7368                }
7369            } else {
7370                None
7371            };
7372
7373            expressions.push(Ordered {
7374                this: expr,
7375                desc,
7376                nulls_first,
7377                explicit_asc,
7378                with_fill: None,
7379            });
7380
7381            if !self.match_token(TokenType::Comma) {
7382                break;
7383            }
7384        }
7385
7386        Ok(SortBy { expressions })
7387    }
7388
7389    /// Parse FOR UPDATE/SHARE locking clauses or FOR XML (T-SQL)
7390    /// Syntax: FOR UPDATE|SHARE|NO KEY UPDATE|KEY SHARE [OF tables] [NOWAIT|WAIT n|SKIP LOCKED]
7391    /// Also handles: LOCK IN SHARE MODE (MySQL)
7392    /// Also handles: FOR XML PATH|RAW|AUTO|EXPLICIT [, options...] (T-SQL)
7393    fn parse_locks_and_for_xml(&mut self) -> Result<(Vec<Lock>, Vec<Expression>)> {
7394        let mut locks = Vec::new();
7395        let mut for_xml = Vec::new();
7396
7397        loop {
7398            let (update, key) = if self.match_keywords(&[TokenType::For, TokenType::Update]) {
7399                // FOR UPDATE
7400                (
7401                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7402                        value: true,
7403                    }))),
7404                    None,
7405                )
7406            } else if self.check(TokenType::For) && self.check_next_identifier("XML") {
7407                // FOR XML (T-SQL) - parse XML options
7408                self.advance(); // consume FOR
7409                self.advance(); // consume XML
7410                for_xml = self.parse_for_xml_options()?;
7411                break; // FOR XML is always the last clause
7412            } else if self.check(TokenType::For) && self.check_next_identifier("SHARE") {
7413                // FOR SHARE
7414                self.advance(); // consume FOR
7415                self.advance(); // consume SHARE
7416                (None, None)
7417            } else if self.check_identifier("LOCK") && self.check_next(TokenType::In) {
7418                // LOCK IN SHARE MODE (MySQL) -> converted to FOR SHARE
7419                self.advance(); // consume LOCK
7420                self.advance(); // consume IN
7421                if self.match_identifier("SHARE") {
7422                    let _ = self.match_identifier("MODE");
7423                }
7424                (None, None)
7425            } else if self.check(TokenType::For) && self.check_next(TokenType::Key) {
7426                // FOR KEY SHARE (PostgreSQL)
7427                self.advance(); // consume FOR
7428                self.advance(); // consume KEY
7429                if !self.match_identifier("SHARE") {
7430                    break; // Not a valid lock clause
7431                }
7432                (
7433                    None,
7434                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7435                        value: true,
7436                    }))),
7437                )
7438            } else if self.check(TokenType::For) && self.check_next(TokenType::No) {
7439                // FOR NO KEY UPDATE (PostgreSQL)
7440                self.advance(); // consume FOR
7441                self.advance(); // consume NO
7442                if !self.match_identifier("KEY") || !self.match_token(TokenType::Update) {
7443                    break; // Not a valid lock clause
7444                }
7445                (
7446                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7447                        value: true,
7448                    }))),
7449                    Some(Box::new(Expression::Boolean(BooleanLiteral {
7450                        value: true,
7451                    }))),
7452                )
7453            } else {
7454                // No more lock clauses
7455                break;
7456            };
7457
7458            // Parse optional OF clause: OF table1, table2
7459            let expressions = if self.match_token(TokenType::Of) {
7460                let mut tables = Vec::new();
7461                loop {
7462                    // Parse table reference (can be schema.table or just table)
7463                    let table = self.parse_table_ref()?;
7464                    tables.push(Expression::Table(table));
7465                    if !self.match_token(TokenType::Comma) {
7466                        break;
7467                    }
7468                }
7469                tables
7470            } else {
7471                Vec::new()
7472            };
7473
7474            // Parse wait option: NOWAIT, WAIT n, or SKIP LOCKED
7475            // Following Python sqlglot convention:
7476            // - NOWAIT -> Boolean(true)
7477            // - SKIP LOCKED -> Boolean(false)
7478            // - WAIT n -> Literal (the number)
7479            let wait = if self.match_identifier("NOWAIT") {
7480                // NOWAIT -> represented as Boolean(true)
7481                Some(Box::new(Expression::Boolean(BooleanLiteral {
7482                    value: true,
7483                })))
7484            } else if self.match_identifier("WAIT") {
7485                // WAIT n -> wait = expression (the number/literal)
7486                Some(Box::new(self.parse_primary()?))
7487            } else if self.match_identifier("SKIP") && self.match_identifier("LOCKED") {
7488                // SKIP LOCKED -> represented as Boolean(false)
7489                Some(Box::new(Expression::Boolean(BooleanLiteral {
7490                    value: false,
7491                })))
7492            } else {
7493                None
7494            };
7495
7496            locks.push(Lock {
7497                update,
7498                expressions,
7499                wait,
7500                key,
7501            });
7502        }
7503
7504        Ok((locks, for_xml))
7505    }
7506
7507    /// Parse FOR XML options (T-SQL)
7508    /// Syntax: FOR XML PATH|RAW|AUTO|EXPLICIT [('element')] [, BINARY BASE64] [, ELEMENTS [XSINIL|ABSENT]] [, TYPE] [, ROOT('name')]
7509    fn parse_for_xml_options(&mut self) -> Result<Vec<Expression>> {
7510        let mut options = Vec::new();
7511
7512        loop {
7513            // Parse XML option: could be a known option (PATH, RAW, AUTO, EXPLICIT, BINARY, ELEMENTS, TYPE, ROOT)
7514            // or an XMLKeyValueOption like PATH('element')
7515            if let Some(opt) = self.parse_for_xml_single_option()? {
7516                options.push(opt);
7517            } else {
7518                break;
7519            }
7520
7521            // Check for comma to continue parsing more options
7522            if !self.match_token(TokenType::Comma) {
7523                break;
7524            }
7525        }
7526
7527        Ok(options)
7528    }
7529
7530    /// Parse a single FOR XML option
7531    fn parse_for_xml_single_option(&mut self) -> Result<Option<Expression>> {
7532        // Known XML modes: PATH, RAW, AUTO, EXPLICIT
7533        // Known options: BINARY BASE64, ELEMENTS [XSINIL|ABSENT], TYPE, ROOT('name')
7534
7535        // Try to match known patterns
7536        if self.match_identifier("PATH") {
7537            let expression = if self.match_token(TokenType::LParen) {
7538                let expr = self.parse_string()?;
7539                self.expect(TokenType::RParen)?;
7540                expr
7541            } else {
7542                None
7543            };
7544            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7545                this: Box::new(Expression::Var(Box::new(Var {
7546                    this: "PATH".to_string(),
7547                }))),
7548                expression: expression.map(|e| Box::new(e)),
7549            }))));
7550        }
7551
7552        if self.match_identifier("RAW") {
7553            let expression = if self.match_token(TokenType::LParen) {
7554                let expr = self.parse_string()?;
7555                self.expect(TokenType::RParen)?;
7556                expr
7557            } else {
7558                None
7559            };
7560            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7561                this: Box::new(Expression::Var(Box::new(Var {
7562                    this: "RAW".to_string(),
7563                }))),
7564                expression: expression.map(|e| Box::new(e)),
7565            }))));
7566        }
7567
7568        if self.match_identifier("AUTO") {
7569            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7570                this: Box::new(Expression::Var(Box::new(Var {
7571                    this: "AUTO".to_string(),
7572                }))),
7573                expression: None,
7574            }))));
7575        }
7576
7577        if self.match_identifier("EXPLICIT") {
7578            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7579                this: Box::new(Expression::Var(Box::new(Var {
7580                    this: "EXPLICIT".to_string(),
7581                }))),
7582                expression: None,
7583            }))));
7584        }
7585
7586        if self.match_identifier("TYPE") {
7587            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7588                this: Box::new(Expression::Var(Box::new(Var {
7589                    this: "TYPE".to_string(),
7590                }))),
7591                expression: None,
7592            }))));
7593        }
7594
7595        if self.match_identifier("BINARY") {
7596            // BINARY BASE64
7597            if self.match_identifier("BASE64") {
7598                return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7599                    this: Box::new(Expression::Var(Box::new(Var {
7600                        this: "BINARY BASE64".to_string(),
7601                    }))),
7602                    expression: None,
7603                }))));
7604            } else {
7605                return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7606                    this: Box::new(Expression::Var(Box::new(Var {
7607                        this: "BINARY".to_string(),
7608                    }))),
7609                    expression: None,
7610                }))));
7611            }
7612        }
7613
7614        if self.match_identifier("ELEMENTS") {
7615            // ELEMENTS [XSINIL|ABSENT]
7616            let suboption = if self.match_identifier("XSINIL") {
7617                Some("XSINIL".to_string())
7618            } else if self.match_identifier("ABSENT") {
7619                Some("ABSENT".to_string())
7620            } else {
7621                None
7622            };
7623            let option_name = match &suboption {
7624                Some(sub) => format!("ELEMENTS {}", sub),
7625                None => "ELEMENTS".to_string(),
7626            };
7627            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7628                this: Box::new(Expression::Var(Box::new(Var { this: option_name }))),
7629                expression: None,
7630            }))));
7631        }
7632
7633        if self.match_identifier("ROOT") {
7634            let expression = if self.match_token(TokenType::LParen) {
7635                let expr = self.parse_string()?;
7636                self.expect(TokenType::RParen)?;
7637                expr
7638            } else {
7639                None
7640            };
7641            return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
7642                this: Box::new(Expression::Var(Box::new(Var {
7643                    this: "ROOT".to_string(),
7644                }))),
7645                expression: expression.map(|e| Box::new(e)),
7646            }))));
7647        }
7648
7649        // No more options recognized
7650        Ok(None)
7651    }
7652
7653    /// Parse CONNECT BY clause (Oracle hierarchical queries)
7654    /// Syntax: [START WITH condition] CONNECT BY [NOCYCLE] condition [START WITH condition]
7655    /// START WITH can appear before or after CONNECT BY
7656    fn parse_connect(&mut self) -> Result<Option<Connect>> {
7657        // Check for START WITH first (can appear before CONNECT BY)
7658        let start_before = if self.match_keywords(&[TokenType::Start, TokenType::With]) {
7659            Some(self.parse_expression()?)
7660        } else {
7661            None
7662        };
7663
7664        // Check for CONNECT BY
7665        if !self.match_keywords(&[TokenType::Connect, TokenType::By]) {
7666            if start_before.is_some() {
7667                return Err(self.parse_error("START WITH without CONNECT BY"));
7668            }
7669            return Ok(None);
7670        }
7671
7672        // Check for NOCYCLE
7673        let nocycle = self.match_token(TokenType::NoCycle);
7674
7675        // Parse the CONNECT BY condition with PRIOR support
7676        let connect = self.parse_connect_expression()?;
7677
7678        // START WITH can also appear after CONNECT BY
7679        let start = if start_before.is_some() {
7680            start_before
7681        } else if self.match_keywords(&[TokenType::Start, TokenType::With]) {
7682            Some(self.parse_expression()?)
7683        } else {
7684            None
7685        };
7686
7687        Ok(Some(Connect {
7688            start,
7689            connect,
7690            nocycle,
7691        }))
7692    }
7693
7694    /// Parse expression in CONNECT BY context, treating PRIOR as prefix operator
7695    fn parse_connect_expression(&mut self) -> Result<Expression> {
7696        self.parse_connect_or()
7697    }
7698
7699    /// Parse OR expression in CONNECT BY context
7700    fn parse_connect_or(&mut self) -> Result<Expression> {
7701        let mut left = self.parse_connect_and()?;
7702
7703        while self.match_token(TokenType::Or) {
7704            let right = self.parse_connect_and()?;
7705            left = Expression::Or(Box::new(BinaryOp::new(left, right)));
7706        }
7707
7708        Ok(Self::maybe_rebalance_boolean_chain(left, false))
7709    }
7710
7711    /// Parse AND expression in CONNECT BY context
7712    fn parse_connect_and(&mut self) -> Result<Expression> {
7713        let mut left = self.parse_connect_comparison()?;
7714
7715        while self.match_token(TokenType::And) {
7716            let right = self.parse_connect_comparison()?;
7717            left = Expression::And(Box::new(BinaryOp::new(left, right)));
7718        }
7719
7720        Ok(Self::maybe_rebalance_boolean_chain(left, true))
7721    }
7722
7723    /// Parse comparison in CONNECT BY context
7724    fn parse_connect_comparison(&mut self) -> Result<Expression> {
7725        let left = self.parse_connect_primary()?;
7726
7727        if self.match_token(TokenType::Eq) {
7728            let right = self.parse_connect_primary()?;
7729            return Ok(Expression::Eq(Box::new(BinaryOp::new(left, right))));
7730        }
7731        if self.match_token(TokenType::Neq) {
7732            let right = self.parse_connect_primary()?;
7733            return Ok(Expression::Neq(Box::new(BinaryOp::new(left, right))));
7734        }
7735        if self.match_token(TokenType::Lt) {
7736            let right = self.parse_connect_primary()?;
7737            return Ok(Expression::Lt(Box::new(BinaryOp::new(left, right))));
7738        }
7739        if self.match_token(TokenType::Lte) {
7740            let right = self.parse_connect_primary()?;
7741            return Ok(Expression::Lte(Box::new(BinaryOp::new(left, right))));
7742        }
7743        if self.match_token(TokenType::Gt) {
7744            let right = self.parse_connect_primary()?;
7745            return Ok(Expression::Gt(Box::new(BinaryOp::new(left, right))));
7746        }
7747        if self.match_token(TokenType::Gte) {
7748            let right = self.parse_connect_primary()?;
7749            return Ok(Expression::Gte(Box::new(BinaryOp::new(left, right))));
7750        }
7751
7752        Ok(left)
7753    }
7754
7755    /// Parse primary in CONNECT BY context with PRIOR support
7756    fn parse_connect_primary(&mut self) -> Result<Expression> {
7757        // Handle PRIOR as prefix operator
7758        if self.match_token(TokenType::Prior) {
7759            let expr = self.parse_primary()?;
7760            return Ok(Expression::Prior(Box::new(Prior { this: expr })));
7761        }
7762
7763        if let Some(connect_by_root) = self.try_parse_connect_by_root_expression()? {
7764            return Ok(connect_by_root);
7765        }
7766
7767        self.parse_primary()
7768    }
7769
7770    /// Parse Oracle CONNECT_BY_ROOT in either supported form:
7771    /// CONNECT_BY_ROOT col
7772    /// CONNECT_BY_ROOT(col)
7773    fn try_parse_connect_by_root_expression(&mut self) -> Result<Option<Expression>> {
7774        if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("CONNECT_BY_ROOT"))
7775        {
7776            return Ok(None);
7777        }
7778
7779        self.advance();
7780
7781        let this = if self.match_token(TokenType::LParen) {
7782            let expr = self.parse_expression()?;
7783            self.expect(TokenType::RParen)?;
7784            expr
7785        } else {
7786            self.parse_column()?.ok_or_else(|| {
7787                self.parse_error("Expected expression or column after CONNECT_BY_ROOT")
7788            })?
7789        };
7790
7791        Ok(Some(Expression::ConnectByRoot(Box::new(ConnectByRoot {
7792            this,
7793        }))))
7794    }
7795
7796    /// Parse MATCH_RECOGNIZE clause (Oracle/Snowflake/Presto/Trino pattern matching)
7797    /// MATCH_RECOGNIZE ( [PARTITION BY ...] [ORDER BY ...] [MEASURES ...] [rows] [after] PATTERN (...) DEFINE ... )
7798    fn parse_match_recognize(&mut self, source: Option<Expression>) -> Result<Expression> {
7799        self.expect(TokenType::LParen)?;
7800
7801        // PARTITION BY (optional)
7802        let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
7803            Some(self.parse_expression_list()?)
7804        } else {
7805            None
7806        };
7807
7808        // ORDER BY (optional)
7809        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
7810            Some(self.parse_order_by()?.expressions)
7811        } else {
7812            None
7813        };
7814
7815        // MEASURES (optional)
7816        let measures = if self.match_token(TokenType::Measures) {
7817            Some(self.parse_match_recognize_measures()?)
7818        } else {
7819            None
7820        };
7821
7822        // Row semantics: ONE ROW PER MATCH / ALL ROWS PER MATCH
7823        let rows = self.parse_match_recognize_rows()?;
7824
7825        // AFTER MATCH SKIP
7826        let after = self.parse_match_recognize_after()?;
7827
7828        // PATTERN
7829        let pattern = if self.match_token(TokenType::Pattern) {
7830            Some(self.parse_match_recognize_pattern()?)
7831        } else {
7832            None
7833        };
7834
7835        // DEFINE
7836        let define = if self.match_token(TokenType::Define) {
7837            Some(self.parse_match_recognize_define()?)
7838        } else {
7839            None
7840        };
7841
7842        self.expect(TokenType::RParen)?;
7843
7844        // Alias is handled by the caller
7845
7846        Ok(Expression::MatchRecognize(Box::new(MatchRecognize {
7847            this: source.map(Box::new),
7848            partition_by,
7849            order_by,
7850            measures,
7851            rows,
7852            after,
7853            pattern,
7854            define,
7855            alias: None,
7856            alias_explicit_as: false,
7857        })))
7858    }
7859
7860    /// Parse MEASURES clause in MATCH_RECOGNIZE
7861    fn parse_match_recognize_measures(&mut self) -> Result<Vec<MatchRecognizeMeasure>> {
7862        let mut measures = Vec::new();
7863
7864        loop {
7865            // Check for RUNNING or FINAL
7866            let window_frame = if self.match_token(TokenType::Running) {
7867                Some(MatchRecognizeSemantics::Running)
7868            } else if self.match_token(TokenType::Final) {
7869                Some(MatchRecognizeSemantics::Final)
7870            } else {
7871                None
7872            };
7873
7874            let mut expr = self.parse_expression()?;
7875
7876            // Handle AS alias for measures
7877            if self.match_token(TokenType::As) {
7878                let alias = Identifier::new(self.expect_identifier()?);
7879                expr = Expression::Alias(Box::new(Alias::new(expr, alias)));
7880            }
7881
7882            measures.push(MatchRecognizeMeasure {
7883                this: expr,
7884                window_frame,
7885            });
7886
7887            if !self.match_token(TokenType::Comma) {
7888                break;
7889            }
7890        }
7891
7892        Ok(measures)
7893    }
7894
7895    /// Parse row semantics in MATCH_RECOGNIZE
7896    fn parse_match_recognize_rows(&mut self) -> Result<Option<MatchRecognizeRows>> {
7897        // ONE ROW PER MATCH
7898        if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "ONE" {
7899            self.advance(); // consume ONE
7900            if !self.match_token(TokenType::Row) {
7901                return Err(self.parse_error("Expected ROW after ONE"));
7902            }
7903            if !(self.check(TokenType::Var) && self.peek().text.to_uppercase() == "PER") {
7904                return Err(self.parse_error("Expected PER after ONE ROW"));
7905            }
7906            self.advance(); // consume PER
7907            if !self.match_token(TokenType::Match) {
7908                return Err(self.parse_error("Expected MATCH after ONE ROW PER"));
7909            }
7910            return Ok(Some(MatchRecognizeRows::OneRowPerMatch));
7911        }
7912
7913        // ALL ROWS PER MATCH [variants]
7914        if self.match_token(TokenType::All) {
7915            if !self.match_token(TokenType::Rows) {
7916                return Err(self.parse_error("Expected ROWS after ALL"));
7917            }
7918            if !(self.check(TokenType::Var) && self.peek().text.to_uppercase() == "PER") {
7919                return Err(self.parse_error("Expected PER after ALL ROWS"));
7920            }
7921            self.advance(); // consume PER
7922            if !self.match_token(TokenType::Match) {
7923                return Err(self.parse_error("Expected MATCH after ALL ROWS PER"));
7924            }
7925
7926            // Check for optional modifiers
7927            if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "SHOW" {
7928                self.advance();
7929                if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "EMPTY" {
7930                    self.advance();
7931                    if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "MATCHES" {
7932                        self.advance();
7933                        return Ok(Some(MatchRecognizeRows::AllRowsPerMatchShowEmptyMatches));
7934                    }
7935                }
7936                return Err(self.parse_error("Expected EMPTY MATCHES after SHOW"));
7937            }
7938
7939            if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "OMIT" {
7940                self.advance();
7941                if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "EMPTY" {
7942                    self.advance();
7943                    if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "MATCHES" {
7944                        self.advance();
7945                        return Ok(Some(MatchRecognizeRows::AllRowsPerMatchOmitEmptyMatches));
7946                    }
7947                }
7948                return Err(self.parse_error("Expected EMPTY MATCHES after OMIT"));
7949            }
7950
7951            if self.match_token(TokenType::With) {
7952                if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "UNMATCHED" {
7953                    self.advance();
7954                    if self.match_token(TokenType::Rows) {
7955                        return Ok(Some(MatchRecognizeRows::AllRowsPerMatchWithUnmatchedRows));
7956                    }
7957                }
7958                return Err(self.parse_error("Expected UNMATCHED ROWS after WITH"));
7959            }
7960
7961            return Ok(Some(MatchRecognizeRows::AllRowsPerMatch));
7962        }
7963
7964        Ok(None)
7965    }
7966
7967    /// Parse AFTER MATCH SKIP clause in MATCH_RECOGNIZE
7968    fn parse_match_recognize_after(&mut self) -> Result<Option<MatchRecognizeAfter>> {
7969        if !self.match_token(TokenType::After) {
7970            return Ok(None);
7971        }
7972
7973        if !self.match_token(TokenType::Match) {
7974            return Err(self.parse_error("Expected MATCH after AFTER"));
7975        }
7976
7977        // Check for SKIP (it might be an identifier)
7978        if !(self.check(TokenType::Var) && self.peek().text.to_uppercase() == "SKIP") {
7979            return Err(self.parse_error("Expected SKIP after AFTER MATCH"));
7980        }
7981        self.advance(); // consume SKIP
7982
7983        // PAST LAST ROW
7984        if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "PAST" {
7985            self.advance();
7986            if self.match_token(TokenType::Last) {
7987                if self.match_token(TokenType::Row) {
7988                    return Ok(Some(MatchRecognizeAfter::PastLastRow));
7989                }
7990            }
7991            return Err(self.parse_error("Expected LAST ROW after PAST"));
7992        }
7993
7994        // TO NEXT ROW / TO FIRST x / TO LAST x
7995        if self.match_token(TokenType::To) {
7996            if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "NEXT" {
7997                self.advance();
7998                if self.match_token(TokenType::Row) {
7999                    return Ok(Some(MatchRecognizeAfter::ToNextRow));
8000                }
8001                return Err(self.parse_error("Expected ROW after NEXT"));
8002            }
8003
8004            if self.match_token(TokenType::First) {
8005                let name = self.expect_identifier()?;
8006                return Ok(Some(MatchRecognizeAfter::ToFirst(Identifier::new(name))));
8007            }
8008
8009            if self.match_token(TokenType::Last) {
8010                let name = self.expect_identifier()?;
8011                return Ok(Some(MatchRecognizeAfter::ToLast(Identifier::new(name))));
8012            }
8013
8014            return Err(self.parse_error("Expected NEXT ROW, FIRST x, or LAST x after TO"));
8015        }
8016
8017        Err(self.parse_error("Expected PAST LAST ROW or TO ... after AFTER MATCH SKIP"))
8018    }
8019
8020    /// Parse PATTERN clause in MATCH_RECOGNIZE using bracket counting
8021    fn parse_match_recognize_pattern(&mut self) -> Result<String> {
8022        self.expect(TokenType::LParen)?;
8023
8024        let mut depth = 1;
8025        let mut pattern = String::new();
8026
8027        while depth > 0 && !self.is_at_end() {
8028            let token = self.advance();
8029            match token.token_type {
8030                TokenType::LParen => {
8031                    depth += 1;
8032                    pattern.push('(');
8033                }
8034                TokenType::RParen => {
8035                    depth -= 1;
8036                    if depth > 0 {
8037                        pattern.push(')');
8038                    }
8039                }
8040                _ => {
8041                    // Pattern quantifiers (+, *, ?, {n,m}) should not have a space before them
8042                    let is_quantifier = matches!(token.text.as_str(), "+" | "*" | "?")
8043                        || token.text.starts_with('{');
8044
8045                    if !pattern.is_empty()
8046                        && !pattern.ends_with('(')
8047                        && !pattern.ends_with(' ')
8048                        && !is_quantifier
8049                    {
8050                        pattern.push(' ');
8051                    }
8052                    pattern.push_str(&token.text);
8053                }
8054            }
8055        }
8056
8057        if depth > 0 {
8058            return Err(self.parse_error("Unclosed parenthesis in PATTERN clause"));
8059        }
8060
8061        Ok(pattern.trim().to_string())
8062    }
8063
8064    /// Parse DEFINE clause in MATCH_RECOGNIZE
8065    fn parse_match_recognize_define(&mut self) -> Result<Vec<(Identifier, Expression)>> {
8066        let mut definitions = Vec::new();
8067
8068        loop {
8069            let name = Identifier::new(self.expect_identifier()?);
8070            self.expect(TokenType::As)?;
8071            let expr = self.parse_expression()?;
8072
8073            definitions.push((name, expr));
8074
8075            if !self.match_token(TokenType::Comma) {
8076                break;
8077            }
8078        }
8079
8080        Ok(definitions)
8081    }
8082
8083    /// Parse LATERAL VIEW clauses (Hive/Spark)
8084    /// Syntax: LATERAL VIEW [OUTER] generator_function(args) table_alias AS col1 [, col2, ...]
8085    fn parse_lateral_views(&mut self) -> Result<Vec<LateralView>> {
8086        let mut views = Vec::new();
8087
8088        while self.match_keywords(&[TokenType::Lateral, TokenType::View]) {
8089            // Check for OUTER keyword
8090            let outer = self.match_token(TokenType::Outer);
8091
8092            // Parse the generator function (EXPLODE, POSEXPLODE, INLINE, etc.)
8093            // This is a function call expression
8094            let this = self.parse_primary()?;
8095
8096            // Parse table alias (comes before AS)
8097            let table_alias = if self.check(TokenType::Var) && !self.check_keyword() {
8098                Some(Identifier::new(self.expect_identifier()?))
8099            } else {
8100                None
8101            };
8102
8103            // Parse column aliases after AS keyword
8104            // Supports both: AS a, b and AS (a, b)
8105            let column_aliases = if self.match_token(TokenType::As) {
8106                let mut aliases = Vec::new();
8107                // Check for parenthesized alias list: AS ("a", "b")
8108                if self.match_token(TokenType::LParen) {
8109                    loop {
8110                        aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
8111                        if !self.match_token(TokenType::Comma) {
8112                            break;
8113                        }
8114                    }
8115                    self.expect(TokenType::RParen)?;
8116                } else {
8117                    // Non-parenthesized aliases: AS a, b, c
8118                    // Use expect_identifier_or_keyword because aliases like "key", "value", "pos" may be keywords
8119                    loop {
8120                        aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
8121                        if !self.match_token(TokenType::Comma) {
8122                            break;
8123                        }
8124                        // Check if next token is still an identifier or keyword (column alias)
8125                        // vs starting a new LATERAL VIEW or other clause
8126                        if !self.is_identifier_or_keyword_token() {
8127                            break;
8128                        }
8129                        // Check for keywords that would end the column list
8130                        if self.peek().token_type == TokenType::Lateral
8131                            || self.peek().token_type == TokenType::Where
8132                            || self.peek().token_type == TokenType::Group
8133                            || self.peek().token_type == TokenType::Having
8134                            || self.peek().token_type == TokenType::Order
8135                            || self.peek().token_type == TokenType::Limit
8136                        {
8137                            break;
8138                        }
8139                    }
8140                }
8141                aliases
8142            } else {
8143                Vec::new()
8144            };
8145
8146            views.push(LateralView {
8147                this,
8148                table_alias,
8149                column_aliases,
8150                outer,
8151            });
8152        }
8153
8154        Ok(views)
8155    }
8156
8157    /// Parse named windows (WINDOW w AS (...), ...)
8158    fn parse_named_windows(&mut self) -> Result<Vec<NamedWindow>> {
8159        let mut windows = Vec::new();
8160
8161        loop {
8162            let name = self.expect_identifier()?;
8163            self.expect(TokenType::As)?;
8164            self.expect(TokenType::LParen)?;
8165
8166            // Parse optional base window name reference (e.g., w1 AS (w0 ORDER BY ...))
8167            let window_name = if (self.check(TokenType::Identifier)
8168                || self.check(TokenType::Var)
8169                || self.check(TokenType::QuotedIdentifier))
8170                && !self.check(TokenType::Partition)
8171                && !self.check(TokenType::Order)
8172                && self.peek_nth(1).map_or(true, |t| {
8173                    matches!(
8174                        t.token_type,
8175                        TokenType::Partition
8176                            | TokenType::Order
8177                            | TokenType::Rows
8178                            | TokenType::Range
8179                            | TokenType::Groups
8180                            | TokenType::RParen
8181                            | TokenType::Comma
8182                    )
8183                }) {
8184                Some(self.expect_identifier()?)
8185            } else {
8186                None
8187            };
8188
8189            // Parse window specification
8190            let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
8191                Some(self.parse_expression_list()?)
8192            } else {
8193                None
8194            };
8195
8196            let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
8197                Some(self.parse_order_by()?)
8198            } else {
8199                None
8200            };
8201
8202            let frame = self.parse_window_frame()?;
8203
8204            self.expect(TokenType::RParen)?;
8205
8206            windows.push(NamedWindow {
8207                name: Identifier::new(name),
8208                spec: Over {
8209                    window_name: window_name.map(|n| Identifier::new(n)),
8210                    partition_by: partition_by.unwrap_or_default(),
8211                    order_by: order_by.map(|o| o.expressions).unwrap_or_default(),
8212                    frame,
8213                    alias: None,
8214                },
8215            });
8216
8217            if !self.match_token(TokenType::Comma) {
8218                break;
8219            }
8220        }
8221
8222        Ok(windows)
8223    }
8224
8225    /// Parse query hint /*+ ... */
8226    fn parse_hint(&mut self) -> Result<Hint> {
8227        let token = self.advance();
8228        let hint_text = token.text.clone();
8229
8230        // For now, parse as raw hint text
8231        // More sophisticated parsing can be added later
8232        let expressions = if hint_text.is_empty() {
8233            Vec::new()
8234        } else {
8235            vec![HintExpression::Raw(hint_text)]
8236        };
8237
8238        Ok(Hint { expressions })
8239    }
8240
8241    /// Parse SAMPLE / TABLESAMPLE / USING SAMPLE clause
8242    fn parse_sample_clause(&mut self) -> Result<Option<Sample>> {
8243        // Check for USING SAMPLE (DuckDB), SAMPLE, or TABLESAMPLE
8244        let is_using_sample = if self.check(TokenType::Using)
8245            && self.current + 1 < self.tokens.len()
8246            && self.tokens[self.current + 1].token_type == TokenType::Sample
8247        {
8248            self.advance(); // consume USING
8249            self.advance(); // consume SAMPLE
8250            true
8251        } else {
8252            false
8253        };
8254
8255        let use_sample_keyword = if is_using_sample {
8256            // USING SAMPLE acts like SAMPLE
8257            true
8258        } else if self.match_token(TokenType::Sample) {
8259            true
8260        } else if self.match_token(TokenType::TableSample) {
8261            false
8262        } else {
8263            return Ok(None);
8264        };
8265
8266        // Parse sampling method if specified (BERNOULLI, SYSTEM, BLOCK, ROW, RESERVOIR)
8267        let (method, method_before_size, explicit_method) =
8268            if self.match_token(TokenType::Bernoulli) {
8269                (SampleMethod::Bernoulli, true, true)
8270            } else if self.match_token(TokenType::System) {
8271                (SampleMethod::System, true, true)
8272            } else if self.match_token(TokenType::Block) {
8273                (SampleMethod::Block, true, true)
8274            } else if self.match_token(TokenType::Row) {
8275                (SampleMethod::Row, true, true)
8276            } else if self.check_identifier("RESERVOIR") {
8277                self.advance();
8278                (SampleMethod::Reservoir, true, true)
8279            } else {
8280                // Default to BERNOULLI for both SAMPLE and TABLESAMPLE
8281                // This matches Python SQLGlot's normalization behavior
8282                (SampleMethod::Bernoulli, false, false)
8283            };
8284
8285        // Parse size (can be in parentheses)
8286        let has_paren = self.match_token(TokenType::LParen);
8287
8288        // Check for BUCKET syntax: TABLESAMPLE (BUCKET 1 OUT OF 5 ON x)
8289        if self.match_identifier("BUCKET") {
8290            let bucket_numerator = self.parse_primary()?;
8291            self.match_identifier("OUT");
8292            self.match_token(TokenType::Of); // OF is a keyword token
8293            let bucket_denominator = self.parse_primary()?;
8294            let bucket_field = if self.match_token(TokenType::On) {
8295                Some(Box::new(self.parse_primary()?))
8296            } else {
8297                None
8298            };
8299            if has_paren {
8300                self.expect(TokenType::RParen)?;
8301            }
8302            return Ok(Some(Sample {
8303                method: SampleMethod::Bucket,
8304                size: bucket_numerator.clone(),
8305                seed: None,
8306                offset: None,
8307                unit_after_size: false,
8308                use_sample_keyword,
8309                explicit_method: true,     // BUCKET is always explicit
8310                method_before_size: false, // BUCKET appears inside parens
8311                use_seed_keyword: false,
8312                bucket_numerator: Some(Box::new(bucket_numerator)),
8313                bucket_denominator: Some(Box::new(bucket_denominator)),
8314                bucket_field,
8315                is_using_sample,
8316                is_percent: false,
8317                suppress_method_output: false,
8318            }));
8319        }
8320
8321        // Use parse_unary to avoid consuming PERCENT as modulo operator
8322        let size = self.parse_unary()?;
8323
8324        // Check for PERCENT/ROWS suffix after size (if not already part of the number)
8325        // Both "%" and "PERCENT" tokens map to TokenType::Percent - accept both as PERCENT modifier
8326        let (method, unit_after_size, is_percent) = if self.check(TokenType::Percent) {
8327            self.advance(); // consume PERCENT or %
8328                            // If method was already explicitly specified (e.g., SYSTEM), keep it
8329                            // PERCENT here is just the unit, not the sampling method
8330            if method_before_size {
8331                (method, true, true)
8332            } else {
8333                (SampleMethod::Percent, true, true)
8334            }
8335        } else if self.match_token(TokenType::Rows) {
8336            // If method was already explicitly specified, keep it
8337            if method_before_size {
8338                (method, true, false)
8339            } else {
8340                (SampleMethod::Row, true, false)
8341            }
8342        } else {
8343            // No explicit unit after size - preserve the original method
8344            (method, false, false)
8345        };
8346
8347        if has_paren {
8348            self.expect(TokenType::RParen)?;
8349        }
8350
8351        // DuckDB USING SAMPLE: method and optional seed can come in parens after size
8352        // e.g., "10 PERCENT (bernoulli)" or "10% (system, 377)"
8353        // DuckDB USING SAMPLE: method and optional seed can come in parens after size
8354        // e.g., "10 PERCENT (bernoulli)" or "10% (system, 377)"
8355        let (method, seed, use_seed_keyword, explicit_method) =
8356            if is_using_sample && self.check(TokenType::LParen) {
8357                self.advance(); // consume LParen
8358                                // Parse method name as identifier or keyword token
8359                                // BERNOULLI, SYSTEM, RESERVOIR can be tokenized as keywords, not identifiers
8360                let method_from_parens =
8361                    if self.check_identifier("BERNOULLI") || self.check(TokenType::Bernoulli) {
8362                        self.advance();
8363                        Some(SampleMethod::Bernoulli)
8364                    } else if self.check_identifier("SYSTEM") || self.check(TokenType::System) {
8365                        self.advance();
8366                        Some(SampleMethod::System)
8367                    } else if self.check_identifier("RESERVOIR") {
8368                        self.advance();
8369                        Some(SampleMethod::Reservoir)
8370                    } else {
8371                        None
8372                    };
8373                // Optional seed after comma
8374                let seed = if self.match_token(TokenType::Comma) {
8375                    Some(self.parse_expression()?)
8376                } else {
8377                    None
8378                };
8379                self.expect(TokenType::RParen)?;
8380                let final_method = method_from_parens.unwrap_or(method);
8381                (final_method, seed, false, true)
8382            } else {
8383                // Parse optional SEED / REPEATABLE
8384                let (seed, use_seed_keyword) = if self.match_token(TokenType::Seed) {
8385                    self.expect(TokenType::LParen)?;
8386                    let seed_value = self.parse_expression()?;
8387                    self.expect(TokenType::RParen)?;
8388                    (Some(seed_value), true)
8389                } else if self.match_token(TokenType::Repeatable) {
8390                    self.expect(TokenType::LParen)?;
8391                    let seed_value = self.parse_expression()?;
8392                    self.expect(TokenType::RParen)?;
8393                    (Some(seed_value), false)
8394                } else {
8395                    (None, false)
8396                };
8397                let explicit_method = explicit_method || unit_after_size;
8398                (method, seed, use_seed_keyword, explicit_method)
8399            };
8400
8401        // For DuckDB USING SAMPLE: apply default methods
8402        // - bare number -> RESERVOIR, ROWS
8403        // - percent -> SYSTEM, PERCENT
8404        let (method, unit_after_size) = if is_using_sample && !explicit_method {
8405            // No explicit method - apply defaults
8406            (SampleMethod::Reservoir, false) // default: RESERVOIR with ROWS
8407        } else if is_using_sample && unit_after_size && !method_before_size {
8408            // Unit was specified after size (e.g., "10 PERCENT") but no method before
8409            // Check if method was set in post-parens
8410            if matches!(method, SampleMethod::Percent) {
8411                // "10%" or "10 PERCENT" without method -> SYSTEM
8412                (SampleMethod::System, true)
8413            } else if matches!(method, SampleMethod::Row) {
8414                // "50 ROWS" without method -> RESERVOIR
8415                (SampleMethod::Reservoir, true)
8416            } else {
8417                (method, unit_after_size)
8418            }
8419        } else {
8420            (method, unit_after_size)
8421        };
8422
8423        // method_before_size: true for USING SAMPLE - we normalize to method-before-size format
8424        // e.g., "10 PERCENT (bernoulli)" becomes "BERNOULLI (10 PERCENT)"
8425        Ok(Some(Sample {
8426            method,
8427            size,
8428            seed,
8429            offset: None,
8430            unit_after_size,
8431            use_sample_keyword,
8432            explicit_method: true,    // For USING SAMPLE, always explicit
8433            method_before_size: true, // Normalize to method-before-size format
8434            use_seed_keyword,
8435            bucket_numerator: None,
8436            bucket_denominator: None,
8437            bucket_field: None,
8438            is_using_sample,
8439            is_percent,
8440            suppress_method_output: false,
8441        }))
8442    }
8443
8444    /// Parse table-level TABLESAMPLE/SAMPLE: TABLESAMPLE/SAMPLE METHOD(size [PERCENT|ROWS])
8445    /// e.g., TABLESAMPLE RESERVOIR(20%), SAMPLE BERNOULLI(10 PERCENT), SAMPLE ROW(0)
8446    fn parse_table_level_sample(&mut self) -> Result<Option<Sample>> {
8447        // Accept both TABLESAMPLE and SAMPLE (Snowflake supports both)
8448        let use_sample_keyword = if self.match_token(TokenType::Sample) {
8449            true
8450        } else if self.match_token(TokenType::TableSample) {
8451            false
8452        } else {
8453            return Ok(None);
8454        };
8455        // Track which keyword was used for identity output
8456        let _ = use_sample_keyword; // Used below for is_using_sample field
8457
8458        // Teradata: SAMPLE 5 or SAMPLE 0.33, .25, .1 (no parentheses)
8459        if matches!(
8460            self.config.dialect,
8461            Some(crate::dialects::DialectType::Teradata)
8462        ) && use_sample_keyword
8463            && !self.check(TokenType::LParen)
8464        {
8465            let mut expressions = vec![self.parse_unary()?];
8466            while self.match_token(TokenType::Comma) {
8467                expressions.push(self.parse_unary()?);
8468            }
8469            let size = if expressions.len() == 1 {
8470                expressions.into_iter().next().unwrap()
8471            } else {
8472                Expression::Tuple(Box::new(Tuple { expressions }))
8473            };
8474            return Ok(Some(Sample {
8475                method: SampleMethod::Percent,
8476                size,
8477                seed: None,
8478                offset: None,
8479                unit_after_size: false,
8480                use_sample_keyword,
8481                explicit_method: false,
8482                method_before_size: false,
8483                use_seed_keyword: false,
8484                bucket_numerator: None,
8485                bucket_denominator: None,
8486                bucket_field: None,
8487                is_using_sample: false,
8488                is_percent: false,
8489                suppress_method_output: false,
8490            }));
8491        }
8492
8493        // ClickHouse: SAMPLE 0.1 [OFFSET 0.2] (no parentheses)
8494        if matches!(
8495            self.config.dialect,
8496            Some(crate::dialects::DialectType::ClickHouse)
8497        ) && use_sample_keyword
8498            && !self.check(TokenType::LParen)
8499        {
8500            let size = self.parse_expression()?;
8501            let offset = if self.match_token(TokenType::Offset) {
8502                Some(self.parse_expression()?)
8503            } else {
8504                None
8505            };
8506            return Ok(Some(Sample {
8507                method: SampleMethod::Bernoulli,
8508                size,
8509                seed: None,
8510                offset,
8511                unit_after_size: false,
8512                use_sample_keyword,
8513                explicit_method: false,
8514                method_before_size: false,
8515                use_seed_keyword: false,
8516                bucket_numerator: None,
8517                bucket_denominator: None,
8518                bucket_field: None,
8519                is_using_sample: false,
8520                is_percent: false,
8521                suppress_method_output: false,
8522            }));
8523        }
8524
8525        // Parse method name (optional for table-level TABLESAMPLE)
8526        let (method, explicit_method, method_before_size) = if self.check_identifier("RESERVOIR") {
8527            self.advance();
8528            (SampleMethod::Reservoir, true, true)
8529        } else if self.match_token(TokenType::Bernoulli) {
8530            (SampleMethod::Bernoulli, true, true)
8531        } else if self.match_token(TokenType::System) {
8532            (SampleMethod::System, true, true)
8533        } else if self.match_token(TokenType::Block) {
8534            (SampleMethod::Block, true, true)
8535        } else if self.match_token(TokenType::Row) {
8536            (SampleMethod::Row, true, true)
8537        } else {
8538            // No explicit method - default to Bernoulli internally but track as not explicit
8539            (SampleMethod::Bernoulli, false, false)
8540        };
8541
8542        // Parse (size [PERCENT|ROWS])
8543        self.expect(TokenType::LParen)?;
8544
8545        // Check for BUCKET syntax: TABLESAMPLE (BUCKET 1 OUT OF 5 [ON col])
8546        if self.match_identifier("BUCKET") {
8547            let bucket_numerator = self.parse_primary()?;
8548            self.match_identifier("OUT");
8549            self.match_token(TokenType::Of);
8550            let bucket_denominator = self.parse_primary()?;
8551            let bucket_field = if self.match_token(TokenType::On) {
8552                Some(Box::new(self.parse_primary()?))
8553            } else {
8554                None
8555            };
8556            self.expect(TokenType::RParen)?;
8557            return Ok(Some(Sample {
8558                method: SampleMethod::Bucket,
8559                size: bucket_numerator.clone(),
8560                seed: None,
8561                offset: None,
8562                unit_after_size: false,
8563                use_sample_keyword,
8564                explicit_method: true,
8565                method_before_size: false,
8566                use_seed_keyword: false,
8567                bucket_numerator: Some(Box::new(bucket_numerator)),
8568                bucket_denominator: Some(Box::new(bucket_denominator)),
8569                bucket_field,
8570                is_using_sample: false,
8571                is_percent: false,
8572                suppress_method_output: false,
8573            }));
8574        }
8575
8576        let size = self.parse_unary()?;
8577
8578        // Check for PERCENT/ROWS suffix or % symbol
8579        let (method, unit_after_size, is_percent) =
8580            if self.check(TokenType::Percent) && self.peek().text.to_uppercase() == "PERCENT" {
8581                self.advance();
8582                // If no explicit method, use Percent to represent "PERCENT" unit
8583                if explicit_method {
8584                    (method, true, true)
8585                } else {
8586                    (SampleMethod::Percent, true, true)
8587                }
8588            } else if self.match_token(TokenType::Rows) {
8589                // If no explicit method, use Row to represent "ROWS" unit
8590                if explicit_method {
8591                    (method, true, false)
8592                } else {
8593                    (SampleMethod::Row, true, false)
8594                }
8595            } else if self.check(TokenType::Percent) && self.peek().text == "%" {
8596                // 20% -> consume the %, treat as PERCENT unit
8597                self.advance();
8598                if explicit_method {
8599                    (method, true, true)
8600                } else {
8601                    (SampleMethod::Percent, true, true)
8602                }
8603            } else {
8604                (method, false, false)
8605            };
8606
8607        self.expect(TokenType::RParen)?;
8608
8609        // Optional SEED/REPEATABLE
8610        let (seed, use_seed_keyword) = if self.match_token(TokenType::Seed) {
8611            self.expect(TokenType::LParen)?;
8612            let seed_value = self.parse_expression()?;
8613            self.expect(TokenType::RParen)?;
8614            (Some(seed_value), true)
8615        } else if self.match_token(TokenType::Repeatable) {
8616            self.expect(TokenType::LParen)?;
8617            let seed_value = self.parse_expression()?;
8618            self.expect(TokenType::RParen)?;
8619            (Some(seed_value), false)
8620        } else {
8621            (None, false)
8622        };
8623
8624        Ok(Some(Sample {
8625            method,
8626            size,
8627            seed,
8628            offset: None,
8629            unit_after_size,
8630            use_sample_keyword,
8631            explicit_method,
8632            method_before_size,
8633            use_seed_keyword,
8634            bucket_numerator: None,
8635            bucket_denominator: None,
8636            bucket_field: None,
8637            is_using_sample: false, // table-level uses TABLESAMPLE/SAMPLE keyword, not USING SAMPLE
8638            is_percent,
8639            suppress_method_output: false,
8640        }))
8641    }
8642
8643    /// Parse set operations (UNION, INTERSECT, EXCEPT)
8644    fn parse_set_operation(&mut self, left: Expression) -> Result<Expression> {
8645        // Check for BigQuery set operation modifiers BEFORE the set operation keyword
8646        // Pattern: SELECT ... [INNER|LEFT|RIGHT|FULL] UNION/INTERSECT/EXCEPT ...
8647        let (side, kind) = self.parse_set_operation_side_kind();
8648
8649        // Capture leading comments from the set operation keyword token (e.g., /*x*/ before UNION).
8650        // These comments appeared on a new line between the left SELECT and the set operation keyword.
8651        let set_op_leading_comments = if self.check(TokenType::Union)
8652            || self.check(TokenType::Intersect)
8653            || self.check(TokenType::Except)
8654        {
8655            self.current_leading_comments()
8656        } else {
8657            Vec::new()
8658        };
8659
8660        // Wrap left expression with comments if needed
8661        let left = if !set_op_leading_comments.is_empty() {
8662            Expression::Annotated(Box::new(Annotated {
8663                this: left,
8664                trailing_comments: set_op_leading_comments,
8665            }))
8666        } else {
8667            left
8668        };
8669
8670        if self.match_token(TokenType::Union) {
8671            let all = self.match_token(TokenType::All);
8672            let distinct = if !all {
8673                self.match_token(TokenType::Distinct)
8674            } else {
8675                false
8676            };
8677
8678            // Parse STRICT CORRESPONDING, CORRESPONDING, BY NAME modifiers
8679            let (by_name, strict, corresponding, on_columns) =
8680                self.parse_set_operation_corresponding()?;
8681
8682            // If CORRESPONDING (without STRICT) is present and no explicit side/kind, default kind to INNER
8683            // STRICT CORRESPONDING does NOT set kind to INNER
8684            let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
8685                Some("INNER".to_string())
8686            } else {
8687                kind
8688            };
8689
8690            let right = self.parse_select_or_paren_select()?;
8691            // Check for chained set operations first
8692            let mut result = Expression::Union(Box::new(Union {
8693                left,
8694                right,
8695                all,
8696                distinct,
8697                with: None,
8698                order_by: None,
8699                limit: None,
8700                offset: None,
8701                distribute_by: None,
8702                sort_by: None,
8703                cluster_by: None,
8704                by_name,
8705                side,
8706                kind,
8707                corresponding,
8708                strict,
8709                on_columns,
8710            }));
8711            result = self.parse_set_operation(result)?;
8712            // Parse ORDER BY, LIMIT, OFFSET for the outermost set operation
8713            self.parse_set_operation_modifiers(&mut result)?;
8714            Ok(result)
8715        } else if self.match_token(TokenType::Intersect) {
8716            let all = self.match_token(TokenType::All);
8717            let distinct = if !all {
8718                self.match_token(TokenType::Distinct)
8719            } else {
8720                false
8721            };
8722
8723            // Parse STRICT CORRESPONDING, CORRESPONDING, BY NAME modifiers
8724            let (by_name, strict, corresponding, on_columns) =
8725                self.parse_set_operation_corresponding()?;
8726
8727            // If CORRESPONDING (without STRICT) is present and no explicit side/kind, default kind to INNER
8728            // STRICT CORRESPONDING does NOT set kind to INNER
8729            let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
8730                Some("INNER".to_string())
8731            } else {
8732                kind
8733            };
8734
8735            let right = self.parse_select_or_paren_select()?;
8736            let mut result = Expression::Intersect(Box::new(Intersect {
8737                left,
8738                right,
8739                all,
8740                distinct,
8741                with: None,
8742                order_by: None,
8743                limit: None,
8744                offset: None,
8745                distribute_by: None,
8746                sort_by: None,
8747                cluster_by: None,
8748                by_name,
8749                side,
8750                kind,
8751                corresponding,
8752                strict,
8753                on_columns,
8754            }));
8755            result = self.parse_set_operation(result)?;
8756            self.parse_set_operation_modifiers(&mut result)?;
8757            Ok(result)
8758        } else if self.match_token(TokenType::Except) {
8759            let all = self.match_token(TokenType::All);
8760            let distinct = if !all {
8761                self.match_token(TokenType::Distinct)
8762            } else {
8763                false
8764            };
8765
8766            // Parse STRICT CORRESPONDING, CORRESPONDING, BY NAME modifiers
8767            let (by_name, strict, corresponding, on_columns) =
8768                self.parse_set_operation_corresponding()?;
8769
8770            // If CORRESPONDING (without STRICT) is present and no explicit side/kind, default kind to INNER
8771            // STRICT CORRESPONDING does NOT set kind to INNER
8772            let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
8773                Some("INNER".to_string())
8774            } else {
8775                kind
8776            };
8777
8778            let right = self.parse_select_or_paren_select()?;
8779            let mut result = Expression::Except(Box::new(Except {
8780                left,
8781                right,
8782                all,
8783                distinct,
8784                with: None,
8785                order_by: None,
8786                limit: None,
8787                offset: None,
8788                distribute_by: None,
8789                sort_by: None,
8790                cluster_by: None,
8791                by_name,
8792                side,
8793                kind,
8794                corresponding,
8795                strict,
8796                on_columns,
8797            }));
8798            result = self.parse_set_operation(result)?;
8799            self.parse_set_operation_modifiers(&mut result)?;
8800            Ok(result)
8801        } else if side.is_some() || kind.is_some() {
8802            // We parsed side/kind but didn't find a set operation - this is an error
8803            Err(self
8804                .parse_error("Expected UNION, INTERSECT, or EXCEPT after set operation modifier"))
8805        } else {
8806            Ok(left)
8807        }
8808    }
8809
8810    /// Parse BigQuery set operation side (LEFT, RIGHT, FULL) and kind (INNER)
8811    /// These modifiers appear BEFORE the UNION/INTERSECT/EXCEPT keyword
8812    fn parse_set_operation_side_kind(&mut self) -> (Option<String>, Option<String>) {
8813        let mut side = None;
8814        let mut kind = None;
8815
8816        // Check for side: LEFT, RIGHT, FULL (reusing join side tokens)
8817        if self.check(TokenType::Left)
8818            || self.check(TokenType::Right)
8819            || self.check(TokenType::Full)
8820        {
8821            // Only consume if followed by UNION/INTERSECT/EXCEPT (or INNER which would be followed by them)
8822            let saved = self.current;
8823            let side_token = self.advance();
8824            let side_text = side_token.text.to_uppercase();
8825
8826            // Check if followed by set operation or INNER
8827            if self.check(TokenType::Union)
8828                || self.check(TokenType::Intersect)
8829                || self.check(TokenType::Except)
8830                || self.check(TokenType::Inner)
8831            {
8832                side = Some(side_text);
8833            } else {
8834                // Not a set operation modifier, backtrack
8835                self.current = saved;
8836                return (None, None);
8837            }
8838        }
8839
8840        // Check for kind: INNER
8841        if self.check(TokenType::Inner) {
8842            let saved = self.current;
8843            self.advance(); // consume INNER
8844
8845            // Check if followed by set operation
8846            if self.check(TokenType::Union)
8847                || self.check(TokenType::Intersect)
8848                || self.check(TokenType::Except)
8849            {
8850                kind = Some("INNER".to_string());
8851            } else {
8852                // Not a set operation modifier, backtrack
8853                self.current = saved;
8854                if side.is_some() {
8855                    // We already consumed a side token, need to backtrack that too
8856                    self.current = saved - 1;
8857                }
8858                return (None, None);
8859            }
8860        }
8861
8862        (side, kind)
8863    }
8864
8865    /// Parse CORRESPONDING/STRICT CORRESPONDING/BY NAME modifiers after ALL/DISTINCT
8866    /// Returns (by_name, strict, corresponding, on_columns)
8867    fn parse_set_operation_corresponding(&mut self) -> Result<(bool, bool, bool, Vec<Expression>)> {
8868        let mut by_name = false;
8869        let mut strict = false;
8870        let mut corresponding = false;
8871        let mut on_columns = Vec::new();
8872
8873        // Check for BY NAME (DuckDB style)
8874        if self.match_token(TokenType::By) && self.match_identifier("NAME") {
8875            by_name = true;
8876        }
8877        // Check for STRICT CORRESPONDING (BigQuery style)
8878        else if self.match_identifier("STRICT") {
8879            if self.match_identifier("CORRESPONDING") {
8880                strict = true;
8881                corresponding = true;
8882            } else {
8883                // STRICT without CORRESPONDING - backtrack
8884                self.current -= 1;
8885            }
8886        }
8887        // Check for CORRESPONDING (BigQuery style)
8888        else if self.match_identifier("CORRESPONDING") {
8889            corresponding = true;
8890        }
8891
8892        // If CORRESPONDING is set, check for BY (columns)
8893        if corresponding && self.match_token(TokenType::By) {
8894            self.expect(TokenType::LParen)?;
8895            on_columns = self
8896                .parse_identifier_list()?
8897                .into_iter()
8898                .map(|id| {
8899                    Expression::Column(Column {
8900                        name: id,
8901                        table: None,
8902                        join_mark: false,
8903                        trailing_comments: Vec::new(),
8904                        span: None,
8905                        inferred_type: None,
8906                    })
8907                })
8908                .collect();
8909            self.expect(TokenType::RParen)?;
8910        }
8911
8912        Ok((by_name, strict, corresponding, on_columns))
8913    }
8914
8915    /// Parse ORDER BY, LIMIT, OFFSET modifiers for set operations
8916    fn parse_set_operation_modifiers(&mut self, expr: &mut Expression) -> Result<()> {
8917        // Parse ORDER BY
8918        let order_by = if self.match_token(TokenType::Order) {
8919            self.expect(TokenType::By)?;
8920            Some(self.parse_order_by()?)
8921        } else {
8922            None
8923        };
8924
8925        // Parse LIMIT
8926        let limit = if self.match_token(TokenType::Limit) {
8927            Some(Box::new(self.parse_expression()?))
8928        } else {
8929            None
8930        };
8931
8932        // Parse OFFSET
8933        let offset = if self.match_token(TokenType::Offset) {
8934            Some(Box::new(self.parse_expression()?))
8935        } else {
8936            None
8937        };
8938
8939        // Apply modifiers to the outermost set operation
8940        match expr {
8941            Expression::Union(ref mut union) => {
8942                if order_by.is_some() {
8943                    union.order_by = order_by;
8944                }
8945                if limit.is_some() {
8946                    union.limit = limit;
8947                }
8948                if offset.is_some() {
8949                    union.offset = offset;
8950                }
8951            }
8952            Expression::Intersect(ref mut intersect) => {
8953                if order_by.is_some() {
8954                    intersect.order_by = order_by;
8955                }
8956                if limit.is_some() {
8957                    intersect.limit = limit;
8958                }
8959                if offset.is_some() {
8960                    intersect.offset = offset;
8961                }
8962            }
8963            Expression::Except(ref mut except) => {
8964                if order_by.is_some() {
8965                    except.order_by = order_by;
8966                }
8967                if limit.is_some() {
8968                    except.limit = limit;
8969                }
8970                if offset.is_some() {
8971                    except.offset = offset;
8972                }
8973            }
8974            _ => {}
8975        }
8976        Ok(())
8977    }
8978
8979    /// Parse either a SELECT statement or a parenthesized SELECT/set operation
8980    fn parse_select_or_paren_select(&mut self) -> Result<Expression> {
8981        if self.match_token(TokenType::LParen) {
8982            // Could be (SELECT ...) or ((SELECT ...) UNION ...) or (FROM ...) for DuckDB
8983            if self.check(TokenType::Select)
8984                || self.check(TokenType::With)
8985                || self.check(TokenType::From)
8986            {
8987                let query = self.parse_statement()?;
8988                self.expect(TokenType::RParen)?;
8989                // Handle optional alias after subquery: (SELECT 1) AS a
8990                let alias = if self.match_token(TokenType::As) {
8991                    Some(Identifier::new(self.expect_identifier()?))
8992                } else {
8993                    None
8994                };
8995                // Wrap in Subquery to preserve parentheses
8996                Ok(Expression::Subquery(Box::new(Subquery {
8997                    this: query,
8998                    alias,
8999                    column_aliases: Vec::new(),
9000                    order_by: None,
9001                    limit: None,
9002                    offset: None,
9003                    lateral: false,
9004                    modifiers_inside: false,
9005                    trailing_comments: Vec::new(),
9006                    distribute_by: None,
9007                    sort_by: None,
9008                    cluster_by: None,
9009                    inferred_type: None,
9010                })))
9011            } else if self.check(TokenType::LParen) {
9012                // Nested parentheses like ((SELECT ...))
9013                let inner = self.parse_select_or_paren_select()?;
9014                // Check for set operations inside the parens
9015                let result = self.parse_set_operation(inner)?;
9016                self.expect(TokenType::RParen)?;
9017                // Handle optional alias after subquery
9018                let alias = if self.match_token(TokenType::As) {
9019                    Some(Identifier::new(self.expect_identifier()?))
9020                } else {
9021                    None
9022                };
9023                // Wrap in Subquery to preserve parentheses
9024                Ok(Expression::Subquery(Box::new(Subquery {
9025                    this: result,
9026                    alias,
9027                    column_aliases: Vec::new(),
9028                    order_by: None,
9029                    limit: None,
9030                    offset: None,
9031                    lateral: false,
9032                    modifiers_inside: false,
9033                    trailing_comments: Vec::new(),
9034                    distribute_by: None,
9035                    sort_by: None,
9036                    cluster_by: None,
9037                    inferred_type: None,
9038                })))
9039            } else {
9040                Err(self.parse_error("Expected SELECT or ( after ("))
9041            }
9042        } else if self.check(TokenType::From) {
9043            // DuckDB FROM-first syntax without parentheses: ... UNION FROM t
9044            self.parse_from_first_query()
9045        } else if self.check(TokenType::With) {
9046            // WITH CTE as right-hand side of UNION/INTERSECT/EXCEPT
9047            self.parse_statement()
9048        } else {
9049            self.parse_select()
9050        }
9051    }
9052
9053    /// Parse INSERT statement
9054    fn parse_insert(&mut self) -> Result<Expression> {
9055        let insert_token = self.expect(TokenType::Insert)?;
9056        let leading_comments = insert_token.comments;
9057
9058        // Parse query hint /*+ ... */ if present (Oracle: INSERT /*+ APPEND */ INTO ...)
9059        let hint = if self.check(TokenType::Hint) {
9060            Some(self.parse_hint()?)
9061        } else {
9062            None
9063        };
9064
9065        // Handle SQLite conflict action: INSERT OR ABORT|FAIL|IGNORE|REPLACE|ROLLBACK INTO
9066        let conflict_action = if self.match_token(TokenType::Or) {
9067            if self.match_identifier("ABORT") {
9068                Some("ABORT".to_string())
9069            } else if self.match_identifier("FAIL") {
9070                Some("FAIL".to_string())
9071            } else if self.match_token(TokenType::Ignore) {
9072                Some("IGNORE".to_string())
9073            } else if self.match_token(TokenType::Replace) {
9074                Some("REPLACE".to_string())
9075            } else if self.match_token(TokenType::Rollback) {
9076                Some("ROLLBACK".to_string())
9077            } else {
9078                return Err(self.parse_error(
9079                    "Expected ABORT, FAIL, IGNORE, REPLACE, or ROLLBACK after INSERT OR",
9080                ));
9081            }
9082        } else {
9083            None
9084        };
9085
9086        // Handle INSERT IGNORE (MySQL)
9087        let ignore = conflict_action.is_none() && self.match_token(TokenType::Ignore);
9088
9089        // Handle OVERWRITE for Hive/Spark: INSERT OVERWRITE TABLE ...
9090        let overwrite = self.match_token(TokenType::Overwrite);
9091
9092        // Handle Oracle multi-table INSERT: INSERT ALL/FIRST ...
9093        // Must check before OVERWRITE handling since these are mutually exclusive
9094        if !overwrite && (self.match_token(TokenType::All) || self.match_token(TokenType::First)) {
9095            if let Some(multi_insert) = self.parse_multitable_inserts(leading_comments.clone())? {
9096                return Ok(multi_insert);
9097            }
9098        }
9099
9100        // Handle INTO or TABLE (OVERWRITE requires TABLE, INTO is standard)
9101        // Also handle INSERT OVERWRITE [LOCAL] DIRECTORY 'path'
9102        let local_directory = overwrite && self.match_token(TokenType::Local);
9103        let is_directory = (overwrite || local_directory) && self.match_identifier("DIRECTORY");
9104
9105        if is_directory {
9106            // INSERT OVERWRITE [LOCAL] DIRECTORY 'path' [ROW FORMAT ...] SELECT ...
9107            let path = self.expect_string()?;
9108            // Parse optional ROW FORMAT clause
9109            let row_format = if self.match_keywords(&[TokenType::Row, TokenType::Format]) {
9110                // ROW FORMAT DELIMITED ...
9111                let delimited = self.match_identifier("DELIMITED");
9112                let mut fields_terminated_by = None;
9113                let mut collection_items_terminated_by = None;
9114                let mut map_keys_terminated_by = None;
9115                let mut lines_terminated_by = None;
9116                let mut null_defined_as = None;
9117
9118                // Parse the various TERMINATED BY clauses
9119                loop {
9120                    if self.match_identifier("FIELDS") || self.match_identifier("FIELD") {
9121                        self.match_identifier("TERMINATED");
9122                        self.match_token(TokenType::By);
9123                        fields_terminated_by = Some(self.expect_string()?);
9124                    } else if self.match_identifier("COLLECTION") {
9125                        self.match_identifier("ITEMS");
9126                        self.match_identifier("TERMINATED");
9127                        self.match_token(TokenType::By);
9128                        collection_items_terminated_by = Some(self.expect_string()?);
9129                    } else if self.match_identifier("MAP") {
9130                        self.match_identifier("KEYS");
9131                        self.match_identifier("TERMINATED");
9132                        self.match_token(TokenType::By);
9133                        map_keys_terminated_by = Some(self.expect_string()?);
9134                    } else if self.match_identifier("LINES") {
9135                        self.match_identifier("TERMINATED");
9136                        self.match_token(TokenType::By);
9137                        lines_terminated_by = Some(self.expect_string()?);
9138                    } else if self.match_token(TokenType::Null) {
9139                        self.match_identifier("DEFINED");
9140                        self.match_token(TokenType::As);
9141                        null_defined_as = Some(self.expect_string()?);
9142                    } else {
9143                        break;
9144                    }
9145                }
9146
9147                Some(RowFormat {
9148                    delimited,
9149                    fields_terminated_by,
9150                    collection_items_terminated_by,
9151                    map_keys_terminated_by,
9152                    lines_terminated_by,
9153                    null_defined_as,
9154                })
9155            } else {
9156                None
9157            };
9158
9159            // Parse optional STORED AS clause
9160            let stored_as = if self.match_identifier("STORED") {
9161                self.expect(TokenType::As)?;
9162                Some(self.expect_identifier()?)
9163            } else {
9164                None
9165            };
9166
9167            // Parse the SELECT query
9168            let query = self.parse_statement()?;
9169
9170            return Ok(Expression::Insert(Box::new(Insert {
9171                table: TableRef::new(""),
9172                columns: Vec::new(),
9173                values: Vec::new(),
9174                query: Some(query),
9175                overwrite,
9176                partition: Vec::new(),
9177                directory: Some(DirectoryInsert {
9178                    local: local_directory,
9179                    path,
9180                    row_format,
9181                    stored_as,
9182                }),
9183                returning: Vec::new(),
9184                output: None,
9185                on_conflict: None,
9186                leading_comments,
9187                if_exists: false,
9188                with: None,
9189                ignore,
9190                source_alias: None,
9191                alias: None,
9192                alias_explicit_as: false,
9193                default_values: false,
9194                by_name: false,
9195                conflict_action: conflict_action.clone(),
9196                is_replace: false,
9197                replace_where: None,
9198                source: None,
9199                hint: hint.clone(),
9200                function_target: None,
9201                partition_by: None,
9202                settings: Vec::new(),
9203            })));
9204        }
9205
9206        if overwrite {
9207            // OVERWRITE is typically followed by TABLE
9208            self.match_token(TokenType::Table);
9209        } else {
9210            self.expect(TokenType::Into)?;
9211            // Optional TABLE keyword after INTO
9212            self.match_token(TokenType::Table);
9213        }
9214
9215        // ClickHouse: INSERT INTO [TABLE] FUNCTION func_name(args...)
9216        let mut function_target: Option<Box<Expression>> = None;
9217        if self.match_token(TokenType::Function) {
9218            // Parse function call: func_name(args...)
9219            let func_name = self.expect_identifier_or_keyword()?;
9220            self.expect(TokenType::LParen)?;
9221            let args = if self.check(TokenType::RParen) {
9222                Vec::new()
9223            } else {
9224                self.parse_expression_list()?
9225            };
9226            self.expect(TokenType::RParen)?;
9227            function_target = Some(Box::new(Expression::Function(Box::new(Function {
9228                name: func_name,
9229                args,
9230                distinct: false,
9231                trailing_comments: Vec::new(),
9232                use_bracket_syntax: false,
9233                no_parens: false,
9234                quoted: false,
9235                span: None,
9236                inferred_type: None,
9237            }))));
9238        }
9239
9240        let table_name = if function_target.is_some() {
9241            // For FUNCTION targets, use empty table name
9242            Identifier::new(String::new())
9243        } else {
9244            // Allow keywords (like TABLE) as table names in INSERT statements
9245            self.expect_identifier_or_keyword_with_quoted()?
9246        };
9247        // Handle qualified table names like a.b
9248        let table = if self.match_token(TokenType::Dot) {
9249            let schema = table_name;
9250            let name = self.expect_identifier_or_keyword_with_quoted()?;
9251            let trailing_comments = self.previous_trailing_comments();
9252            TableRef {
9253                name,
9254                schema: Some(schema),
9255                catalog: None,
9256                alias: None,
9257                alias_explicit_as: false,
9258                column_aliases: Vec::new(),
9259                trailing_comments,
9260                when: None,
9261                only: false,
9262                final_: false,
9263                table_sample: None,
9264                hints: Vec::new(),
9265                system_time: None,
9266                partitions: Vec::new(),
9267                identifier_func: None,
9268                changes: None,
9269                version: None,
9270                span: None,
9271            }
9272        } else {
9273            let trailing_comments = self.previous_trailing_comments();
9274            TableRef {
9275                name: table_name,
9276                schema: None,
9277                catalog: None,
9278                alias: None,
9279                alias_explicit_as: false,
9280                column_aliases: Vec::new(),
9281                when: None,
9282                only: false,
9283                final_: false,
9284                table_sample: None,
9285                hints: Vec::new(),
9286                system_time: None,
9287                trailing_comments,
9288                partitions: Vec::new(),
9289                identifier_func: None,
9290                changes: None,
9291                version: None,
9292                span: None,
9293            }
9294        };
9295
9296        // Optional alias (PostgreSQL: INSERT INTO table AS t(...), Oracle: INSERT INTO table t ...)
9297        let (alias, alias_explicit_as) = if self.match_token(TokenType::As) {
9298            (Some(Identifier::new(self.expect_identifier()?)), true)
9299        } else if self.is_identifier_token()
9300            && !self.check(TokenType::Values)
9301            && !self.check(TokenType::Select)
9302            && !self.check(TokenType::Default)
9303            && !self.check(TokenType::By)
9304            && !self.check(TokenType::Partition)
9305            && !self.check(TokenType::Output)
9306            && !self.check(TokenType::If)
9307            && !self.check(TokenType::Replace)
9308            && !self.check(TokenType::Table)
9309            && !self.check(TokenType::LParen)
9310        {
9311            // Implicit alias without AS (e.g., INSERT INTO dest d VALUES ...)
9312            (Some(Identifier::new(self.expect_identifier()?)), false)
9313        } else {
9314            (None, false)
9315        };
9316
9317        // Optional IF EXISTS (Hive)
9318        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
9319
9320        // Optional REPLACE WHERE clause (Databricks): INSERT INTO a REPLACE WHERE cond VALUES ...
9321        let replace_where =
9322            if self.match_token(TokenType::Replace) && self.match_token(TokenType::Where) {
9323                Some(Box::new(self.parse_or()?))
9324            } else {
9325                None
9326            };
9327
9328        // Optional PARTITION clause
9329        // ClickHouse: PARTITION BY expr (no parens)
9330        // Hive/Spark: PARTITION (col1 = val1, col2)
9331        let mut partition_by_expr: Option<Box<Expression>> = None;
9332        let partition = if self.check(TokenType::Partition) && self.check_next(TokenType::By) {
9333            // ClickHouse PARTITION BY expr
9334            self.advance(); // consume PARTITION
9335            self.advance(); // consume BY
9336            partition_by_expr = Some(Box::new(self.parse_expression()?));
9337            Vec::new()
9338        } else if self.match_token(TokenType::Partition) {
9339            self.expect(TokenType::LParen)?;
9340            let mut parts = Vec::new();
9341            loop {
9342                let col = Identifier::new(self.expect_identifier()?);
9343                let value = if self.match_token(TokenType::Eq) {
9344                    Some(self.parse_expression()?)
9345                } else {
9346                    None
9347                };
9348                parts.push((col, value));
9349                if !self.match_token(TokenType::Comma) {
9350                    break;
9351                }
9352            }
9353            self.expect(TokenType::RParen)?;
9354            parts
9355        } else {
9356            Vec::new()
9357        };
9358
9359        // ClickHouse: SETTINGS key = val, ...
9360        let insert_settings = if self.match_token(TokenType::Settings) {
9361            let mut settings = Vec::new();
9362            loop {
9363                settings.push(self.parse_expression()?);
9364                if !self.match_token(TokenType::Comma) {
9365                    break;
9366                }
9367            }
9368            settings
9369        } else {
9370            Vec::new()
9371        };
9372
9373        // Optional column list OR parenthesized subquery
9374        // We need to check if ( is followed by SELECT/WITH (subquery) or identifiers (column list)
9375        let columns = if self.check(TokenType::LParen) {
9376            // Look ahead to see if this is a subquery or column list
9377            if self
9378                .peek_nth(1)
9379                .map(|t| t.token_type == TokenType::Select || t.token_type == TokenType::With)
9380                .unwrap_or(false)
9381            {
9382                // This is a parenthesized subquery, not a column list
9383                Vec::new()
9384            } else if matches!(
9385                self.config.dialect,
9386                Some(crate::dialects::DialectType::ClickHouse)
9387            ) && {
9388                // ClickHouse: INSERT INTO t (*), t(* EXCEPT ...), t(table.* EXCEPT ...), t(COLUMNS('pattern') EXCEPT ...)
9389                let peek1 = self.peek_nth(1).map(|t| t.token_type);
9390                peek1 == Some(TokenType::Star)
9391                    || (peek1 == Some(TokenType::Var)
9392                        && self.peek_nth(2).map(|t| t.token_type) == Some(TokenType::Dot)
9393                        && self.peek_nth(3).map(|t| t.token_type) == Some(TokenType::Star))
9394                    || (peek1 == Some(TokenType::Var)
9395                        && self
9396                            .peek_nth(1)
9397                            .map(|t| t.text.to_uppercase() == "COLUMNS")
9398                            .unwrap_or(false))
9399            } {
9400                // Consume balanced parens and skip entire column specification
9401                self.advance(); // consume (
9402                let mut depth = 1i32;
9403                while !self.is_at_end() && depth > 0 {
9404                    if self.check(TokenType::LParen) {
9405                        depth += 1;
9406                    }
9407                    if self.check(TokenType::RParen) {
9408                        depth -= 1;
9409                        if depth == 0 {
9410                            break;
9411                        }
9412                    }
9413                    self.advance();
9414                }
9415                self.expect(TokenType::RParen)?;
9416                Vec::new() // Treat as "all columns"
9417            } else {
9418                self.advance(); // consume (
9419                let cols = self.parse_identifier_list()?;
9420                self.expect(TokenType::RParen)?;
9421                cols
9422            }
9423        } else {
9424            Vec::new()
9425        };
9426
9427        // Parse OUTPUT clause (TSQL)
9428        let output = if self.match_token(TokenType::Output) {
9429            Some(self.parse_output_clause()?)
9430        } else {
9431            None
9432        };
9433
9434        // Check for BY NAME (DuckDB): INSERT INTO x BY NAME SELECT ...
9435        let by_name = self.match_token(TokenType::By) && self.match_identifier("NAME");
9436
9437        // Check for DEFAULT VALUES (PostgreSQL)
9438        let default_values =
9439            self.match_token(TokenType::Default) && self.match_token(TokenType::Values);
9440
9441        // VALUES or SELECT or TABLE source (Hive/Spark) or DEFAULT VALUES (already consumed above)
9442        let (values, query) = if default_values {
9443            // DEFAULT VALUES: no values or query
9444            (Vec::new(), None)
9445        } else if matches!(
9446            self.config.dialect,
9447            Some(crate::dialects::DialectType::ClickHouse)
9448        ) && self.check(TokenType::Format)
9449            && self.peek_nth(1).is_some_and(|t| {
9450                let upper = t.text.to_uppercase();
9451                upper != "VALUES"
9452                    && (t.token_type == TokenType::Var || t.token_type == TokenType::Identifier)
9453            })
9454        {
9455            // ClickHouse: FORMAT <format_name> followed by raw data (CSV, JSON, TSV, etc.)
9456            // Skip everything to next semicolon or end — the data is not SQL
9457            self.advance(); // consume FORMAT
9458            let format_name = self.advance().text.clone(); // consume format name
9459                                                           // Consume all remaining tokens until semicolon (raw data)
9460            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
9461                self.advance();
9462            }
9463            // Store as empty values with the format name in the query as a command
9464            (
9465                Vec::new(),
9466                Some(Expression::Command(Box::new(crate::expressions::Command {
9467                    this: format!("FORMAT {}", format_name),
9468                }))),
9469            )
9470        } else if matches!(
9471            self.config.dialect,
9472            Some(crate::dialects::DialectType::ClickHouse)
9473        ) && self.match_text_seq(&["FORMAT", "VALUES"])
9474        {
9475            let mut all_values = Vec::new();
9476
9477            loop {
9478                self.expect(TokenType::LParen)?;
9479                let row = self.parse_expression_list()?;
9480                self.expect(TokenType::RParen)?;
9481                all_values.push(row);
9482
9483                if !self.match_token(TokenType::Comma) {
9484                    break;
9485                }
9486            }
9487
9488            (all_values, None)
9489        } else if self.match_token(TokenType::Values) {
9490            let mut all_values = Vec::new();
9491
9492            // ClickHouse: INSERT INTO t VALUES; — empty VALUES (clientError expected)
9493            if matches!(
9494                self.config.dialect,
9495                Some(crate::dialects::DialectType::ClickHouse)
9496            ) && (self.check(TokenType::Semicolon) || self.is_at_end())
9497            {
9498                // Return empty INSERT as Command to avoid needing all Insert fields
9499                return Ok(Expression::Command(Box::new(crate::expressions::Command {
9500                    this: "INSERT INTO VALUES".to_string(),
9501                })));
9502            }
9503
9504            // ClickHouse: allow bare VALUES without parens: VALUES 1, 2, 3
9505            if matches!(
9506                self.config.dialect,
9507                Some(crate::dialects::DialectType::ClickHouse)
9508            ) && !self.check(TokenType::LParen)
9509            {
9510                loop {
9511                    let val = self.parse_expression()?;
9512                    all_values.push(vec![val]);
9513                    if !self.match_token(TokenType::Comma) {
9514                        break;
9515                    }
9516                }
9517            } else {
9518                loop {
9519                    self.expect(TokenType::LParen)?;
9520                    // ClickHouse: allow empty VALUES () — empty tuple
9521                    let row = if self.check(TokenType::RParen) {
9522                        Vec::new()
9523                    } else {
9524                        self.parse_values_expression_list()?
9525                    };
9526                    self.expect(TokenType::RParen)?;
9527                    all_values.push(row);
9528
9529                    if !self.match_token(TokenType::Comma) {
9530                        // ClickHouse: allow tuples without commas: VALUES (1) (2) (3)
9531                        if matches!(
9532                            self.config.dialect,
9533                            Some(crate::dialects::DialectType::ClickHouse)
9534                        ) && self.check(TokenType::LParen)
9535                        {
9536                            continue;
9537                        }
9538                        break;
9539                    }
9540                    // ClickHouse: allow trailing comma after last tuple
9541                    if matches!(
9542                        self.config.dialect,
9543                        Some(crate::dialects::DialectType::ClickHouse)
9544                    ) && !self.check(TokenType::LParen)
9545                    {
9546                        break;
9547                    }
9548                }
9549            } // close else (parenthesized values)
9550
9551            (all_values, None)
9552        } else if self.check(TokenType::Table) {
9553            // Hive/Spark: INSERT OVERWRITE TABLE target TABLE source
9554            // The TABLE keyword here indicates source table, not a subquery
9555            (Vec::new(), None)
9556        } else {
9557            (Vec::new(), Some(self.parse_statement()?))
9558        };
9559
9560        // Parse source table (Hive/Spark): INSERT OVERWRITE TABLE target TABLE source
9561        let source = if self.match_token(TokenType::Table) {
9562            // Parse source table reference (similar to target table parsing)
9563            let source_name = self.expect_identifier_with_quoted()?;
9564            let source_table = if self.match_token(TokenType::Dot) {
9565                let schema = source_name;
9566                let name = self.expect_identifier_with_quoted()?;
9567                let trailing_comments = self.previous_trailing_comments();
9568                TableRef {
9569                    name,
9570                    schema: Some(schema),
9571                    catalog: None,
9572                    alias: None,
9573                    alias_explicit_as: false,
9574                    column_aliases: Vec::new(),
9575                    trailing_comments,
9576                    when: None,
9577                    only: false,
9578                    final_: false,
9579                    table_sample: None,
9580                    hints: Vec::new(),
9581                    system_time: None,
9582                    partitions: Vec::new(),
9583                    identifier_func: None,
9584                    changes: None,
9585                    version: None,
9586                    span: None,
9587                }
9588            } else {
9589                let trailing_comments = self.previous_trailing_comments();
9590                TableRef {
9591                    name: source_name,
9592                    schema: None,
9593                    catalog: None,
9594                    alias: None,
9595                    alias_explicit_as: false,
9596                    column_aliases: Vec::new(),
9597                    trailing_comments,
9598                    when: None,
9599                    only: false,
9600                    final_: false,
9601                    table_sample: None,
9602                    hints: Vec::new(),
9603                    system_time: None,
9604                    partitions: Vec::new(),
9605                    identifier_func: None,
9606                    changes: None,
9607                    version: None,
9608                    span: None,
9609                }
9610            };
9611            Some(Expression::Table(source_table))
9612        } else {
9613            None
9614        };
9615
9616        // Parse optional AS alias after VALUES (MySQL: INSERT ... VALUES (...) AS new_data)
9617        let source_alias = if self.match_token(TokenType::As) {
9618            Some(Identifier::new(self.expect_identifier()?))
9619        } else {
9620            None
9621        };
9622
9623        // Parse ON CONFLICT clause (PostgreSQL, SQLite) or ON DUPLICATE KEY UPDATE (MySQL)
9624        let on_conflict = if self.match_token(TokenType::On) {
9625            if self.match_identifier("CONFLICT") {
9626                Some(Box::new(self.parse_on_conflict()?))
9627            } else if self.match_identifier("DUPLICATE") {
9628                // MySQL: ON DUPLICATE KEY UPDATE
9629                self.expect(TokenType::Key)?;
9630                self.expect(TokenType::Update)?;
9631
9632                // Parse the UPDATE SET expressions
9633                let mut sets = Vec::new();
9634                loop {
9635                    // Parse column = expression
9636                    let col_name = self.expect_identifier_with_quoted()?;
9637                    // Handle qualified column: table.column
9638                    let column = if self.match_token(TokenType::Dot) {
9639                        let col = self.expect_identifier_with_quoted()?;
9640                        Expression::Column(Column {
9641                            name: col,
9642                            table: Some(col_name),
9643                            join_mark: false,
9644                            trailing_comments: Vec::new(),
9645                            span: None,
9646                            inferred_type: None,
9647                        })
9648                    } else {
9649                        Expression::Identifier(col_name)
9650                    };
9651                    self.expect(TokenType::Eq)?;
9652                    let value = self.parse_expression()?;
9653                    sets.push(Expression::Eq(Box::new(BinaryOp {
9654                        left: column,
9655                        right: value,
9656                        left_comments: Vec::new(),
9657                        operator_comments: Vec::new(),
9658                        trailing_comments: Vec::new(),
9659                        inferred_type: None,
9660                    })));
9661                    if !self.match_token(TokenType::Comma) {
9662                        break;
9663                    }
9664                }
9665
9666                Some(Box::new(Expression::OnConflict(Box::new(OnConflict {
9667                    duplicate: Some(Box::new(Expression::Boolean(BooleanLiteral {
9668                        value: true,
9669                    }))),
9670                    expressions: sets,
9671                    action: None,
9672                    conflict_keys: None,
9673                    index_predicate: None,
9674                    constraint: None,
9675                    where_: None,
9676                }))))
9677            } else {
9678                // Unexpected token after ON
9679                return Err(self.parse_error("Expected CONFLICT or DUPLICATE after ON"));
9680            }
9681        } else {
9682            None
9683        };
9684
9685        // Parse RETURNING clause (PostgreSQL, SQLite)
9686        let returning = if self.match_token(TokenType::Returning) {
9687            self.parse_select_expressions()?
9688        } else {
9689            Vec::new()
9690        };
9691
9692        Ok(Expression::Insert(Box::new(Insert {
9693            table,
9694            columns,
9695            values,
9696            query,
9697            overwrite,
9698            partition,
9699            directory: None,
9700            returning,
9701            output,
9702            on_conflict,
9703            leading_comments,
9704            if_exists,
9705            with: None,
9706            ignore,
9707            source_alias,
9708            alias,
9709            alias_explicit_as,
9710            default_values,
9711            by_name,
9712            conflict_action,
9713            is_replace: false,
9714            replace_where,
9715            source: source.map(Box::new),
9716            hint,
9717            function_target,
9718            partition_by: partition_by_expr,
9719            settings: insert_settings,
9720        })))
9721    }
9722
9723    /// Parse ON CONFLICT clause for INSERT statements (PostgreSQL, SQLite)
9724    /// Syntax: ON CONFLICT [(conflict_target)] [WHERE predicate] DO NOTHING | DO UPDATE SET ...
9725    /// ON CONFLICT ON CONSTRAINT constraint_name DO ...
9726    fn parse_on_conflict(&mut self) -> Result<Expression> {
9727        // Check for ON CONSTRAINT variant
9728        let constraint =
9729            if self.match_token(TokenType::On) && self.match_token(TokenType::Constraint) {
9730                let name = self.expect_identifier()?;
9731                Some(Box::new(Expression::Identifier(Identifier::new(name))))
9732            } else {
9733                None
9734            };
9735
9736        // Parse optional conflict target (column list)
9737        let conflict_keys = if constraint.is_none() && self.match_token(TokenType::LParen) {
9738            let keys = self.parse_expression_list()?;
9739            self.expect(TokenType::RParen)?;
9740            Some(Box::new(Expression::Tuple(Box::new(Tuple {
9741                expressions: keys,
9742            }))))
9743        } else {
9744            None
9745        };
9746
9747        // Parse optional WHERE clause for conflict target
9748        let index_predicate = if self.match_token(TokenType::Where) {
9749            Some(Box::new(self.parse_expression()?))
9750        } else {
9751            None
9752        };
9753
9754        // Parse DO NOTHING or DO UPDATE
9755        if !self.match_identifier("DO") {
9756            return Err(self.parse_error("Expected DO after ON CONFLICT"));
9757        }
9758
9759        let action = if self.match_identifier("NOTHING") {
9760            // DO NOTHING
9761            Some(Box::new(Expression::Identifier(Identifier::new(
9762                "NOTHING".to_string(),
9763            ))))
9764        } else if self.match_token(TokenType::Update) {
9765            // DO UPDATE SET ...
9766            self.expect(TokenType::Set)?;
9767            let mut sets = Vec::new();
9768            loop {
9769                // Parse column = expression
9770                let col_name = self.expect_identifier_with_quoted()?;
9771                // Handle qualified column: table.column
9772                let column = if self.match_token(TokenType::Dot) {
9773                    let col = self.expect_identifier_with_quoted()?;
9774                    Expression::Column(Column {
9775                        name: col,
9776                        table: Some(col_name),
9777                        join_mark: false,
9778                        trailing_comments: Vec::new(),
9779                        span: None,
9780                        inferred_type: None,
9781                    })
9782                } else {
9783                    Expression::Identifier(col_name)
9784                };
9785                self.expect(TokenType::Eq)?;
9786                let value = self.parse_expression()?;
9787                sets.push(Expression::Eq(Box::new(BinaryOp {
9788                    left: column,
9789                    right: value,
9790                    left_comments: Vec::new(),
9791                    operator_comments: Vec::new(),
9792                    trailing_comments: Vec::new(),
9793                    inferred_type: None,
9794                })));
9795                if !self.match_token(TokenType::Comma) {
9796                    break;
9797                }
9798            }
9799            Some(Box::new(Expression::Tuple(Box::new(Tuple {
9800                expressions: sets,
9801            }))))
9802        } else {
9803            return Err(self.parse_error("Expected NOTHING or UPDATE after DO"));
9804        };
9805
9806        // Parse optional WHERE clause for the UPDATE action
9807        let where_ = if self.match_token(TokenType::Where) {
9808            Some(Box::new(self.parse_expression()?))
9809        } else {
9810            None
9811        };
9812
9813        Ok(Expression::OnConflict(Box::new(OnConflict {
9814            duplicate: None,
9815            expressions: Vec::new(),
9816            action,
9817            conflict_keys,
9818            index_predicate,
9819            constraint,
9820            where_,
9821        })))
9822    }
9823
9824    /// Parse MySQL REPLACE [INTO] statement or REPLACE() function call
9825    fn parse_replace(&mut self) -> Result<Expression> {
9826        // Check if this is REPLACE() function call (REPLACE followed by '(')
9827        // or MySQL REPLACE INTO statement
9828        let replace_token = self.expect(TokenType::Replace)?;
9829        let leading_comments = replace_token.comments;
9830
9831        if self.check(TokenType::LParen) {
9832            // This is a REPLACE() function call, parse as expression
9833            self.expect(TokenType::LParen)?;
9834            let args = self.parse_expression_list()?;
9835            self.expect(TokenType::RParen)?;
9836            return Ok(Expression::Function(Box::new(Function {
9837                name: "REPLACE".to_string(),
9838                args,
9839                distinct: false,
9840                trailing_comments: Vec::new(),
9841                use_bracket_syntax: false,
9842                no_parens: false,
9843                quoted: false,
9844                span: None,
9845                inferred_type: None,
9846            })));
9847        }
9848
9849        // Teradata: REPLACE VIEW -> CREATE OR REPLACE VIEW
9850        if matches!(
9851            self.config.dialect,
9852            Some(crate::dialects::DialectType::Teradata)
9853        ) && self.check(TokenType::View)
9854        {
9855            return self.parse_create_view(true, false, false, None, None, None, false);
9856        }
9857
9858        // ClickHouse: REPLACE TABLE -> treat like CREATE OR REPLACE TABLE
9859        // Also handle REPLACE TEMPORARY TABLE
9860        if matches!(
9861            self.config.dialect,
9862            Some(crate::dialects::DialectType::ClickHouse)
9863        ) && (self.check(TokenType::Table) || self.check(TokenType::Temporary))
9864        {
9865            let temporary = self.match_token(TokenType::Temporary);
9866            return self.parse_create_table(true, temporary, leading_comments.clone(), None);
9867        }
9868
9869        // ClickHouse: REPLACE DICTIONARY -> consume as Command
9870        if matches!(
9871            self.config.dialect,
9872            Some(crate::dialects::DialectType::ClickHouse)
9873        ) && (self.check(TokenType::Dictionary) || self.check_identifier("DICTIONARY"))
9874        {
9875            let mut parts = vec!["REPLACE".to_string()];
9876            let mut _paren_depth = 0i32;
9877            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
9878                let token = self.advance();
9879                if token.token_type == TokenType::LParen {
9880                    _paren_depth += 1;
9881                }
9882                if token.token_type == TokenType::RParen {
9883                    _paren_depth -= 1;
9884                }
9885                let text = if token.token_type == TokenType::String {
9886                    format!("'{}'", token.text)
9887                } else if token.token_type == TokenType::QuotedIdentifier {
9888                    format!("\"{}\"", token.text)
9889                } else {
9890                    token.text.clone()
9891                };
9892                parts.push(text);
9893            }
9894            return Ok(Expression::Command(Box::new(crate::expressions::Command {
9895                this: parts.join(" "),
9896            })));
9897        }
9898
9899        // Otherwise, this is MySQL/SQLite REPLACE INTO statement - parse similarly to INSERT
9900        self.match_token(TokenType::Into);
9901
9902        let table_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
9903        let table = if self.match_token(TokenType::Dot) {
9904            let second_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
9905            TableRef {
9906                name: second_name,
9907                schema: Some(table_name),
9908                catalog: None,
9909                alias: None,
9910                alias_explicit_as: false,
9911                column_aliases: Vec::new(),
9912                trailing_comments: Vec::new(),
9913                when: None,
9914                only: false,
9915                final_: false,
9916                table_sample: None,
9917                hints: Vec::new(),
9918                system_time: None,
9919                partitions: Vec::new(),
9920                identifier_func: None,
9921                changes: None,
9922                version: None,
9923                span: None,
9924            }
9925        } else {
9926            TableRef::new(table_name.name)
9927        };
9928
9929        // Parse optional column list
9930        let columns = if self.match_token(TokenType::LParen) {
9931            let mut cols = Vec::new();
9932            loop {
9933                if self.check(TokenType::RParen) {
9934                    break;
9935                }
9936                let col = self.expect_identifier_with_quoted()?;
9937                cols.push(col);
9938                if !self.match_token(TokenType::Comma) {
9939                    break;
9940                }
9941            }
9942            self.expect(TokenType::RParen)?;
9943            cols
9944        } else {
9945            Vec::new()
9946        };
9947
9948        // Parse VALUES or SELECT query
9949        let mut values = Vec::new();
9950        let query = if self.match_token(TokenType::Values) {
9951            loop {
9952                self.expect(TokenType::LParen)?;
9953                let row = self.parse_expression_list()?;
9954                self.expect(TokenType::RParen)?;
9955                values.push(row);
9956                if !self.match_token(TokenType::Comma) {
9957                    break;
9958                }
9959            }
9960            None
9961        } else if !self.is_at_end() && !self.check(TokenType::Semicolon) {
9962            // SELECT or other statement as value source
9963            Some(self.parse_statement()?)
9964        } else {
9965            None
9966        };
9967
9968        Ok(Expression::Insert(Box::new(Insert {
9969            table,
9970            columns,
9971            values,
9972            query,
9973            overwrite: false,
9974            partition: Vec::new(),
9975            directory: None,
9976            returning: Vec::new(),
9977            output: None,
9978            on_conflict: None,
9979            leading_comments,
9980            if_exists: false,
9981            with: None,
9982            ignore: false,
9983            source_alias: None,
9984            alias: None,
9985            alias_explicit_as: false,
9986            default_values: false,
9987            by_name: false,
9988            conflict_action: None,
9989            is_replace: true,
9990            replace_where: None,
9991            source: None,
9992            hint: None,
9993            function_target: None,
9994            partition_by: None,
9995            settings: Vec::new(),
9996        })))
9997    }
9998
9999    /// Parse UPDATE statement
10000    fn parse_update(&mut self) -> Result<Expression> {
10001        let update_token = self.expect(TokenType::Update)?;
10002        let leading_comments = update_token.comments;
10003
10004        // TSQL: UPDATE STATISTICS table_name - parse as Command
10005        if self.check_identifier("STATISTICS") {
10006            let mut parts = vec!["UPDATE".to_string()];
10007            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
10008                parts.push(self.advance().text);
10009            }
10010            return Ok(Expression::Command(Box::new(Command {
10011                this: parts.join(" "),
10012            })));
10013        }
10014
10015        // Parse table name (can be qualified: db.table_name)
10016        let first_name = self.expect_identifier_with_quoted()?;
10017        let mut table = if self.match_token(TokenType::Dot) {
10018            let second_name = self.expect_identifier_with_quoted()?;
10019            // Check for three-part name (catalog.schema.table)
10020            if self.match_token(TokenType::Dot) {
10021                let table_name = self.expect_identifier_with_quoted()?;
10022                TableRef {
10023                    name: table_name,
10024                    schema: Some(second_name),
10025                    catalog: Some(first_name),
10026                    alias: None,
10027                    alias_explicit_as: false,
10028                    column_aliases: Vec::new(),
10029                    trailing_comments: Vec::new(),
10030                    when: None,
10031                    only: false,
10032                    final_: false,
10033                    table_sample: None,
10034                    hints: Vec::new(),
10035                    system_time: None,
10036                    partitions: Vec::new(),
10037                    identifier_func: None,
10038                    changes: None,
10039                    version: None,
10040                    span: None,
10041                }
10042            } else {
10043                TableRef {
10044                    name: second_name,
10045                    schema: Some(first_name),
10046                    catalog: None,
10047                    alias: None,
10048                    alias_explicit_as: false,
10049                    column_aliases: Vec::new(),
10050                    trailing_comments: Vec::new(),
10051                    when: None,
10052                    only: false,
10053                    final_: false,
10054                    table_sample: None,
10055                    hints: Vec::new(),
10056                    system_time: None,
10057                    partitions: Vec::new(),
10058                    identifier_func: None,
10059                    changes: None,
10060                    version: None,
10061                    span: None,
10062                }
10063            }
10064        } else {
10065            TableRef::from_identifier(first_name)
10066        };
10067        table.trailing_comments = self.previous_trailing_comments();
10068
10069        // Optional alias (with or without AS)
10070        if self.match_token(TokenType::As) {
10071            table.alias = Some(self.expect_identifier_with_quoted()?);
10072            table.alias_explicit_as = true;
10073        } else if self.is_identifier_token() && !self.check(TokenType::Set) {
10074            // Implicit alias (table t SET ...)
10075            table.alias = Some(self.expect_identifier_with_quoted()?);
10076            table.alias_explicit_as = false;
10077        }
10078
10079        // Handle multi-table UPDATE syntax: UPDATE t1, t2, t3 LEFT JOIN t4 ON ... SET ...
10080        // Capture additional tables
10081        let mut extra_tables = Vec::new();
10082        while self.match_token(TokenType::Comma) {
10083            // Parse additional table name
10084            let first_name = self.expect_identifier_with_quoted()?;
10085            let mut extra_table = if self.match_token(TokenType::Dot) {
10086                let second_name = self.expect_identifier_with_quoted()?;
10087                if self.match_token(TokenType::Dot) {
10088                    let table_name = self.expect_identifier_with_quoted()?;
10089                    TableRef {
10090                        name: table_name,
10091                        schema: Some(second_name),
10092                        catalog: Some(first_name),
10093                        alias: None,
10094                        alias_explicit_as: false,
10095                        column_aliases: Vec::new(),
10096                        trailing_comments: Vec::new(),
10097                        when: None,
10098                        only: false,
10099                        final_: false,
10100                        table_sample: None,
10101                        hints: Vec::new(),
10102                        system_time: None,
10103                        partitions: Vec::new(),
10104                        identifier_func: None,
10105                        changes: None,
10106                        version: None,
10107                        span: None,
10108                    }
10109                } else {
10110                    TableRef {
10111                        name: second_name,
10112                        schema: Some(first_name),
10113                        catalog: None,
10114                        alias: None,
10115                        alias_explicit_as: false,
10116                        column_aliases: Vec::new(),
10117                        trailing_comments: Vec::new(),
10118                        when: None,
10119                        only: false,
10120                        final_: false,
10121                        table_sample: None,
10122                        hints: Vec::new(),
10123                        system_time: None,
10124                        partitions: Vec::new(),
10125                        identifier_func: None,
10126                        changes: None,
10127                        version: None,
10128                        span: None,
10129                    }
10130                }
10131            } else {
10132                TableRef::from_identifier(first_name)
10133            };
10134            // Optional alias
10135            if self.match_token(TokenType::As) {
10136                extra_table.alias = Some(self.expect_identifier_with_quoted()?);
10137                extra_table.alias_explicit_as = true;
10138            } else if self.is_identifier_token()
10139                && !self.check(TokenType::Set)
10140                && !self.check_keyword()
10141            {
10142                extra_table.alias = Some(self.expect_identifier_with_quoted()?);
10143                extra_table.alias_explicit_as = false;
10144            }
10145            extra_tables.push(extra_table);
10146        }
10147
10148        // Handle JOINs before SET
10149        let mut table_joins = Vec::new();
10150        while let Some((kind, _, use_inner_keyword, use_outer_keyword, _join_hint)) =
10151            self.try_parse_join_kind()
10152        {
10153            if self.check(TokenType::Join) {
10154                self.advance(); // consume JOIN
10155            }
10156            // Parse joined table
10157            let first_name = self.expect_identifier_with_quoted()?;
10158            let mut join_table = if self.match_token(TokenType::Dot) {
10159                let second_name = self.expect_identifier_with_quoted()?;
10160                if self.match_token(TokenType::Dot) {
10161                    let table_name = self.expect_identifier_with_quoted()?;
10162                    TableRef {
10163                        name: table_name,
10164                        schema: Some(second_name),
10165                        catalog: Some(first_name),
10166                        alias: None,
10167                        alias_explicit_as: false,
10168                        column_aliases: Vec::new(),
10169                        trailing_comments: Vec::new(),
10170                        when: None,
10171                        only: false,
10172                        final_: false,
10173                        table_sample: None,
10174                        hints: Vec::new(),
10175                        system_time: None,
10176                        partitions: Vec::new(),
10177                        identifier_func: None,
10178                        changes: None,
10179                        version: None,
10180                        span: None,
10181                    }
10182                } else {
10183                    TableRef {
10184                        name: second_name,
10185                        schema: Some(first_name),
10186                        catalog: None,
10187                        alias: None,
10188                        alias_explicit_as: false,
10189                        column_aliases: Vec::new(),
10190                        trailing_comments: Vec::new(),
10191                        when: None,
10192                        only: false,
10193                        final_: false,
10194                        table_sample: None,
10195                        hints: Vec::new(),
10196                        system_time: None,
10197                        partitions: Vec::new(),
10198                        changes: None,
10199                        version: None,
10200                        identifier_func: None,
10201                        span: None,
10202                    }
10203                }
10204            } else {
10205                TableRef::from_identifier(first_name)
10206            };
10207            // Optional alias
10208            if self.match_token(TokenType::As) {
10209                join_table.alias = Some(self.expect_identifier_with_quoted()?);
10210                join_table.alias_explicit_as = true;
10211            } else if self.is_identifier_token()
10212                && !self.check(TokenType::On)
10213                && !self.check(TokenType::Set)
10214            {
10215                join_table.alias = Some(self.expect_identifier_with_quoted()?);
10216                join_table.alias_explicit_as = false;
10217            }
10218            // ON clause
10219            let on_condition = if self.match_token(TokenType::On) {
10220                Some(self.parse_expression()?)
10221            } else {
10222                None
10223            };
10224            table_joins.push(Join {
10225                this: Expression::Table(join_table),
10226                on: on_condition,
10227                using: Vec::new(),
10228                kind,
10229                use_inner_keyword,
10230                use_outer_keyword,
10231                deferred_condition: false,
10232                join_hint: None,
10233                match_condition: None,
10234                pivots: Vec::new(),
10235                comments: Vec::new(),
10236                nesting_group: 0,
10237                directed: false,
10238            });
10239        }
10240
10241        // Snowflake syntax: UPDATE table FROM (source) SET ... WHERE ...
10242        // Check if FROM comes before SET
10243        let (from_before_set, early_from_clause, early_from_joins) =
10244            if self.match_token(TokenType::From) {
10245                let from_clause = self.parse_from()?;
10246                let from_joins = self.parse_joins()?;
10247                (true, Some(from_clause), from_joins)
10248            } else {
10249                (false, None, Vec::new())
10250            };
10251
10252        self.expect(TokenType::Set)?;
10253
10254        let mut set = Vec::new();
10255        loop {
10256            // Column can be qualified for multi-table UPDATE (e.g., a.id = 1)
10257            // Use safe keyword variant to allow keywords like 'exists' as column names (ClickHouse)
10258            let mut col_ident = self.expect_identifier_or_safe_keyword_with_quoted()?;
10259            while self.match_token(TokenType::Dot) {
10260                let part = self.expect_identifier_or_safe_keyword_with_quoted()?;
10261                // For qualified columns, preserve both parts
10262                col_ident = Identifier {
10263                    name: format!("{}.{}", col_ident.name, part.name),
10264                    quoted: col_ident.quoted || part.quoted,
10265                    trailing_comments: Vec::new(),
10266                    span: None,
10267                };
10268            }
10269            self.expect(TokenType::Eq)?;
10270            let value = self.parse_expression()?;
10271            set.push((col_ident, value));
10272
10273            if !self.match_token(TokenType::Comma) {
10274                break;
10275            }
10276        }
10277
10278        // Parse OUTPUT clause (TSQL)
10279        let output = if self.match_token(TokenType::Output) {
10280            Some(self.parse_output_clause()?)
10281        } else {
10282            None
10283        };
10284
10285        // Parse FROM clause (PostgreSQL, SQL Server, Snowflake) - only if not already parsed before SET
10286        let (from_clause, from_joins) = if from_before_set {
10287            (early_from_clause, early_from_joins)
10288        } else if self.match_token(TokenType::From) {
10289            let from_clause = Some(self.parse_from()?);
10290            let from_joins = self.parse_joins()?;
10291            (from_clause, from_joins)
10292        } else {
10293            (None, Vec::new())
10294        };
10295
10296        let where_clause = if self.match_token(TokenType::Where) {
10297            Some(Where {
10298                this: self.parse_expression()?,
10299            })
10300        } else {
10301            None
10302        };
10303
10304        // Parse RETURNING clause (PostgreSQL, SQLite)
10305        let returning = if self.match_token(TokenType::Returning) {
10306            self.parse_select_expressions()?
10307        } else {
10308            Vec::new()
10309        };
10310
10311        // Parse ORDER BY clause (MySQL)
10312        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
10313            Some(self.parse_order_by()?)
10314        } else {
10315            None
10316        };
10317
10318        // Parse LIMIT clause (MySQL)
10319        let limit = if self.match_token(TokenType::Limit) {
10320            Some(self.parse_expression()?)
10321        } else {
10322            None
10323        };
10324
10325        Ok(Expression::Update(Box::new(Update {
10326            table,
10327            extra_tables,
10328            table_joins,
10329            set,
10330            from_clause,
10331            from_joins,
10332            where_clause,
10333            returning,
10334            output,
10335            with: None,
10336            leading_comments,
10337            limit,
10338            order_by,
10339            from_before_set,
10340        })))
10341    }
10342
10343    /// Parse DELETE statement
10344    /// Handles:
10345    /// - Standard: DELETE FROM t WHERE ...
10346    /// - PostgreSQL USING: DELETE FROM t USING s WHERE ... RETURNING a
10347    /// - DuckDB USING: DELETE FROM t USING (VALUES ...) AS t1 WHERE ...
10348    /// - MySQL multi-table: DELETE t1 FROM t1 JOIN t2 ON ... WHERE ...
10349    /// - MySQL multi-table: DELETE t1, t2 FROM t1 JOIN t2 JOIN t3 WHERE ...
10350    /// - MySQL USING: DELETE FROM t1, t2 USING t1 JOIN t2 JOIN t3 WHERE ...
10351    /// - MySQL FORCE INDEX: DELETE FROM t FORCE INDEX (idx) WHERE ...
10352    fn parse_delete(&mut self) -> Result<Expression> {
10353        let delete_token = self.expect(TokenType::Delete)?;
10354        let leading_comments = delete_token.comments;
10355
10356        // Check if FROM is present. If not, this is MySQL multi-table: DELETE t1, t2 FROM ...
10357        // or TSQL: DELETE x OUTPUT x.a FROM z
10358        let mut tables = Vec::new();
10359        let mut early_output = None;
10360        let _has_from = if self.check(TokenType::From) {
10361            self.advance(); // consume FROM
10362            true
10363        } else {
10364            // MySQL multi-table: DELETE t1[, t2, ...] FROM ...
10365            // or TSQL: DELETE x OUTPUT x.a FROM z
10366            // or BigQuery/generic: DELETE table WHERE ... (no FROM required)
10367            // Parse target table list (supporting dotted names)
10368            loop {
10369                let tref = self.parse_table_ref()?;
10370                tables.push(tref);
10371                if !self.match_token(TokenType::Comma) {
10372                    break;
10373                }
10374            }
10375            // TSQL: OUTPUT clause can appear before FROM
10376            if self.match_token(TokenType::Output) {
10377                early_output = Some(self.parse_output_clause()?);
10378            }
10379            if self.check(TokenType::From) {
10380                self.advance(); // consume FROM
10381                true
10382            } else {
10383                // BigQuery-style: DELETE table WHERE ... (no FROM)
10384                false
10385            }
10386        };
10387
10388        // Now parse the main table after FROM (or use from no-FROM path)
10389        let table = if _has_from {
10390            // Parse the main table(s) after FROM
10391            // Use parse_table_ref() to handle dotted names like db.table
10392            self.parse_table_ref()?
10393        } else {
10394            // BigQuery-style: table was already parsed into `tables`
10395            // Move it out to be the main table
10396            if !tables.is_empty() {
10397                tables.remove(0)
10398            } else {
10399                return Err(self.parse_error("Expected table name in DELETE statement"));
10400            }
10401        };
10402
10403        // ClickHouse: ON CLUSTER clause
10404        let on_cluster = self.parse_on_cluster_clause()?;
10405
10406        // Check for additional tables after the first: DELETE FROM t1, t2 USING ...
10407        let mut extra_from_tables = Vec::new();
10408        if _has_from
10409            && tables.is_empty()
10410            && self.check(TokenType::Comma)
10411            && !self.check(TokenType::Where)
10412        {
10413            // Could be multi-table: DELETE FROM t1, t2 USING ...
10414            // Check ahead if this is followed by USING or more tables
10415            while self.match_token(TokenType::Comma) {
10416                let extra_name = self.expect_identifier_with_quoted()?;
10417                let extra_ref = TableRef::from_identifier(extra_name);
10418                extra_from_tables.push(extra_ref);
10419            }
10420        }
10421
10422        // If we had DELETE FROM t1, t2 USING ..., the tables field stores t1, t2
10423        let mut tables_from_using = false;
10424        if !extra_from_tables.is_empty() {
10425            // The main table + extra tables form the multi-table target
10426            tables.push(table.clone());
10427            tables.append(&mut extra_from_tables);
10428            tables_from_using = true;
10429        }
10430
10431        // Check for FORCE INDEX hint (MySQL): DELETE FROM t FORCE INDEX (idx)
10432        let force_index = if self.match_text_seq(&["FORCE", "INDEX"]) {
10433            self.expect(TokenType::LParen)?;
10434            let idx_name = self.expect_identifier_with_quoted()?;
10435            self.expect(TokenType::RParen)?;
10436            Some(idx_name.name)
10437        } else {
10438            None
10439        };
10440
10441        // Check for optional alias (with or without AS)
10442        let (alias, alias_explicit_as) = if force_index.is_none() && self.match_token(TokenType::As)
10443        {
10444            (Some(self.expect_identifier_with_quoted()?), true)
10445        } else if force_index.is_none()
10446            && self.is_identifier_token()
10447            && !self.check(TokenType::Using)
10448            && !self.check(TokenType::Where)
10449            && !self.check(TokenType::Inner)
10450            && !self.check(TokenType::Left)
10451            && !self.check(TokenType::Right)
10452            && !self.check(TokenType::Cross)
10453            && !self.check(TokenType::Full)
10454            && !self.check(TokenType::Join)
10455            && !self.check_identifier("FORCE")
10456        {
10457            (Some(self.expect_identifier_with_quoted()?), false)
10458        } else {
10459            (None, false)
10460        };
10461
10462        // Parse JOINs for MySQL multi-table: DELETE t1 FROM t1 LEFT JOIN t2 ON ...
10463        let mut joins = self.parse_joins()?;
10464
10465        // Parse USING clause (PostgreSQL/DuckDB/MySQL)
10466        let mut using = Vec::new();
10467        if self.match_token(TokenType::Using) {
10468            loop {
10469                // Check for subquery: USING (SELECT ...) AS ... or (VALUES ...) AS ...
10470                if self.check(TokenType::LParen) {
10471                    // Check if next token after ( is VALUES
10472                    let is_values = self.current + 1 < self.tokens.len()
10473                        && self.tokens[self.current + 1].token_type == TokenType::Values;
10474                    let subquery = if is_values {
10475                        // Parse (VALUES ...) as parenthesized VALUES
10476                        self.advance(); // consume (
10477                        let values = self.parse_values()?;
10478                        self.expect(TokenType::RParen)?;
10479                        Expression::Paren(Box::new(Paren {
10480                            this: values,
10481                            trailing_comments: Vec::new(),
10482                        }))
10483                    } else {
10484                        // Parse as subquery (SELECT ...) or other expression
10485                        self.parse_primary()?
10486                    };
10487                    // Parse alias
10488                    let using_alias = if self.match_token(TokenType::As) {
10489                        let alias_name = self.expect_identifier_with_quoted()?;
10490                        // Check for column aliases: AS name(col1, col2)
10491                        let col_aliases = if self.match_token(TokenType::LParen) {
10492                            let aliases = self.parse_identifier_list()?;
10493                            self.expect(TokenType::RParen)?;
10494                            aliases
10495                        } else {
10496                            Vec::new()
10497                        };
10498                        Some((alias_name, col_aliases))
10499                    } else {
10500                        None
10501                    };
10502                    // Create a TableRef from the subquery with alias
10503                    let mut tref = TableRef::new("");
10504                    if let Some((alias_name, col_aliases)) = using_alias {
10505                        tref.alias = Some(alias_name);
10506                        tref.alias_explicit_as = true;
10507                        tref.column_aliases = col_aliases;
10508                    }
10509                    // Store the subquery in the table reference using hints (as a hack)
10510                    // Actually, we need a better approach - use the table ref hints to store the subquery
10511                    tref.hints = vec![subquery];
10512                    using.push(tref);
10513                } else {
10514                    let using_table = self.expect_identifier_with_quoted()?;
10515                    let mut using_ref = TableRef::from_identifier(using_table);
10516
10517                    // Check for JOINs: USING t1 INNER JOIN t2 INNER JOIN t3
10518                    if self.check_join_keyword() {
10519                        // Parse JOINs as part of USING
10520                        using.push(using_ref);
10521                        let mut using_joins = self.parse_joins()?;
10522                        joins.append(&mut using_joins);
10523                        break;
10524                    }
10525
10526                    // Optional alias for using table
10527                    if self.match_token(TokenType::As) {
10528                        using_ref.alias = Some(self.expect_identifier_with_quoted()?);
10529                        using_ref.alias_explicit_as = true;
10530                    } else if self.is_identifier_token()
10531                        && !self.check(TokenType::Comma)
10532                        && !self.check(TokenType::Where)
10533                    {
10534                        using_ref.alias = Some(self.expect_identifier_with_quoted()?);
10535                    }
10536                    using.push(using_ref);
10537                }
10538                if !self.match_token(TokenType::Comma) {
10539                    break;
10540                }
10541            }
10542        }
10543
10544        // ClickHouse: IN PARTITION 'partition_id' clause before WHERE
10545        if matches!(
10546            self.config.dialect,
10547            Some(crate::dialects::DialectType::ClickHouse)
10548        ) && self.check(TokenType::In)
10549            && self
10550                .peek_nth(1)
10551                .is_some_and(|t| t.text.eq_ignore_ascii_case("PARTITION"))
10552        {
10553            self.advance(); // consume IN
10554            self.advance(); // consume PARTITION
10555                            // Consume partition expression (string or identifier)
10556            let _partition = self.parse_primary()?;
10557        }
10558
10559        // Parse OUTPUT clause (TSQL) - may have been parsed early (before FROM)
10560        let output = if early_output.is_some() {
10561            early_output
10562        } else if self.match_token(TokenType::Output) {
10563            Some(self.parse_output_clause()?)
10564        } else {
10565            None
10566        };
10567
10568        let where_clause = if self.match_token(TokenType::Where) {
10569            Some(Where {
10570                this: self.parse_expression()?,
10571            })
10572        } else {
10573            None
10574        };
10575
10576        // Parse ORDER BY clause (MySQL)
10577        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
10578            Some(self.parse_order_by()?)
10579        } else {
10580            None
10581        };
10582
10583        // Parse LIMIT clause (MySQL)
10584        let limit = if self.match_token(TokenType::Limit) {
10585            Some(self.parse_expression()?)
10586        } else {
10587            None
10588        };
10589
10590        // Parse RETURNING clause (PostgreSQL)
10591        let returning = if self.match_token(TokenType::Returning) {
10592            self.parse_select_expressions()?
10593        } else {
10594            Vec::new()
10595        };
10596
10597        Ok(Expression::Delete(Box::new(Delete {
10598            table,
10599            on_cluster,
10600            alias,
10601            alias_explicit_as,
10602            using,
10603            where_clause,
10604            output,
10605            leading_comments,
10606            with: None,
10607            limit,
10608            order_by,
10609            returning,
10610            tables,
10611            tables_from_using,
10612            joins,
10613            force_index,
10614            no_from: !_has_from,
10615        })))
10616    }
10617
10618    // ==================== DDL Parsing ====================
10619
10620    /// Parse a CREATE statement
10621    fn parse_create(&mut self) -> Result<Expression> {
10622        let create_pos = self.current; // position of CREATE token
10623        let create_token = self.expect(TokenType::Create)?;
10624        let leading_comments = create_token.comments;
10625
10626        // Handle OR REPLACE
10627        let or_replace = self.match_keywords(&[TokenType::Or, TokenType::Replace]);
10628
10629        // Handle TEMPORARY
10630        let temporary = self.match_token(TokenType::Temporary);
10631
10632        // Handle MATERIALIZED
10633        let materialized = self.match_token(TokenType::Materialized);
10634
10635        // Parse MySQL-specific CREATE VIEW options: ALGORITHM, DEFINER, SQL SECURITY
10636        // CREATE ALGORITHM=... DEFINER=... SQL SECURITY DEFINER VIEW ...
10637        let mut algorithm: Option<String> = None;
10638        let mut definer: Option<String> = None;
10639        let mut security: Option<FunctionSecurity> = None;
10640
10641        while self.match_identifier("ALGORITHM")
10642            || self.match_identifier("DEFINER")
10643            || self.match_identifier("SQL")
10644        {
10645            let option_name = self.previous().text.to_uppercase();
10646
10647            if option_name == "ALGORITHM" && self.match_token(TokenType::Eq) {
10648                // ALGORITHM=UNDEFINED|MERGE|TEMPTABLE
10649                let value = self.expect_identifier_or_keyword()?;
10650                algorithm = Some(value.to_uppercase());
10651            } else if option_name == "DEFINER" && self.match_token(TokenType::Eq) {
10652                // DEFINER=user@host (can include @ and %)
10653                let mut definer_value = String::new();
10654                while !self.is_at_end()
10655                    && !self.check(TokenType::View)
10656                    && !self.check_identifier("ALGORITHM")
10657                    && !self.check_identifier("DEFINER")
10658                    && !self.check_identifier("SQL")
10659                    && !self.check_identifier("SECURITY")
10660                {
10661                    definer_value.push_str(&self.advance().text);
10662                }
10663                definer = Some(definer_value);
10664            } else if option_name == "SQL" && self.match_identifier("SECURITY") {
10665                // SQL SECURITY DEFINER/INVOKER
10666                if self.match_identifier("DEFINER") {
10667                    security = Some(FunctionSecurity::Definer);
10668                } else if self.match_identifier("INVOKER") {
10669                    security = Some(FunctionSecurity::Invoker);
10670                }
10671            }
10672        }
10673
10674        // Handle SECURE modifier for VIEW (Snowflake)
10675        let secure = self.match_identifier("SECURE");
10676
10677        // Handle table modifiers: DYNAMIC, ICEBERG, EXTERNAL, HYBRID, TRANSIENT (Snowflake), UNLOGGED (PostgreSQL)
10678        let mut table_modifier: Option<String> = if self.check_identifier("DYNAMIC") {
10679            self.advance();
10680            Some("DYNAMIC".to_string())
10681        } else if self.check_identifier("ICEBERG") {
10682            self.advance();
10683            Some("ICEBERG".to_string())
10684        } else if self.check_identifier("EXTERNAL") {
10685            self.advance();
10686            Some("EXTERNAL".to_string())
10687        } else if self.check_identifier("HYBRID") {
10688            self.advance();
10689            Some("HYBRID".to_string())
10690        } else if self.check_identifier("TRANSIENT") {
10691            self.advance();
10692            Some("TRANSIENT".to_string())
10693        } else if self.check_identifier("UNLOGGED") {
10694            self.advance();
10695            Some("UNLOGGED".to_string())
10696        } else if self.check_identifier("DICTIONARY") {
10697            self.advance();
10698            Some("DICTIONARY".to_string())
10699        } else if self.check(TokenType::Dictionary) {
10700            self.advance();
10701            Some("DICTIONARY".to_string())
10702        } else {
10703            None
10704        };
10705
10706        // Teradata: SET/MULTISET/VOLATILE/GLOBAL TEMPORARY modifiers before TABLE
10707        if matches!(
10708            self.config.dialect,
10709            Some(crate::dialects::DialectType::Teradata)
10710        ) {
10711            let mut parts = Vec::new();
10712            loop {
10713                if self.match_token(TokenType::Set) {
10714                    parts.push(self.previous().text.to_uppercase());
10715                } else if self.match_identifier("MULTISET") {
10716                    parts.push(self.previous().text.to_uppercase());
10717                } else if self.match_identifier("VOLATILE") {
10718                    parts.push(self.previous().text.to_uppercase());
10719                } else if self.match_identifier("GLOBAL") {
10720                    parts.push(self.previous().text.to_uppercase());
10721                } else if self.match_token(TokenType::Temporary) {
10722                    parts.push(self.previous().text.to_uppercase());
10723                } else {
10724                    break;
10725                }
10726            }
10727            if !parts.is_empty() {
10728                table_modifier = Some(parts.join(" "));
10729            }
10730        }
10731
10732        if table_modifier.as_deref() == Some("DICTIONARY") {
10733            return self.parse_create_table(
10734                or_replace,
10735                temporary,
10736                leading_comments,
10737                table_modifier.as_deref(),
10738            );
10739        }
10740
10741        match self.peek().token_type {
10742            TokenType::Table => {
10743                // Check if this is CREATE TABLE FUNCTION (BigQuery)
10744                if self.current + 1 < self.tokens.len()
10745                    && self.tokens[self.current + 1].token_type == TokenType::Function
10746                {
10747                    self.advance(); // consume TABLE
10748                    return self.parse_create_function(or_replace, temporary, true);
10749                }
10750                let modifier = if materialized {
10751                    Some("MATERIALIZED")
10752                } else {
10753                    table_modifier.as_deref()
10754                };
10755                self.parse_create_table(or_replace, temporary, leading_comments, modifier)
10756            }
10757            TokenType::Dictionary => {
10758                self.parse_create_table(or_replace, temporary, leading_comments, Some("DICTIONARY"))
10759            }
10760            TokenType::View => self.parse_create_view(
10761                or_replace,
10762                materialized,
10763                temporary,
10764                algorithm,
10765                definer,
10766                security,
10767                secure,
10768            ),
10769            TokenType::Unique => {
10770                self.advance(); // consume UNIQUE
10771                                // Check for CLUSTERED/NONCLUSTERED after UNIQUE (TSQL)
10772                let clustered = if self.check_identifier("CLUSTERED") {
10773                    self.advance();
10774                    Some("CLUSTERED".to_string())
10775                } else if self.check_identifier("NONCLUSTERED") {
10776                    self.advance();
10777                    Some("NONCLUSTERED".to_string())
10778                } else {
10779                    None
10780                };
10781                // Check for COLUMNSTORE (TSQL: CREATE UNIQUE NONCLUSTERED COLUMNSTORE INDEX)
10782                if self.check_identifier("COLUMNSTORE") {
10783                    self.advance();
10784                    // Prepend COLUMNSTORE to clustered
10785                    let clustered = clustered
10786                        .map(|c| format!("{} COLUMNSTORE", c))
10787                        .or_else(|| Some("COLUMNSTORE".to_string()));
10788                    self.parse_create_index_with_clustered(true, clustered)
10789                } else {
10790                    self.parse_create_index_with_clustered(true, clustered)
10791                }
10792            }
10793            TokenType::Index => self.parse_create_index_with_clustered(false, None),
10794            TokenType::Schema => self.parse_create_schema(leading_comments),
10795            TokenType::Database => self.parse_create_database(),
10796            TokenType::Function => self.parse_create_function(or_replace, temporary, false),
10797            TokenType::Procedure => self.parse_create_procedure(or_replace),
10798            TokenType::Sequence => self.parse_create_sequence(temporary, or_replace),
10799            TokenType::Trigger => self.parse_create_trigger(or_replace, false, create_pos),
10800            TokenType::Constraint => {
10801                self.advance(); // consume CONSTRAINT
10802                self.parse_create_trigger(or_replace, true, create_pos)
10803            }
10804            TokenType::Type => self.parse_create_type(),
10805            TokenType::Domain => self.parse_create_domain(),
10806            _ => {
10807                // Handle TSQL CLUSTERED/NONCLUSTERED [COLUMNSTORE] INDEX
10808                if self.check_identifier("CLUSTERED") || self.check_identifier("NONCLUSTERED") {
10809                    let clustered_text = self.advance().text.to_uppercase();
10810                    // Check for COLUMNSTORE after CLUSTERED/NONCLUSTERED
10811                    let clustered = if self.check_identifier("COLUMNSTORE") {
10812                        self.advance();
10813                        Some(format!("{} COLUMNSTORE", clustered_text))
10814                    } else {
10815                        Some(clustered_text)
10816                    };
10817                    return self.parse_create_index_with_clustered(false, clustered);
10818                }
10819                // Handle TSQL COLUMNSTORE INDEX (without CLUSTERED/NONCLUSTERED prefix)
10820                if self.check_identifier("COLUMNSTORE") && {
10821                    let pos = self.current;
10822                    let result = pos + 1 < self.tokens.len()
10823                        && self.tokens[pos + 1].token_type == TokenType::Index;
10824                    result
10825                } {
10826                    self.advance(); // consume COLUMNSTORE
10827                                    // COLUMNSTORE without prefix implies NONCLUSTERED
10828                    return self.parse_create_index_with_clustered(
10829                        false,
10830                        Some("NONCLUSTERED COLUMNSTORE".to_string()),
10831                    );
10832                }
10833                // Handle identifiers that aren't keywords: TAG, STAGE, STREAM, etc.
10834                if self.check_identifier("TAG") {
10835                    return self.parse_create_tag(or_replace);
10836                }
10837                if self.check_identifier("STAGE") {
10838                    return self.parse_create_stage(or_replace, temporary);
10839                }
10840                if self.check_identifier("STREAM") {
10841                    return self.parse_create_stream(or_replace);
10842                }
10843                if (self.check_identifier("FILE") || self.check(TokenType::File)) && {
10844                    let next = self.current + 1;
10845                    next < self.tokens.len()
10846                        && (self.tokens[next].text.eq_ignore_ascii_case("FORMAT"))
10847                } {
10848                    return self.parse_create_file_format(or_replace, temporary);
10849                }
10850                // Fall back to Raw for unrecognized CREATE targets
10851                // (e.g., CREATE WAREHOUSE, CREATE STREAMLIT, CREATE STORAGE INTEGRATION, etc.)
10852                {
10853                    let start = self.current;
10854                    while !self.is_at_end() && !self.check(TokenType::Semicolon) {
10855                        self.advance();
10856                    }
10857                    let sql = self.tokens_to_sql(start, self.current);
10858                    let mut prefix = String::from("CREATE");
10859                    if or_replace {
10860                        prefix.push_str(" OR REPLACE");
10861                    }
10862                    if temporary {
10863                        prefix.push_str(" TEMPORARY");
10864                    }
10865                    if materialized {
10866                        prefix.push_str(" MATERIALIZED");
10867                    }
10868                    prefix.push(' ');
10869                    prefix.push_str(&sql);
10870                    Ok(Expression::Raw(Raw { sql: prefix }))
10871                }
10872            }
10873        }
10874    }
10875
10876    /// Parse CREATE TABLE
10877    fn parse_create_table(
10878        &mut self,
10879        or_replace: bool,
10880        temporary: bool,
10881        leading_comments: Vec<String>,
10882        table_modifier: Option<&str>,
10883    ) -> Result<Expression> {
10884        if table_modifier == Some("DICTIONARY") {
10885            let _ = self.match_token(TokenType::Dictionary);
10886        } else {
10887            self.expect(TokenType::Table)?;
10888        }
10889
10890        // Handle IF NOT EXISTS
10891        let if_not_exists =
10892            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
10893
10894        let is_special_modifier = matches!(
10895            table_modifier,
10896            Some(
10897                "DYNAMIC"
10898                    | "ICEBERG"
10899                    | "EXTERNAL"
10900                    | "HYBRID"
10901                    | "UNLOGGED"
10902                    | "DICTIONARY"
10903                    | "MATERIALIZED"
10904            )
10905        ) || (table_modifier.is_some()
10906            && matches!(
10907                self.config.dialect,
10908                Some(crate::dialects::DialectType::Teradata)
10909            ));
10910        let is_clickhouse = matches!(
10911            self.config.dialect,
10912            Some(crate::dialects::DialectType::ClickHouse)
10913        );
10914
10915        // Parse table name
10916        let name = self.parse_table_ref()?;
10917
10918        // ClickHouse: UUID 'xxx' clause after table name
10919        if matches!(
10920            self.config.dialect,
10921            Some(crate::dialects::DialectType::ClickHouse)
10922        ) && self.check_identifier("UUID")
10923        {
10924            self.advance(); // consume UUID
10925            let _ = self.advance(); // consume UUID string value
10926        }
10927
10928        // ClickHouse: ON CLUSTER clause
10929        let on_cluster = self.parse_on_cluster_clause()?;
10930
10931        // Teradata: options after name before column list
10932        let teradata_post_name_options = if matches!(
10933            self.config.dialect,
10934            Some(crate::dialects::DialectType::Teradata)
10935        ) {
10936            self.parse_teradata_post_name_options()
10937        } else {
10938            Vec::new()
10939        };
10940
10941        // Handle PARTITION OF parent_table [(column_defs)] [FOR VALUES spec | DEFAULT] [PARTITION BY ...]
10942        if self.match_keywords(&[TokenType::Partition, TokenType::Of]) {
10943            return self.parse_create_table_partition_of(
10944                name,
10945                if_not_exists,
10946                temporary,
10947                or_replace,
10948                table_modifier,
10949                leading_comments,
10950            );
10951        }
10952
10953        // ClickHouse: EMPTY AS source_table — create empty table from source
10954        if matches!(
10955            self.config.dialect,
10956            Some(crate::dialects::DialectType::ClickHouse)
10957        ) && self.check_identifier("EMPTY")
10958        {
10959            if self.check_next(TokenType::As) {
10960                self.advance(); // consume EMPTY
10961                self.advance(); // consume AS
10962                                // Consume rest as Command
10963                let start = self.current;
10964                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
10965                    self.advance();
10966                }
10967                let rest_sql = self.tokens_to_sql(start, self.current);
10968                let mut prefix = String::from("CREATE TABLE");
10969                if if_not_exists {
10970                    prefix.push_str(" IF NOT EXISTS");
10971                }
10972                prefix.push(' ');
10973                prefix.push_str(&name.name.name);
10974                prefix.push_str(" EMPTY AS ");
10975                prefix.push_str(&rest_sql);
10976                return Ok(Expression::Raw(Raw { sql: prefix }));
10977            }
10978        }
10979
10980        // Handle [SHALLOW | DEEP] CLONE source_table [AT(...) | BEFORE(...)]
10981        // Databricks/Delta Lake uses SHALLOW CLONE / DEEP CLONE
10982        // Snowflake uses just CLONE (which is equivalent to DEEP CLONE)
10983        let shallow_clone = self.check_identifier("SHALLOW");
10984        let deep_clone = self.check_identifier("DEEP");
10985        if shallow_clone || deep_clone {
10986            self.advance(); // consume SHALLOW or DEEP
10987        }
10988        // Also handle COPY (BigQuery: CREATE TABLE ... COPY source_table)
10989        // But NOT "COPY GRANTS" which is a Snowflake property
10990        let is_copy = self.check(TokenType::Copy) && !self.check_next_identifier("GRANTS");
10991        if self.check_identifier("CLONE") || is_copy {
10992            self.advance(); // consume CLONE or COPY
10993                            // ClickHouse: CLONE AS source_table (AS is part of the syntax, not an alias)
10994            if matches!(
10995                self.config.dialect,
10996                Some(crate::dialects::DialectType::ClickHouse)
10997            ) {
10998                let _ = self.match_token(TokenType::As);
10999            }
11000            let source = self.parse_table_ref()?;
11001            // Parse optional AT or BEFORE time travel clause
11002            // Note: BEFORE is a keyword token, AT is an identifier
11003            let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
11004                let keyword = self.previous().text.to_uppercase();
11005                self.expect(TokenType::LParen)?;
11006                // Parse the content: OFFSET => value or TIMESTAMP => value
11007                let mut result = format!("{} (", keyword);
11008                let mut prev_token_type: Option<TokenType> = None;
11009                let mut paren_depth = 1;
11010                while !self.is_at_end() && paren_depth > 0 {
11011                    let token = self.advance();
11012                    if token.token_type == TokenType::LParen {
11013                        paren_depth += 1;
11014                    } else if token.token_type == TokenType::RParen {
11015                        paren_depth -= 1;
11016                        if paren_depth == 0 {
11017                            break;
11018                        }
11019                    }
11020                    let needs_space = !result.ends_with('(')
11021                        && prev_token_type != Some(TokenType::Arrow)
11022                        && prev_token_type != Some(TokenType::Dash)
11023                        && prev_token_type != Some(TokenType::LParen)
11024                        && prev_token_type != Some(TokenType::Comma) // comma already adds trailing space
11025                        && token.token_type != TokenType::LParen; // no space before (
11026                    if needs_space
11027                        && token.token_type != TokenType::RParen
11028                        && token.token_type != TokenType::Comma
11029                    {
11030                        result.push(' ');
11031                    }
11032                    // Properly quote string literals
11033                    if token.token_type == TokenType::String {
11034                        result.push('\'');
11035                        result.push_str(&token.text.replace('\'', "''"));
11036                        result.push('\'');
11037                    } else {
11038                        result.push_str(&token.text);
11039                    }
11040                    if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma
11041                    {
11042                        result.push(' ');
11043                    }
11044                    prev_token_type = Some(token.token_type);
11045                }
11046                result.push(')');
11047                Some(Expression::Raw(Raw { sql: result }))
11048            } else {
11049                None
11050            };
11051            // Return the CLONE table immediately
11052            return Ok(Expression::CreateTable(Box::new(CreateTable {
11053                name,
11054                on_cluster: on_cluster.clone(),
11055                columns: Vec::new(),
11056                constraints: Vec::new(),
11057                if_not_exists,
11058                temporary,
11059                or_replace,
11060                table_modifier: table_modifier.map(|s| s.to_string()),
11061                as_select: None,
11062                as_select_parenthesized: false,
11063                on_commit: None,
11064                clone_source: Some(source),
11065                clone_at_clause: at_clause,
11066                shallow_clone,
11067                is_copy,
11068                leading_comments,
11069                with_properties: Vec::new(),
11070                teradata_post_name_options: teradata_post_name_options.clone(),
11071                with_data: None,
11072                with_statistics: None,
11073                teradata_indexes: Vec::new(),
11074                with_cte: None,
11075                properties: Vec::new(),
11076                partition_of: None,
11077                post_table_properties: Vec::new(),
11078                mysql_table_options: Vec::new(),
11079                inherits: Vec::new(),
11080                on_property: None,
11081                copy_grants: false,
11082                using_template: None,
11083                rollup: None,
11084            })));
11085        }
11086
11087        // Handle WITH properties before columns/AS (e.g., CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1)
11088        let with_properties = if self.match_token(TokenType::With) {
11089            self.parse_with_properties()?
11090        } else {
11091            Vec::new()
11092        };
11093
11094        // Snowflake: COPY GRANTS clause (before column list or AS)
11095        let copy_grants = self.match_text_seq(&["COPY", "GRANTS"]);
11096
11097        // Snowflake: USING TEMPLATE (expr) - allows schema inference from a query
11098        let using_template = if self.match_text_seq(&["USING", "TEMPLATE"]) {
11099            Some(Box::new(self.parse_primary()?))
11100        } else {
11101            None
11102        };
11103
11104        // If we have USING TEMPLATE, return early since it replaces AS SELECT
11105        if using_template.is_some() {
11106            return Ok(Expression::CreateTable(Box::new(CreateTable {
11107                name,
11108                on_cluster: on_cluster.clone(),
11109                columns: Vec::new(),
11110                constraints: Vec::new(),
11111                if_not_exists,
11112                temporary,
11113                or_replace,
11114                table_modifier: table_modifier.map(|s| s.to_string()),
11115                as_select: None,
11116                as_select_parenthesized: false,
11117                on_commit: None,
11118                clone_source: None,
11119                clone_at_clause: None,
11120                shallow_clone: false,
11121                is_copy: false,
11122                leading_comments,
11123                with_properties,
11124                teradata_post_name_options: teradata_post_name_options.clone(),
11125                with_data: None,
11126                with_statistics: None,
11127                teradata_indexes: Vec::new(),
11128                with_cte: None,
11129                properties: Vec::new(),
11130                partition_of: None,
11131                post_table_properties: Vec::new(),
11132                mysql_table_options: Vec::new(),
11133                inherits: Vec::new(),
11134                on_property: None,
11135                copy_grants,
11136                using_template,
11137                rollup: None,
11138            })));
11139        }
11140
11141        // Redshift: Parse DISTKEY, SORTKEY, DISTSTYLE, BACKUP before AS SELECT (CTAS without columns)
11142        // This handles: CREATE TABLE t BACKUP YES|NO AS SELECT ...
11143        let mut redshift_ctas_properties: Vec<Expression> = Vec::new();
11144        loop {
11145            if self.match_identifier("DISTKEY") {
11146                // DISTKEY(column)
11147                if self.match_token(TokenType::LParen) {
11148                    let col = self.expect_identifier()?;
11149                    self.expect(TokenType::RParen)?;
11150                    redshift_ctas_properties.push(Expression::DistKeyProperty(Box::new(
11151                        DistKeyProperty {
11152                            this: Box::new(Expression::Column(Column {
11153                                name: Identifier::new(col),
11154                                table: None,
11155                                join_mark: false,
11156                                trailing_comments: Vec::new(),
11157                                span: None,
11158                                inferred_type: None,
11159                            })),
11160                        },
11161                    )));
11162                }
11163            } else if self.check_identifier("COMPOUND") || self.check_identifier("INTERLEAVED") {
11164                // COMPOUND SORTKEY(col, ...) or INTERLEAVED SORTKEY(col, ...)
11165                let modifier = self.advance().text.to_uppercase();
11166                if self.match_identifier("SORTKEY") && self.match_token(TokenType::LParen) {
11167                    let mut cols = Vec::new();
11168                    loop {
11169                        let col = self.expect_identifier()?;
11170                        cols.push(Expression::Column(Column {
11171                            name: Identifier::new(col),
11172                            table: None,
11173                            join_mark: false,
11174                            trailing_comments: Vec::new(),
11175                            span: None,
11176                            inferred_type: None,
11177                        }));
11178                        if !self.match_token(TokenType::Comma) {
11179                            break;
11180                        }
11181                    }
11182                    self.expect(TokenType::RParen)?;
11183                    let compound_value = if modifier == "COMPOUND" {
11184                        Some(Box::new(Expression::Boolean(BooleanLiteral {
11185                            value: true,
11186                        })))
11187                    } else {
11188                        None
11189                    };
11190                    redshift_ctas_properties.push(Expression::SortKeyProperty(Box::new(
11191                        SortKeyProperty {
11192                            this: Box::new(Expression::Tuple(Box::new(Tuple {
11193                                expressions: cols,
11194                            }))),
11195                            compound: compound_value,
11196                        },
11197                    )));
11198                }
11199            } else if self.match_identifier("SORTKEY") {
11200                // SORTKEY(column, ...)
11201                if self.match_token(TokenType::LParen) {
11202                    let mut cols = Vec::new();
11203                    loop {
11204                        let col = self.expect_identifier()?;
11205                        cols.push(Expression::Column(Column {
11206                            name: Identifier::new(col),
11207                            table: None,
11208                            join_mark: false,
11209                            trailing_comments: Vec::new(),
11210                            span: None,
11211                            inferred_type: None,
11212                        }));
11213                        if !self.match_token(TokenType::Comma) {
11214                            break;
11215                        }
11216                    }
11217                    self.expect(TokenType::RParen)?;
11218                    redshift_ctas_properties.push(Expression::SortKeyProperty(Box::new(
11219                        SortKeyProperty {
11220                            this: Box::new(Expression::Tuple(Box::new(Tuple {
11221                                expressions: cols,
11222                            }))),
11223                            compound: None,
11224                        },
11225                    )));
11226                }
11227            } else if self.match_identifier("DISTSTYLE") {
11228                // DISTSTYLE ALL|EVEN|AUTO|KEY
11229                if self.match_texts(&["ALL", "EVEN", "AUTO", "KEY"]) {
11230                    let style = self.previous().text.to_uppercase();
11231                    redshift_ctas_properties.push(Expression::DistStyleProperty(Box::new(
11232                        DistStyleProperty {
11233                            this: Box::new(Expression::Var(Box::new(Var { this: style }))),
11234                        },
11235                    )));
11236                }
11237            } else if self.match_identifier("BACKUP") {
11238                // BACKUP YES|NO
11239                if self.match_texts(&["YES", "NO"]) {
11240                    let value = self.previous().text.to_uppercase();
11241                    redshift_ctas_properties.push(Expression::BackupProperty(Box::new(
11242                        BackupProperty {
11243                            this: Box::new(Expression::Var(Box::new(Var { this: value }))),
11244                        },
11245                    )));
11246                }
11247            } else {
11248                break;
11249            }
11250        }
11251
11252        // Check for AS SELECT (CTAS)
11253        if self.match_token(TokenType::As) {
11254            // ClickHouse: CREATE TABLE t AS other_table [ENGINE = ...] — copy structure from another table
11255            // Also: CREATE TABLE t AS func_name(args...) — table from function (e.g., remote, merge)
11256            // Detect when AS is followed by an identifier (not SELECT/WITH/LParen)
11257            if is_clickhouse
11258                && !self.check(TokenType::Select)
11259                && !self.check(TokenType::With)
11260                && !self.check(TokenType::LParen)
11261                && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
11262            {
11263                // Check if this is AS func_name(...) — table function
11264                let is_table_func = self.current + 1 < self.tokens.len()
11265                    && self.tokens[self.current + 1].token_type == TokenType::LParen;
11266                let source = if is_table_func {
11267                    // Parse as expression to consume function call with arguments
11268                    self.parse_primary()?;
11269                    let mut table_properties: Vec<Expression> = Vec::new();
11270                    self.parse_clickhouse_table_properties(&mut table_properties)?;
11271                    return Ok(Expression::CreateTable(Box::new(CreateTable {
11272                        name,
11273                        on_cluster: on_cluster.clone(),
11274                        columns: Vec::new(),
11275                        constraints: Vec::new(),
11276                        if_not_exists,
11277                        temporary,
11278                        or_replace,
11279                        table_modifier: table_modifier.map(|s| s.to_string()),
11280                        as_select: None,
11281                        as_select_parenthesized: false,
11282                        on_commit: None,
11283                        clone_source: None,
11284                        clone_at_clause: None,
11285                        shallow_clone: false,
11286                        is_copy: false,
11287                        leading_comments,
11288                        with_properties,
11289                        teradata_post_name_options: teradata_post_name_options.clone(),
11290                        with_data: None,
11291                        with_statistics: None,
11292                        teradata_indexes: Vec::new(),
11293                        with_cte: None,
11294                        properties: table_properties,
11295                        partition_of: None,
11296                        post_table_properties: redshift_ctas_properties,
11297                        mysql_table_options: Vec::new(),
11298                        inherits: Vec::new(),
11299                        on_property: None,
11300                        copy_grants,
11301                        using_template: None,
11302                        rollup: None,
11303                    })));
11304                } else {
11305                    self.parse_table_ref()?
11306                };
11307                // Parse ClickHouse table properties after the source table
11308                let mut table_properties: Vec<Expression> = Vec::new();
11309                self.parse_clickhouse_table_properties(&mut table_properties)?;
11310                return Ok(Expression::CreateTable(Box::new(CreateTable {
11311                    name,
11312                    on_cluster: on_cluster.clone(),
11313                    columns: Vec::new(),
11314                    constraints: Vec::new(),
11315                    if_not_exists,
11316                    temporary,
11317                    or_replace,
11318                    table_modifier: table_modifier.map(|s| s.to_string()),
11319                    as_select: None,
11320                    as_select_parenthesized: false,
11321                    on_commit: None,
11322                    clone_source: Some(source),
11323                    clone_at_clause: None,
11324                    shallow_clone: false,
11325                    is_copy: false,
11326                    leading_comments,
11327                    with_properties,
11328                    teradata_post_name_options: teradata_post_name_options.clone(),
11329                    with_data: None,
11330                    with_statistics: None,
11331                    teradata_indexes: Vec::new(),
11332                    with_cte: None,
11333                    properties: table_properties,
11334                    partition_of: None,
11335                    post_table_properties: redshift_ctas_properties,
11336                    mysql_table_options: Vec::new(),
11337                    inherits: Vec::new(),
11338                    on_property: None,
11339                    copy_grants,
11340                    using_template: None,
11341                    rollup: None,
11342                })));
11343            }
11344
11345            // The query can be:
11346            // - SELECT ... (simple case)
11347            // - (SELECT 1) UNION ALL (SELECT 2) (set operations)
11348            // - (WITH cte AS (SELECT 1) SELECT * FROM cte) (CTE in parens)
11349            let mut as_select_parenthesized = self.check(TokenType::LParen);
11350            let query = if as_select_parenthesized {
11351                // Parenthesized query - parse as expression which handles subqueries
11352                // Note: parse_primary will consume set operations like UNION internally
11353                let subquery = self.parse_primary()?;
11354                // If parse_primary returned a set operation, the outer parens weren't wrapping
11355                // the entire expression - they were part of the operands
11356                if matches!(
11357                    &subquery,
11358                    Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)
11359                ) {
11360                    as_select_parenthesized = false;
11361                    subquery
11362                } else {
11363                    // Just a parenthesized query without set ops
11364                    // Keep the Subquery wrapper if it has limit/offset/order_by
11365                    if let Expression::Subquery(ref sq) = subquery {
11366                        if sq.limit.is_some() || sq.offset.is_some() || sq.order_by.is_some() {
11367                            // Keep the Subquery to preserve the modifiers
11368                            subquery
11369                        } else {
11370                            // Extract the inner query
11371                            if let Expression::Subquery(sq) = subquery {
11372                                sq.this
11373                            } else {
11374                                subquery
11375                            }
11376                        }
11377                    } else if let Expression::Paren(p) = subquery {
11378                        p.this
11379                    } else {
11380                        subquery
11381                    }
11382                }
11383            } else if self.check(TokenType::With) {
11384                // Handle WITH ... SELECT ...
11385                self.parse_statement()?
11386            } else {
11387                self.parse_select()?
11388            };
11389
11390            // Parse any trailing Teradata options like "WITH DATA", "NO PRIMARY INDEX", etc.
11391            let (with_data, with_statistics, teradata_indexes) =
11392                self.parse_teradata_table_options();
11393            let on_commit = if matches!(
11394                self.config.dialect,
11395                Some(crate::dialects::DialectType::Teradata)
11396            ) && self.check(TokenType::On)
11397                && self.check_next(TokenType::Commit)
11398            {
11399                self.advance(); // ON
11400                self.advance(); // COMMIT
11401                if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
11402                    Some(OnCommit::PreserveRows)
11403                } else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
11404                    Some(OnCommit::DeleteRows)
11405                } else {
11406                    return Err(
11407                        self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
11408                    );
11409                }
11410            } else {
11411                None
11412            };
11413
11414            return Ok(Expression::CreateTable(Box::new(CreateTable {
11415                name,
11416                on_cluster: on_cluster.clone(),
11417                columns: Vec::new(),
11418                constraints: Vec::new(),
11419                if_not_exists,
11420                temporary,
11421                or_replace,
11422                table_modifier: table_modifier.map(|s| s.to_string()),
11423                as_select: Some(query),
11424                as_select_parenthesized,
11425                on_commit,
11426                clone_source: None,
11427                clone_at_clause: None,
11428                shallow_clone: false,
11429                is_copy: false,
11430                leading_comments,
11431                with_properties,
11432                teradata_post_name_options: teradata_post_name_options.clone(),
11433                with_data,
11434                with_statistics,
11435                teradata_indexes,
11436                with_cte: None,
11437                properties: Vec::new(),
11438                partition_of: None,
11439                post_table_properties: redshift_ctas_properties,
11440                mysql_table_options: Vec::new(),
11441                inherits: Vec::new(),
11442                on_property: None,
11443                copy_grants,
11444                using_template: None,
11445                rollup: None,
11446            })));
11447        }
11448
11449        // ClickHouse: allow table properties/AS SELECT without a column list
11450        if is_clickhouse && !self.check(TokenType::LParen) {
11451            let starts_props = self.check_identifier("ENGINE")
11452                || self.check(TokenType::Order)
11453                || self.check(TokenType::Sample)
11454                || self.check(TokenType::Settings)
11455                || self.check(TokenType::Comment)
11456                || self.check(TokenType::As);
11457
11458            if starts_props {
11459                let mut table_properties: Vec<Expression> = Vec::new();
11460                self.parse_clickhouse_table_properties(&mut table_properties)?;
11461
11462                let as_select = if self.match_token(TokenType::As) {
11463                    Some(self.parse_statement()?)
11464                } else {
11465                    None
11466                };
11467                let as_select_parenthesized = as_select.is_some();
11468
11469                if as_select.is_some() {
11470                    self.parse_clickhouse_table_properties(&mut table_properties)?;
11471                }
11472
11473                return Ok(Expression::CreateTable(Box::new(CreateTable {
11474                    name,
11475                    on_cluster: on_cluster.clone(),
11476                    columns: Vec::new(),
11477                    constraints: Vec::new(),
11478                    if_not_exists,
11479                    temporary,
11480                    or_replace,
11481                    table_modifier: table_modifier.map(|s| s.to_string()),
11482                    as_select,
11483                    as_select_parenthesized,
11484                    on_commit: None,
11485                    clone_source: None,
11486                    clone_at_clause: None,
11487                    shallow_clone: false,
11488                    is_copy: false,
11489                    leading_comments,
11490                    with_properties,
11491                    teradata_post_name_options: teradata_post_name_options.clone(),
11492                    with_data: None,
11493                    with_statistics: None,
11494                    teradata_indexes: Vec::new(),
11495                    with_cte: None,
11496                    properties: table_properties,
11497                    partition_of: None,
11498                    post_table_properties: Vec::new(),
11499                    mysql_table_options: Vec::new(),
11500                    inherits: Vec::new(),
11501                    on_property: None,
11502                    copy_grants,
11503                    using_template: None,
11504                    rollup: None,
11505                })));
11506            }
11507        }
11508
11509        // For DYNAMIC/ICEBERG/EXTERNAL tables, columns might be optional (use AS SELECT or other syntax)
11510        // Check if we have a left paren for columns or if we're going straight to options
11511        if !self.check(TokenType::LParen) && is_special_modifier {
11512            // No columns - parse options and AS SELECT
11513            let mut extra_options = Vec::new();
11514            // Parse key=value options until AS or end
11515            // Note: WAREHOUSE is a keyword token type, so check for it explicitly
11516            while !self.is_at_end()
11517                && !self.check(TokenType::As)
11518                && !self.check(TokenType::Semicolon)
11519            {
11520                if self.is_identifier_token()
11521                    || self.is_safe_keyword_as_identifier()
11522                    || self.check(TokenType::Warehouse)
11523                {
11524                    let key = self.advance().text;
11525                    if self.match_token(TokenType::Eq) {
11526                        // Capture value
11527                        let value = if self.check(TokenType::String) {
11528                            let v = format!("'{}'", self.peek().text);
11529                            self.advance();
11530                            v
11531                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
11532                        {
11533                            self.advance().text
11534                        } else {
11535                            break;
11536                        };
11537                        extra_options.push((key, value));
11538                    } else {
11539                        // Just a keyword without value (like WAREHOUSE mywh)
11540                        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
11541                            let value = self.advance().text;
11542                            extra_options.push((key, value));
11543                        }
11544                    }
11545                } else {
11546                    break;
11547                }
11548            }
11549            // Check for AS SELECT
11550            let as_select = if self.match_token(TokenType::As) {
11551                Some(self.parse_statement()?)
11552            } else {
11553                None
11554            };
11555            return Ok(Expression::CreateTable(Box::new(CreateTable {
11556                name,
11557                on_cluster: on_cluster.clone(),
11558                columns: Vec::new(),
11559                constraints: Vec::new(),
11560                if_not_exists,
11561                temporary,
11562                or_replace,
11563                table_modifier: table_modifier.map(|s| s.to_string()),
11564                as_select,
11565                as_select_parenthesized: false,
11566                on_commit: None,
11567                clone_source: None,
11568                clone_at_clause: None,
11569                shallow_clone: false,
11570                is_copy: false,
11571                leading_comments,
11572                with_properties: extra_options,
11573                teradata_post_name_options: teradata_post_name_options.clone(),
11574                with_data: None,
11575                with_statistics: None,
11576                teradata_indexes: Vec::new(),
11577                with_cte: None,
11578                properties: Vec::new(),
11579                partition_of: None,
11580                post_table_properties: Vec::new(),
11581                mysql_table_options: Vec::new(),
11582                inherits: Vec::new(),
11583                on_property: None,
11584                copy_grants,
11585                using_template: None,
11586                rollup: None,
11587            })));
11588        }
11589
11590        // MySQL: CREATE TABLE A LIKE B (without parentheses)
11591        if self.check(TokenType::Like) {
11592            self.advance(); // consume LIKE
11593            let source_ref = self.parse_table_ref()?;
11594            return Ok(Expression::CreateTable(Box::new(CreateTable {
11595                name,
11596                on_cluster: on_cluster.clone(),
11597                columns: Vec::new(),
11598                constraints: vec![TableConstraint::Like {
11599                    source: source_ref,
11600                    options: Vec::new(),
11601                }],
11602                if_not_exists,
11603                temporary,
11604                or_replace,
11605                table_modifier: table_modifier.map(|s| s.to_string()),
11606                as_select: None,
11607                as_select_parenthesized: false,
11608                on_commit: None,
11609                clone_source: None,
11610                clone_at_clause: None,
11611                shallow_clone: false,
11612                is_copy: false,
11613                leading_comments,
11614                with_properties,
11615                teradata_post_name_options: teradata_post_name_options.clone(),
11616                with_data: None,
11617                with_statistics: None,
11618                teradata_indexes: Vec::new(),
11619                with_cte: None,
11620                properties: Vec::new(),
11621                partition_of: None,
11622                post_table_properties: Vec::new(),
11623                mysql_table_options: Vec::new(),
11624                inherits: Vec::new(),
11625                on_property: None,
11626                copy_grants,
11627                using_template: None,
11628                rollup: None,
11629            })));
11630        }
11631
11632        // Snowflake: CREATE TABLE a TAG (key='value', ...) without column definitions
11633        if self.match_keyword("TAG")
11634            || (self.match_token(TokenType::With) && self.match_keyword("TAG"))
11635        {
11636            let tags = self.parse_tags()?;
11637            return Ok(Expression::CreateTable(Box::new(CreateTable {
11638                name,
11639                on_cluster: on_cluster.clone(),
11640                columns: Vec::new(),
11641                constraints: vec![TableConstraint::Tags(tags)],
11642                if_not_exists,
11643                temporary,
11644                or_replace,
11645                table_modifier: table_modifier.map(|s| s.to_string()),
11646                as_select: None,
11647                as_select_parenthesized: false,
11648                on_commit: None,
11649                clone_source: None,
11650                clone_at_clause: None,
11651                shallow_clone: false,
11652                is_copy: false,
11653                leading_comments,
11654                with_properties,
11655                teradata_post_name_options: teradata_post_name_options.clone(),
11656                with_data: None,
11657                with_statistics: None,
11658                teradata_indexes: Vec::new(),
11659                with_cte: None,
11660                properties: Vec::new(),
11661                partition_of: None,
11662                post_table_properties: Vec::new(),
11663                mysql_table_options: Vec::new(),
11664                inherits: Vec::new(),
11665                on_property: None,
11666                copy_grants,
11667                using_template: None,
11668                rollup: None,
11669            })));
11670        }
11671
11672        // Hive/Spark/Databricks: CREATE TABLE t TBLPROPERTIES (...) without column definitions
11673        // Check for Hive-style table properties before expecting column definitions
11674        if self.check_identifier("TBLPROPERTIES")
11675            || self.check_identifier("LOCATION")
11676            || self.check_identifier("STORED")
11677            || self.check(TokenType::Row)
11678            || self.check(TokenType::Using)
11679            || self.check_identifier("CLUSTERED")
11680            || self.check_identifier("PARTITIONED")
11681            || self.check_identifier("COMMENT")
11682        {
11683            // Parse Hive table properties without column definitions
11684            let hive_properties = self.parse_hive_table_properties()?;
11685
11686            // Check for AS SELECT (CTAS) after properties
11687            let as_select = if self.match_token(TokenType::As) {
11688                Some(self.parse_statement()?)
11689            } else {
11690                None
11691            };
11692
11693            return Ok(Expression::CreateTable(Box::new(CreateTable {
11694                name,
11695                on_cluster: on_cluster.clone(),
11696                columns: Vec::new(),
11697                constraints: Vec::new(),
11698                if_not_exists,
11699                temporary,
11700                or_replace,
11701                table_modifier: table_modifier.map(|s| s.to_string()),
11702                as_select,
11703                as_select_parenthesized: false,
11704                on_commit: None,
11705                clone_source: None,
11706                clone_at_clause: None,
11707                shallow_clone: false,
11708                is_copy: false,
11709                leading_comments,
11710                with_properties,
11711                teradata_post_name_options: teradata_post_name_options.clone(),
11712                with_data: None,
11713                with_statistics: None,
11714                teradata_indexes: Vec::new(),
11715                with_cte: None,
11716                properties: hive_properties,
11717                partition_of: None,
11718                post_table_properties: Vec::new(),
11719                mysql_table_options: Vec::new(),
11720                inherits: Vec::new(),
11721                on_property: None,
11722                copy_grants,
11723                using_template: None,
11724                rollup: None,
11725            })));
11726        }
11727
11728        // Check if (SELECT ...) or (WITH ...) follows - this is CTAS without explicit AS keyword
11729        if self.check(TokenType::LParen) {
11730            let saved = self.current;
11731            self.advance(); // consume (
11732            let is_ctas = self.check(TokenType::Select) || self.check(TokenType::With);
11733            self.current = saved;
11734            if is_ctas {
11735                // Parse as subquery
11736                let subquery = self.parse_primary()?;
11737                let query = if let Expression::Subquery(sq) = subquery {
11738                    sq.this
11739                } else if let Expression::Paren(p) = subquery {
11740                    p.this
11741                } else {
11742                    subquery
11743                };
11744                return Ok(Expression::CreateTable(Box::new(CreateTable {
11745                    name,
11746                    on_cluster: on_cluster.clone(),
11747                    columns: Vec::new(),
11748                    constraints: Vec::new(),
11749                    if_not_exists,
11750                    temporary,
11751                    or_replace,
11752                    table_modifier: table_modifier.map(|s| s.to_string()),
11753                    as_select: Some(query),
11754                    as_select_parenthesized: true,
11755                    on_commit: None,
11756                    clone_source: None,
11757                    clone_at_clause: None,
11758                    shallow_clone: false,
11759                    is_copy: false,
11760                    leading_comments,
11761                    with_properties,
11762                    teradata_post_name_options: teradata_post_name_options.clone(),
11763                    with_data: None,
11764                    with_statistics: None,
11765                    teradata_indexes: Vec::new(),
11766                    with_cte: None,
11767                    properties: Vec::new(),
11768                    partition_of: None,
11769                    post_table_properties: Vec::new(),
11770                    mysql_table_options: Vec::new(),
11771                    inherits: Vec::new(),
11772                    on_property: None,
11773                    copy_grants,
11774                    using_template: None,
11775                    rollup: None,
11776                })));
11777            }
11778        }
11779
11780        // BigQuery (and others): CREATE TABLE t PARTITION BY ... CLUSTER BY ... OPTIONS(...) AS (SELECT ...)
11781        // When there are no column definitions, skip straight to property/AS parsing
11782        let no_column_defs = !self.check(TokenType::LParen)
11783            && (self.check(TokenType::Partition)
11784                || self.check(TokenType::PartitionBy)
11785                || self.check(TokenType::Cluster)
11786                || self.check_identifier("OPTIONS")
11787                || self.check(TokenType::As));
11788
11789        // Parse column definitions
11790        if !no_column_defs {
11791            self.expect(TokenType::LParen)?;
11792        }
11793
11794        // For DYNAMIC TABLE, column list contains only names without types
11795        // e.g., CREATE DYNAMIC TABLE t (col1, col2, col3) TARGET_LAG=... AS SELECT ...
11796        let (columns, constraints) = if no_column_defs {
11797            (Vec::new(), Vec::new())
11798        } else if table_modifier == Some("DYNAMIC") {
11799            // Check if this looks like a simple column name list (just identifiers separated by commas)
11800            // by peeking ahead - if next token after identifier is comma or rparen, it's a name-only list
11801            let saved = self.current;
11802            let is_name_only_list =
11803                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
11804                    self.advance();
11805                    let result = self.check(TokenType::Comma) || self.check(TokenType::RParen);
11806                    self.current = saved;
11807                    result
11808                } else {
11809                    false
11810                };
11811
11812            if is_name_only_list {
11813                // Parse column names without types
11814                let mut cols = Vec::new();
11815                loop {
11816                    let name = self.expect_identifier_or_safe_keyword_with_quoted()?;
11817                    // Create a column def with an empty/placeholder type
11818                    let mut col_def = ColumnDef::new(
11819                        name.name.clone(),
11820                        DataType::Custom {
11821                            name: String::new(),
11822                        },
11823                    );
11824                    col_def.name = name;
11825                    cols.push(col_def);
11826                    if !self.match_token(TokenType::Comma) {
11827                        break;
11828                    }
11829                }
11830                (cols, Vec::new())
11831            } else {
11832                // Regular column definitions with types
11833                self.parse_column_definitions()?
11834            }
11835        } else {
11836            self.parse_column_definitions()?
11837        };
11838
11839        if !no_column_defs {
11840            self.expect(TokenType::RParen)?;
11841        }
11842
11843        // Parse COMMENT before WITH properties (Presto: CREATE TABLE x (...) COMMENT 'text' WITH (...))
11844        let pre_with_comment = if self.check(TokenType::Comment) {
11845            let saved = self.current;
11846            self.advance(); // consume COMMENT
11847            if self.check(TokenType::String) {
11848                let comment_text = self.advance().text.clone();
11849                Some(comment_text)
11850            } else {
11851                self.current = saved;
11852                None
11853            }
11854        } else {
11855            None
11856        };
11857
11858        // Handle WITH properties after columns (e.g., CREATE TABLE z (z INT) WITH (...))
11859        // But skip if this is WITH(SYSTEM_VERSIONING=...) which is handled by parse_post_table_properties
11860        let with_properties_after = if self.check(TokenType::With) {
11861            // Lookahead: check if this is WITH(SYSTEM_VERSIONING=...)
11862            let saved = self.current;
11863            self.advance(); // consume WITH
11864            let is_system_versioning = if self.check(TokenType::LParen) {
11865                let saved2 = self.current;
11866                self.advance(); // consume (
11867                let result = self.check_identifier("SYSTEM_VERSIONING");
11868                self.current = saved2; // retreat to before (
11869                result
11870            } else {
11871                false
11872            };
11873            if is_system_versioning {
11874                // Retreat back before WITH, let parse_post_table_properties handle it
11875                self.current = saved;
11876                Vec::new()
11877            } else {
11878                // Normal WITH properties parsing
11879                self.parse_with_properties()?
11880            }
11881        } else {
11882            Vec::new()
11883        };
11884
11885        // Combine properties from before and after columns
11886        let mut all_with_properties = with_properties;
11887        all_with_properties.extend(with_properties_after);
11888
11889        // For DYNAMIC/ICEBERG/EXTERNAL tables with columns, parse Snowflake-specific options
11890        // like TARGET_LAG, WAREHOUSE, CATALOG, EXTERNAL_VOLUME, LOCATION etc.
11891        if is_special_modifier {
11892            while !self.is_at_end()
11893                && !self.check(TokenType::As)
11894                && !self.check(TokenType::Semicolon)
11895            {
11896                // Check for known Snowflake table options (WAREHOUSE is a keyword, others are identifiers)
11897                // These are Snowflake-style options that use KEY=VALUE or KEY VALUE (without =)
11898                // Hive-style LOCATION/TBLPROPERTIES (without =) should NOT be matched here
11899                let is_snowflake_option = self.check(TokenType::Warehouse)
11900                    || self.check_identifier("TARGET_LAG")
11901                    || self.check_identifier("CATALOG")
11902                    || self.check_identifier("EXTERNAL_VOLUME")
11903                    || self.check_identifier("BASE_LOCATION")
11904                    || self.check_identifier("REFRESH_MODE")
11905                    || self.check_identifier("INITIALIZE")
11906                    || self.check_identifier("DATA_RETENTION_TIME_IN_DAYS")
11907                    || self.check_identifier("LOCATION")
11908                    || self.check_identifier("PARTITION")
11909                    || self.check_identifier("FILE_FORMAT")
11910                    || self.check_identifier("AUTO_REFRESH");
11911                if is_snowflake_option {
11912                    // Save position before consuming key - we might need to retreat for Hive-style syntax
11913                    let saved = self.current;
11914                    let key = self.advance().text;
11915                    if self.match_token(TokenType::Eq) {
11916                        // Capture value - could be string, identifier, stage path @..., keyword, or parenthesized options
11917                        let value = if self.check(TokenType::LParen) {
11918                            // Parenthesized option list like file_format = (type = parquet compression = gzip)
11919                            self.advance(); // consume (
11920                            let mut options = String::from("(");
11921                            let mut depth = 1;
11922                            while !self.is_at_end() && depth > 0 {
11923                                let tok = self.advance();
11924                                if tok.token_type == TokenType::LParen {
11925                                    depth += 1;
11926                                } else if tok.token_type == TokenType::RParen {
11927                                    depth -= 1;
11928                                }
11929                                // Add space before tokens that need it (not after open paren, not before close paren)
11930                                if !options.ends_with('(')
11931                                    && !options.ends_with(' ')
11932                                    && tok.token_type != TokenType::RParen
11933                                {
11934                                    options.push(' ');
11935                                }
11936                                options.push_str(&tok.text);
11937                            }
11938                            options
11939                        } else if self.check(TokenType::String) {
11940                            let v = format!("'{}'", self.peek().text);
11941                            self.advance();
11942                            v
11943                        } else if self.check(TokenType::DAt) {
11944                            // Stage path like @s1/logs/
11945                            self.advance(); // consume @
11946                            let mut path = String::from("@");
11947                            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
11948                                path.push_str(&self.advance().text);
11949                            }
11950                            // Parse path segments, but stop before Snowflake option keywords
11951                            while self.check(TokenType::Slash) {
11952                                // Peek ahead to see if next identifier is a Snowflake option keyword
11953                                if self.current + 1 < self.tokens.len() {
11954                                    let next = &self.tokens[self.current + 1];
11955                                    let next_text_upper = next.text.to_uppercase();
11956                                    if next_text_upper == "FILE_FORMAT"
11957                                        || next_text_upper == "PARTITION_TYPE"
11958                                        || next_text_upper == "AUTO_REFRESH"
11959                                        || next_text_upper == "LOCATION"
11960                                        || next_text_upper == "PARTITION"
11961                                        || next_text_upper == "WAREHOUSE"
11962                                    {
11963                                        // Consume the trailing slash before the keyword
11964                                        self.advance();
11965                                        path.push('/');
11966                                        break;
11967                                    }
11968                                }
11969                                self.advance();
11970                                path.push('/');
11971                                if self.is_identifier_token()
11972                                    || self.is_safe_keyword_as_identifier()
11973                                {
11974                                    path.push_str(&self.advance().text);
11975                                }
11976                            }
11977                            path
11978                        } else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
11979                            // Stage path tokenized as Var (e.g., @s2/logs/)
11980                            // When @ is followed by alphanumeric, tokenizer creates a Var token
11981                            let mut path = self.advance().text;
11982                            // Parse path segments, but stop before Snowflake option keywords
11983                            while self.check(TokenType::Slash) {
11984                                // Peek ahead to see if next identifier is a Snowflake option keyword
11985                                if self.current + 1 < self.tokens.len() {
11986                                    let next = &self.tokens[self.current + 1];
11987                                    let next_text_upper = next.text.to_uppercase();
11988                                    if next_text_upper == "FILE_FORMAT"
11989                                        || next_text_upper == "PARTITION_TYPE"
11990                                        || next_text_upper == "AUTO_REFRESH"
11991                                        || next_text_upper == "LOCATION"
11992                                        || next_text_upper == "PARTITION"
11993                                        || next_text_upper == "WAREHOUSE"
11994                                    {
11995                                        // Consume the trailing slash before the keyword
11996                                        self.advance();
11997                                        path.push('/');
11998                                        break;
11999                                    }
12000                                }
12001                                self.advance();
12002                                path.push('/');
12003                                if self.is_identifier_token()
12004                                    || self.is_safe_keyword_as_identifier()
12005                                {
12006                                    path.push_str(&self.advance().text);
12007                                }
12008                            }
12009                            path
12010                        } else if self.check(TokenType::Warehouse) {
12011                            self.advance().text
12012                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
12013                        {
12014                            self.advance().text
12015                        } else {
12016                            // No valid value after =, retreat and let Hive parsing try
12017                            self.current = saved;
12018                            break;
12019                        };
12020                        all_with_properties.push((key, value));
12021                    } else if self.is_identifier_token()
12022                        || self.is_safe_keyword_as_identifier()
12023                        || self.check(TokenType::Warehouse)
12024                    {
12025                        // WAREHOUSE mywh (without =)
12026                        let value = self.advance().text;
12027                        all_with_properties.push((key, value));
12028                    } else {
12029                        // Not a Snowflake-style option (e.g., Hive LOCATION 'path' without =)
12030                        // Retreat and let Hive parsing try
12031                        self.current = saved;
12032                        break;
12033                    }
12034                } else {
12035                    break;
12036                }
12037            }
12038        }
12039
12040        // Parse MySQL table options: ENGINE=val, AUTO_INCREMENT=val, DEFAULT CHARSET=val, etc.
12041        let mysql_table_options = if is_clickhouse {
12042            Vec::new()
12043        } else {
12044            self.parse_mysql_table_options()
12045        };
12046
12047        // Parse StarRocks ROLLUP property: ROLLUP (r1(col1, col2), r2(col1))
12048        let rollup = if self.match_token(TokenType::Rollup) {
12049            self.expect(TokenType::LParen)?;
12050            let mut indices = Vec::new();
12051            loop {
12052                let name = self.expect_identifier_or_keyword_with_quoted()?;
12053                let cols = if self.match_token(TokenType::LParen) {
12054                    let mut col_list = Vec::new();
12055                    loop {
12056                        col_list.push(self.expect_identifier_or_keyword_with_quoted()?);
12057                        if !self.match_token(TokenType::Comma) {
12058                            break;
12059                        }
12060                    }
12061                    self.expect(TokenType::RParen)?;
12062                    col_list
12063                } else {
12064                    Vec::new()
12065                };
12066                indices.push(crate::expressions::RollupIndex {
12067                    name,
12068                    expressions: cols,
12069                });
12070                if !self.match_token(TokenType::Comma) {
12071                    break;
12072                }
12073            }
12074            self.expect(TokenType::RParen)?;
12075            Some(crate::expressions::RollupProperty {
12076                expressions: indices,
12077            })
12078        } else {
12079            None
12080        };
12081
12082        // Parse Hive table properties: ROW FORMAT, STORED AS/BY, LOCATION, TBLPROPERTIES
12083        let hive_properties = self.parse_hive_table_properties()?;
12084        let is_teradata = matches!(
12085            self.config.dialect,
12086            Some(crate::dialects::DialectType::Teradata)
12087        );
12088
12089        // Handle ON COMMIT PRESERVE ROWS or ON COMMIT DELETE ROWS
12090        // Also handle TSQL ON filegroup or ON filegroup (partition_column)
12091        let (mut on_commit, on_property) = if is_teradata {
12092            (None, None)
12093        } else if self.match_token(TokenType::On) {
12094            if self.match_token(TokenType::Commit) {
12095                if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
12096                    (Some(OnCommit::PreserveRows), None)
12097                } else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
12098                    (Some(OnCommit::DeleteRows), None)
12099                } else {
12100                    return Err(
12101                        self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
12102                    );
12103                }
12104            } else {
12105                // TSQL: ON filegroup or ON filegroup (partition_column)
12106                // Parse filegroup name as schema which allows filegroup(column) syntax
12107                let filegroup = self.parse_schema_identifier()?;
12108                (
12109                    None,
12110                    Some(OnProperty {
12111                        this: Box::new(filegroup),
12112                    }),
12113                )
12114            }
12115        } else {
12116            (None, None)
12117        };
12118
12119        // Parse table properties like DEFAULT COLLATE (BigQuery)
12120        let mut table_properties = hive_properties;
12121
12122        // If COMMENT was found before WITH, add it to table_properties as SchemaCommentProperty
12123        if let Some(comment_text) = pre_with_comment {
12124            table_properties.push(Expression::SchemaCommentProperty(Box::new(
12125                SchemaCommentProperty {
12126                    this: Box::new(Expression::Literal(Literal::String(comment_text))),
12127                },
12128            )));
12129        }
12130
12131        if self.match_token(TokenType::Default) && self.match_token(TokenType::Collate) {
12132            let collation = self.parse_primary()?;
12133            table_properties.push(Expression::CollateProperty(Box::new(CollateProperty {
12134                this: Box::new(collation),
12135                default: Some(Box::new(Expression::Boolean(BooleanLiteral {
12136                    value: true,
12137                }))),
12138            })));
12139        }
12140
12141        // BigQuery: OPTIONS (key=value, ...) on table - comes after column definitions
12142        if matches!(
12143            self.config.dialect,
12144            Some(crate::dialects::DialectType::BigQuery)
12145        ) {
12146            if let Some(options_property) = self.parse_bigquery_options_property()? {
12147                table_properties.push(options_property);
12148            }
12149        } else if self.match_identifier("OPTIONS") {
12150            let options = self.parse_options_list()?;
12151            table_properties.push(Expression::Properties(Box::new(Properties {
12152                expressions: options,
12153            })));
12154        }
12155
12156        // Doris/StarRocks: PROPERTIES ('key'='value', ...) - comes after column definitions
12157        let is_doris_starrocks = matches!(
12158            self.config.dialect,
12159            Some(crate::dialects::DialectType::Doris)
12160                | Some(crate::dialects::DialectType::StarRocks)
12161        );
12162        if is_doris_starrocks && self.match_identifier("PROPERTIES") {
12163            // Use parse_options_list which handles 'key'='value' format
12164            let props = self.parse_options_list()?;
12165            if !props.is_empty() {
12166                table_properties.push(Expression::Properties(Box::new(Properties {
12167                    expressions: props,
12168                })));
12169            }
12170        }
12171
12172        // Redshift: Parse DISTKEY, SORTKEY, DISTSTYLE, BACKUP after column definitions
12173        // These can appear in any order and multiple times
12174        loop {
12175            if self.match_identifier("DISTKEY") {
12176                // DISTKEY(column)
12177                if let Some(distkey) = self.parse_distkey()? {
12178                    table_properties.push(distkey);
12179                }
12180            } else if self.match_text_seq(&["COMPOUND", "SORTKEY"]) {
12181                // COMPOUND SORTKEY(col1, col2, ...)
12182                if let Some(sortkey) = self.parse_sortkey()? {
12183                    // Set compound flag
12184                    if let Expression::SortKeyProperty(mut skp) = sortkey {
12185                        skp.compound = Some(Box::new(Expression::Boolean(BooleanLiteral {
12186                            value: true,
12187                        })));
12188                        table_properties.push(Expression::SortKeyProperty(skp));
12189                    }
12190                }
12191            } else if self.match_identifier("SORTKEY") {
12192                // SORTKEY(col1, col2, ...)
12193                if let Some(sortkey) = self.parse_sortkey()? {
12194                    table_properties.push(sortkey);
12195                }
12196            } else if self.match_identifier("DISTSTYLE") {
12197                // DISTSTYLE ALL|EVEN|AUTO|KEY
12198                if self.match_texts(&["ALL", "EVEN", "AUTO", "KEY"]) {
12199                    let style = self.previous().text.to_uppercase();
12200                    table_properties.push(Expression::DistStyleProperty(Box::new(
12201                        DistStyleProperty {
12202                            this: Box::new(Expression::Var(Box::new(Var { this: style }))),
12203                        },
12204                    )));
12205                }
12206            } else if self.match_identifier("BACKUP") {
12207                // BACKUP YES|NO
12208                if self.match_texts(&["YES", "NO"]) {
12209                    let value = self.previous().text.to_uppercase();
12210                    table_properties.push(Expression::BackupProperty(Box::new(BackupProperty {
12211                        this: Box::new(Expression::Var(Box::new(Var { this: value }))),
12212                    })));
12213                }
12214            } else {
12215                break;
12216            }
12217        }
12218
12219        // Teradata: PRIMARY/UNIQUE/INDEX and PARTITION BY clauses after columns
12220        if is_teradata {
12221            loop {
12222                // Consume optional comma separator between index specs (only if followed by an index keyword)
12223                if self.check(TokenType::Comma) {
12224                    let saved_comma = self.current;
12225                    self.advance(); // consume comma
12226                    let is_index_keyword = self.check(TokenType::Unique)
12227                        || self.check(TokenType::PrimaryKey)
12228                        || self.check(TokenType::Index)
12229                        || self.check(TokenType::No);
12230                    if !is_index_keyword {
12231                        self.current = saved_comma; // retreat
12232                    }
12233                }
12234                if self.match_token(TokenType::Unique) {
12235                    let primary = self.match_token(TokenType::PrimaryKey);
12236                    let amp = self.match_identifier("AMP");
12237                    self.match_token(TokenType::Index);
12238                    let params = if self.match_token(TokenType::LParen) {
12239                        let cols = self.parse_identifier_list()?;
12240                        self.expect(TokenType::RParen)?;
12241                        cols.into_iter()
12242                            .map(|id| {
12243                                Expression::Column(Column {
12244                                    name: id,
12245                                    table: None,
12246                                    join_mark: false,
12247                                    trailing_comments: Vec::new(),
12248                                    span: None,
12249                                    inferred_type: None,
12250                                })
12251                            })
12252                            .collect()
12253                    } else {
12254                        Vec::new()
12255                    };
12256                    table_properties.push(Expression::Index(Box::new(Index {
12257                        this: None,
12258                        table: None,
12259                        unique: true,
12260                        primary: if primary {
12261                            Some(Box::new(Expression::Boolean(BooleanLiteral {
12262                                value: true,
12263                            })))
12264                        } else {
12265                            None
12266                        },
12267                        amp: if amp {
12268                            Some(Box::new(Expression::Boolean(BooleanLiteral {
12269                                value: true,
12270                            })))
12271                        } else {
12272                            None
12273                        },
12274                        params,
12275                    })));
12276                    continue;
12277                }
12278                if self.match_token(TokenType::PrimaryKey) {
12279                    let amp = self.match_identifier("AMP");
12280                    self.match_token(TokenType::Index);
12281                    let params = if self.match_token(TokenType::LParen) {
12282                        let cols = self.parse_identifier_list()?;
12283                        self.expect(TokenType::RParen)?;
12284                        cols.into_iter()
12285                            .map(|id| {
12286                                Expression::Column(Column {
12287                                    name: id,
12288                                    table: None,
12289                                    join_mark: false,
12290                                    trailing_comments: Vec::new(),
12291                                    span: None,
12292                                    inferred_type: None,
12293                                })
12294                            })
12295                            .collect()
12296                    } else {
12297                        Vec::new()
12298                    };
12299                    table_properties.push(Expression::Index(Box::new(Index {
12300                        this: None,
12301                        table: None,
12302                        unique: false,
12303                        primary: Some(Box::new(Expression::Boolean(BooleanLiteral {
12304                            value: true,
12305                        }))),
12306                        amp: if amp {
12307                            Some(Box::new(Expression::Boolean(BooleanLiteral {
12308                                value: true,
12309                            })))
12310                        } else {
12311                            None
12312                        },
12313                        params,
12314                    })));
12315                    continue;
12316                }
12317                if self.match_token(TokenType::Index) {
12318                    let params = if self.match_token(TokenType::LParen) {
12319                        let cols = self.parse_identifier_list()?;
12320                        self.expect(TokenType::RParen)?;
12321                        cols.into_iter()
12322                            .map(|id| {
12323                                Expression::Column(Column {
12324                                    name: id,
12325                                    table: None,
12326                                    join_mark: false,
12327                                    trailing_comments: Vec::new(),
12328                                    span: None,
12329                                    inferred_type: None,
12330                                })
12331                            })
12332                            .collect()
12333                    } else {
12334                        Vec::new()
12335                    };
12336                    table_properties.push(Expression::Index(Box::new(Index {
12337                        this: None,
12338                        table: None,
12339                        unique: false,
12340                        primary: None,
12341                        amp: None,
12342                        params,
12343                    })));
12344                    continue;
12345                }
12346                if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
12347                    let expr = self.parse_primary()?;
12348                    table_properties.push(Expression::PartitionedByProperty(Box::new(
12349                        PartitionedByProperty {
12350                            this: Box::new(expr),
12351                        },
12352                    )));
12353                    continue;
12354                }
12355                break;
12356            }
12357
12358            if on_commit.is_none()
12359                && self.check(TokenType::On)
12360                && self.check_next(TokenType::Commit)
12361            {
12362                self.advance(); // ON
12363                self.advance(); // COMMIT
12364                if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
12365                    on_commit = Some(OnCommit::PreserveRows);
12366                } else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
12367                    on_commit = Some(OnCommit::DeleteRows);
12368                } else {
12369                    return Err(
12370                        self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
12371                    );
12372                }
12373            }
12374        }
12375
12376        // ClickHouse: table properties after column definitions
12377        if is_clickhouse {
12378            self.parse_clickhouse_table_properties(&mut table_properties)?;
12379        }
12380
12381        // ClickHouse: EMPTY AS SELECT
12382        if matches!(
12383            self.config.dialect,
12384            Some(crate::dialects::DialectType::ClickHouse)
12385        ) && self.match_identifier("EMPTY")
12386        {
12387            table_properties.push(Expression::Var(Box::new(Var {
12388                this: "EMPTY".to_string(),
12389            })));
12390        }
12391
12392        // Handle AS SELECT after columns/WITH (CTAS with column definitions)
12393        // When there are no column definitions, AS comes after PARTITION BY/CLUSTER BY/OPTIONS
12394        let as_select = if !no_column_defs && self.match_token(TokenType::As) {
12395            Some(self.parse_statement()?)
12396        } else {
12397            None
12398        };
12399
12400        if is_clickhouse && as_select.is_some() {
12401            self.parse_clickhouse_table_properties(&mut table_properties)?;
12402        }
12403
12404        // Parse PARTITION BY RANGE/LIST/HASH(columns) for regular CREATE TABLE
12405        let is_bigquery = matches!(
12406            self.config.dialect,
12407            Some(crate::dialects::DialectType::BigQuery)
12408        );
12409        if !is_teradata && (self.check(TokenType::Partition) || self.check(TokenType::PartitionBy))
12410        {
12411            let parsed_bigquery_partition = if is_bigquery {
12412                if let Some(partition_property) = self.parse_bigquery_partition_by_property()? {
12413                    table_properties.push(partition_property);
12414                    true
12415                } else {
12416                    false
12417                }
12418            } else {
12419                false
12420            };
12421
12422            if !parsed_bigquery_partition {
12423                let saved = self.current;
12424                let is_partition_by = if self.match_token(TokenType::PartitionBy) {
12425                    true
12426                } else if self.match_token(TokenType::Partition) {
12427                    self.match_token(TokenType::By)
12428                } else {
12429                    false
12430                };
12431                if is_partition_by {
12432                    let partition_kind = if self.check(TokenType::Range) {
12433                        self.advance();
12434                        Some("RANGE".to_string())
12435                    } else if self.check(TokenType::List) {
12436                        self.advance();
12437                        Some("LIST".to_string())
12438                    } else if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
12439                        && self.check_next(TokenType::LParen)
12440                    {
12441                        // Only treat identifier as partition method (like HASH) if followed by (
12442                        Some(self.advance().text.to_uppercase())
12443                    } else {
12444                        // No explicit partition method (RANGE/LIST/HASH), just PARTITION BY (cols)
12445                        None
12446                    };
12447
12448                    // StarRocks/Doris: PARTITION BY func(), col (bare expressions without RANGE/LIST)
12449                    // When the partition_kind was consumed as an identifier that's actually a function call
12450                    // and the content after the parenthesized args includes a comma, it's a bare expression list
12451                    if is_doris_starrocks
12452                        && partition_kind.is_some()
12453                        && !matches!(
12454                            partition_kind.as_deref(),
12455                            Some("RANGE") | Some("LIST") | Some("HASH") | Some("KEY")
12456                        )
12457                    {
12458                        // Backtrack: re-parse as bare PARTITION BY with comma-separated expressions
12459                        let func_name = partition_kind.unwrap();
12460                        let mut raw_sql = format!("PARTITION BY {}", func_name);
12461                        // Helper closure for consuming parenthesized content with proper spacing
12462                        fn consume_parens(parser: &mut Parser, raw_sql: &mut String) {
12463                            if !parser.check(TokenType::LParen) {
12464                                return;
12465                            }
12466                            parser.advance();
12467                            raw_sql.push('(');
12468                            let mut depth = 1;
12469                            let mut last_type: Option<TokenType> = None;
12470                            while !parser.is_at_end() && depth > 0 {
12471                                let tok = parser.advance();
12472                                if tok.token_type == TokenType::LParen {
12473                                    depth += 1;
12474                                } else if tok.token_type == TokenType::RParen {
12475                                    depth -= 1;
12476                                    if depth == 0 {
12477                                        break;
12478                                    }
12479                                }
12480                                // Add space after commas
12481                                if matches!(last_type, Some(TokenType::Comma)) {
12482                                    raw_sql.push(' ');
12483                                }
12484                                if tok.token_type == TokenType::String {
12485                                    raw_sql.push('\'');
12486                                    raw_sql.push_str(&tok.text);
12487                                    raw_sql.push('\'');
12488                                } else {
12489                                    raw_sql.push_str(&tok.text);
12490                                }
12491                                last_type = Some(tok.token_type.clone());
12492                            }
12493                            raw_sql.push(')');
12494                        }
12495                        consume_parens(self, &mut raw_sql);
12496                        // Consume more comma-separated expressions
12497                        while self.match_token(TokenType::Comma) {
12498                            raw_sql.push_str(", ");
12499                            let tok = self.advance();
12500                            raw_sql.push_str(&tok.text);
12501                            consume_parens(self, &mut raw_sql);
12502                        }
12503                        table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
12504                    } else
12505                    // For Doris/StarRocks/MySQL RANGE/LIST, use structured parsing
12506                    if (is_doris_starrocks
12507                        || matches!(
12508                            self.config.dialect,
12509                            Some(crate::dialects::DialectType::MySQL)
12510                                | Some(crate::dialects::DialectType::SingleStore)
12511                                | Some(crate::dialects::DialectType::TiDB)
12512                        ))
12513                        && matches!(partition_kind.as_deref(), Some("RANGE") | Some("LIST"))
12514                    {
12515                        let partition_expr = self.parse_doris_partition_by_range_or_list(
12516                            partition_kind
12517                                .as_ref()
12518                                .map(|s| s.as_str())
12519                                .unwrap_or("RANGE"),
12520                        )?;
12521                        table_properties.push(partition_expr);
12522                    } else {
12523                        // Generic raw SQL parsing for other dialects
12524                        let no_partition_kind = partition_kind.is_none();
12525                        let mut raw_sql = match partition_kind {
12526                            Some(kind) => format!("PARTITION BY {}", kind),
12527                            None => "PARTITION BY ".to_string(),
12528                        };
12529                        if self.check(TokenType::LParen) {
12530                            self.advance();
12531                            raw_sql.push('(');
12532                            let mut depth = 1;
12533                            let mut last_tok_type: Option<TokenType> = None;
12534                            while !self.is_at_end() && depth > 0 {
12535                                let tok = self.advance();
12536                                if tok.token_type == TokenType::LParen {
12537                                    depth += 1;
12538                                } else if tok.token_type == TokenType::RParen {
12539                                    depth -= 1;
12540                                    if depth == 0 {
12541                                        break;
12542                                    }
12543                                }
12544                                // Add space before token if needed for proper formatting
12545                                let needs_space = match (&last_tok_type, &tok.token_type) {
12546                                    // Add space after comma
12547                                    (Some(TokenType::Comma), _) => true,
12548                                    // Add space after identifiers/keywords before other identifiers/keywords
12549                                    (Some(TokenType::Identifier), TokenType::Identifier) => true,
12550                                    _ => false,
12551                                };
12552                                if needs_space {
12553                                    raw_sql.push(' ');
12554                                }
12555                                // Handle string literals - preserve quotes
12556                                if tok.token_type == TokenType::String {
12557                                    raw_sql.push('\'');
12558                                    raw_sql.push_str(&tok.text);
12559                                    raw_sql.push('\'');
12560                                } else {
12561                                    raw_sql.push_str(&tok.text);
12562                                }
12563                                last_tok_type = Some(tok.token_type.clone());
12564                            }
12565                            raw_sql.push(')');
12566                        } else if no_partition_kind {
12567                            // Bare PARTITION BY expression list without a partition method
12568                            let mut first = true;
12569                            while !self.is_at_end()
12570                                && !self.check(TokenType::Cluster)
12571                                && !self.check(TokenType::As)
12572                                && !self.check(TokenType::Semicolon)
12573                                && !self.check(TokenType::RParen)
12574                                && !self.check_identifier("OPTIONS")
12575                            {
12576                                if !first {
12577                                    raw_sql.push_str(", ");
12578                                }
12579                                first = false;
12580                                let tok = self.advance();
12581                                raw_sql.push_str(&tok.text);
12582                                // Handle function calls: PARTITION BY DATE(col)
12583                                if self.check(TokenType::LParen) {
12584                                    self.advance();
12585                                    raw_sql.push('(');
12586                                    let mut depth = 1;
12587                                    while !self.is_at_end() && depth > 0 {
12588                                        let t = self.advance();
12589                                        if t.token_type == TokenType::LParen {
12590                                            depth += 1;
12591                                        } else if t.token_type == TokenType::RParen {
12592                                            depth -= 1;
12593                                            if depth == 0 {
12594                                                break;
12595                                            }
12596                                        }
12597                                        raw_sql.push_str(&t.text);
12598                                    }
12599                                    raw_sql.push(')');
12600                                }
12601                                if !self.match_token(TokenType::Comma) {
12602                                    break;
12603                                }
12604                            }
12605                        }
12606                        table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
12607                    }
12608                } else {
12609                    self.current = saved;
12610                }
12611            }
12612        }
12613
12614        // Parse CLUSTER BY (BigQuery) after PARTITION BY
12615        if is_bigquery {
12616            if let Some(cluster_property) = self.parse_bigquery_cluster_by_property()? {
12617                table_properties.push(cluster_property);
12618            }
12619        } else if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
12620            let mut cluster_names = Vec::new();
12621            loop {
12622                let name = self.expect_identifier_or_keyword()?;
12623                cluster_names.push(name);
12624                if !self.match_token(TokenType::Comma) {
12625                    break;
12626                }
12627            }
12628            table_properties.push(Expression::Raw(Raw {
12629                sql: format!("CLUSTER BY {}", cluster_names.join(", ")),
12630            }));
12631        }
12632
12633        // No-column-defs path: OPTIONS and AS SELECT come after PARTITION BY / CLUSTER BY
12634        if no_column_defs {
12635            if matches!(
12636                self.config.dialect,
12637                Some(crate::dialects::DialectType::BigQuery)
12638            ) {
12639                if let Some(options_property) = self.parse_bigquery_options_property()? {
12640                    table_properties.push(options_property);
12641                }
12642            } else if self.match_identifier("OPTIONS") {
12643                let options = self.parse_options_list()?;
12644                table_properties.push(Expression::Properties(Box::new(Properties {
12645                    expressions: options,
12646                })));
12647            }
12648        }
12649
12650        let as_select = if no_column_defs && self.match_token(TokenType::As) {
12651            Some(self.parse_statement()?)
12652        } else {
12653            as_select
12654        };
12655
12656        // For EXTERNAL tables, parse additional Snowflake options that may come after PARTITION BY
12657        // (location=@s2/logs/, partition_type = user_specified, file_format = (...), etc.)
12658        if is_special_modifier {
12659            while !self.is_at_end()
12660                && !self.check(TokenType::As)
12661                && !self.check(TokenType::Semicolon)
12662            {
12663                let is_snowflake_option = self.check(TokenType::Warehouse)
12664                    || self.check_identifier("TARGET_LAG")
12665                    || self.check_identifier("CATALOG")
12666                    || self.check_identifier("EXTERNAL_VOLUME")
12667                    || self.check_identifier("BASE_LOCATION")
12668                    || self.check_identifier("REFRESH_MODE")
12669                    || self.check_identifier("INITIALIZE")
12670                    || self.check_identifier("DATA_RETENTION_TIME_IN_DAYS")
12671                    || self.check_identifier("LOCATION")
12672                    || self.check_identifier("PARTITION_TYPE")
12673                    || self.check_identifier("FILE_FORMAT")
12674                    || self.check_identifier("AUTO_REFRESH");
12675                if is_snowflake_option {
12676                    let key = self.advance().text;
12677                    if self.match_token(TokenType::Eq) {
12678                        let value = if self.check(TokenType::LParen) {
12679                            // Parenthesized option list
12680                            self.advance();
12681                            let mut options = String::from("(");
12682                            let mut depth = 1;
12683                            while !self.is_at_end() && depth > 0 {
12684                                let tok = self.advance();
12685                                if tok.token_type == TokenType::LParen {
12686                                    depth += 1;
12687                                } else if tok.token_type == TokenType::RParen {
12688                                    depth -= 1;
12689                                }
12690                                if !options.ends_with('(')
12691                                    && !options.ends_with(' ')
12692                                    && tok.token_type != TokenType::RParen
12693                                {
12694                                    options.push(' ');
12695                                }
12696                                options.push_str(&tok.text);
12697                            }
12698                            options
12699                        } else if self.check(TokenType::String) {
12700                            let v = format!("'{}'", self.peek().text);
12701                            self.advance();
12702                            v
12703                        } else if self.check(TokenType::DAt) {
12704                            // Stage path like @s1/logs/
12705                            self.advance();
12706                            let mut path = String::from("@");
12707                            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
12708                                path.push_str(&self.advance().text);
12709                            }
12710                            while self.check(TokenType::Slash) {
12711                                if self.current + 1 < self.tokens.len() {
12712                                    let next = &self.tokens[self.current + 1];
12713                                    let next_text_upper = next.text.to_uppercase();
12714                                    if next_text_upper == "FILE_FORMAT"
12715                                        || next_text_upper == "PARTITION_TYPE"
12716                                        || next_text_upper == "AUTO_REFRESH"
12717                                        || next_text_upper == "LOCATION"
12718                                        || next_text_upper == "PARTITION"
12719                                        || next_text_upper == "WAREHOUSE"
12720                                    {
12721                                        self.advance();
12722                                        path.push('/');
12723                                        break;
12724                                    }
12725                                }
12726                                self.advance();
12727                                path.push('/');
12728                                if self.is_identifier_token()
12729                                    || self.is_safe_keyword_as_identifier()
12730                                {
12731                                    path.push_str(&self.advance().text);
12732                                }
12733                            }
12734                            path
12735                        } else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
12736                            let mut path = self.advance().text;
12737                            while self.check(TokenType::Slash) {
12738                                if self.current + 1 < self.tokens.len() {
12739                                    let next = &self.tokens[self.current + 1];
12740                                    let next_text_upper = next.text.to_uppercase();
12741                                    if next_text_upper == "FILE_FORMAT"
12742                                        || next_text_upper == "PARTITION_TYPE"
12743                                        || next_text_upper == "AUTO_REFRESH"
12744                                        || next_text_upper == "LOCATION"
12745                                        || next_text_upper == "PARTITION"
12746                                        || next_text_upper == "WAREHOUSE"
12747                                    {
12748                                        self.advance();
12749                                        path.push('/');
12750                                        break;
12751                                    }
12752                                }
12753                                self.advance();
12754                                path.push('/');
12755                                if self.is_identifier_token()
12756                                    || self.is_safe_keyword_as_identifier()
12757                                {
12758                                    path.push_str(&self.advance().text);
12759                                }
12760                            }
12761                            path
12762                        } else if self.check(TokenType::Warehouse) {
12763                            self.advance().text
12764                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
12765                        {
12766                            self.advance().text
12767                        } else {
12768                            break;
12769                        };
12770                        all_with_properties.push((key, value));
12771                    } else if self.is_identifier_token()
12772                        || self.is_safe_keyword_as_identifier()
12773                        || self.check(TokenType::Warehouse)
12774                    {
12775                        let value = self.advance().text;
12776                        all_with_properties.push((key, value));
12777                    }
12778                } else {
12779                    break;
12780                }
12781            }
12782        }
12783
12784        // Parse TSQL table-level WITH(SYSTEM_VERSIONING=ON(...)) after columns
12785        // This is different from the earlier WITH properties parsing.
12786        // TSQL uses WITH(...) after columns for system versioning.
12787        let post_table_properties = self.parse_post_table_properties()?;
12788
12789        // PostgreSQL: INHERITS (parent1, parent2, ...)
12790        let inherits = if self.match_identifier("INHERITS") {
12791            self.expect(TokenType::LParen)?;
12792            let mut parents = Vec::new();
12793            loop {
12794                parents.push(self.parse_table_ref()?);
12795                if !self.match_token(TokenType::Comma) {
12796                    break;
12797                }
12798            }
12799            self.expect(TokenType::RParen)?;
12800            parents
12801        } else {
12802            Vec::new()
12803        };
12804
12805        Ok(Expression::CreateTable(Box::new(CreateTable {
12806            name,
12807            on_cluster,
12808            columns,
12809            constraints,
12810            if_not_exists,
12811            temporary,
12812            or_replace,
12813            table_modifier: table_modifier.map(|s| s.to_string()),
12814            as_select,
12815            as_select_parenthesized: false,
12816            on_commit,
12817            clone_source: None,
12818            clone_at_clause: None,
12819            shallow_clone: false,
12820            is_copy: false,
12821            leading_comments,
12822            with_properties: all_with_properties,
12823            teradata_post_name_options: teradata_post_name_options.clone(),
12824            with_data: None,
12825            with_statistics: None,
12826            teradata_indexes: Vec::new(),
12827            with_cte: None,
12828            properties: table_properties,
12829            partition_of: None,
12830            post_table_properties,
12831            mysql_table_options,
12832            inherits,
12833            on_property,
12834            copy_grants,
12835            using_template: None,
12836            rollup,
12837        })))
12838    }
12839
12840    /// Parse CREATE TABLE ... PARTITION OF parent_table [(cols)] [FOR VALUES spec | DEFAULT] [PARTITION BY ...]
12841    fn parse_create_table_partition_of(
12842        &mut self,
12843        name: TableRef,
12844        if_not_exists: bool,
12845        temporary: bool,
12846        or_replace: bool,
12847        table_modifier: Option<&str>,
12848        leading_comments: Vec<String>,
12849    ) -> Result<Expression> {
12850        // Parse parent table name
12851        let parent_table = self.parse_table_ref()?;
12852
12853        // Optionally parse column constraints in parens: (unitsales DEFAULT 0) or (CONSTRAINT ...)
12854        // This must come before FOR VALUES or DEFAULT. We distinguish from other uses
12855        // by checking if the first token after LParen is CONSTRAINT or an identifier
12856        // that is not a string literal.
12857        let (columns, constraints) = if self.check(TokenType::LParen) {
12858            // Peek ahead: current is LParen, current+1 is first token inside parens
12859            let first_inside = self.current + 1;
12860            // Check if this is a partition column specification: (colname DEFAULT value)
12861            // Column names tokenize as Var (unquoted) or QuotedIdentifier (quoted)
12862            let is_column_defs = first_inside < self.tokens.len()
12863                && (self.tokens[first_inside].token_type == TokenType::Constraint
12864                    || ((self.tokens[first_inside].token_type == TokenType::Var
12865                        || self.tokens[first_inside].token_type == TokenType::QuotedIdentifier
12866                        || self.tokens[first_inside].token_type == TokenType::Identifier)
12867                        && first_inside + 1 < self.tokens.len()
12868                        && self.tokens[first_inside + 1].token_type == TokenType::Default));
12869
12870            if is_column_defs {
12871                self.advance(); // consume LParen
12872                                // Use special parsing for partition column specs - they don't have data types,
12873                                // just column names with constraint overrides like DEFAULT
12874                let (cols, constrs) = self.parse_partition_column_specs()?;
12875                self.expect(TokenType::RParen)?;
12876                (cols, constrs)
12877            } else {
12878                (Vec::new(), Vec::new())
12879            }
12880        } else {
12881            (Vec::new(), Vec::new())
12882        };
12883
12884        // Parse DEFAULT or FOR VALUES spec
12885        let partition_bound: Expression = if self.match_token(TokenType::Default) {
12886            // DEFAULT partition
12887            Expression::Var(Box::new(Var {
12888                this: "DEFAULT".to_string(),
12889            }))
12890        } else if self.match_token(TokenType::For) {
12891            // FOR VALUES ...
12892            self.expect(TokenType::Values)?;
12893            self.parse_partition_bound_spec()?
12894        } else {
12895            // Neither DEFAULT nor FOR VALUES - could be an error
12896            // but we'll be lenient and just create a DEFAULT
12897            Expression::Var(Box::new(Var {
12898                this: "DEFAULT".to_string(),
12899            }))
12900        };
12901
12902        let partition_of_expr =
12903            Expression::PartitionedOfProperty(Box::new(PartitionedOfProperty {
12904                this: Box::new(Expression::Table(parent_table)),
12905                expression: Box::new(partition_bound),
12906            }));
12907
12908        // Optionally parse trailing PARTITION BY RANGE/LIST/HASH(columns)
12909        let mut table_properties: Vec<Expression> = Vec::new();
12910        if self.match_token(TokenType::Partition) || self.match_token(TokenType::PartitionBy) {
12911            // Could be PARTITION BY or just PartitionBy token
12912            if self.previous().token_type == TokenType::Partition {
12913                self.expect(TokenType::By)?;
12914            }
12915            // Parse RANGE/LIST/HASH(columns)
12916            let partition_kind = if self.check(TokenType::Identifier) || self.check(TokenType::Var)
12917            {
12918                let kind_text = self.advance().text.to_uppercase();
12919                kind_text
12920            } else if self.check(TokenType::Range) {
12921                self.advance();
12922                "RANGE".to_string()
12923            } else if self.check(TokenType::List) {
12924                self.advance();
12925                "LIST".to_string()
12926            } else {
12927                "RANGE".to_string()
12928            };
12929            // Parse (columns)
12930            let mut raw_sql = format!("PARTITION BY {}", partition_kind);
12931            if self.check(TokenType::LParen) {
12932                self.advance(); // consume LParen
12933                raw_sql.push('(');
12934                let mut depth = 1;
12935                while !self.is_at_end() && depth > 0 {
12936                    let tok = self.advance();
12937                    if tok.token_type == TokenType::LParen {
12938                        depth += 1;
12939                    } else if tok.token_type == TokenType::RParen {
12940                        depth -= 1;
12941                        if depth == 0 {
12942                            break;
12943                        }
12944                    }
12945                    raw_sql.push_str(&tok.text);
12946                }
12947                raw_sql.push(')');
12948            }
12949            table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
12950        }
12951
12952        Ok(Expression::CreateTable(Box::new(CreateTable {
12953            name,
12954            on_cluster: None,
12955            columns,
12956            constraints,
12957            if_not_exists,
12958            temporary,
12959            or_replace,
12960            table_modifier: table_modifier.map(|s| s.to_string()),
12961            as_select: None,
12962            as_select_parenthesized: false,
12963            on_commit: None,
12964            clone_source: None,
12965            clone_at_clause: None,
12966            shallow_clone: false,
12967            is_copy: false,
12968            leading_comments,
12969            with_properties: Vec::new(),
12970            teradata_post_name_options: Vec::new(),
12971            with_data: None,
12972            with_statistics: None,
12973            teradata_indexes: Vec::new(),
12974            with_cte: None,
12975            properties: table_properties,
12976            partition_of: Some(partition_of_expr),
12977            post_table_properties: Vec::new(),
12978            mysql_table_options: Vec::new(),
12979            inherits: Vec::new(),
12980            on_property: None,
12981            copy_grants: false,
12982            using_template: None,
12983            rollup: None,
12984        })))
12985    }
12986
12987    /// Parse partition bound spec for PARTITION OF: IN (...), FROM (...) TO (...), or WITH (MODULUS n, REMAINDER n)
12988    fn parse_partition_bound_spec(&mut self) -> Result<Expression> {
12989        if self.match_token(TokenType::In) {
12990            // IN (val, val, ...)
12991            self.expect(TokenType::LParen)?;
12992            let mut values = Vec::new();
12993            loop {
12994                let val = self.parse_expression()?;
12995                values.push(val);
12996                if !self.match_token(TokenType::Comma) {
12997                    break;
12998                }
12999            }
13000            self.expect(TokenType::RParen)?;
13001            // Use Tuple for multiple values (generator strips parens for partition bounds)
13002            let this_expr = if values.len() == 1 {
13003                values.into_iter().next().unwrap()
13004            } else {
13005                Expression::Tuple(Box::new(Tuple {
13006                    expressions: values,
13007                }))
13008            };
13009            Ok(Expression::PartitionBoundSpec(Box::new(
13010                PartitionBoundSpec {
13011                    this: Some(Box::new(this_expr)),
13012                    expression: None,
13013                    from_expressions: None,
13014                    to_expressions: None,
13015                },
13016            )))
13017        } else if self.match_token(TokenType::From) {
13018            // FROM (val, ...) TO (val, ...)
13019            self.expect(TokenType::LParen)?;
13020            let mut from_vals = Vec::new();
13021            loop {
13022                let val = self.parse_partition_bound_value()?;
13023                from_vals.push(val);
13024                if !self.match_token(TokenType::Comma) {
13025                    break;
13026                }
13027            }
13028            self.expect(TokenType::RParen)?;
13029
13030            self.expect(TokenType::To)?;
13031            self.expect(TokenType::LParen)?;
13032            let mut to_vals = Vec::new();
13033            loop {
13034                let val = self.parse_partition_bound_value()?;
13035                to_vals.push(val);
13036                if !self.match_token(TokenType::Comma) {
13037                    break;
13038                }
13039            }
13040            self.expect(TokenType::RParen)?;
13041
13042            let from_expr = if from_vals.len() == 1 {
13043                from_vals.into_iter().next().unwrap()
13044            } else {
13045                Expression::Tuple(Box::new(Tuple {
13046                    expressions: from_vals,
13047                }))
13048            };
13049            let to_expr = if to_vals.len() == 1 {
13050                to_vals.into_iter().next().unwrap()
13051            } else {
13052                Expression::Tuple(Box::new(Tuple {
13053                    expressions: to_vals,
13054                }))
13055            };
13056
13057            Ok(Expression::PartitionBoundSpec(Box::new(
13058                PartitionBoundSpec {
13059                    this: None,
13060                    expression: None,
13061                    from_expressions: Some(Box::new(from_expr)),
13062                    to_expressions: Some(Box::new(to_expr)),
13063                },
13064            )))
13065        } else if self.match_token(TokenType::With) {
13066            // WITH (MODULUS n, REMAINDER n)
13067            self.expect(TokenType::LParen)?;
13068            self.match_text_seq(&["MODULUS"]);
13069            let modulus = self.parse_expression()?;
13070            self.expect(TokenType::Comma)?;
13071            self.match_text_seq(&["REMAINDER"]);
13072            let remainder = self.parse_expression()?;
13073            self.expect(TokenType::RParen)?;
13074
13075            Ok(Expression::PartitionBoundSpec(Box::new(
13076                PartitionBoundSpec {
13077                    this: Some(Box::new(modulus)),
13078                    expression: Some(Box::new(remainder)),
13079                    from_expressions: None,
13080                    to_expressions: None,
13081                },
13082            )))
13083        } else {
13084            Err(self.parse_error("Expected IN, FROM, or WITH after FOR VALUES in PARTITION OF"))
13085        }
13086    }
13087
13088    /// Parse a single partition bound value (number, string, MINVALUE, MAXVALUE)
13089    fn parse_partition_bound_value(&mut self) -> Result<Expression> {
13090        if self.match_token(TokenType::Minvalue) {
13091            Ok(Expression::Var(Box::new(Var {
13092                this: "MINVALUE".to_string(),
13093            })))
13094        } else if self.match_token(TokenType::Maxvalue) {
13095            Ok(Expression::Var(Box::new(Var {
13096                this: "MAXVALUE".to_string(),
13097            })))
13098        } else {
13099            self.parse_expression()
13100        }
13101    }
13102
13103    /// Parse column specifications for PostgreSQL PARTITION OF syntax.
13104    /// Unlike regular column definitions, these don't have data types - just column names
13105    /// with constraint overrides like DEFAULT, NOT NULL, or table-level CONSTRAINT clauses.
13106    /// Example: (unitsales DEFAULT 0) or (CONSTRAINT check_date CHECK (logdate >= '2016-07-01'))
13107    fn parse_partition_column_specs(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>)> {
13108        let mut columns = Vec::new();
13109        let mut constraints = Vec::new();
13110
13111        loop {
13112            // Check for table-level constraint (CONSTRAINT name ...)
13113            if self.check(TokenType::Constraint) {
13114                constraints.push(self.parse_table_constraint()?);
13115            } else if self.check(TokenType::PrimaryKey)
13116                || self.check(TokenType::ForeignKey)
13117                || self.check(TokenType::Unique)
13118                || self.check(TokenType::Check)
13119                || self.check(TokenType::Exclude)
13120            {
13121                constraints.push(self.parse_table_constraint()?);
13122            } else {
13123                // Parse column name with optional constraints (no data type)
13124                columns.push(self.parse_partition_column_spec()?);
13125            }
13126
13127            if !self.match_token(TokenType::Comma) {
13128                break;
13129            }
13130            // ClickHouse allows a trailing comma before the closing ')'
13131            if matches!(
13132                self.config.dialect,
13133                Some(crate::dialects::DialectType::ClickHouse)
13134            ) && self.check(TokenType::RParen)
13135            {
13136                break;
13137            }
13138        }
13139
13140        Ok((columns, constraints))
13141    }
13142
13143    /// Parse a single partition column specification: column_name [DEFAULT value] [NOT NULL] [NULL] [WITH OPTIONS ...]
13144    fn parse_partition_column_spec(&mut self) -> Result<ColumnDef> {
13145        // Parse column name
13146        let name = self.expect_identifier_or_safe_keyword_with_quoted()?;
13147
13148        // Create column def with Unknown data type (data type comes from parent table)
13149        let mut col_def = ColumnDef::new(name.name.clone(), DataType::Unknown);
13150        col_def.name = name;
13151
13152        // Parse column constraints (no data type expected)
13153        loop {
13154            if self.match_token(TokenType::Default) {
13155                // DEFAULT value
13156                let default_val = self.parse_expression()?;
13157                col_def.default = Some(default_val.clone());
13158                col_def
13159                    .constraints
13160                    .push(ColumnConstraint::Default(default_val));
13161                col_def.constraint_order.push(ConstraintType::Default);
13162            } else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
13163                col_def.nullable = Some(false);
13164                col_def.constraint_order.push(ConstraintType::NotNull);
13165            } else if self.match_token(TokenType::Null) {
13166                col_def.nullable = Some(true);
13167                col_def.constraint_order.push(ConstraintType::Null);
13168            } else if self.match_token(TokenType::Constraint) {
13169                // Inline CONSTRAINT name ... for this column
13170                let constraint_name = self.expect_identifier_or_safe_keyword()?;
13171                if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
13172                    col_def.nullable = Some(false);
13173                    col_def.not_null_constraint_name = Some(constraint_name);
13174                    col_def.constraint_order.push(ConstraintType::NotNull);
13175                } else if self.match_token(TokenType::Check) {
13176                    col_def.check_constraint_name = Some(constraint_name);
13177                    if self.match_token(TokenType::LParen) {
13178                        let check_expr = self.parse_expression()?;
13179                        self.expect(TokenType::RParen)?;
13180                        col_def
13181                            .constraints
13182                            .push(ColumnConstraint::Check(check_expr));
13183                    }
13184                    col_def.constraint_order.push(ConstraintType::Check);
13185                } else if self.match_token(TokenType::Default) {
13186                    let default_val = self.parse_expression()?;
13187                    col_def.default = Some(default_val.clone());
13188                    col_def
13189                        .constraints
13190                        .push(ColumnConstraint::Default(default_val));
13191                    col_def.constraint_order.push(ConstraintType::Default);
13192                }
13193            } else if self.match_text_seq(&["WITH", "OPTIONS"]) {
13194                // PostgreSQL: WITH OPTIONS allows specifying more options
13195                // For now, just skip this - it's rarely used
13196                break;
13197            } else {
13198                break;
13199            }
13200        }
13201
13202        Ok(col_def)
13203    }
13204
13205    /// Parse WITH properties for CREATE TABLE (e.g., WITH (FORMAT='parquet', x='2'))
13206    /// Returns a list of (key, value) pairs
13207    fn parse_with_properties(&mut self) -> Result<Vec<(String, String)>> {
13208        self.expect(TokenType::LParen)?;
13209        let mut properties = Vec::new();
13210
13211        loop {
13212            if self.check(TokenType::RParen) {
13213                break;
13214            }
13215
13216            // Parse property name (can be keywords like FORMAT, TABLE_FORMAT)
13217            let mut key = self.expect_identifier_or_keyword()?;
13218
13219            // Handle multi-word keys like "PARTITIONED BY" -> "PARTITIONED_BY"
13220            if key.to_uppercase() == "PARTITIONED" && self.check(TokenType::By) {
13221                self.advance(); // consume BY
13222                key = "PARTITIONED_BY".to_string();
13223            }
13224
13225            // Expect = or special case for PARTITIONED_BY=(...)
13226            self.expect(TokenType::Eq)?;
13227
13228            // Parse property value - can be string, identifier, or parenthesized expression
13229            let value = if self.check(TokenType::String) {
13230                // Store string with quotes to preserve format
13231                let val = format!("'{}'", self.peek().text);
13232                self.advance();
13233                val
13234            } else if self.match_token(TokenType::LParen) {
13235                // Handle PARTITIONED_BY=(x INT, y INT) or similar
13236                let mut depth = 1;
13237                let mut result = String::from("(");
13238                let mut need_space = false;
13239                while !self.is_at_end() && depth > 0 {
13240                    if self.check(TokenType::LParen) {
13241                        depth += 1;
13242                    } else if self.check(TokenType::RParen) {
13243                        depth -= 1;
13244                        if depth == 0 {
13245                            break;
13246                        }
13247                    }
13248                    let token = self.peek();
13249                    let text = &token.text;
13250                    let token_type = token.token_type;
13251
13252                    // Determine if we need a space before this token
13253                    let is_punctuation = matches!(
13254                        token_type,
13255                        TokenType::Comma | TokenType::LParen | TokenType::RParen
13256                    );
13257                    if need_space && !is_punctuation {
13258                        result.push(' ');
13259                    }
13260
13261                    result.push_str(text);
13262
13263                    // Determine if we need a space after this token
13264                    need_space = token_type == TokenType::Comma
13265                        || (!is_punctuation
13266                            && !matches!(
13267                                token_type,
13268                                TokenType::LParen | TokenType::RParen | TokenType::Comma
13269                            ));
13270                    self.advance();
13271                }
13272                self.expect(TokenType::RParen)?;
13273                result.push(')');
13274                result
13275            } else if self.check_identifier("ARRAY")
13276                && self
13277                    .peek_nth(1)
13278                    .is_some_and(|t| t.token_type == TokenType::LBracket)
13279            {
13280                // Handle ARRAY['value', 'value', ...] syntax (Athena/Presto)
13281                let mut result = self.advance().text.clone(); // consume ARRAY
13282                self.expect(TokenType::LBracket)?;
13283                result.push('[');
13284                let mut first = true;
13285                while !self.is_at_end() && !self.check(TokenType::RBracket) {
13286                    if !first {
13287                        if self.match_token(TokenType::Comma) {
13288                            result.push_str(", ");
13289                        } else {
13290                            break;
13291                        }
13292                    }
13293                    first = false;
13294                    // Parse array element (usually a string)
13295                    if self.check(TokenType::String) {
13296                        result.push('\'');
13297                        result.push_str(&self.advance().text);
13298                        result.push('\'');
13299                    } else if self.is_identifier_token() {
13300                        result.push_str(&self.advance().text);
13301                    } else {
13302                        break;
13303                    }
13304                }
13305                self.expect(TokenType::RBracket)?;
13306                result.push(']');
13307                result
13308            } else if self.check(TokenType::Number) {
13309                // Numeric value (e.g., bucket_count=64)
13310                self.advance().text.clone()
13311            } else {
13312                // Just an identifier or keyword (e.g., allow_page_locks=on)
13313                self.expect_identifier_or_keyword()?
13314            };
13315
13316            properties.push((key, value));
13317
13318            if !self.match_token(TokenType::Comma) {
13319                break;
13320            }
13321        }
13322
13323        self.expect(TokenType::RParen)?;
13324        Ok(properties)
13325    }
13326
13327    /// Parse column definitions and table constraints
13328    fn parse_column_definitions(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>)> {
13329        let mut columns = Vec::new();
13330        let mut constraints = Vec::new();
13331
13332        loop {
13333            if self.check(TokenType::RParen) {
13334                break;
13335            }
13336            // Check for LIKE clause (PostgreSQL)
13337            if self.check(TokenType::Like) {
13338                constraints.push(self.parse_like_clause()?);
13339            }
13340            // Check for table-level constraint
13341            // For CHECK, only treat as constraint if followed by '(' (or in ClickHouse where parens are optional).
13342            // Otherwise, 'check' is a column name (e.g., CREATE TABLE t (check INT)).
13343            else if self.check(TokenType::Constraint)
13344                || self.check(TokenType::PrimaryKey)
13345                || self.check(TokenType::ForeignKey)
13346                || self.check(TokenType::Unique)
13347                || (self.check(TokenType::Check)
13348                    && (self
13349                        .peek_nth(1)
13350                        .map_or(false, |t| t.token_type == TokenType::LParen)
13351                        || matches!(
13352                            self.config.dialect,
13353                            Some(crate::dialects::DialectType::ClickHouse)
13354                        )))
13355                || self.check(TokenType::Exclude)
13356            {
13357                constraints.push(self.parse_table_constraint()?);
13358            } else if matches!(
13359                self.config.dialect,
13360                Some(crate::dialects::DialectType::ClickHouse)
13361            ) && self.check(TokenType::Index)
13362            {
13363                // ClickHouse: INDEX name expr TYPE type_func(args) GRANULARITY n
13364                self.advance(); // consume INDEX
13365                let name = self.expect_identifier_or_keyword_with_quoted()?;
13366                // Use parse_conjunction to handle comparisons like c0 < (SELECT _table)
13367                let expression = self.parse_conjunction()?.ok_or_else(|| {
13368                    self.parse_error("Expected expression in ClickHouse INDEX definition")
13369                })?;
13370                let index_type = if self.match_token(TokenType::Type) {
13371                    // Parse function or identifier for type (e.g., bloom_filter(0.001), set(100), minmax)
13372                    // Handle keywords like 'set' that are tokenized as TokenType::Set
13373                    if let Some(func) = self.parse_function()? {
13374                        Some(Box::new(func))
13375                    } else if !self.check(TokenType::Identifier)
13376                        && !self.check(TokenType::Var)
13377                        && !self.is_at_end()
13378                    {
13379                        // Handle keywords as index type names (e.g., set, minmax)
13380                        let type_name = self.advance().text.clone();
13381                        if self.check(TokenType::LParen) {
13382                            // It's a function call like set(100)
13383                            self.advance(); // consume (
13384                            let mut args = Vec::new();
13385                            if !self.check(TokenType::RParen) {
13386                                args.push(self.parse_expression()?);
13387                                while self.match_token(TokenType::Comma) {
13388                                    args.push(self.parse_expression()?);
13389                                }
13390                            }
13391                            self.expect(TokenType::RParen)?;
13392                            Some(Box::new(Expression::Function(Box::new(Function::new(
13393                                type_name, args,
13394                            )))))
13395                        } else {
13396                            // Just an identifier
13397                            Some(Box::new(Expression::Identifier(Identifier::new(type_name))))
13398                        }
13399                    } else if let Some(id) = self.parse_id_var()? {
13400                        Some(Box::new(id))
13401                    } else {
13402                        None
13403                    }
13404                } else {
13405                    None
13406                };
13407                let granularity = if self.match_identifier("GRANULARITY") {
13408                    let gran_val = self.parse_expression()?;
13409                    Some(Box::new(gran_val))
13410                } else {
13411                    None
13412                };
13413                constraints.push(TableConstraint::Index {
13414                    name: Some(name),
13415                    columns: Vec::new(),
13416                    kind: None,
13417                    modifiers: ConstraintModifiers::default(),
13418                    use_key_keyword: false,
13419                    expression: Some(Box::new(expression)),
13420                    index_type,
13421                    granularity,
13422                });
13423            } else if !matches!(
13424                self.config.dialect,
13425                Some(crate::dialects::DialectType::ClickHouse)
13426            ) && (self.check(TokenType::Index)
13427                || self.check(TokenType::Key)
13428                || self.check_identifier("FULLTEXT")
13429                || self.check_identifier("SPATIAL"))
13430            {
13431                // INDEX/KEY constraint (MySQL). Guard KEY <type> as a normal column definition
13432                // (e.g. ClickHouse: `key UInt64`).
13433                let looks_like_key_constraint = if self.check(TokenType::Key) {
13434                    self.check_next(TokenType::LParen)
13435                        || ((self.check_next(TokenType::Identifier)
13436                            || self.check_next(TokenType::Var)
13437                            || self.check_next(TokenType::QuotedIdentifier))
13438                            && self.current + 2 < self.tokens.len()
13439                            && self.tokens[self.current + 2].token_type == TokenType::LParen)
13440                } else {
13441                    true
13442                };
13443
13444                if looks_like_key_constraint {
13445                    constraints.push(self.parse_index_table_constraint()?);
13446                } else {
13447                    columns.push(self.parse_column_def()?);
13448                }
13449            } else if self.check_identifier("PERIOD") {
13450                // TSQL: PERIOD FOR SYSTEM_TIME (start_col, end_col)
13451                if let Some(period_constraint) =
13452                    self.parse_period_for_system_time_table_constraint()?
13453                {
13454                    constraints.push(period_constraint);
13455                } else {
13456                    // Not actually PERIOD FOR SYSTEM_TIME, treat as column definition
13457                    columns.push(self.parse_column_def()?);
13458                }
13459            } else if self.check_identifier("INITIALLY") {
13460                // PostgreSQL: INITIALLY DEFERRED / INITIALLY IMMEDIATE as table-level setting
13461                self.advance(); // consume INITIALLY
13462                if self.match_identifier("DEFERRED") {
13463                    constraints.push(TableConstraint::InitiallyDeferred { deferred: true });
13464                } else if self.match_identifier("IMMEDIATE") {
13465                    constraints.push(TableConstraint::InitiallyDeferred { deferred: false });
13466                } else {
13467                    return Err(self.parse_error("Expected DEFERRED or IMMEDIATE after INITIALLY"));
13468                }
13469            } else if matches!(
13470                self.config.dialect,
13471                Some(crate::dialects::DialectType::ClickHouse)
13472            ) && self.check_identifier("PROJECTION")
13473            {
13474                // ClickHouse: PROJECTION name (SELECT ...) or PROJECTION name INDEX expr TYPE type_name
13475                self.advance(); // consume PROJECTION
13476                let name = self.expect_identifier_or_keyword_with_quoted()?;
13477                if self.match_token(TokenType::LParen) {
13478                    let expression = self.parse_statement()?;
13479                    self.expect(TokenType::RParen)?;
13480                    // ClickHouse: PROJECTION name (SELECT ...) WITH SETTINGS (key=value, ...)
13481                    if self.check(TokenType::With)
13482                        && self.current + 1 < self.tokens.len()
13483                        && self.tokens[self.current + 1].token_type == TokenType::Settings
13484                    {
13485                        self.advance(); // consume WITH
13486                        self.advance(); // consume SETTINGS
13487                        if self.match_token(TokenType::LParen) {
13488                            // Consume key=value pairs
13489                            loop {
13490                                if self.check(TokenType::RParen) {
13491                                    break;
13492                                }
13493                                if self.is_identifier_token()
13494                                    || self.is_safe_keyword_as_identifier()
13495                                {
13496                                    self.advance(); // key
13497                                }
13498                                if self.match_token(TokenType::Eq) {
13499                                    let _ = self.parse_primary()?; // value
13500                                }
13501                                if !self.match_token(TokenType::Comma) {
13502                                    break;
13503                                }
13504                            }
13505                            self.expect(TokenType::RParen)?;
13506                        }
13507                    }
13508                    constraints.push(TableConstraint::Projection { name, expression });
13509                } else if self.match_token(TokenType::Index) {
13510                    // PROJECTION name INDEX expr TYPE type_name
13511                    let expr = self.parse_bitwise()?.ok_or_else(|| {
13512                        self.parse_error(
13513                            "Expected expression in ClickHouse PROJECTION INDEX definition",
13514                        )
13515                    })?;
13516                    let type_str = if self.match_token(TokenType::Type) {
13517                        if !self.is_at_end()
13518                            && !self.check(TokenType::Comma)
13519                            && !self.check(TokenType::RParen)
13520                        {
13521                            self.advance().text.clone()
13522                        } else {
13523                            String::new()
13524                        }
13525                    } else {
13526                        String::new()
13527                    };
13528                    let raw_sql = if type_str.is_empty() {
13529                        format!("INDEX {} ", expr)
13530                    } else {
13531                        format!("INDEX {} TYPE {}", expr, type_str)
13532                    };
13533                    constraints.push(TableConstraint::Projection {
13534                        name,
13535                        expression: Expression::Raw(Raw { sql: raw_sql }),
13536                    });
13537                } else {
13538                    constraints.push(TableConstraint::Projection {
13539                        name,
13540                        expression: Expression::Null(Null),
13541                    });
13542                }
13543            } else {
13544                // Parse column definition
13545                columns.push(self.parse_column_def()?);
13546            }
13547
13548            if !self.match_token(TokenType::Comma) {
13549                break;
13550            }
13551            // ClickHouse: allow trailing comma before closing paren
13552            if matches!(
13553                self.config.dialect,
13554                Some(crate::dialects::DialectType::ClickHouse)
13555            ) && self.check(TokenType::RParen)
13556            {
13557                break;
13558            }
13559        }
13560
13561        Ok((columns, constraints))
13562    }
13563
13564    /// Parse LIKE clause in CREATE TABLE: LIKE source_table [INCLUDING|EXCLUDING options]
13565    fn parse_like_clause(&mut self) -> Result<TableConstraint> {
13566        self.expect(TokenType::Like)?;
13567        let source = self.parse_table_ref()?;
13568        let mut options = Vec::new();
13569
13570        // Parse optional INCLUDING/EXCLUDING modifiers
13571        loop {
13572            if self.match_identifier("INCLUDING") {
13573                let prop = self.expect_identifier_or_keyword()?.to_uppercase();
13574                options.push((LikeOptionAction::Including, prop));
13575            } else if self.match_identifier("EXCLUDING") {
13576                let prop = self.expect_identifier_or_keyword()?.to_uppercase();
13577                options.push((LikeOptionAction::Excluding, prop));
13578            } else {
13579                break;
13580            }
13581        }
13582
13583        Ok(TableConstraint::Like { source, options })
13584    }
13585
13586    /// Parse a single column definition
13587    fn parse_column_def(&mut self) -> Result<ColumnDef> {
13588        // Column names can be keywords like 'end', 'truncate', 'view', etc.
13589        // ClickHouse allows any keyword as column name (from, select, etc.)
13590        let mut name = if matches!(
13591            self.config.dialect,
13592            Some(crate::dialects::DialectType::ClickHouse)
13593        ) {
13594            self.expect_identifier_or_keyword_with_quoted()?
13595        } else {
13596            self.expect_identifier_or_safe_keyword_with_quoted()?
13597        };
13598        // ClickHouse: Nested column names like n.b for Nested() columns
13599        if matches!(
13600            self.config.dialect,
13601            Some(crate::dialects::DialectType::ClickHouse)
13602        ) {
13603            while self.match_token(TokenType::Dot) {
13604                let sub = self.expect_identifier_or_safe_keyword_with_quoted()?;
13605                name = Identifier {
13606                    name: format!("{}.{}", name.name, sub.name),
13607                    quoted: name.quoted,
13608                    trailing_comments: sub.trailing_comments,
13609                    span: None,
13610                };
13611            }
13612        }
13613
13614        // TSQL computed columns have no data type: column_name AS (expression) [PERSISTED]
13615        // Check if AS follows immediately (no data type)
13616        if self.check(TokenType::As) {
13617            let mut col_def = ColumnDef::new(
13618                name.name.clone(),
13619                DataType::Custom {
13620                    name: String::new(),
13621                },
13622            );
13623            col_def.name = name;
13624            // Consume AS and parse computed column expression
13625            self.advance(); // consume AS
13626            if self.check(TokenType::LParen) {
13627                self.parse_as_computed_column(&mut col_def)?;
13628            }
13629            return Ok(col_def);
13630        }
13631
13632        // SQLite allows column definitions without types: CREATE TABLE t (x, y)
13633        // ClickHouse allows typeless columns with DEFAULT/MATERIALIZED/ALIAS/EPHEMERAL
13634        // Check if the next token indicates no type (comma, rparen, or constraint keyword)
13635        let no_type = self.check(TokenType::Comma)
13636            || self.check(TokenType::RParen)
13637            || (matches!(
13638                self.config.dialect,
13639                Some(crate::dialects::DialectType::ClickHouse)
13640            ) && (self.check(TokenType::Default)
13641                || self.check(TokenType::Materialized)
13642                || self.check_identifier("ALIAS")
13643                || self.check_identifier("EPHEMERAL")));
13644        let data_type = if no_type {
13645            // No type specified - use empty custom type
13646            DataType::Custom {
13647                name: String::new(),
13648            }
13649        } else {
13650            self.parse_data_type()?
13651        };
13652
13653        let mut col_def = ColumnDef::new(name.name.clone(), data_type);
13654        col_def.name = name;
13655        col_def.no_type = no_type;
13656
13657        // Parse MySQL type modifiers (UNSIGNED, ZEROFILL)
13658        // These come after the data type but before other constraints
13659        while self.match_identifier("UNSIGNED")
13660            || self.match_identifier("ZEROFILL")
13661            || self.match_identifier("SIGNED")
13662        {
13663            let modifier = self.previous().text.to_uppercase();
13664            if modifier == "UNSIGNED" {
13665                col_def.unsigned = true;
13666            } else if modifier == "ZEROFILL" {
13667                col_def.zerofill = true;
13668            }
13669            // SIGNED is the default, no action needed
13670        }
13671
13672        // BigQuery: OPTIONS (key=value, ...) on column - comes right after type
13673        if self.match_identifier("OPTIONS") {
13674            col_def.options = self.parse_options_list()?;
13675        }
13676
13677        // Parse column constraints
13678        loop {
13679            if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
13680                col_def.nullable = Some(false);
13681                col_def.constraint_order.push(ConstraintType::NotNull);
13682            } else if self.match_token(TokenType::Null) {
13683                col_def.nullable = Some(true);
13684                col_def.constraint_order.push(ConstraintType::Null);
13685            } else if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
13686                // Handle PRIMARY KEY [ASC|DESC]
13687                col_def.primary_key = true;
13688                // Capture ASC/DESC after PRIMARY KEY
13689                if self.match_token(TokenType::Asc) {
13690                    col_def.primary_key_order = Some(SortOrder::Asc);
13691                } else if self.match_token(TokenType::Desc) {
13692                    col_def.primary_key_order = Some(SortOrder::Desc);
13693                }
13694                col_def.constraint_order.push(ConstraintType::PrimaryKey);
13695            } else if self.match_token(TokenType::Constraint) {
13696                // Inline CONSTRAINT name ... (e.g., CONSTRAINT fk_name REFERENCES ...)
13697                let constraint_name = self.expect_identifier()?;
13698                // After constraint name, expect REFERENCES, PRIMARY KEY, UNIQUE, CHECK, NOT NULL, NULL, etc.
13699                if self.match_token(TokenType::References) {
13700                    let mut fk_ref = self.parse_foreign_key_ref()?;
13701                    fk_ref.constraint_name = Some(constraint_name);
13702                    col_def
13703                        .constraints
13704                        .push(ColumnConstraint::References(fk_ref));
13705                    col_def.constraint_order.push(ConstraintType::References);
13706                } else if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
13707                    col_def.primary_key = true;
13708                    col_def.primary_key_constraint_name = Some(constraint_name);
13709                    col_def.constraint_order.push(ConstraintType::PrimaryKey);
13710                } else if self.match_token(TokenType::Unique) {
13711                    col_def.unique = true;
13712                    col_def.unique_constraint_name = Some(constraint_name);
13713                    // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
13714                    if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
13715                        col_def.unique_nulls_not_distinct = true;
13716                    }
13717                    col_def.constraint_order.push(ConstraintType::Unique);
13718                } else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
13719                    col_def.nullable = Some(false);
13720                    col_def.not_null_constraint_name = Some(constraint_name);
13721                    col_def.constraint_order.push(ConstraintType::NotNull);
13722                } else if self.match_token(TokenType::Check) {
13723                    col_def.check_constraint_name = Some(constraint_name);
13724                    // Parse CHECK constraint expression
13725                    if self.match_token(TokenType::LParen) {
13726                        let check_expr = self.parse_expression()?;
13727                        self.expect(TokenType::RParen)?;
13728                        col_def
13729                            .constraints
13730                            .push(ColumnConstraint::Check(check_expr));
13731                    } else if matches!(
13732                        self.config.dialect,
13733                        Some(crate::dialects::DialectType::ClickHouse)
13734                    ) {
13735                        // ClickHouse: CHECK expr without parens
13736                        let check_expr = self.parse_or()?;
13737                        col_def
13738                            .constraints
13739                            .push(ColumnConstraint::Check(check_expr));
13740                    }
13741                    col_def.constraint_order.push(ConstraintType::Check);
13742                }
13743            } else if self.match_token(TokenType::Unique) {
13744                col_def.unique = true;
13745                // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
13746                if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
13747                    col_def.unique_nulls_not_distinct = true;
13748                }
13749                col_def.constraint_order.push(ConstraintType::Unique);
13750            } else if self.match_token(TokenType::Check) {
13751                // Standalone CHECK (expr) constraint (without CONSTRAINT name)
13752                if self.match_token(TokenType::LParen) {
13753                    let check_expr = self.parse_expression()?;
13754                    self.expect(TokenType::RParen)?;
13755                    col_def
13756                        .constraints
13757                        .push(ColumnConstraint::Check(check_expr));
13758                    col_def.constraint_order.push(ConstraintType::Check);
13759                } else if matches!(
13760                    self.config.dialect,
13761                    Some(crate::dialects::DialectType::ClickHouse)
13762                ) {
13763                    // ClickHouse: CHECK expr without parens
13764                    let check_expr = self.parse_or()?;
13765                    col_def
13766                        .constraints
13767                        .push(ColumnConstraint::Check(check_expr));
13768                    col_def.constraint_order.push(ConstraintType::Check);
13769                }
13770            } else if self.match_token(TokenType::AutoIncrement) || self.match_keyword("IDENTITY") {
13771                col_def.auto_increment = true;
13772                col_def.constraint_order.push(ConstraintType::AutoIncrement);
13773                // Handle IDENTITY/AUTOINCREMENT options: START n INCREMENT m [ORDER|NOORDER] or (start, increment)
13774                if self.match_keyword("START") {
13775                    col_def.auto_increment_start = Some(Box::new(self.parse_primary()?));
13776                    if self.match_keyword("INCREMENT") {
13777                        col_def.auto_increment_increment = Some(Box::new(self.parse_primary()?));
13778                    }
13779                    // Snowflake: ORDER or NOORDER option
13780                    if self.match_token(TokenType::Order) {
13781                        col_def.auto_increment_order = Some(true);
13782                    } else if self.match_identifier("NOORDER") {
13783                        col_def.auto_increment_order = Some(false);
13784                    }
13785                } else if self.match_token(TokenType::LParen) {
13786                    // IDENTITY(start, increment) or AUTOINCREMENT(start, increment)
13787                    col_def.auto_increment_start = Some(Box::new(self.parse_primary()?));
13788                    if self.match_token(TokenType::Comma) {
13789                        col_def.auto_increment_increment = Some(Box::new(self.parse_primary()?));
13790                    }
13791                    self.expect(TokenType::RParen)?;
13792                }
13793            } else if self.match_token(TokenType::Default) {
13794                // ClickHouse: DEFAULT expressions can be complex (today(), a + 1, cond ? x : y, etc.)
13795                col_def.default = if matches!(
13796                    self.config.dialect,
13797                    Some(crate::dialects::DialectType::ClickHouse)
13798                ) {
13799                    Some(self.parse_expression()?)
13800                } else {
13801                    Some(self.parse_unary()?)
13802                };
13803                col_def.constraint_order.push(ConstraintType::Default);
13804            } else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
13805                // Snowflake/SQL Server: FOREIGN KEY REFERENCES table(columns)
13806                // The FOREIGN KEY keywords are followed by REFERENCES
13807                self.expect(TokenType::References)?;
13808                let mut fk_ref = self.parse_foreign_key_ref()?;
13809                fk_ref.has_foreign_key_keywords = true;
13810                col_def
13811                    .constraints
13812                    .push(ColumnConstraint::References(fk_ref));
13813                col_def.constraint_order.push(ConstraintType::References);
13814            } else if self.match_token(TokenType::References) {
13815                let fk_ref = self.parse_foreign_key_ref()?;
13816                col_def
13817                    .constraints
13818                    .push(ColumnConstraint::References(fk_ref));
13819                col_def.constraint_order.push(ConstraintType::References);
13820            } else if self.match_token(TokenType::Generated) {
13821                // GENERATED [BY DEFAULT [ON NULL] | ALWAYS] AS ...
13822                // Could be: AS IDENTITY, AS (expr) STORED|VIRTUAL, AS ROW START|END
13823                self.parse_generated_column_constraint(&mut col_def)?;
13824            } else if self.match_token(TokenType::Collate) {
13825                // COLLATE collation_name (may be quoted like "de_DE")
13826                // Also handle dotted names like pg_catalog."default"
13827                let mut collation = self.expect_identifier_or_keyword_with_quoted()?;
13828                // Check for dotted collation names: pg_catalog."default"
13829                while self.match_token(TokenType::Dot) {
13830                    let next = self.expect_identifier_or_keyword_with_quoted()?;
13831                    let sep = if next.quoted {
13832                        format!("{}.\"{}\"", collation.name, next.name)
13833                    } else {
13834                        format!("{}.{}", collation.name, next.name)
13835                    };
13836                    collation = Identifier {
13837                        name: sep,
13838                        quoted: false,
13839                        trailing_comments: Vec::new(),
13840                        span: None,
13841                    };
13842                }
13843                col_def
13844                    .constraints
13845                    .push(ColumnConstraint::Collate(collation));
13846                col_def.constraint_order.push(ConstraintType::Collate);
13847            } else if self.match_token(TokenType::Comment) {
13848                // COMMENT 'comment text'
13849                let comment_text = self.expect_string()?;
13850                col_def
13851                    .constraints
13852                    .push(ColumnConstraint::Comment(comment_text));
13853                col_def.constraint_order.push(ConstraintType::Comment);
13854            } else if self.match_keywords(&[TokenType::On, TokenType::Update]) {
13855                // MySQL: ON UPDATE expression (e.g., ON UPDATE CURRENT_TIMESTAMP)
13856                let expr = self.parse_unary()?;
13857                col_def.on_update = Some(expr);
13858                col_def.constraint_order.push(ConstraintType::OnUpdate);
13859            } else if self.match_identifier("ENCODE") {
13860                // Redshift: ENCODE encoding_type (e.g., ZSTD, DELTA, LZO, etc.)
13861                let encoding = self.expect_identifier_or_keyword()?;
13862                col_def.encoding = Some(encoding);
13863                col_def.constraint_order.push(ConstraintType::Encode);
13864            } else if !matches!(
13865                self.config.dialect,
13866                Some(crate::dialects::DialectType::ClickHouse)
13867            ) && self.match_token(TokenType::Format)
13868            {
13869                // Teradata: FORMAT 'pattern' (not ClickHouse — FORMAT there is statement-level)
13870                let format_str = self.expect_string()?;
13871                col_def.format = Some(format_str);
13872            } else if self.match_identifier("TITLE") {
13873                // Teradata: TITLE 'title'
13874                let title_str = self.expect_string()?;
13875                col_def.title = Some(title_str);
13876            } else if self.match_identifier("INLINE") {
13877                // Teradata: INLINE LENGTH n
13878                self.match_identifier("LENGTH");
13879                let length = self.expect_number()?;
13880                col_def.inline_length = Some(length as u64);
13881            } else if self.match_identifier("COMPRESS") {
13882                // Teradata: COMPRESS or COMPRESS (values) or COMPRESS 'value'
13883                if self.match_token(TokenType::LParen) {
13884                    let values = self.parse_expression_list()?;
13885                    self.expect(TokenType::RParen)?;
13886                    col_def.compress = Some(values);
13887                } else if self.check(TokenType::String) {
13888                    // COMPRESS 'value'
13889                    let value = self.parse_primary()?;
13890                    col_def.compress = Some(vec![value]);
13891                } else {
13892                    // COMPRESS without values
13893                    col_def.compress = Some(Vec::new());
13894                }
13895            } else if self.match_identifier("CHARACTER") {
13896                // Teradata: CHARACTER SET name
13897                self.match_token(TokenType::Set);
13898                let charset = self.expect_identifier_or_keyword()?;
13899                col_def.character_set = Some(charset);
13900            } else if self.match_identifier("UPPERCASE") {
13901                // Teradata: UPPERCASE
13902                col_def.uppercase = true;
13903            } else if self.match_identifier("CASESPECIFIC") {
13904                // Teradata: CASESPECIFIC
13905                col_def.casespecific = Some(true);
13906            } else if self.match_text_seq(&["NOT", "FOR", "REPLICATION"]) {
13907                // TSQL: NOT FOR REPLICATION - skip this modifier (not preserved in output for non-TSQL)
13908                col_def.not_for_replication = true;
13909            } else if self.match_token(TokenType::Not) && self.match_identifier("CASESPECIFIC") {
13910                // Teradata: NOT CASESPECIFIC
13911                col_def.casespecific = Some(false);
13912            } else if self.match_keyword("TAG")
13913                || (self.match_token(TokenType::With) && self.match_keyword("TAG"))
13914            {
13915                // Snowflake: TAG (key='value', ...) or WITH TAG (key='value', ...)
13916                let tags = self.parse_tags()?;
13917                col_def.constraints.push(ColumnConstraint::Tags(tags));
13918                col_def.constraint_order.push(ConstraintType::Tags);
13919            } else if self.match_token(TokenType::As) {
13920                // Computed column: AS (expression) [STORED|VIRTUAL|PERSISTED] [NOT NULL]
13921                // TSQL: AS (expression) [PERSISTED] [NOT NULL]
13922                // MySQL shorthand: AS (expression) [STORED|VIRTUAL]
13923                // Also: Snowflake External Table virtual column expression
13924                if self.check(TokenType::LParen) {
13925                    self.parse_as_computed_column(&mut col_def)?;
13926                }
13927            } else if self.match_identifier("CODEC") {
13928                // ClickHouse: CODEC(LZ4HC(9), ZSTD, DELTA)
13929                self.expect(TokenType::LParen)?;
13930                let start = self.current;
13931                let mut depth = 1;
13932                while !self.is_at_end() && depth > 0 {
13933                    if self.check(TokenType::LParen) {
13934                        depth += 1;
13935                    }
13936                    if self.check(TokenType::RParen) {
13937                        depth -= 1;
13938                        if depth == 0 {
13939                            break;
13940                        }
13941                    }
13942                    self.advance();
13943                }
13944                let codec_text = self.tokens_to_sql(start, self.current);
13945                self.expect(TokenType::RParen)?;
13946                col_def.codec = Some(codec_text);
13947            } else if self.match_identifier("STATISTICS") {
13948                // ClickHouse: STATISTICS(tdigest, minmax, uniq, ...)
13949                self.expect(TokenType::LParen)?;
13950                let mut depth = 1;
13951                while !self.is_at_end() && depth > 0 {
13952                    if self.check(TokenType::LParen) {
13953                        depth += 1;
13954                    }
13955                    if self.check(TokenType::RParen) {
13956                        depth -= 1;
13957                        if depth == 0 {
13958                            break;
13959                        }
13960                    }
13961                    self.advance();
13962                }
13963                self.expect(TokenType::RParen)?;
13964                // Statistics info is stored but we don't need it for transpilation
13965            } else if self.match_identifier("EPHEMERAL") {
13966                // ClickHouse: EPHEMERAL [expr] [type]
13967                // EPHEMERAL can optionally be followed by an expression, then optionally a data type
13968                if !self.check(TokenType::Comma)
13969                    && !self.check(TokenType::RParen)
13970                    && !self.is_at_end()
13971                    && !self.check_identifier("CODEC")
13972                    && !self.check_identifier("TTL")
13973                    && !self.check(TokenType::Comment)
13974                {
13975                    let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
13976                    col_def.ephemeral = Some(Some(Box::new(expr)));
13977                    // ClickHouse: type can follow EPHEMERAL expression (e.g., b EPHEMERAL 'a' String)
13978                    if col_def.no_type
13979                        && !self.check(TokenType::Comma)
13980                        && !self.check(TokenType::RParen)
13981                        && !self.is_at_end()
13982                        && !self.check_identifier("CODEC")
13983                        && !self.check_identifier("TTL")
13984                        && !self.check(TokenType::Comment)
13985                    {
13986                        col_def.data_type = self.parse_data_type()?;
13987                        col_def.no_type = false;
13988                    }
13989                } else {
13990                    col_def.ephemeral = Some(None);
13991                }
13992            } else if self.check(TokenType::Materialized) && !self.check_next(TokenType::View) {
13993                // ClickHouse: MATERIALIZED expr (but not MATERIALIZED VIEW)
13994                self.advance(); // consume MATERIALIZED
13995                let expr = self.parse_or()?;
13996                col_def.materialized_expr = Some(Box::new(expr));
13997            } else if self.match_identifier("ALIAS") {
13998                // ClickHouse: ALIAS expr
13999                let expr = self.parse_or()?;
14000                col_def.alias_expr = Some(Box::new(expr));
14001            } else if matches!(
14002                self.config.dialect,
14003                Some(crate::dialects::DialectType::ClickHouse)
14004            ) && self.check_identifier("EXPRESSION")
14005            {
14006                // ClickHouse dictionary column: EXPRESSION expr
14007                self.advance(); // consume EXPRESSION
14008                let expr = self.parse_or()?;
14009                col_def.materialized_expr = Some(Box::new(expr));
14010            } else if matches!(
14011                self.config.dialect,
14012                Some(crate::dialects::DialectType::ClickHouse)
14013            ) && (self.match_identifier("HIERARCHICAL")
14014                || self.match_identifier("IS_OBJECT_ID")
14015                || self.match_identifier("INJECTIVE")
14016                || self.match_identifier("BIDIRECTIONAL"))
14017            {
14018                // ClickHouse dictionary column attributes: HIERARCHICAL, IS_OBJECT_ID, INJECTIVE, BIDIRECTIONAL
14019                // These are flag-like attributes with no value, just skip them
14020            } else if self.match_identifier("TTL") {
14021                // ClickHouse: TTL expr
14022                let expr = self.parse_expression()?;
14023                col_def.ttl_expr = Some(Box::new(expr));
14024            } else if matches!(
14025                self.config.dialect,
14026                Some(crate::dialects::DialectType::ClickHouse)
14027            ) && self.check(TokenType::Settings)
14028                && self.check_next(TokenType::LParen)
14029            {
14030                // ClickHouse: SETTINGS (key = value, ...) on column definition
14031                // Only match parenthesized form; non-parenthesized SETTINGS is statement-level
14032                self.advance(); // consume SETTINGS
14033                self.expect(TokenType::LParen)?;
14034                let mut depth = 1i32;
14035                while !self.is_at_end() && depth > 0 {
14036                    if self.check(TokenType::LParen) {
14037                        depth += 1;
14038                    }
14039                    if self.check(TokenType::RParen) {
14040                        depth -= 1;
14041                        if depth == 0 {
14042                            break;
14043                        }
14044                    }
14045                    self.advance();
14046                }
14047                self.expect(TokenType::RParen)?;
14048            } else {
14049                // Skip unknown column modifiers (DEFERRABLE, CHARACTER SET, etc.)
14050                // to allow parsing to continue
14051                if self.skip_column_modifier() {
14052                    continue;
14053                }
14054                break;
14055            }
14056        }
14057
14058        Ok(col_def)
14059    }
14060
14061    /// Skip optional column modifiers that we don't need to preserve
14062    fn skip_column_modifier(&mut self) -> bool {
14063        // NOT DEFERRABLE, NOT CASESPECIFIC - handle NOT followed by specific keywords
14064        // (NOT NULL is handled earlier in the constraint loop)
14065        if self.check(TokenType::Not) {
14066            // Check what follows NOT
14067            if self.check_next_identifier("DEFERRABLE")
14068                || self.check_next_identifier("CASESPECIFIC")
14069            {
14070                self.advance(); // consume NOT
14071                self.advance(); // consume DEFERRABLE/CASESPECIFIC
14072                return true;
14073            }
14074        }
14075        // DEFERRABLE / NOT DEFERRABLE / INITIALLY DEFERRED / INITIALLY IMMEDIATE
14076        if self.match_identifier("DEFERRABLE")
14077            || self.match_identifier("DEFERRED")
14078            || self.match_identifier("IMMEDIATE")
14079        {
14080            return true;
14081        }
14082        // CHARACTER SET name
14083        if self.match_identifier("CHARACTER") {
14084            self.match_token(TokenType::Set);
14085            // Consume charset name (can be multiple parts like LATIN, utf8_bin, etc.)
14086            let _ = self.match_token(TokenType::Var) || self.match_token(TokenType::Identifier);
14087            return true;
14088        }
14089        // UPPERCASE, CASESPECIFIC
14090        if self.match_identifier("UPPERCASE") || self.match_identifier("CASESPECIFIC") {
14091            return true;
14092        }
14093        // Note: COMPRESS, FORMAT, TITLE, and INLINE LENGTH are now properly parsed and stored in ColumnDef
14094        false
14095    }
14096
14097    /// Parse Teradata-specific table options after CREATE TABLE AS
14098    /// Returns (with_data, with_statistics, teradata_indexes)
14099    fn parse_teradata_table_options(&mut self) -> (Option<bool>, Option<bool>, Vec<TeradataIndex>) {
14100        let mut with_data = None;
14101        let mut with_statistics = None;
14102        let mut teradata_indexes = Vec::new();
14103
14104        loop {
14105            // WITH DATA [AND STATISTICS] / WITH NO DATA [AND NO STATISTICS]
14106            if self.match_token(TokenType::With) {
14107                let no = self.match_token(TokenType::No); // optional NO
14108                self.match_identifier("DATA");
14109                with_data = Some(!no); // WITH DATA = true, WITH NO DATA = false
14110                                       // Optional AND [NO] STATISTICS
14111                if self.match_token(TokenType::And) {
14112                    let no_stats = self.match_token(TokenType::No); // optional NO
14113                    self.match_identifier("STATISTICS");
14114                    with_statistics = Some(!no_stats); // AND STATISTICS = true, AND NO STATISTICS = false
14115                }
14116                continue;
14117            }
14118            // NO PRIMARY INDEX
14119            if self.match_token(TokenType::No) {
14120                self.match_token(TokenType::PrimaryKey);
14121                self.match_token(TokenType::Index);
14122                teradata_indexes.push(TeradataIndex {
14123                    kind: TeradataIndexKind::NoPrimary,
14124                    name: None,
14125                    columns: Vec::new(),
14126                });
14127                // Consume optional comma separator between index specs
14128                self.match_token(TokenType::Comma);
14129                continue;
14130            }
14131            // PRIMARY AMP INDEX / PRIMARY INDEX
14132            if self.match_token(TokenType::PrimaryKey) {
14133                let is_amp = self.match_identifier("AMP");
14134                self.match_token(TokenType::Index);
14135                // Optional index name
14136                let name = if self.is_identifier_token() && !self.check(TokenType::LParen) {
14137                    Some(self.advance().text)
14138                } else {
14139                    None
14140                };
14141                // Optional column list
14142                let columns = if self.match_token(TokenType::LParen) {
14143                    let cols = self.parse_identifier_list_raw();
14144                    self.match_token(TokenType::RParen);
14145                    cols
14146                } else {
14147                    Vec::new()
14148                };
14149                teradata_indexes.push(TeradataIndex {
14150                    kind: if is_amp {
14151                        TeradataIndexKind::PrimaryAmp
14152                    } else {
14153                        TeradataIndexKind::Primary
14154                    },
14155                    name,
14156                    columns,
14157                });
14158                // Consume optional comma separator between index specs
14159                self.match_token(TokenType::Comma);
14160                continue;
14161            }
14162            // UNIQUE [PRIMARY] INDEX
14163            if self.match_token(TokenType::Unique) {
14164                let is_primary = self.match_token(TokenType::PrimaryKey);
14165                self.match_token(TokenType::Index);
14166                // Optional index name
14167                let name = if self.is_identifier_token() {
14168                    Some(self.advance().text)
14169                } else {
14170                    None
14171                };
14172                // Optional column list
14173                let columns = if self.match_token(TokenType::LParen) {
14174                    let cols = self.parse_identifier_list_raw();
14175                    self.match_token(TokenType::RParen);
14176                    cols
14177                } else {
14178                    Vec::new()
14179                };
14180                teradata_indexes.push(TeradataIndex {
14181                    kind: if is_primary {
14182                        TeradataIndexKind::UniquePrimary
14183                    } else {
14184                        TeradataIndexKind::Unique
14185                    },
14186                    name,
14187                    columns,
14188                });
14189                // Consume optional comma separator between index specs
14190                self.match_token(TokenType::Comma);
14191                continue;
14192            }
14193            // Plain INDEX (non-primary, non-unique)
14194            if self.match_token(TokenType::Index) {
14195                // Optional index name
14196                let name = if self.is_identifier_token() && !self.check(TokenType::LParen) {
14197                    Some(self.advance().text)
14198                } else {
14199                    None
14200                };
14201                // Optional column list
14202                let columns = if self.match_token(TokenType::LParen) {
14203                    let cols = self.parse_identifier_list_raw();
14204                    self.match_token(TokenType::RParen);
14205                    cols
14206                } else {
14207                    Vec::new()
14208                };
14209                teradata_indexes.push(TeradataIndex {
14210                    kind: TeradataIndexKind::Secondary,
14211                    name,
14212                    columns,
14213                });
14214                // Consume optional comma separator between index specs
14215                self.match_token(TokenType::Comma);
14216                continue;
14217            }
14218            break;
14219        }
14220
14221        (with_data, with_statistics, teradata_indexes)
14222    }
14223
14224    /// Parse Teradata table options after name before column list (comma-separated)
14225    fn parse_teradata_post_name_options(&mut self) -> Vec<String> {
14226        // Options begin with a comma after the table name.
14227        if !self.match_token(TokenType::Comma) {
14228            return Vec::new();
14229        }
14230
14231        let mut options = Vec::new();
14232        let mut current_tokens: Vec<(String, TokenType)> = Vec::new();
14233        let mut paren_depth = 0;
14234        let mut in_value = false;
14235
14236        while !self.is_at_end() {
14237            if self.check(TokenType::LParen) && paren_depth == 0 {
14238                if !in_value {
14239                    // Column list begins
14240                    break;
14241                }
14242                let mut is_terminal = false;
14243                if let Some((last_text, last_type)) = current_tokens.last() {
14244                    let last_upper = last_text.to_uppercase();
14245                    is_terminal = matches!(last_type, TokenType::Number | TokenType::String)
14246                        || matches!(
14247                            last_upper.as_str(),
14248                            "ON" | "OFF"
14249                                | "DEFAULT"
14250                                | "NEVER"
14251                                | "ALWAYS"
14252                                | "MINIMUM"
14253                                | "MAXIMUM"
14254                                | "BYTES"
14255                                | "KBYTES"
14256                                | "KILOBYTES"
14257                                | "PERCENT"
14258                        );
14259                }
14260                if is_terminal {
14261                    break;
14262                }
14263            }
14264
14265            let token = self.advance();
14266
14267            match token.token_type {
14268                TokenType::LParen => {
14269                    paren_depth += 1;
14270                }
14271                TokenType::RParen => {
14272                    if paren_depth > 0 {
14273                        paren_depth -= 1;
14274                        if paren_depth == 0 && in_value {
14275                            in_value = false;
14276                        }
14277                    }
14278                }
14279                TokenType::Eq => {
14280                    if paren_depth == 0 {
14281                        in_value = true;
14282                    }
14283                }
14284                TokenType::Comma => {
14285                    if paren_depth == 0 {
14286                        let option = self.join_teradata_option_tokens(current_tokens);
14287                        if !option.is_empty() {
14288                            options.push(option);
14289                        }
14290                        current_tokens = Vec::new();
14291                        in_value = false;
14292                        continue;
14293                    }
14294                }
14295                _ => {}
14296            }
14297
14298            let text = if token.token_type == TokenType::QuotedIdentifier {
14299                let quote_char = if self.config.dialect == Some(crate::dialects::DialectType::MySQL)
14300                    || self.config.dialect == Some(crate::dialects::DialectType::SingleStore)
14301                    || self.config.dialect == Some(crate::dialects::DialectType::Doris)
14302                    || self.config.dialect == Some(crate::dialects::DialectType::StarRocks)
14303                {
14304                    '`'
14305                } else {
14306                    '"'
14307                };
14308                format!("{}{}{}", quote_char, token.text, quote_char)
14309            } else if token.token_type == TokenType::String {
14310                format!("'{}'", token.text)
14311            } else {
14312                token.text.clone()
14313            };
14314
14315            let mut join_type = token.token_type;
14316            if join_type == TokenType::Percent && token.text.eq_ignore_ascii_case("PERCENT") {
14317                // Treat PERCENT as an identifier to preserve spacing (e.g., "1 PERCENT")
14318                join_type = TokenType::Identifier;
14319            }
14320            current_tokens.push((text, join_type));
14321        }
14322
14323        if !current_tokens.is_empty() {
14324            let option = self.join_teradata_option_tokens(current_tokens);
14325            if !option.is_empty() {
14326                options.push(option);
14327            }
14328        }
14329
14330        options
14331    }
14332
14333    /// Parse identifier list for Teradata indexes, returning raw strings
14334    fn parse_identifier_list_raw(&mut self) -> Vec<String> {
14335        let mut identifiers = Vec::new();
14336        loop {
14337            if self.is_identifier_token() || self.is_identifier_or_keyword_token() {
14338                identifiers.push(self.advance().text);
14339            }
14340            if !self.match_token(TokenType::Comma) {
14341                break;
14342            }
14343        }
14344        identifiers
14345    }
14346
14347    /// Parse GENERATED column constraint after GENERATED token has been consumed.
14348    /// Handles three forms:
14349    /// 1. GENERATED [BY DEFAULT | ALWAYS] AS IDENTITY [...] -> GeneratedAsIdentity
14350    /// 2. GENERATED ALWAYS AS (expr) [STORED|VIRTUAL] -> ComputedColumn
14351    /// 3. GENERATED ALWAYS AS ROW START|END [HIDDEN] -> GeneratedAsRow
14352    fn parse_generated_column_constraint(&mut self, col_def: &mut ColumnDef) -> Result<()> {
14353        let always;
14354        let mut on_null = false;
14355
14356        // BY DEFAULT [ON NULL] | ALWAYS
14357        if self.match_token(TokenType::By) {
14358            self.expect(TokenType::Default)?;
14359            on_null = self.match_keywords(&[TokenType::On, TokenType::Null]);
14360            always = false;
14361        } else {
14362            self.expect(TokenType::Always)?;
14363            always = true;
14364        }
14365
14366        // Expect AS
14367        self.expect(TokenType::As)?;
14368
14369        // Check what follows AS
14370        if self.check(TokenType::Row) {
14371            // GENERATED ALWAYS AS ROW START|END [HIDDEN]
14372            self.advance(); // consume ROW
14373            let start = if self.match_token(TokenType::Start) {
14374                true
14375            } else {
14376                self.expect(TokenType::End)?;
14377                false
14378            };
14379            let hidden = self.match_identifier("HIDDEN");
14380            col_def
14381                .constraints
14382                .push(ColumnConstraint::GeneratedAsRow(GeneratedAsRow {
14383                    start,
14384                    hidden,
14385                }));
14386            col_def
14387                .constraint_order
14388                .push(ConstraintType::GeneratedAsRow);
14389        } else if self.check(TokenType::Identity) {
14390            // GENERATED [BY DEFAULT | ALWAYS] AS IDENTITY [(...)]
14391            self.advance(); // consume IDENTITY
14392
14393            let mut start = None;
14394            let mut increment = None;
14395            let mut minvalue = None;
14396            let mut maxvalue = None;
14397            let mut cycle = None;
14398
14399            // Optional sequence options in parentheses
14400            if self.match_token(TokenType::LParen) {
14401                loop {
14402                    if self.match_token(TokenType::Start) {
14403                        self.match_token(TokenType::With);
14404                        start = Some(Box::new(self.parse_unary()?));
14405                    } else if self.match_token(TokenType::Increment) {
14406                        self.match_token(TokenType::By);
14407                        increment = Some(Box::new(self.parse_unary()?));
14408                    } else if self.match_token(TokenType::Minvalue) {
14409                        minvalue = Some(Box::new(self.parse_unary()?));
14410                    } else if self.match_token(TokenType::Maxvalue) {
14411                        maxvalue = Some(Box::new(self.parse_unary()?));
14412                    } else if self.match_token(TokenType::Cycle) {
14413                        cycle = Some(true);
14414                    } else if self.match_keywords(&[TokenType::No, TokenType::Cycle]) {
14415                        cycle = Some(false);
14416                    } else if self.check(TokenType::RParen) {
14417                        break;
14418                    } else {
14419                        self.advance();
14420                    }
14421                }
14422                self.expect(TokenType::RParen)?;
14423            }
14424
14425            col_def
14426                .constraints
14427                .push(ColumnConstraint::GeneratedAsIdentity(GeneratedAsIdentity {
14428                    always,
14429                    on_null,
14430                    start,
14431                    increment,
14432                    minvalue,
14433                    maxvalue,
14434                    cycle,
14435                }));
14436            col_def
14437                .constraint_order
14438                .push(ConstraintType::GeneratedAsIdentity);
14439        } else if self.check(TokenType::LParen) {
14440            // GENERATED ALWAYS AS (expr) [STORED|VIRTUAL]
14441            self.advance(); // consume LParen
14442            let expr = self.parse_expression()?;
14443            self.expect(TokenType::RParen)?;
14444
14445            // Check for STORED or VIRTUAL
14446            let (persisted, persistence_kind) = if self.match_identifier("STORED") {
14447                (true, Some("STORED".to_string()))
14448            } else if self.match_identifier("VIRTUAL") {
14449                (false, Some("VIRTUAL".to_string()))
14450            } else {
14451                (false, None)
14452            };
14453
14454            col_def
14455                .constraints
14456                .push(ColumnConstraint::ComputedColumn(ComputedColumn {
14457                    expression: Box::new(expr),
14458                    persisted,
14459                    not_null: false,
14460                    persistence_kind,
14461                    data_type: None,
14462                }));
14463            col_def
14464                .constraint_order
14465                .push(ConstraintType::ComputedColumn);
14466        } else {
14467            // Fallback: treat as GENERATED AS IDENTITY without explicit IDENTITY keyword
14468            col_def
14469                .constraints
14470                .push(ColumnConstraint::GeneratedAsIdentity(GeneratedAsIdentity {
14471                    always,
14472                    on_null,
14473                    start: None,
14474                    increment: None,
14475                    minvalue: None,
14476                    maxvalue: None,
14477                    cycle: None,
14478                }));
14479            col_def
14480                .constraint_order
14481                .push(ConstraintType::GeneratedAsIdentity);
14482        }
14483        Ok(())
14484    }
14485
14486    /// Parse AS (expr) [STORED|VIRTUAL|PERSISTED] [TYPE] [NOT NULL] for computed columns.
14487    /// Called after AS token has been consumed and we've confirmed LParen follows.
14488    /// SingleStore: AS (expr) PERSISTED TYPE NOT NULL
14489    fn parse_as_computed_column(&mut self, col_def: &mut ColumnDef) -> Result<()> {
14490        self.expect(TokenType::LParen)?;
14491        let expr = self.parse_expression()?;
14492        self.expect(TokenType::RParen)?;
14493
14494        // Check for STORED, VIRTUAL, or PERSISTED
14495        let (persisted, persistence_kind) = if self.match_identifier("STORED") {
14496            (true, Some("STORED".to_string()))
14497        } else if self.match_identifier("VIRTUAL") {
14498            (false, Some("VIRTUAL".to_string()))
14499        } else if self.match_identifier("PERSISTED") {
14500            (true, Some("PERSISTED".to_string()))
14501        } else {
14502            (false, None)
14503        };
14504
14505        // For PERSISTED columns, check for optional data type (SingleStore: PERSISTED TYPE NOT NULL)
14506        // Also check for AUTO keyword for SingleStore: PERSISTED AUTO NOT NULL
14507        let data_type = if persistence_kind.as_deref() == Some("PERSISTED") {
14508            // Check if next token looks like a data type (not NOT, not end of input, not comma/rparen)
14509            if !self.is_at_end()
14510                && !self.check(TokenType::Not)
14511                && !self.check(TokenType::Comma)
14512                && !self.check(TokenType::RParen)
14513                && !self.check(TokenType::Semicolon)
14514            {
14515                let tok = self.peek();
14516                // Check for AUTO keyword (SingleStore: PERSISTED AUTO)
14517                if tok.text.to_uppercase() == "AUTO" {
14518                    self.advance(); // consume AUTO
14519                    None // AUTO is not a data type, just a modifier
14520                } else if tok.token_type.is_keyword()
14521                    || tok.token_type == TokenType::Identifier
14522                    || tok.token_type == TokenType::Var
14523                {
14524                    Some(self.parse_data_type()?)
14525                } else {
14526                    None
14527                }
14528            } else {
14529                None
14530            }
14531        } else {
14532            None
14533        };
14534
14535        // For PERSISTED columns, check for NOT NULL
14536        let not_null = if persistence_kind.as_deref() == Some("PERSISTED") {
14537            self.match_keywords(&[TokenType::Not, TokenType::Null])
14538        } else {
14539            false
14540        };
14541
14542        col_def
14543            .constraints
14544            .push(ColumnConstraint::ComputedColumn(ComputedColumn {
14545                expression: Box::new(expr),
14546                persisted,
14547                not_null,
14548                persistence_kind,
14549                data_type,
14550            }));
14551        col_def
14552            .constraint_order
14553            .push(ConstraintType::ComputedColumn);
14554        Ok(())
14555    }
14556
14557    /// Parse PERIOD FOR SYSTEM_TIME (start_col, end_col) as a table constraint.
14558    /// Returns None if this is not actually PERIOD FOR SYSTEM_TIME (e.g., just a column named PERIOD).
14559    fn parse_period_for_system_time_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
14560        // Save position for possible retreat
14561        let saved = self.current;
14562
14563        if self.match_identifier("PERIOD") {
14564            // Check if followed by FOR SYSTEM_TIME
14565            if self.match_token(TokenType::For) {
14566                if self.match_identifier("SYSTEM_TIME") {
14567                    // Parse (start_col, end_col)
14568                    self.expect(TokenType::LParen)?;
14569                    let start_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
14570                    self.expect(TokenType::Comma)?;
14571                    let end_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
14572                    self.expect(TokenType::RParen)?;
14573                    return Ok(Some(TableConstraint::PeriodForSystemTime {
14574                        start_col: start_name,
14575                        end_col: end_name,
14576                    }));
14577                }
14578            }
14579        }
14580
14581        // Not PERIOD FOR SYSTEM_TIME, retreat
14582        self.current = saved;
14583        Ok(None)
14584    }
14585
14586    /// Parse MySQL table options that appear after the closing paren of column definitions.
14587    /// Handles ENGINE=val, AUTO_INCREMENT=val, DEFAULT CHARSET=val, ROW_FORMAT=val,
14588    /// COMMENT='val', COLLATE=val, etc.
14589    fn parse_mysql_table_options(&mut self) -> Vec<(String, String)> {
14590        let mut options = Vec::new();
14591        loop {
14592            // Skip optional commas between options
14593            self.match_token(TokenType::Comma);
14594
14595            // DEFAULT CHARSET=val or DEFAULT CHARACTER SET=val
14596            if self.check(TokenType::Default) {
14597                let saved = self.current;
14598                self.advance(); // consume DEFAULT
14599                if self.check_identifier("CHARSET") || self.check_identifier("CHARACTER") {
14600                    let is_character = self.check_identifier("CHARACTER");
14601                    let key_part = self.advance().text.to_uppercase();
14602                    if is_character {
14603                        // CHARACTER SET
14604                        self.match_token(TokenType::Set);
14605                    }
14606                    if self.match_token(TokenType::Eq) {
14607                        let value = if self.check(TokenType::String) {
14608                            let v = format!("'{}'", self.peek().text);
14609                            self.advance();
14610                            v
14611                        } else if self.is_identifier_token()
14612                            || self.is_safe_keyword_as_identifier()
14613                            || self.check(TokenType::Number)
14614                        {
14615                            self.advance().text
14616                        } else {
14617                            self.current = saved;
14618                            break;
14619                        };
14620                        // Normalize CHARSET -> CHARACTER SET
14621                        let key = if is_character || key_part == "CHARSET" {
14622                            "DEFAULT CHARACTER SET".to_string()
14623                        } else {
14624                            format!("DEFAULT {}", key_part)
14625                        };
14626                        options.push((key, value));
14627                        continue;
14628                    }
14629                }
14630                self.current = saved;
14631                break;
14632            }
14633
14634            // ENGINE=val, AUTO_INCREMENT=val, ROW_FORMAT=val, COLLATE=val, KEY_BLOCK_SIZE=val
14635            let is_known_option = self.check_identifier("ENGINE")
14636                || self.check(TokenType::AutoIncrement)
14637                || self.check_identifier("ROW_FORMAT")
14638                || self.check(TokenType::Collate)
14639                || self.check_identifier("KEY_BLOCK_SIZE")
14640                || self.check_identifier("PACK_KEYS")
14641                || self.check_identifier("STATS_AUTO_RECALC")
14642                || self.check_identifier("STATS_PERSISTENT")
14643                || self.check_identifier("STATS_SAMPLE_PAGES")
14644                || self.check_identifier("MAX_ROWS")
14645                || self.check_identifier("MIN_ROWS")
14646                || self.check_identifier("CHECKSUM")
14647                || self.check_identifier("DELAY_KEY_WRITE")
14648                || self.check_identifier("COMPRESSION")
14649                || self.check_identifier("CONNECTION")
14650                || self.check_identifier("TABLESPACE")
14651                || self.check_identifier("ENCRYPTION");
14652
14653            if is_known_option {
14654                let key = self.advance().text.to_uppercase();
14655                if self.match_token(TokenType::Eq) {
14656                    let value = if self.check(TokenType::String) {
14657                        let v = format!("'{}'", self.peek().text);
14658                        self.advance();
14659                        v
14660                    } else if self.check(TokenType::Number) {
14661                        self.advance().text
14662                    } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
14663                        self.advance().text
14664                    } else {
14665                        break;
14666                    };
14667                    options.push((key, value));
14668                    continue;
14669                }
14670                break;
14671            }
14672
14673            // COMMENT='val' (Comment is a keyword token type)
14674            if self.check(TokenType::Comment) {
14675                let saved = self.current;
14676                self.advance(); // consume COMMENT
14677                if self.match_token(TokenType::Eq) {
14678                    if self.check(TokenType::String) {
14679                        let v = format!("'{}'", self.peek().text);
14680                        self.advance();
14681                        options.push(("COMMENT".to_string(), v));
14682                        continue;
14683                    }
14684                } else if self.check(TokenType::String) {
14685                    let v = format!("'{}'", self.peek().text);
14686                    self.advance();
14687                    options.push(("COMMENT".to_string(), v));
14688                    continue;
14689                }
14690                self.current = saved;
14691                break;
14692            }
14693
14694            // CHARACTER SET=val or CHARSET=val (without DEFAULT prefix)
14695            if self.check_identifier("CHARACTER") || self.check_identifier("CHARSET") {
14696                let saved = self.current;
14697                let is_character = self.check_identifier("CHARACTER");
14698                self.advance(); // consume CHARACTER or CHARSET
14699                if is_character {
14700                    // CHARACTER SET
14701                    if !self.match_token(TokenType::Set) {
14702                        self.current = saved;
14703                        break;
14704                    }
14705                }
14706                if self.match_token(TokenType::Eq) {
14707                    let value = if self.check(TokenType::String) {
14708                        let v = format!("'{}'", self.peek().text);
14709                        self.advance();
14710                        v
14711                    } else if self.is_identifier_token()
14712                        || self.is_safe_keyword_as_identifier()
14713                        || self.check(TokenType::Number)
14714                    {
14715                        self.advance().text
14716                    } else {
14717                        self.current = saved;
14718                        break;
14719                    };
14720                    options.push(("CHARACTER SET".to_string(), value));
14721                    continue;
14722                }
14723                self.current = saved;
14724                break;
14725            }
14726
14727            break;
14728        }
14729        options
14730    }
14731
14732    /// Parse Hive-specific table properties that appear after column definitions.
14733    /// Handles: ROW FORMAT (SERDE/DELIMITED), STORED AS/BY, LOCATION, TBLPROPERTIES
14734    fn parse_hive_table_properties(&mut self) -> Result<Vec<Expression>> {
14735        let mut properties = Vec::new();
14736
14737        loop {
14738            // ROW FORMAT SERDE 'class' [WITH SERDEPROPERTIES (...)]
14739            // ROW FORMAT DELIMITED [FIELDS TERMINATED BY ...] [...]
14740            if self.match_token(TokenType::Row) {
14741                if let Some(row_format) = self.parse_row()? {
14742                    properties.push(row_format);
14743                    continue;
14744                }
14745            }
14746
14747            // STORED AS INPUTFORMAT 'input' OUTPUTFORMAT 'output'
14748            // STORED AS format_name
14749            // STORED BY 'storage_handler_class'
14750            if self.match_identifier("STORED") {
14751                if self.match_token(TokenType::By) {
14752                    // STORED BY 'storage_handler_class'
14753                    let handler = self.parse_string()?.unwrap_or(Expression::Null(Null));
14754                    properties.push(Expression::StorageHandlerProperty(Box::new(
14755                        StorageHandlerProperty {
14756                            this: Box::new(handler),
14757                        },
14758                    )));
14759                    continue;
14760                } else if self.match_token(TokenType::As) {
14761                    // STORED AS INPUTFORMAT 'x' OUTPUTFORMAT 'y' or STORED AS format
14762                    if self.match_token(TokenType::InputFormat) {
14763                        let input_format = self.parse_string()?;
14764                        let output_format = if self.match_identifier("OUTPUTFORMAT") {
14765                            self.parse_string()?
14766                        } else {
14767                            None
14768                        };
14769                        // Use InputOutputFormat inside FileFormatProperty.this
14770                        let io_format =
14771                            Expression::InputOutputFormat(Box::new(InputOutputFormat {
14772                                input_format: input_format.map(Box::new),
14773                                output_format: output_format.map(Box::new),
14774                            }));
14775                        properties.push(Expression::FileFormatProperty(Box::new(
14776                            FileFormatProperty {
14777                                this: Some(Box::new(io_format)),
14778                                expressions: vec![],
14779                                hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
14780                                    value: true,
14781                                }))),
14782                            },
14783                        )));
14784                        continue;
14785                    } else {
14786                        // STORED AS format_name (e.g., STORED AS TEXTFILE, STORED AS ORC)
14787                        let format = if self.check(TokenType::String) {
14788                            Expression::Literal(Literal::String(self.advance().text.clone()))
14789                        } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
14790                        {
14791                            Expression::Identifier(Identifier::new(self.advance().text.clone()))
14792                        } else {
14793                            break;
14794                        };
14795                        properties.push(Expression::FileFormatProperty(Box::new(
14796                            FileFormatProperty {
14797                                this: Some(Box::new(format)),
14798                                expressions: vec![],
14799                                hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
14800                                    value: true,
14801                                }))),
14802                            },
14803                        )));
14804                        continue;
14805                    }
14806                }
14807            }
14808
14809            // USING format_name (Databricks/Spark) e.g., USING DELTA, USING PARQUET
14810            // This is similar to STORED AS but uses different syntax
14811            if self.match_token(TokenType::Using) {
14812                // Parse the format name (e.g., DELTA, PARQUET, ICEBERG, etc.)
14813                let format = if self.check(TokenType::String) {
14814                    Expression::Literal(Literal::String(self.advance().text.clone()))
14815                } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
14816                    Expression::Identifier(Identifier::new(self.advance().text.clone()))
14817                } else {
14818                    break;
14819                };
14820                // Create FileFormatProperty WITHOUT hive_format to signal USING syntax
14821                properties.push(Expression::FileFormatProperty(Box::new(
14822                    FileFormatProperty {
14823                        this: Some(Box::new(format)),
14824                        expressions: vec![],
14825                        hive_format: None, // None indicates USING syntax (not STORED AS)
14826                    },
14827                )));
14828                continue;
14829            }
14830
14831            // LOCATION 'path'
14832            if self.match_identifier("LOCATION") {
14833                let path = self.parse_string()?.unwrap_or(Expression::Null(Null));
14834                properties.push(Expression::LocationProperty(Box::new(LocationProperty {
14835                    this: Box::new(path),
14836                })));
14837                continue;
14838            }
14839
14840            // TBLPROPERTIES ('key'='value', ...)
14841            if self.match_identifier("TBLPROPERTIES") {
14842                // Parse the property list manually since parse_property doesn't handle key=value
14843                self.expect(TokenType::LParen)?;
14844                let mut prop_exprs = Vec::new();
14845                loop {
14846                    if self.check(TokenType::RParen) {
14847                        break;
14848                    }
14849                    // Parse 'key'='value' or key=value
14850                    let key = self.parse_primary()?;
14851                    if self.match_token(TokenType::Eq) {
14852                        let value = self.parse_primary()?;
14853                        prop_exprs.push(Expression::Eq(Box::new(BinaryOp::new(key, value))));
14854                    } else {
14855                        prop_exprs.push(key);
14856                    }
14857                    if !self.match_token(TokenType::Comma) {
14858                        break;
14859                    }
14860                }
14861                self.expect(TokenType::RParen)?;
14862                properties.push(Expression::Properties(Box::new(Properties {
14863                    expressions: prop_exprs,
14864                })));
14865                continue;
14866            }
14867
14868            // DISTRIBUTED BY HASH (col1, col2) [BUCKETS n] (StarRocks/Doris)
14869            if self.match_identifier("DISTRIBUTED") {
14870                if let Some(dist_prop) = self.parse_distributed_property()? {
14871                    properties.push(dist_prop);
14872                    continue;
14873                }
14874            }
14875
14876            // CLUSTERED BY (col, col, ...) [SORTED BY (col, col, ...)] INTO n BUCKETS (Hive/Athena)
14877            if self.match_identifier("CLUSTERED") {
14878                self.expect(TokenType::By)?;
14879                self.expect(TokenType::LParen)?;
14880                let expressions = self.parse_expression_list()?;
14881                self.expect(TokenType::RParen)?;
14882
14883                // Optional SORTED BY (col, col, ...)
14884                let sorted_by = if self.match_identifier("SORTED") {
14885                    self.expect(TokenType::By)?;
14886                    self.expect(TokenType::LParen)?;
14887                    let sorted_exprs = self.parse_expression_list()?;
14888                    self.expect(TokenType::RParen)?;
14889                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
14890                        expressions: sorted_exprs,
14891                    }))))
14892                } else {
14893                    None
14894                };
14895
14896                // INTO n BUCKETS
14897                let buckets = if self.match_token(TokenType::Into) {
14898                    let num = self.parse_expression()?;
14899                    if !self.match_identifier("BUCKETS") {
14900                        return Err(self.parse_error("Expected BUCKETS after INTO <n>"));
14901                    }
14902                    Some(Box::new(num))
14903                } else {
14904                    None
14905                };
14906
14907                properties.push(Expression::ClusteredByProperty(Box::new(
14908                    ClusteredByProperty {
14909                        expressions,
14910                        sorted_by,
14911                        buckets,
14912                    },
14913                )));
14914                continue;
14915            }
14916
14917            // PARTITIONED BY (col, col, ...) or PARTITIONED BY (col, BUCKET(n, col), ...) (Hive/Athena/Iceberg)
14918            if self.match_identifier("PARTITIONED") {
14919                self.expect(TokenType::By)?;
14920                self.expect(TokenType::LParen)?;
14921
14922                let mut partition_exprs = Vec::new();
14923                loop {
14924                    if self.check(TokenType::RParen) {
14925                        break;
14926                    }
14927
14928                    // Check for transform functions like BUCKET(n, col), TRUNCATE(n, col), etc.
14929                    if self.check_identifier("BUCKET") || self.check_identifier("TRUNCATE") {
14930                        let func_name = self.advance().text.clone();
14931                        self.expect(TokenType::LParen)?;
14932                        let args = self.parse_expression_list()?;
14933                        self.expect(TokenType::RParen)?;
14934
14935                        // Create a Function expression for BUCKET/TRUNCATE
14936                        partition_exprs.push(Expression::Function(Box::new(Function {
14937                            name: func_name,
14938                            args,
14939                            distinct: false,
14940                            trailing_comments: Vec::new(),
14941                            use_bracket_syntax: false,
14942                            no_parens: false,
14943                            quoted: false,
14944                            span: None,
14945                            inferred_type: None,
14946                        })));
14947                    } else {
14948                        // Try to parse as column definition (name data_type) for Hive-style partitioned by
14949                        // e.g., PARTITIONED BY (y INT, z STRING)
14950                        let saved_pos = self.current;
14951                        let mut parsed_as_column = false;
14952                        // Allow type keywords (like DATE, TIMESTAMP) as column names in PARTITIONED BY
14953                        if self.check(TokenType::Var)
14954                            || self.check(TokenType::Identifier)
14955                            || self.check(TokenType::Date)
14956                            || self.check(TokenType::Timestamp)
14957                            || self.check(TokenType::Int)
14958                            || self.check(TokenType::BigInt)
14959                            || self.check(TokenType::SmallInt)
14960                            || self.check(TokenType::TinyInt)
14961                            || self.check(TokenType::Float)
14962                            || self.check(TokenType::Double)
14963                            || self.check(TokenType::Boolean)
14964                        {
14965                            let col_name = self.advance().text.clone();
14966                            // Check if next token looks like a data type
14967                            if self.check(TokenType::Var)
14968                                || self.check(TokenType::Identifier)
14969                                || self.check(TokenType::Int)
14970                                || self.check(TokenType::BigInt)
14971                                || self.check(TokenType::SmallInt)
14972                                || self.check(TokenType::TinyInt)
14973                                || self.check(TokenType::Float)
14974                                || self.check(TokenType::Double)
14975                                || self.check(TokenType::Boolean)
14976                                || self.check(TokenType::Date)
14977                                || self.check(TokenType::Timestamp)
14978                            {
14979                                let type_text = self.peek().text.to_uppercase();
14980                                let is_type = matches!(
14981                                    type_text.as_str(),
14982                                    "INT"
14983                                        | "INTEGER"
14984                                        | "BIGINT"
14985                                        | "SMALLINT"
14986                                        | "TINYINT"
14987                                        | "FLOAT"
14988                                        | "DOUBLE"
14989                                        | "DECIMAL"
14990                                        | "NUMERIC"
14991                                        | "STRING"
14992                                        | "VARCHAR"
14993                                        | "CHAR"
14994                                        | "BINARY"
14995                                        | "BOOLEAN"
14996                                        | "DATE"
14997                                        | "TIMESTAMP"
14998                                        | "DATETIME"
14999                                        | "ARRAY"
15000                                        | "MAP"
15001                                        | "STRUCT"
15002                                );
15003                                if is_type {
15004                                    // Parse as column definition
15005                                    let data_type = self.parse_data_type()?;
15006                                    // Store as ColumnDef expression
15007                                    partition_exprs.push(Expression::ColumnDef(Box::new(
15008                                        crate::expressions::ColumnDef::new(col_name, data_type),
15009                                    )));
15010                                    parsed_as_column = true;
15011                                }
15012                            }
15013                        }
15014                        if !parsed_as_column {
15015                            // Backtrack and parse as regular expression
15016                            self.current = saved_pos;
15017                            partition_exprs.push(self.parse_expression()?);
15018                        }
15019                    }
15020
15021                    if !self.match_token(TokenType::Comma) {
15022                        break;
15023                    }
15024                }
15025                self.expect(TokenType::RParen)?;
15026
15027                properties.push(Expression::PartitionedByProperty(Box::new(
15028                    PartitionedByProperty {
15029                        this: Box::new(Expression::Tuple(Box::new(Tuple {
15030                            expressions: partition_exprs,
15031                        }))),
15032                    },
15033                )));
15034                continue;
15035            }
15036
15037            // No more Hive properties
15038            break;
15039        }
15040
15041        Ok(properties)
15042    }
15043
15044    /// Parse table-level properties that appear after the closing paren of column definitions.
15045    /// Currently handles TSQL WITH(SYSTEM_VERSIONING=ON(...)).
15046    fn parse_post_table_properties(&mut self) -> Result<Vec<Expression>> {
15047        let mut properties = Vec::new();
15048
15049        // Doris/StarRocks: UNIQUE KEY (cols) or DUPLICATE KEY (cols) after column definitions
15050        // These are table key properties that define the distribution/sort key
15051        let is_doris_starrocks = matches!(
15052            self.config.dialect,
15053            Some(crate::dialects::DialectType::Doris)
15054                | Some(crate::dialects::DialectType::StarRocks)
15055        );
15056        if is_doris_starrocks {
15057            // UNIQUE KEY (c1, c2, ...) - defines unique key columns
15058            if self.match_text_seq(&["UNIQUE", "KEY"]) {
15059                let exprs = self.parse_composite_key_expressions()?;
15060                properties.push(Expression::UniqueKeyProperty(Box::new(
15061                    crate::expressions::UniqueKeyProperty { expressions: exprs },
15062                )));
15063            }
15064            // DUPLICATE KEY (c1, c2, ...) - defines duplicate key columns
15065            else if self.match_text_seq(&["DUPLICATE", "KEY"]) {
15066                let exprs = self.parse_composite_key_expressions()?;
15067                properties.push(Expression::DuplicateKeyProperty(Box::new(
15068                    crate::expressions::DuplicateKeyProperty { expressions: exprs },
15069                )));
15070            }
15071
15072            // DISTRIBUTED BY HASH (col1, col2) [BUCKETS n] - comes after UNIQUE KEY / DUPLICATE KEY
15073            if self.match_identifier("DISTRIBUTED") {
15074                if let Some(dist_prop) = self.parse_distributed_property()? {
15075                    properties.push(dist_prop);
15076                }
15077            }
15078
15079            // PROPERTIES ('key'='value', ...) - comes after DISTRIBUTED BY
15080            if self.match_identifier("PROPERTIES") {
15081                let props = self.parse_options_list()?;
15082                if !props.is_empty() {
15083                    properties.push(Expression::Properties(Box::new(Properties {
15084                        expressions: props,
15085                    })));
15086                }
15087            }
15088        }
15089
15090        // Check for WITH( that might contain SYSTEM_VERSIONING
15091        // We need to be careful not to consume a WITH that is meant for WITH properties
15092        // or other purposes. We only handle WITH(SYSTEM_VERSIONING=...) here.
15093        if self.check(TokenType::With) {
15094            // Look ahead: WITH followed by ( followed by SYSTEM_VERSIONING
15095            let saved = self.current;
15096            if self.match_token(TokenType::With) {
15097                if self.match_token(TokenType::LParen) {
15098                    if self.check_identifier("SYSTEM_VERSIONING") {
15099                        self.advance(); // consume SYSTEM_VERSIONING
15100                        self.expect(TokenType::Eq)?;
15101
15102                        let on = if self.match_token(TokenType::On) {
15103                            true
15104                        } else if self.match_identifier("OFF") {
15105                            false
15106                        } else {
15107                            return Err(
15108                                self.parse_error("Expected ON or OFF after SYSTEM_VERSIONING=")
15109                            );
15110                        };
15111
15112                        let mut history_table = None;
15113                        let mut data_consistency = None;
15114
15115                        // Optional parameters: ON(HISTORY_TABLE=..., DATA_CONSISTENCY_CHECK=...)
15116                        if on && self.match_token(TokenType::LParen) {
15117                            loop {
15118                                if self.check(TokenType::RParen) {
15119                                    break;
15120                                }
15121                                if self.match_identifier("HISTORY_TABLE") {
15122                                    self.expect(TokenType::Eq)?;
15123                                    // Parse table reference (could be [dbo].[table])
15124                                    let table_ref = self.parse_table_ref()?;
15125                                    history_table = Some(Expression::Table(table_ref));
15126                                } else if self.match_identifier("DATA_CONSISTENCY_CHECK") {
15127                                    self.expect(TokenType::Eq)?;
15128                                    let val = self.expect_identifier_or_keyword()?;
15129                                    data_consistency = Some(Expression::Identifier(
15130                                        crate::expressions::Identifier::new(val),
15131                                    ));
15132                                } else if self.check(TokenType::RParen) {
15133                                    break;
15134                                } else {
15135                                    self.advance();
15136                                }
15137                                self.match_token(TokenType::Comma);
15138                            }
15139                            self.expect(TokenType::RParen)?;
15140                        }
15141
15142                        self.expect(TokenType::RParen)?; // close WITH(...)
15143
15144                        properties.push(Expression::WithSystemVersioningProperty(Box::new(
15145                            WithSystemVersioningProperty {
15146                                on: if on {
15147                                    Some(Box::new(Expression::Boolean(
15148                                        crate::expressions::BooleanLiteral { value: true },
15149                                    )))
15150                                } else {
15151                                    None
15152                                },
15153                                this: history_table.map(Box::new),
15154                                data_consistency: data_consistency.map(Box::new),
15155                                retention_period: None,
15156                                with_: Some(Box::new(Expression::Boolean(
15157                                    crate::expressions::BooleanLiteral { value: true },
15158                                ))),
15159                            },
15160                        )));
15161                    } else {
15162                        // Not SYSTEM_VERSIONING, retreat
15163                        self.current = saved;
15164                    }
15165                } else {
15166                    // Not WITH(...), retreat
15167                    self.current = saved;
15168                }
15169            }
15170        }
15171
15172        Ok(properties)
15173    }
15174
15175    /// Parse composite key expressions for UNIQUE KEY (cols) or DUPLICATE KEY (cols)
15176    /// Returns a vector of column identifiers
15177    fn parse_composite_key_expressions(&mut self) -> Result<Vec<Expression>> {
15178        self.expect(TokenType::LParen)?;
15179        let mut expressions = Vec::new();
15180        loop {
15181            if let Some(id) = self.parse_id_var()? {
15182                expressions.push(id);
15183            } else {
15184                break;
15185            }
15186            if !self.match_token(TokenType::Comma) {
15187                break;
15188            }
15189        }
15190        self.expect(TokenType::RParen)?;
15191        Ok(expressions)
15192    }
15193
15194    /// Parse a table-level constraint
15195    fn parse_table_constraint(&mut self) -> Result<TableConstraint> {
15196        // Optional constraint name
15197        let name = if self.match_token(TokenType::Constraint) {
15198            // Use safe keyword version to accept keywords as constraint names (e.g., CONSTRAINT identity CHECK ...)
15199            Some(self.expect_identifier_or_safe_keyword_with_quoted()?)
15200        } else {
15201            None
15202        };
15203
15204        self.parse_constraint_definition(name)
15205    }
15206
15207    /// Parse constraint definition (after optional CONSTRAINT name)
15208    fn parse_constraint_definition(&mut self, name: Option<Identifier>) -> Result<TableConstraint> {
15209        if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
15210            // PRIMARY KEY [CLUSTERED|NONCLUSTERED] [name] (col1, col2) [INCLUDE (col3, col4)]
15211            // MySQL allows: PRIMARY KEY pk_name (col1, col2)
15212            // TSQL allows: PRIMARY KEY CLUSTERED (col1, col2)
15213
15214            // Check for TSQL CLUSTERED/NONCLUSTERED modifier
15215            let clustered = if self.check_identifier("CLUSTERED") {
15216                self.advance();
15217                Some("CLUSTERED".to_string())
15218            } else if self.check_identifier("NONCLUSTERED") {
15219                self.advance();
15220                Some("NONCLUSTERED".to_string())
15221            } else {
15222                None
15223            };
15224
15225            let actual_name = if name.is_none() && !self.check(TokenType::LParen) {
15226                if matches!(
15227                    self.config.dialect,
15228                    Some(crate::dialects::DialectType::ClickHouse)
15229                ) {
15230                    // ClickHouse: PRIMARY KEY col (without parentheses)
15231                    None
15232                } else if self.is_identifier_token() || self.check(TokenType::QuotedIdentifier) {
15233                    Some(self.expect_identifier_with_quoted()?)
15234                } else if self.check(TokenType::String)
15235                    && matches!(
15236                        self.config.dialect,
15237                        Some(crate::dialects::DialectType::MySQL)
15238                    )
15239                {
15240                    // MySQL: double-quoted strings can be used as constraint names
15241                    // e.g., PRIMARY KEY "pk_name" (id) -> PRIMARY KEY `pk_name` (id)
15242                    let s = self.advance().text.clone();
15243                    Some(Identifier {
15244                        name: s,
15245                        quoted: true,
15246                        trailing_comments: Vec::new(),
15247                        span: None,
15248                    })
15249                } else {
15250                    None
15251                }
15252            } else {
15253                name.clone()
15254            };
15255            // ClickHouse: PRIMARY KEY col without parens — parse single column
15256            let columns = if matches!(
15257                self.config.dialect,
15258                Some(crate::dialects::DialectType::ClickHouse)
15259            ) && !self.check(TokenType::LParen)
15260                && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
15261            {
15262                let col_name = self.expect_identifier_or_keyword_with_quoted()?;
15263                vec![col_name]
15264            } else {
15265                self.expect(TokenType::LParen)?;
15266                // ClickHouse: allow empty PRIMARY KEY ()
15267                let cols = if self.check(TokenType::RParen) {
15268                    Vec::new()
15269                } else if matches!(
15270                    self.config.dialect,
15271                    Some(crate::dialects::DialectType::ClickHouse)
15272                ) {
15273                    // ClickHouse: PRIMARY KEY(v1, gcd(v1, v2)) - expressions allowed
15274                    let mut exprs = Vec::new();
15275                    loop {
15276                        let expr = self.parse_expression()?;
15277                        let name = self.expression_to_sql(&expr);
15278                        exprs.push(Identifier::new(name));
15279                        if !self.match_token(TokenType::Comma) {
15280                            break;
15281                        }
15282                    }
15283                    exprs
15284                } else {
15285                    self.parse_index_identifier_list()?
15286                };
15287                self.expect(TokenType::RParen)?;
15288                cols
15289            };
15290            // Parse optional INCLUDE (columns)
15291            let include_columns = if self.match_identifier("INCLUDE") {
15292                self.expect(TokenType::LParen)?;
15293                let cols = self.parse_identifier_list()?;
15294                self.expect(TokenType::RParen)?;
15295                cols
15296            } else {
15297                Vec::new()
15298            };
15299            // Parse optional constraint modifiers (ENFORCED, DEFERRABLE, etc.)
15300            let mut modifiers = self.parse_constraint_modifiers();
15301            modifiers.clustered = clustered;
15302            let has_constraint_keyword = name.is_some();
15303            Ok(TableConstraint::PrimaryKey {
15304                name: actual_name.or(name),
15305                columns,
15306                include_columns,
15307                modifiers,
15308                has_constraint_keyword,
15309            })
15310        } else if self.match_token(TokenType::Unique) {
15311            // UNIQUE [CLUSTERED|NONCLUSTERED] [KEY|INDEX] [NULLS NOT DISTINCT] [name] (col1, col2) or UNIQUE column_name
15312            // MySQL allows: UNIQUE KEY name (cols), UNIQUE INDEX name (cols), UNIQUE (cols)
15313            // TSQL allows: UNIQUE CLUSTERED (cols)
15314            // PostgreSQL 15+: UNIQUE NULLS NOT DISTINCT (cols)
15315
15316            // Check for TSQL CLUSTERED/NONCLUSTERED modifier
15317            let clustered = if self.check_identifier("CLUSTERED") {
15318                self.advance();
15319                Some("CLUSTERED".to_string())
15320            } else if self.check_identifier("NONCLUSTERED") {
15321                self.advance();
15322                Some("NONCLUSTERED".to_string())
15323            } else {
15324                None
15325            };
15326
15327            let use_key_keyword =
15328                self.match_token(TokenType::Key) || self.match_token(TokenType::Index);
15329
15330            // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
15331            let nulls_not_distinct = self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]);
15332
15333            // Check for optional constraint name (before columns)
15334            let actual_name = if name.is_none()
15335                && self.is_identifier_token()
15336                && !self.check_next(TokenType::Comma)
15337            {
15338                // Name might be here: UNIQUE KEY idx_name (cols)
15339                if self.check_next(TokenType::LParen) {
15340                    Some(self.expect_identifier_with_quoted()?)
15341                } else {
15342                    None
15343                }
15344            } else {
15345                name.clone()
15346            };
15347
15348            if self.match_token(TokenType::LParen) {
15349                let columns = self.parse_index_identifier_list()?;
15350                self.expect(TokenType::RParen)?;
15351                let mut modifiers = self.parse_constraint_modifiers();
15352                modifiers.clustered = clustered;
15353                if use_key_keyword {
15354                    // UNIQUE KEY/INDEX - use Index constraint type with UNIQUE kind
15355                    Ok(TableConstraint::Index {
15356                        name: actual_name.or(name),
15357                        columns,
15358                        kind: Some("UNIQUE".to_string()),
15359                        modifiers,
15360                        use_key_keyword,
15361                        expression: None,
15362                        index_type: None,
15363                        granularity: None,
15364                    })
15365                } else {
15366                    let has_constraint_keyword = name.is_some();
15367                    Ok(TableConstraint::Unique {
15368                        name: actual_name.or(name),
15369                        columns,
15370                        columns_parenthesized: true,
15371                        modifiers,
15372                        has_constraint_keyword,
15373                        nulls_not_distinct,
15374                    })
15375                }
15376            } else {
15377                // Single column unique (for ALTER TABLE ADD CONSTRAINT name UNIQUE colname)
15378                let col_name = self.expect_identifier()?;
15379                let mut modifiers = self.parse_constraint_modifiers();
15380                modifiers.clustered = clustered;
15381                let has_constraint_keyword = name.is_some();
15382                Ok(TableConstraint::Unique {
15383                    name: actual_name.or(name),
15384                    columns: vec![Identifier::new(col_name)],
15385                    columns_parenthesized: false,
15386                    modifiers,
15387                    has_constraint_keyword,
15388                    nulls_not_distinct,
15389                })
15390            }
15391        } else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
15392            // FOREIGN KEY (col1) [REFERENCES other_table(col2)] [ON DELETE ...] [ON UPDATE ...]
15393            self.expect(TokenType::LParen)?;
15394            let columns = self.parse_identifier_list()?;
15395            self.expect(TokenType::RParen)?;
15396            if self.match_token(TokenType::References) {
15397                let references = self.parse_foreign_key_ref()?;
15398                let modifiers = self.parse_constraint_modifiers();
15399                Ok(TableConstraint::ForeignKey {
15400                    name,
15401                    columns,
15402                    references: Some(references),
15403                    on_delete: None,
15404                    on_update: None,
15405                    modifiers,
15406                })
15407            } else {
15408                // No REFERENCES - parse optional ON DELETE/ON UPDATE directly
15409                let mut on_delete = None;
15410                let mut on_update = None;
15411                loop {
15412                    if self.check(TokenType::On) {
15413                        let saved = self.current;
15414                        self.advance(); // consume ON
15415                        if self.match_token(TokenType::Delete) {
15416                            on_delete = Some(self.parse_referential_action()?);
15417                        } else if self.match_token(TokenType::Update) {
15418                            on_update = Some(self.parse_referential_action()?);
15419                        } else {
15420                            self.current = saved;
15421                            break;
15422                        }
15423                    } else {
15424                        break;
15425                    }
15426                }
15427                let modifiers = self.parse_constraint_modifiers();
15428                Ok(TableConstraint::ForeignKey {
15429                    name,
15430                    columns,
15431                    references: None,
15432                    on_delete,
15433                    on_update,
15434                    modifiers,
15435                })
15436            }
15437        } else if self.match_token(TokenType::Check) {
15438            // CHECK (expression) or CHECK (SELECT ...) or ClickHouse: CHECK expression (without parens)
15439            let expression = if self.match_token(TokenType::LParen) {
15440                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
15441                    // Subquery in CHECK constraint
15442                    let stmt = self.parse_statement()?;
15443                    Expression::Subquery(Box::new(Subquery {
15444                        this: stmt,
15445                        alias: None,
15446                        column_aliases: Vec::new(),
15447                        order_by: None,
15448                        limit: None,
15449                        offset: None,
15450                        distribute_by: None,
15451                        sort_by: None,
15452                        cluster_by: None,
15453                        lateral: false,
15454                        modifiers_inside: false,
15455                        trailing_comments: Vec::new(),
15456                        inferred_type: None,
15457                    }))
15458                } else {
15459                    self.parse_expression()?
15460                };
15461                self.expect(TokenType::RParen)?;
15462                expr
15463            } else if matches!(
15464                self.config.dialect,
15465                Some(crate::dialects::DialectType::ClickHouse)
15466            ) {
15467                self.parse_or()?
15468            } else {
15469                self.expect(TokenType::LParen)?;
15470                unreachable!()
15471            };
15472            let modifiers = self.parse_constraint_modifiers();
15473            Ok(TableConstraint::Check {
15474                name,
15475                expression,
15476                modifiers,
15477            })
15478        } else if self.match_token(TokenType::Exclude) {
15479            // PostgreSQL EXCLUDE constraint
15480            // EXCLUDE [USING method] (element WITH operator, ...) [INCLUDE (cols)] [WHERE (expr)] [WITH (params)]
15481            let using = if self.match_token(TokenType::Using) {
15482                Some(self.expect_identifier()?)
15483            } else {
15484                None
15485            };
15486
15487            self.expect(TokenType::LParen)?;
15488            let mut elements = Vec::new();
15489            loop {
15490                // Parse element expression: may be a function call like INT4RANGE(vid, nid)
15491                // or column name possibly with operator class, ASC/DESC, NULLS FIRST/LAST
15492                let mut expr_parts = Vec::new();
15493                let mut paren_depth = 0;
15494                while !self.is_at_end() {
15495                    if self.check(TokenType::LParen) {
15496                        paren_depth += 1;
15497                        expr_parts.push(self.advance().text);
15498                    } else if self.check(TokenType::RParen) {
15499                        if paren_depth == 0 {
15500                            break;
15501                        }
15502                        paren_depth -= 1;
15503                        expr_parts.push(self.advance().text);
15504                    } else if paren_depth == 0 && self.check(TokenType::With) {
15505                        break;
15506                    } else if self.check(TokenType::String) {
15507                        // Preserve string literal quotes
15508                        let token = self.advance();
15509                        expr_parts.push(format!("'{}'", token.text));
15510                    } else {
15511                        expr_parts.push(self.advance().text);
15512                    }
15513                }
15514                let expression = expr_parts
15515                    .join(" ")
15516                    .replace(" (", "(")
15517                    .replace(" )", ")")
15518                    .replace("( ", "(")
15519                    .replace(" ,", ",");
15520
15521                // Parse WITH operator
15522                self.expect(TokenType::With)?;
15523                let operator = self.advance().text.clone();
15524
15525                elements.push(ExcludeElement {
15526                    expression,
15527                    operator,
15528                });
15529
15530                if !self.match_token(TokenType::Comma) {
15531                    break;
15532                }
15533            }
15534            self.expect(TokenType::RParen)?;
15535
15536            // Parse optional INCLUDE (columns)
15537            let include_columns = if self.match_identifier("INCLUDE") {
15538                self.expect(TokenType::LParen)?;
15539                let cols = self.parse_identifier_list()?;
15540                self.expect(TokenType::RParen)?;
15541                cols
15542            } else {
15543                Vec::new()
15544            };
15545
15546            // Parse optional WITH (storage_parameters)
15547            let with_params = if self.match_token(TokenType::With) {
15548                self.expect(TokenType::LParen)?;
15549                let mut params = Vec::new();
15550                loop {
15551                    let key = self.expect_identifier()?;
15552                    self.expect(TokenType::Eq)?;
15553                    let val = self.advance().text.clone();
15554                    params.push((key, val));
15555                    if !self.match_token(TokenType::Comma) {
15556                        break;
15557                    }
15558                }
15559                self.expect(TokenType::RParen)?;
15560                params
15561            } else {
15562                Vec::new()
15563            };
15564
15565            // Parse optional USING INDEX TABLESPACE tablespace_name
15566            let using_index_tablespace =
15567                if self.check(TokenType::Using) && self.check_next(TokenType::Index) {
15568                    self.advance(); // consume USING
15569                    self.advance(); // consume INDEX
15570                    if self.match_identifier("TABLESPACE") {
15571                        Some(self.expect_identifier()?)
15572                    } else {
15573                        None
15574                    }
15575                } else {
15576                    None
15577                };
15578
15579            // Parse optional WHERE clause
15580            let where_clause = if self.match_token(TokenType::Where) {
15581                self.expect(TokenType::LParen)?;
15582                let expr = self.parse_expression()?;
15583                self.expect(TokenType::RParen)?;
15584                Some(Box::new(expr))
15585            } else {
15586                None
15587            };
15588
15589            let modifiers = self.parse_constraint_modifiers();
15590            Ok(TableConstraint::Exclude {
15591                name,
15592                using,
15593                elements,
15594                include_columns,
15595                where_clause,
15596                with_params,
15597                using_index_tablespace,
15598                modifiers,
15599            })
15600        } else if matches!(
15601            self.config.dialect,
15602            Some(crate::dialects::DialectType::ClickHouse)
15603        ) && self.check_identifier("ASSUME")
15604        {
15605            // ClickHouse: CONSTRAINT name ASSUME expression
15606            // Used for query optimization assumptions — store as CHECK constraint
15607            self.advance(); // consume ASSUME
15608            let expr = self.parse_expression()?;
15609            Ok(TableConstraint::Check {
15610                name,
15611                expression: expr,
15612                modifiers: Default::default(),
15613            })
15614        } else {
15615            Err(self.parse_error("Expected PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK, or EXCLUDE"))
15616        }
15617    }
15618
15619    /// Parse INDEX/KEY table constraint for MySQL
15620    /// Syntax: [FULLTEXT|SPATIAL] {INDEX|KEY} [name] [USING {BTREE|HASH}] (columns)
15621    ///     or: [FULLTEXT|SPATIAL] {INDEX|KEY} [USING {BTREE|HASH}] (columns)  -- no name
15622    fn parse_index_table_constraint(&mut self) -> Result<TableConstraint> {
15623        // Check for FULLTEXT or SPATIAL prefix
15624        let kind = if self.match_identifier("FULLTEXT") {
15625            Some("FULLTEXT".to_string())
15626        } else if self.match_identifier("SPATIAL") {
15627            Some("SPATIAL".to_string())
15628        } else {
15629            None
15630        };
15631
15632        // Consume INDEX or KEY keyword, track which was used
15633        let use_key_keyword = if self.match_token(TokenType::Key) {
15634            true
15635        } else {
15636            self.match_token(TokenType::Index);
15637            false
15638        };
15639
15640        // Check for USING before index name (MySQL allows: INDEX USING BTREE (col))
15641        let early_using = if self.check(TokenType::Using) {
15642            self.match_token(TokenType::Using);
15643            if self.match_identifier("BTREE") {
15644                Some("BTREE".to_string())
15645            } else if self.match_identifier("HASH") {
15646                Some("HASH".to_string())
15647            } else {
15648                None
15649            }
15650        } else {
15651            None
15652        };
15653
15654        // Optional index name (only if next token is not LParen or Using)
15655        let name = if !self.check(TokenType::LParen)
15656            && !self.check(TokenType::Using)
15657            && self.is_identifier_token()
15658        {
15659            Some(Identifier::new(self.advance().text))
15660        } else {
15661            None
15662        };
15663
15664        // Check for USING after index name (if not already parsed)
15665        let late_using = if early_using.is_none() && self.match_token(TokenType::Using) {
15666            if self.match_identifier("BTREE") {
15667                Some("BTREE".to_string())
15668            } else if self.match_identifier("HASH") {
15669                Some("HASH".to_string())
15670            } else {
15671                None
15672            }
15673        } else {
15674            None
15675        };
15676
15677        // Parse columns (with optional prefix length and DESC)
15678        self.expect(TokenType::LParen)?;
15679        let columns = self.parse_index_identifier_list()?;
15680        self.expect(TokenType::RParen)?;
15681
15682        // Parse optional constraint modifiers (USING after columns, COMMENT, etc.)
15683        let mut modifiers = self.parse_constraint_modifiers();
15684
15685        // Set the using value from wherever we found it
15686        // Both early_using (before name) and late_using (after name, before columns) mean USING is before columns
15687        if early_using.is_some() {
15688            modifiers.using = early_using;
15689            modifiers.using_before_columns = true;
15690        } else if late_using.is_some() {
15691            modifiers.using = late_using;
15692            modifiers.using_before_columns = true; // USING was after name but before columns
15693        }
15694        // If using was found in parse_constraint_modifiers (after columns), using_before_columns stays false
15695
15696        Ok(TableConstraint::Index {
15697            name,
15698            columns,
15699            kind,
15700            modifiers,
15701            use_key_keyword,
15702            expression: None,
15703            index_type: None,
15704            granularity: None,
15705        })
15706    }
15707
15708    /// Parse constraint modifiers like ENFORCED, DEFERRABLE, NORELY, USING, etc.
15709    fn parse_constraint_modifiers(&mut self) -> ConstraintModifiers {
15710        let mut modifiers = ConstraintModifiers::default();
15711        loop {
15712            if self.match_token(TokenType::Not) {
15713                // NOT ENFORCED, NOT DEFERRABLE, NOT VALID
15714                if self.match_identifier("ENFORCED") {
15715                    modifiers.enforced = Some(false);
15716                } else if self.match_identifier("DEFERRABLE") {
15717                    modifiers.deferrable = Some(false);
15718                } else if self.match_identifier("VALID") {
15719                    modifiers.not_valid = true;
15720                }
15721            } else if self.match_identifier("ENFORCED") {
15722                modifiers.enforced = Some(true);
15723            } else if self.match_identifier("DEFERRABLE") {
15724                modifiers.deferrable = Some(true);
15725            } else if self.match_identifier("INITIALLY") {
15726                // INITIALLY DEFERRED or INITIALLY IMMEDIATE
15727                if self.match_identifier("DEFERRED") {
15728                    modifiers.initially_deferred = Some(true);
15729                } else if self.match_identifier("IMMEDIATE") {
15730                    modifiers.initially_deferred = Some(false);
15731                }
15732            } else if self.match_identifier("NORELY") {
15733                modifiers.norely = true;
15734            } else if self.match_identifier("RELY") {
15735                modifiers.rely = true;
15736            } else if self.match_token(TokenType::Using) {
15737                // USING BTREE or USING HASH (MySQL)
15738                if self.match_identifier("BTREE") {
15739                    modifiers.using = Some("BTREE".to_string());
15740                } else if self.match_identifier("HASH") {
15741                    modifiers.using = Some("HASH".to_string());
15742                }
15743            } else if self.match_token(TokenType::Comment) {
15744                // MySQL index COMMENT 'text'
15745                if self.check(TokenType::String) {
15746                    modifiers.comment = Some(self.advance().text);
15747                }
15748            } else if self.match_identifier("VISIBLE") {
15749                modifiers.visible = Some(true);
15750            } else if self.match_identifier("INVISIBLE") {
15751                modifiers.visible = Some(false);
15752            } else if self.match_identifier("ENGINE_ATTRIBUTE") {
15753                // MySQL ENGINE_ATTRIBUTE = 'value'
15754                self.match_token(TokenType::Eq);
15755                if self.check(TokenType::String) {
15756                    modifiers.engine_attribute = Some(self.advance().text);
15757                }
15758            } else if self.check(TokenType::With) {
15759                let saved_with = self.current;
15760                self.advance(); // consume WITH
15761                if self.match_identifier("PARSER") {
15762                    // MySQL WITH PARSER name
15763                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
15764                        modifiers.with_parser = Some(self.advance().text);
15765                    }
15766                } else if self.check(TokenType::LParen) {
15767                    // TSQL: WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF, ...)
15768                    // Parse and store the options
15769                    self.advance(); // consume (
15770                    loop {
15771                        if self.check(TokenType::RParen) || self.is_at_end() {
15772                            break;
15773                        }
15774                        // Parse KEY=VALUE pair
15775                        let key = self.advance().text.clone();
15776                        if self.match_token(TokenType::Eq) {
15777                            let value = self.advance().text.clone();
15778                            modifiers.with_options.push((key, value));
15779                        }
15780                        if !self.match_token(TokenType::Comma) {
15781                            break;
15782                        }
15783                    }
15784                    let _ = self.match_token(TokenType::RParen);
15785                } else {
15786                    // Not WITH PARSER or WITH (...), backtrack
15787                    self.current = saved_with;
15788                    break;
15789                }
15790            } else if self.check(TokenType::On) {
15791                let saved_on = self.current;
15792                self.advance(); // consume ON
15793                if self.match_identifier("CONFLICT") {
15794                    // SQLite ON CONFLICT action: ROLLBACK, ABORT, FAIL, IGNORE, REPLACE
15795                    if self.match_token(TokenType::Rollback) {
15796                        modifiers.on_conflict = Some("ROLLBACK".to_string());
15797                    } else if self.match_identifier("ABORT") {
15798                        modifiers.on_conflict = Some("ABORT".to_string());
15799                    } else if self.match_identifier("FAIL") {
15800                        modifiers.on_conflict = Some("FAIL".to_string());
15801                    } else if self.match_token(TokenType::Ignore) {
15802                        modifiers.on_conflict = Some("IGNORE".to_string());
15803                    } else if self.match_token(TokenType::Replace) {
15804                        modifiers.on_conflict = Some("REPLACE".to_string());
15805                    }
15806                } else if self.is_identifier_token() || self.check(TokenType::QuotedIdentifier) {
15807                    // TSQL: ON [filegroup] - parse and store
15808                    let quoted = self.check(TokenType::QuotedIdentifier);
15809                    let name = self.advance().text.clone();
15810                    modifiers.on_filegroup = Some(Identifier {
15811                        name,
15812                        quoted,
15813                        trailing_comments: Vec::new(),
15814                        span: None,
15815                    });
15816                } else {
15817                    // Unknown ON clause, backtrack
15818                    self.current = saved_on;
15819                    break;
15820                }
15821            } else {
15822                break;
15823            }
15824        }
15825        modifiers
15826    }
15827
15828    /// Parse foreign key reference
15829    fn parse_foreign_key_ref(&mut self) -> Result<ForeignKeyRef> {
15830        let table = self.parse_table_ref()?;
15831
15832        let columns = if self.match_token(TokenType::LParen) {
15833            let cols = self.parse_identifier_list()?;
15834            self.expect(TokenType::RParen)?;
15835            cols
15836        } else {
15837            Vec::new()
15838        };
15839
15840        // Handle optional MATCH clause (MATCH FULL, MATCH PARTIAL, MATCH SIMPLE)
15841        // MATCH clause comes BEFORE ON DELETE/ON UPDATE in PostgreSQL
15842        let match_type = if self.match_token(TokenType::Match) {
15843            if self.check(TokenType::Full) {
15844                self.advance();
15845                Some(MatchType::Full)
15846            } else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
15847                let text = self.advance().text.to_uppercase();
15848                match text.as_str() {
15849                    "PARTIAL" => Some(MatchType::Partial),
15850                    "SIMPLE" => Some(MatchType::Simple),
15851                    _ => None,
15852                }
15853            } else {
15854                None
15855            }
15856        } else {
15857            None
15858        };
15859
15860        // ON DELETE and ON UPDATE can appear in either order
15861        let mut on_delete = None;
15862        let mut on_update = None;
15863        let mut on_update_first = false;
15864        let mut first_clause = true;
15865
15866        // Try parsing up to 2 ON clauses
15867        for _ in 0..2 {
15868            if on_delete.is_none() && self.match_keywords(&[TokenType::On, TokenType::Delete]) {
15869                on_delete = Some(self.parse_referential_action()?);
15870            } else if on_update.is_none()
15871                && self.match_keywords(&[TokenType::On, TokenType::Update])
15872            {
15873                if first_clause {
15874                    on_update_first = true;
15875                }
15876                on_update = Some(self.parse_referential_action()?);
15877            } else {
15878                break;
15879            }
15880            first_clause = false;
15881        }
15882
15883        // MATCH clause can also appear after ON DELETE/ON UPDATE
15884        let mut match_after_actions = false;
15885        let match_type = if match_type.is_none() && self.match_token(TokenType::Match) {
15886            match_after_actions = on_delete.is_some() || on_update.is_some();
15887            if self.check(TokenType::Full) {
15888                self.advance();
15889                Some(MatchType::Full)
15890            } else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
15891                let text = self.advance().text.to_uppercase();
15892                match text.as_str() {
15893                    "PARTIAL" => Some(MatchType::Partial),
15894                    "SIMPLE" => Some(MatchType::Simple),
15895                    _ => None,
15896                }
15897            } else {
15898                None
15899            }
15900        } else {
15901            match_type
15902        };
15903
15904        // Handle optional DEFERRABLE / NOT DEFERRABLE
15905        let deferrable = if self.match_identifier("DEFERRABLE") {
15906            Some(true)
15907        } else if self.match_token(TokenType::Not) && self.match_identifier("DEFERRABLE") {
15908            Some(false)
15909        } else {
15910            None
15911        };
15912
15913        Ok(ForeignKeyRef {
15914            table,
15915            columns,
15916            on_delete,
15917            on_update,
15918            on_update_first,
15919            match_type,
15920            match_after_actions,
15921            constraint_name: None, // Will be set by caller if CONSTRAINT was used
15922            deferrable,
15923            has_foreign_key_keywords: false, // Will be set by caller if FOREIGN KEY preceded REFERENCES
15924        })
15925    }
15926
15927    /// Parse referential action (CASCADE, SET NULL, etc.)
15928    fn parse_referential_action(&mut self) -> Result<ReferentialAction> {
15929        if self.match_token(TokenType::Cascade) {
15930            Ok(ReferentialAction::Cascade)
15931        } else if self.match_keywords(&[TokenType::Set, TokenType::Null]) {
15932            Ok(ReferentialAction::SetNull)
15933        } else if self.match_keywords(&[TokenType::Set, TokenType::Default]) {
15934            Ok(ReferentialAction::SetDefault)
15935        } else if self.match_token(TokenType::Restrict) {
15936            Ok(ReferentialAction::Restrict)
15937        } else if self.match_token(TokenType::No) {
15938            // NO ACTION - NO is a token, ACTION is an identifier
15939            if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "ACTION" {
15940                self.advance();
15941            }
15942            Ok(ReferentialAction::NoAction)
15943        } else {
15944            Err(self.parse_error("Expected CASCADE, SET NULL, SET DEFAULT, RESTRICT, or NO ACTION"))
15945        }
15946    }
15947
15948    /// Parse Snowflake TAG clause: TAG (key='value', key2='value2')
15949    fn parse_tags(&mut self) -> Result<Tags> {
15950        self.expect(TokenType::LParen)?;
15951        let mut expressions = Vec::new();
15952
15953        loop {
15954            // Parse key = 'value' as a Property expression
15955            let key = self.expect_identifier_or_keyword()?;
15956            self.expect(TokenType::Eq)?;
15957            let value = self.parse_primary()?;
15958
15959            // Create a Property expression: key = value
15960            expressions.push(Expression::Property(Box::new(Property {
15961                this: Box::new(Expression::Identifier(Identifier::new(key))),
15962                value: Some(Box::new(value)),
15963            })));
15964
15965            if !self.match_token(TokenType::Comma) {
15966                break;
15967            }
15968        }
15969
15970        self.expect(TokenType::RParen)?;
15971
15972        Ok(Tags { expressions })
15973    }
15974
15975    /// Parse CREATE VIEW
15976    fn parse_create_view(
15977        &mut self,
15978        or_replace: bool,
15979        materialized: bool,
15980        temporary: bool,
15981        algorithm: Option<String>,
15982        definer: Option<String>,
15983        security: Option<FunctionSecurity>,
15984        secure: bool,
15985    ) -> Result<Expression> {
15986        self.expect(TokenType::View)?;
15987
15988        // Handle IF NOT EXISTS
15989        let if_not_exists =
15990            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
15991
15992        let name = self.parse_table_ref()?;
15993
15994        // ClickHouse: UUID 'xxx' clause after view name
15995        if matches!(
15996            self.config.dialect,
15997            Some(crate::dialects::DialectType::ClickHouse)
15998        ) && self.check_identifier("UUID")
15999        {
16000            self.advance(); // consume UUID
16001            let _ = self.advance(); // consume UUID string value
16002        }
16003
16004        // ClickHouse: ON CLUSTER clause (after view name)
16005        let on_cluster = self.parse_on_cluster_clause()?;
16006
16007        // ClickHouse: TO destination_table clause
16008        let to_table = if self.match_token(TokenType::To) {
16009            Some(self.parse_table_ref()?)
16010        } else {
16011            None
16012        };
16013
16014        // Snowflake: COPY GRANTS (before column list)
16015        let copy_grants = self.match_text_seq(&["COPY", "GRANTS"]);
16016
16017        // For materialized views, column definitions can include data types: (c1 INT, c2 INT)
16018        // This applies to Doris, ClickHouse, and potentially other dialects
16019        // We need to parse this as a schema instead of simple column names
16020        // Track if we parsed a schema (with types) vs simple columns
16021        let mut schema: Option<Schema> = None;
16022        let mut unique_key: Option<UniqueKeyProperty> = None;
16023
16024        // Optional column list with optional COMMENT and OPTIONS per column
16025        let columns = if self.check(TokenType::LParen) {
16026            // For materialized views or ClickHouse views, try to parse as schema with typed columns
16027            if materialized
16028                || matches!(
16029                    self.config.dialect,
16030                    Some(crate::dialects::DialectType::ClickHouse)
16031                )
16032            {
16033                // Save position to backtrack if needed
16034                let saved_pos = self.current;
16035
16036                // Try to parse as schema (with typed columns)
16037                if let Some(Expression::Schema(parsed_schema)) = self.parse_schema()? {
16038                    schema = Some(*parsed_schema);
16039
16040                    // Doris: KEY (columns) after schema
16041                    if self.match_text_seq(&["KEY"]) {
16042                        let exprs = self.parse_composite_key_expressions()?;
16043                        unique_key = Some(UniqueKeyProperty { expressions: exprs });
16044                    }
16045
16046                    Vec::new() // Use schema instead of columns
16047                } else {
16048                    // Backtrack and parse as simple columns
16049                    self.current = saved_pos;
16050                    self.parse_view_columns()?
16051                }
16052            } else {
16053                self.parse_view_columns()?
16054            }
16055        } else {
16056            Vec::new()
16057        };
16058
16059        // Snowflake: COPY GRANTS can also appear after column list
16060        let copy_grants = copy_grants || self.match_text_seq(&["COPY", "GRANTS"]);
16061
16062        // Presto/Trino/StarRocks: SECURITY DEFINER/INVOKER/NONE (after view name, before AS)
16063        // This differs from MySQL's SQL SECURITY which comes before VIEW keyword
16064        let (security, security_sql_style) = if security.is_some() {
16065            // MySQL-style SQL SECURITY was parsed before VIEW keyword
16066            (security, true)
16067        } else if self.match_identifier("SECURITY") {
16068            // Presto-style SECURITY after view name
16069            let sec = if self.match_identifier("DEFINER") {
16070                Some(FunctionSecurity::Definer)
16071            } else if self.match_identifier("INVOKER") {
16072                Some(FunctionSecurity::Invoker)
16073            } else if self.match_identifier("NONE") {
16074                Some(FunctionSecurity::None)
16075            } else {
16076                None
16077            };
16078            (sec, false)
16079        } else {
16080            (None, true)
16081        };
16082
16083        // Snowflake: COMMENT = 'text'
16084        let view_comment = if self.match_token(TokenType::Comment) {
16085            // Match = or skip if not present (some dialects use COMMENT='text')
16086            let _ = self.match_token(TokenType::Eq);
16087            Some(self.expect_string()?)
16088        } else {
16089            None
16090        };
16091
16092        // Snowflake: TAG (name='value', ...)
16093        let tags = if self.match_identifier("TAG") {
16094            let mut tag_list = Vec::new();
16095            if self.match_token(TokenType::LParen) {
16096                loop {
16097                    let tag_name = self.expect_identifier()?;
16098                    let tag_value = if self.match_token(TokenType::Eq) {
16099                        self.expect_string()?
16100                    } else {
16101                        String::new()
16102                    };
16103                    tag_list.push((tag_name, tag_value));
16104                    if !self.match_token(TokenType::Comma) {
16105                        break;
16106                    }
16107                }
16108                self.expect(TokenType::RParen)?;
16109            }
16110            tag_list
16111        } else {
16112            Vec::new()
16113        };
16114
16115        // BigQuery: OPTIONS (key=value, ...)
16116        let options = if self.match_identifier("OPTIONS") {
16117            self.parse_options_list()?
16118        } else {
16119            Vec::new()
16120        };
16121
16122        // Doris: BUILD IMMEDIATE/DEFERRED for materialized views
16123        let build = if self.match_identifier("BUILD") {
16124            if self.match_identifier("IMMEDIATE") {
16125                Some("IMMEDIATE".to_string())
16126            } else if self.match_identifier("DEFERRED") {
16127                Some("DEFERRED".to_string())
16128            } else {
16129                // Unexpected token after BUILD - try to consume it
16130                let value = self.expect_identifier_or_keyword()?;
16131                Some(value.to_uppercase())
16132            }
16133        } else {
16134            None
16135        };
16136
16137        // Doris: REFRESH COMPLETE/AUTO ON MANUAL/COMMIT/SCHEDULE [EVERY n UNIT] [STARTS 'datetime']
16138        // ClickHouse: REFRESH AFTER interval / REFRESH EVERY interval [OFFSET interval] [RANDOMIZE FOR interval] [APPEND]
16139        let refresh = if self.match_token(TokenType::Refresh) {
16140            if matches!(
16141                self.config.dialect,
16142                Some(crate::dialects::DialectType::ClickHouse)
16143            ) {
16144                // ClickHouse REFRESH syntax: consume tokens until AS/POPULATE/TO/ENGINE or end
16145                while !self.is_at_end()
16146                    && !self.check(TokenType::As)
16147                    && !self.check_identifier("POPULATE")
16148                    && !self.check_identifier("TO")
16149                    && !self.check_identifier("APPEND")
16150                    && !self.check_identifier("ENGINE")
16151                    && !self.check(TokenType::Semicolon)
16152                {
16153                    self.advance();
16154                }
16155                // Consume APPEND if present (REFRESH ... APPEND TO target)
16156                let _ = self.match_identifier("APPEND");
16157                None
16158            } else {
16159                Some(Box::new(self.parse_refresh_trigger_property()?))
16160            }
16161        } else {
16162            None
16163        };
16164
16165        // ClickHouse: TO destination_table after REFRESH ... APPEND
16166        // e.g., CREATE MATERIALIZED VIEW v REFRESH AFTER 1 SECOND APPEND TO tab (cols) EMPTY AS ...
16167        let to_table = if to_table.is_none() && self.match_token(TokenType::To) {
16168            Some(self.parse_table_ref()?)
16169        } else {
16170            to_table
16171        };
16172
16173        // ClickHouse: column definitions after REFRESH ... APPEND TO tab (cols)
16174        if schema.is_none()
16175            && self.check(TokenType::LParen)
16176            && matches!(
16177                self.config.dialect,
16178                Some(crate::dialects::DialectType::ClickHouse)
16179            )
16180        {
16181            let saved_pos = self.current;
16182            if let Some(Expression::Schema(parsed_schema)) = self.parse_schema()? {
16183                schema = Some(*parsed_schema);
16184            } else {
16185                self.current = saved_pos;
16186            }
16187        }
16188
16189        // Redshift: AUTO REFRESH YES|NO for materialized views
16190        let auto_refresh = if self.match_text_seq(&["AUTO", "REFRESH"]) {
16191            if self.match_identifier("YES") {
16192                Some(true)
16193            } else if self.match_identifier("NO") {
16194                Some(false)
16195            } else {
16196                None
16197            }
16198        } else {
16199            None
16200        };
16201
16202        // ClickHouse: Parse table properties (ENGINE, ORDER BY, SAMPLE, SETTINGS, TTL, etc.)
16203        // These appear after column definitions but before AS clause for materialized views
16204        let mut table_properties = Vec::new();
16205        if materialized
16206            && matches!(
16207                self.config.dialect,
16208                Some(crate::dialects::DialectType::ClickHouse)
16209            )
16210        {
16211            self.parse_clickhouse_table_properties(&mut table_properties)?;
16212        }
16213
16214        // ClickHouse: POPULATE / EMPTY keywords before AS in materialized views
16215        if materialized
16216            && matches!(
16217                self.config.dialect,
16218                Some(crate::dialects::DialectType::ClickHouse)
16219            )
16220        {
16221            let _ = self.match_identifier("POPULATE");
16222            let _ = self.match_identifier("EMPTY");
16223        }
16224
16225        // AS is optional - some dialects (e.g., Presto) allow SELECT without AS
16226        let has_as = self.match_token(TokenType::As);
16227        if !has_as && !self.check(TokenType::Select) && !self.check(TokenType::With) {
16228            // No AS and no SELECT/WITH means no query - return empty view (for partial statements)
16229            return Ok(Expression::CreateView(Box::new(CreateView {
16230                name,
16231                columns,
16232                query: Expression::Null(Null), // Placeholder for incomplete VIEW
16233                or_replace,
16234                if_not_exists,
16235                materialized,
16236                temporary,
16237                secure,
16238                algorithm,
16239                definer,
16240                security,
16241                security_sql_style,
16242                query_parenthesized: false,
16243                locking_mode: None,
16244                locking_access: None,
16245                copy_grants,
16246                comment: view_comment,
16247                tags,
16248                options,
16249                build,
16250                refresh,
16251                schema: schema.map(Box::new),
16252                unique_key: unique_key.map(Box::new),
16253                no_schema_binding: false,
16254                auto_refresh,
16255                on_cluster,
16256                to_table,
16257                table_properties,
16258            })));
16259        }
16260
16261        // Parse Teradata LOCKING clause: LOCKING ROW|TABLE|DATABASE FOR ACCESS|READ|WRITE
16262        let mut locking_mode: Option<String> = None;
16263        let mut locking_access: Option<String> = None;
16264        if self.match_token(TokenType::Lock) || self.match_identifier("LOCKING") {
16265            // Capture: ROW, TABLE, DATABASE, etc.
16266            if self.match_token(TokenType::Row) {
16267                locking_mode = Some("ROW".to_string());
16268            } else if self.match_token(TokenType::Table) {
16269                locking_mode = Some("TABLE".to_string());
16270            } else if self.match_token(TokenType::Database) || self.match_identifier("DATABASE") {
16271                locking_mode = Some("DATABASE".to_string());
16272            }
16273            // Capture FOR ACCESS|READ|WRITE
16274            if self.match_token(TokenType::For) {
16275                if self.match_identifier("ACCESS") {
16276                    locking_access = Some("ACCESS".to_string());
16277                } else if self.match_identifier("READ") {
16278                    locking_access = Some("READ".to_string());
16279                } else if self.match_identifier("WRITE") {
16280                    locking_access = Some("WRITE".to_string());
16281                }
16282            }
16283        }
16284
16285        // Use parse_statement to handle SELECT, WITH...SELECT, or (SELECT...)
16286        let query_parenthesized = self.check(TokenType::LParen);
16287        let query = if self.check(TokenType::With) {
16288            self.parse_statement()?
16289        } else if query_parenthesized {
16290            // Handle (SELECT ...) or (WITH ... SELECT ...) - parenthesized query
16291            self.advance(); // consume (
16292            let inner = if self.check(TokenType::With) {
16293                self.parse_statement()?
16294            } else {
16295                self.parse_select()?
16296            };
16297            self.expect(TokenType::RParen)?;
16298            inner
16299        } else {
16300            self.parse_select()?
16301        };
16302
16303        // Redshift: WITH NO SCHEMA BINDING (after the query)
16304        let no_schema_binding = self.match_text_seq(&["WITH", "NO", "SCHEMA", "BINDING"]);
16305
16306        Ok(Expression::CreateView(Box::new(CreateView {
16307            name,
16308            columns,
16309            query,
16310            or_replace,
16311            if_not_exists,
16312            materialized,
16313            temporary,
16314            secure,
16315            algorithm,
16316            definer,
16317            security,
16318            security_sql_style,
16319            query_parenthesized,
16320            locking_mode,
16321            locking_access,
16322            copy_grants,
16323            comment: view_comment,
16324            tags,
16325            options,
16326            build,
16327            refresh,
16328            schema: schema.map(Box::new),
16329            unique_key: unique_key.map(Box::new),
16330            no_schema_binding,
16331            auto_refresh,
16332            on_cluster,
16333            to_table,
16334            table_properties,
16335        })))
16336    }
16337
16338    /// Parse view column list: (col1, col2 OPTIONS(...) COMMENT 'text', ...)
16339    /// For simple view definitions without data types
16340    fn parse_view_columns(&mut self) -> Result<Vec<ViewColumn>> {
16341        self.expect(TokenType::LParen)?;
16342        let mut cols = Vec::new();
16343        loop {
16344            let col_name = self.expect_identifier()?;
16345            // BigQuery: OPTIONS (key=value, ...) on view column
16346            let options = if self.match_identifier("OPTIONS") {
16347                self.parse_options_list()?
16348            } else {
16349                Vec::new()
16350            };
16351            // Optional COMMENT 'text'
16352            let comment = if self.match_token(TokenType::Comment) {
16353                Some(self.expect_string()?)
16354            } else {
16355                None
16356            };
16357            cols.push(ViewColumn {
16358                name: Identifier::new(col_name),
16359                comment,
16360                options,
16361            });
16362            if !self.match_token(TokenType::Comma) {
16363                break;
16364            }
16365        }
16366        self.expect(TokenType::RParen)?;
16367        Ok(cols)
16368    }
16369
16370    /// Parse CREATE [CLUSTERED|NONCLUSTERED] INDEX
16371    fn parse_create_index_with_clustered(
16372        &mut self,
16373        unique: bool,
16374        clustered: Option<String>,
16375    ) -> Result<Expression> {
16376        self.expect(TokenType::Index)?;
16377
16378        // PostgreSQL: CREATE INDEX CONCURRENTLY idx ON t(c)
16379        let concurrently = self.match_identifier("CONCURRENTLY");
16380
16381        // Handle IF NOT EXISTS
16382        let if_not_exists =
16383            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
16384
16385        // Index name is optional when IF NOT EXISTS is specified (PostgreSQL)
16386        let name = if if_not_exists && self.check(TokenType::On) {
16387            Identifier::new("") // Empty name when omitted
16388        } else {
16389            self.expect_identifier_with_quoted()?
16390        };
16391        self.expect(TokenType::On)?;
16392        let table = self.parse_table_ref()?;
16393
16394        // Optional USING clause
16395        let using = if self.match_token(TokenType::Using) {
16396            Some(self.expect_identifier()?)
16397        } else {
16398            None
16399        };
16400
16401        // Parse index columns (optional for COLUMNSTORE indexes)
16402        let columns = if self.match_token(TokenType::LParen) {
16403            let cols = self.parse_index_columns()?;
16404            self.expect(TokenType::RParen)?;
16405            cols
16406        } else if clustered
16407            .as_ref()
16408            .is_some_and(|c| c.contains("COLUMNSTORE"))
16409        {
16410            // COLUMNSTORE indexes don't require a column list
16411            Vec::new()
16412        } else if matches!(
16413            self.config.dialect,
16414            Some(crate::dialects::DialectType::ClickHouse)
16415        ) {
16416            // ClickHouse: CREATE INDEX idx ON table expr TYPE minmax GRANULARITY 1
16417            // No parentheses around the expression — consume to semicolon as Command
16418            let mut parts = vec![
16419                "CREATE".to_string(),
16420                if unique {
16421                    "UNIQUE INDEX".to_string()
16422                } else {
16423                    "INDEX".to_string()
16424                },
16425                name.name.clone(),
16426                "ON".to_string(),
16427            ];
16428            // Rebuild table name
16429            if let Some(ref s) = table.schema {
16430                parts.push(format!("{}.{}", s.name, table.name.name));
16431            } else {
16432                parts.push(table.name.name.clone());
16433            }
16434            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
16435                let token = self.advance();
16436                if token.token_type == TokenType::String {
16437                    parts.push(format!("'{}'", token.text));
16438                } else if token.token_type == TokenType::QuotedIdentifier {
16439                    parts.push(format!("\"{}\"", token.text));
16440                } else {
16441                    parts.push(token.text.clone());
16442                }
16443            }
16444            return Ok(Expression::Command(Box::new(crate::expressions::Command {
16445                this: parts.join(" "),
16446            })));
16447        } else {
16448            self.expect(TokenType::LParen)?;
16449            let cols = self.parse_index_columns()?;
16450            self.expect(TokenType::RParen)?;
16451            cols
16452        };
16453
16454        // PostgreSQL: INCLUDE (col1, col2) clause
16455        let include_columns = if self.match_identifier("INCLUDE") {
16456            self.expect(TokenType::LParen)?;
16457            let mut cols = Vec::new();
16458            loop {
16459                cols.push(self.expect_identifier_with_quoted()?);
16460                if !self.match_token(TokenType::Comma) {
16461                    break;
16462                }
16463            }
16464            self.expect(TokenType::RParen)?;
16465            cols
16466        } else {
16467            Vec::new()
16468        };
16469
16470        // TSQL: WITH (option=value, ...) clause for index options
16471        let with_options = if self.check(TokenType::With) {
16472            // parse_with_properties expects the WITH keyword to NOT be consumed
16473            // but we need to check if we have WITH followed by LParen
16474            if self
16475                .peek_nth(1)
16476                .is_some_and(|t| t.token_type == TokenType::LParen)
16477            {
16478                self.advance(); // consume WITH
16479                self.parse_with_properties()?
16480            } else {
16481                Vec::new()
16482            }
16483        } else {
16484            Vec::new()
16485        };
16486
16487        // PostgreSQL: WHERE clause for partial indexes
16488        let where_clause = if self.match_token(TokenType::Where) {
16489            Some(Box::new(self.parse_expression()?))
16490        } else {
16491            None
16492        };
16493
16494        // TSQL: ON filegroup or partition scheme clause
16495        // e.g., ON PRIMARY, ON X([y])
16496        let on_filegroup = if self.match_token(TokenType::On) {
16497            // Get the filegroup/partition scheme name
16498            let token = self.advance();
16499            let mut filegroup = token.text.clone();
16500            // Check for partition scheme with column: ON partition_scheme(column)
16501            if self.match_token(TokenType::LParen) {
16502                filegroup.push('(');
16503                // Parse the partition column(s)
16504                loop {
16505                    let col_token = self.advance();
16506                    // For TSQL, use bracket quoting for quoted identifiers
16507                    if col_token.token_type == TokenType::QuotedIdentifier {
16508                        filegroup.push('[');
16509                        filegroup.push_str(&col_token.text);
16510                        filegroup.push(']');
16511                    } else {
16512                        filegroup.push_str(&col_token.text);
16513                    }
16514                    if !self.match_token(TokenType::Comma) {
16515                        break;
16516                    }
16517                    filegroup.push_str(", ");
16518                }
16519                self.expect(TokenType::RParen)?;
16520                filegroup.push(')');
16521            }
16522            Some(filegroup)
16523        } else {
16524            None
16525        };
16526
16527        Ok(Expression::CreateIndex(Box::new(CreateIndex {
16528            name,
16529            table,
16530            columns,
16531            unique,
16532            if_not_exists,
16533            using,
16534            clustered,
16535            concurrently,
16536            where_clause,
16537            include_columns,
16538            with_options,
16539            on_filegroup,
16540        })))
16541    }
16542
16543    /// Parse index columns - can be identifiers or expressions (like function calls)
16544    fn parse_index_columns(&mut self) -> Result<Vec<IndexColumn>> {
16545        let mut columns = Vec::new();
16546        loop {
16547            // Parse as expression to handle function calls like BOX(location, location)
16548            let expr = self.parse_expression()?;
16549
16550            // Extract column name from expression
16551            let column = match &expr {
16552                Expression::Identifier(ident) => ident.clone(),
16553                Expression::Column(col) => {
16554                    // For column expressions (e.g., simple identifier like [Col]),
16555                    // extract the identifier directly to preserve quoting
16556                    col.name.clone()
16557                }
16558                Expression::Function(_func) => {
16559                    // For function expressions, create an identifier from the function call
16560                    Identifier::new(self.expression_to_sql(&expr))
16561                }
16562                _ => Identifier::new(self.expression_to_sql(&expr)),
16563            };
16564
16565            // Parse optional PostgreSQL operator class (e.g., varchar_pattern_ops, public.gin_trgm_ops)
16566            // An opclass is an identifier that appears before ASC/DESC/NULLS and is not a keyword
16567            let opclass = if self.is_identifier_token()
16568                && !self.check(TokenType::Asc)
16569                && !self.check(TokenType::Desc)
16570                && !self.check(TokenType::Nulls)
16571            {
16572                let mut opclass_name = self.advance().text;
16573                // Handle qualified opclass names like public.gin_trgm_ops
16574                while self.match_token(TokenType::Dot) {
16575                    opclass_name.push('.');
16576                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
16577                        opclass_name.push_str(&self.advance().text);
16578                    }
16579                }
16580                Some(opclass_name)
16581            } else {
16582                None
16583            };
16584
16585            let desc = self.match_token(TokenType::Desc);
16586            let asc = if !desc {
16587                self.match_token(TokenType::Asc)
16588            } else {
16589                false
16590            };
16591            let nulls_first = if self.match_token(TokenType::Nulls) {
16592                if self.match_token(TokenType::First) {
16593                    Some(true)
16594                } else if self.match_token(TokenType::Last) {
16595                    Some(false)
16596                } else {
16597                    None
16598                }
16599            } else {
16600                None
16601            };
16602            columns.push(IndexColumn {
16603                column,
16604                desc,
16605                asc,
16606                nulls_first,
16607                opclass,
16608            });
16609            if !self.match_token(TokenType::Comma) {
16610                break;
16611            }
16612        }
16613        Ok(columns)
16614    }
16615
16616    /// Convert an expression to its SQL string representation (simple version for index expressions)
16617    fn expression_to_sql(&self, expr: &Expression) -> String {
16618        match expr {
16619            Expression::Identifier(ident) => ident.name.clone(),
16620            Expression::Function(func) => {
16621                let args = func
16622                    .args
16623                    .iter()
16624                    .map(|a| self.expression_to_sql(a))
16625                    .collect::<Vec<_>>()
16626                    .join(", ");
16627                format!("{}({})", func.name, args)
16628            }
16629            Expression::Column(col) => {
16630                if let Some(ref table) = col.table {
16631                    format!("{}.{}", table, col.name)
16632                } else {
16633                    col.name.to_string()
16634                }
16635            }
16636            Expression::Literal(lit) => match lit {
16637                Literal::String(s) => format!("'{}'", s),
16638                Literal::Number(n) => n.clone(),
16639                _ => "?".to_string(),
16640            },
16641            Expression::Null(_) => "NULL".to_string(),
16642            Expression::Boolean(b) => {
16643                if b.value {
16644                    "TRUE".to_string()
16645                } else {
16646                    "FALSE".to_string()
16647                }
16648            }
16649            _ => "?".to_string(),
16650        }
16651    }
16652
16653    /// Parse DROP statement
16654    fn parse_drop(&mut self) -> Result<Expression> {
16655        // Capture leading comments from the DROP token (e.g., "-- comment\nDROP TABLE ...")
16656        let leading_comments = self.current_leading_comments();
16657        self.expect(TokenType::Drop)?;
16658
16659        // ClickHouse: DROP TEMPORARY TABLE / DROP TEMPORARY VIEW
16660        if self.check(TokenType::Temporary)
16661            && matches!(
16662                self.config.dialect,
16663                Some(crate::dialects::DialectType::ClickHouse)
16664            )
16665        {
16666            self.advance(); // consume TEMPORARY
16667            if self.check(TokenType::View) {
16668                return self.parse_drop_view(false);
16669            }
16670            return self.parse_drop_table(leading_comments.clone());
16671        }
16672
16673        match self.peek().token_type {
16674            TokenType::Table => self.parse_drop_table(leading_comments),
16675            TokenType::View => self.parse_drop_view(false),
16676            TokenType::Materialized => {
16677                self.advance(); // consume MATERIALIZED
16678                self.parse_drop_view(true)
16679            }
16680            TokenType::Index => self.parse_drop_index(),
16681            TokenType::Schema => self.parse_drop_schema(),
16682            TokenType::Database => self.parse_drop_database(),
16683            TokenType::Function => self.parse_drop_function(),
16684            TokenType::Procedure => self.parse_drop_procedure(),
16685            TokenType::Sequence => self.parse_drop_sequence(),
16686            TokenType::Trigger => self.parse_drop_trigger(),
16687            TokenType::Type => self.parse_drop_type(),
16688            TokenType::Domain => {
16689                // DROP DOMAIN is similar to DROP TYPE
16690                self.advance();
16691                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
16692                let name = self.parse_table_ref()?;
16693                let cascade = self.match_token(TokenType::Cascade);
16694                if !cascade {
16695                    self.match_token(TokenType::Restrict);
16696                }
16697                Ok(Expression::DropType(Box::new(DropType {
16698                    name,
16699                    if_exists,
16700                    cascade,
16701                })))
16702            }
16703            TokenType::Namespace => {
16704                // DROP NAMESPACE is similar to DROP SCHEMA (Spark/Databricks)
16705                self.advance();
16706                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
16707                // Parse potentially qualified namespace name (a.b.c)
16708                let mut name_parts = vec![self.expect_identifier()?];
16709                while self.match_token(TokenType::Dot) {
16710                    name_parts.push(self.expect_identifier()?);
16711                }
16712                let name = Identifier::new(name_parts.join("."));
16713                let cascade = self.match_token(TokenType::Cascade);
16714                if !cascade {
16715                    self.match_token(TokenType::Restrict);
16716                }
16717                Ok(Expression::DropNamespace(Box::new(DropNamespace {
16718                    name,
16719                    if_exists,
16720                    cascade,
16721                })))
16722            }
16723            _ => {
16724                // ClickHouse: DROP DICTIONARY, DROP USER, DROP QUOTA, DROP ROLE,
16725                // DROP ROW POLICY, DROP SETTINGS PROFILE, DROP NAMED COLLECTION
16726                if matches!(
16727                    self.config.dialect,
16728                    Some(crate::dialects::DialectType::ClickHouse)
16729                ) {
16730                    let text_upper = self.peek().text.to_uppercase();
16731                    if matches!(
16732                        text_upper.as_str(),
16733                        "DICTIONARY"
16734                            | "USER"
16735                            | "QUOTA"
16736                            | "ROLE"
16737                            | "ROW"
16738                            | "POLICY"
16739                            | "NAMED"
16740                            | "WORKLOAD"
16741                            | "RESOURCE"
16742                            | "PROFILE"
16743                    ) || self.check(TokenType::Settings)
16744                        || self.check(TokenType::Partition)
16745                    {
16746                        self.advance(); // consume keyword, previous() is now set
16747                        let mut tokens: Vec<(String, TokenType)> = vec![
16748                            ("DROP".to_string(), TokenType::Var),
16749                            (
16750                                self.previous().text.to_uppercase(),
16751                                self.previous().token_type,
16752                            ),
16753                        ];
16754                        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
16755                            let token = self.advance();
16756                            let text = if token.token_type == TokenType::QuotedIdentifier {
16757                                format!("\"{}\"", token.text)
16758                            } else if token.token_type == TokenType::String {
16759                                format!("'{}'", token.text)
16760                            } else {
16761                                token.text.clone()
16762                            };
16763                            tokens.push((text, token.token_type));
16764                        }
16765                        return Ok(Expression::Command(Box::new(Command {
16766                            this: self.join_command_tokens(tokens),
16767                        })));
16768                    }
16769                }
16770                Err(self.parse_error(format!(
16771                    "Expected TABLE, VIEW, INDEX, SCHEMA, DATABASE, FUNCTION, PROCEDURE, SEQUENCE, TRIGGER, TYPE, or NAMESPACE after DROP, got {:?}",
16772                    self.peek().token_type
16773                )))
16774            }
16775        }
16776    }
16777
16778    /// Parse DROP TABLE
16779    fn parse_drop_table(&mut self, leading_comments: Vec<String>) -> Result<Expression> {
16780        self.expect(TokenType::Table)?;
16781
16782        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
16783
16784        // ClickHouse: IF EMPTY
16785        if !if_exists
16786            && matches!(
16787                self.config.dialect,
16788                Some(crate::dialects::DialectType::ClickHouse)
16789            )
16790        {
16791            if self.check(TokenType::If)
16792                && self.current + 1 < self.tokens.len()
16793                && self.tokens[self.current + 1]
16794                    .text
16795                    .eq_ignore_ascii_case("EMPTY")
16796            {
16797                self.advance(); // consume IF
16798                self.advance(); // consume EMPTY
16799            }
16800        }
16801
16802        // Parse table names (can be multiple)
16803        let mut names = Vec::new();
16804        loop {
16805            names.push(self.parse_table_ref()?);
16806            if !self.match_token(TokenType::Comma) {
16807                break;
16808            }
16809        }
16810
16811        // Handle CASCADE [CONSTRAINTS] or RESTRICT
16812        let mut cascade = false;
16813        let mut cascade_constraints = false;
16814        if self.match_token(TokenType::Cascade) {
16815            if self.match_identifier("CONSTRAINTS") {
16816                cascade_constraints = true;
16817            } else {
16818                cascade = true;
16819            }
16820        } else {
16821            self.match_token(TokenType::Restrict); // consume optional RESTRICT
16822        }
16823
16824        // Handle PURGE (Oracle)
16825        let purge = self.match_identifier("PURGE");
16826
16827        // ClickHouse: ON CLUSTER clause
16828        if matches!(
16829            self.config.dialect,
16830            Some(crate::dialects::DialectType::ClickHouse)
16831        ) {
16832            let _ = self.parse_on_cluster_clause()?;
16833        }
16834
16835        // ClickHouse: SYNC keyword
16836        if matches!(
16837            self.config.dialect,
16838            Some(crate::dialects::DialectType::ClickHouse)
16839        ) {
16840            self.match_identifier("SYNC");
16841            self.match_identifier("NO");
16842            self.match_identifier("DELAY");
16843        }
16844
16845        Ok(Expression::DropTable(Box::new(DropTable {
16846            names,
16847            if_exists,
16848            cascade,
16849            cascade_constraints,
16850            purge,
16851            leading_comments,
16852            object_id_args: None,
16853        })))
16854    }
16855
16856    /// Parse DROP VIEW
16857    fn parse_drop_view(&mut self, materialized: bool) -> Result<Expression> {
16858        self.expect(TokenType::View)?;
16859
16860        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
16861        let name = self.parse_table_ref()?;
16862
16863        // ClickHouse: ON CLUSTER clause
16864        if matches!(
16865            self.config.dialect,
16866            Some(crate::dialects::DialectType::ClickHouse)
16867        ) {
16868            let _ = self.parse_on_cluster_clause()?;
16869            self.match_identifier("SYNC");
16870        }
16871
16872        Ok(Expression::DropView(Box::new(DropView {
16873            name,
16874            if_exists,
16875            materialized,
16876        })))
16877    }
16878
16879    /// Parse DROP INDEX
16880    fn parse_drop_index(&mut self) -> Result<Expression> {
16881        self.expect(TokenType::Index)?;
16882
16883        // PostgreSQL CONCURRENTLY modifier
16884        let concurrently = self.match_identifier("CONCURRENTLY");
16885
16886        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
16887
16888        // Parse potentially qualified index name (a.b.c)
16889        let mut name_parts = vec![self.expect_identifier()?];
16890        while self.match_token(TokenType::Dot) {
16891            name_parts.push(self.expect_identifier()?);
16892        }
16893        let name = Identifier::new(name_parts.join("."));
16894
16895        // Optional ON table
16896        let table = if self.match_token(TokenType::On) {
16897            Some(self.parse_table_ref()?)
16898        } else {
16899            None
16900        };
16901
16902        Ok(Expression::DropIndex(Box::new(DropIndex {
16903            name,
16904            table,
16905            if_exists,
16906            concurrently,
16907        })))
16908    }
16909
16910    /// Parse ALTER statement
16911    fn parse_alter(&mut self) -> Result<Expression> {
16912        self.expect(TokenType::Alter)?;
16913
16914        match self.peek().token_type {
16915            TokenType::Table => {
16916                self.advance();
16917                // Handle IF EXISTS after ALTER TABLE
16918                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
16919                // Handle PostgreSQL ONLY modifier: ALTER TABLE ONLY "Album" ...
16920                let has_only = self.match_token(TokenType::Only);
16921                let mut name = self.parse_table_ref()?;
16922                if has_only {
16923                    name.only = true;
16924                }
16925
16926                // ClickHouse: ON CLUSTER clause
16927                let on_cluster = self.parse_on_cluster_clause()?;
16928
16929                // Hive: PARTITION(key=value, ...) clause before actions
16930                let partition = if self.match_token(TokenType::Partition) {
16931                    self.expect(TokenType::LParen)?;
16932                    let mut parts = Vec::new();
16933                    loop {
16934                        let key = self.expect_identifier()?;
16935                        self.expect(TokenType::Eq)?;
16936                        let value = self.parse_expression()?;
16937                        parts.push((Identifier::new(key), value));
16938                        if !self.match_token(TokenType::Comma) {
16939                            break;
16940                        }
16941                    }
16942                    self.expect(TokenType::RParen)?;
16943                    Some(parts)
16944                } else {
16945                    None
16946                };
16947
16948                let mut actions = Vec::new();
16949                let mut last_was_add_column = false;
16950                let mut with_check_modifier: Option<String> = None;
16951
16952                loop {
16953                    // Check for MySQL trailing options (ALGORITHM=val, LOCK=val)
16954                    // before trying to parse as a column def or action.
16955                    // The comma before ALGORITHM was consumed at the bottom of the previous iteration.
16956                    if self.check_identifier("ALGORITHM") || self.check_identifier("LOCK") {
16957                        break;
16958                    }
16959
16960                    // TSQL: WITH CHECK / WITH NOCHECK before ADD CONSTRAINT
16961                    if self.check(TokenType::With) {
16962                        let saved = self.current;
16963                        self.advance(); // consume WITH
16964                        if self.check(TokenType::Check) {
16965                            self.advance(); // consume CHECK
16966                            with_check_modifier = Some("WITH CHECK".to_string());
16967                            // Continue to parse the actual action (ADD CONSTRAINT, etc.)
16968                        } else if self.check_identifier("NOCHECK") {
16969                            self.advance(); // consume NOCHECK
16970                            with_check_modifier = Some("WITH NOCHECK".to_string());
16971                            // Continue to parse the actual action (ADD CONSTRAINT, etc.)
16972                        } else {
16973                            // Not WITH CHECK/NOCHECK, restore position
16974                            self.current = saved;
16975                        }
16976                    }
16977
16978                    // If last action was ADD COLUMN and we just saw a comma,
16979                    // check if this is another column definition (not a new action keyword)
16980                    if last_was_add_column
16981                        && !self.check(TokenType::Add)
16982                        && !self.check(TokenType::Drop)
16983                        && !self.check(TokenType::Alter)
16984                        && !self.check(TokenType::Rename)
16985                        && !self.check(TokenType::Set)
16986                        && !self.check_identifier("MODIFY")
16987                        && !self.check(TokenType::Delete)
16988                        && !self.check(TokenType::Update)
16989                        && !self.check_identifier("DETACH")
16990                        && !self.check_identifier("ATTACH")
16991                        && !self.check_identifier("FREEZE")
16992                        && !self.check_identifier("CLEAR")
16993                        && !self.check_identifier("MATERIALIZE")
16994                        && !self.check(TokenType::Comment)
16995                        && !self.check(TokenType::Replace)
16996                        && !self.check_identifier("MOVE")
16997                        && !self.check_identifier("REMOVE")
16998                        && !self.check_identifier("APPLY")
16999                    {
17000                        // Parse additional column definition
17001                        self.match_token(TokenType::Column); // optional COLUMN keyword
17002                        let if_not_exists = self.match_keywords(&[
17003                            TokenType::If,
17004                            TokenType::Not,
17005                            TokenType::Exists,
17006                        ]);
17007                        let col_def = self.parse_column_def()?;
17008                        let position = if self.match_token(TokenType::First) {
17009                            Some(ColumnPosition::First)
17010                        } else if self.match_token(TokenType::After) {
17011                            let after_col = self.expect_identifier()?;
17012                            // ClickHouse: AFTER n.a (dotted nested column name)
17013                            let after_name = if self.match_token(TokenType::Dot) {
17014                                let field = self.expect_identifier()?;
17015                                format!("{}.{}", after_col, field)
17016                            } else {
17017                                after_col
17018                            };
17019                            Some(ColumnPosition::After(Identifier::new(after_name)))
17020                        } else {
17021                            None
17022                        };
17023                        actions.push(AlterTableAction::AddColumn {
17024                            column: col_def,
17025                            if_not_exists,
17026                            position,
17027                        });
17028                        // last_was_add_column remains true
17029                    } else {
17030                        // Check for MySQL trailing options (ALGORITHM=val, LOCK=val)
17031                        // before trying to parse as an action
17032                        if self.check_identifier("ALGORITHM") || self.check_identifier("LOCK") {
17033                            // Retreat one to re-process the comma in the trailing options loop
17034                            self.current -= 1; // back up past the comma consumed in loop
17035                            break;
17036                        }
17037                        let action = self.parse_alter_action()?;
17038                        last_was_add_column = matches!(action, AlterTableAction::AddColumn { .. });
17039                        actions.push(action);
17040                    }
17041                    if !self.match_token(TokenType::Comma) {
17042                        break;
17043                    }
17044                }
17045
17046                // Parse trailing MySQL ALTER TABLE options: ALGORITHM=val, LOCK=val
17047                // These can appear after actions separated by commas (comma already consumed)
17048                // or directly if no actions were parsed
17049                let mut algorithm = None;
17050                let mut lock = None;
17051                loop {
17052                    // First check without consuming comma (comma may have been consumed by action loop)
17053                    if self.check_identifier("ALGORITHM") {
17054                        self.advance();
17055                        self.expect(TokenType::Eq)?;
17056                        algorithm = Some(self.expect_identifier_or_keyword()?.to_uppercase());
17057                        self.match_token(TokenType::Comma); // optional trailing comma
17058                    } else if self.check_identifier("LOCK") {
17059                        self.advance();
17060                        self.expect(TokenType::Eq)?;
17061                        lock = Some(self.expect_identifier_or_keyword()?.to_uppercase());
17062                        self.match_token(TokenType::Comma); // optional trailing comma
17063                    } else if self.match_token(TokenType::Comma) {
17064                        // Try after comma
17065                        if self.check_identifier("ALGORITHM") {
17066                            self.advance();
17067                            self.expect(TokenType::Eq)?;
17068                            algorithm = Some(self.expect_identifier_or_keyword()?.to_uppercase());
17069                        } else if self.check_identifier("LOCK") {
17070                            self.advance();
17071                            self.expect(TokenType::Eq)?;
17072                            lock = Some(self.expect_identifier_or_keyword()?.to_uppercase());
17073                        } else {
17074                            self.current -= 1;
17075                            break;
17076                        }
17077                    } else {
17078                        break;
17079                    }
17080                }
17081
17082                // ClickHouse: consume optional trailing SETTINGS clause
17083                // e.g., ALTER TABLE t ADD COLUMN c Int64 SETTINGS mutations_sync=2, alter_sync=2
17084                if matches!(
17085                    self.config.dialect,
17086                    Some(crate::dialects::DialectType::ClickHouse)
17087                ) && self.check(TokenType::Settings)
17088                {
17089                    self.advance(); // consume SETTINGS
17090                    let _ = self.parse_settings_property()?;
17091                }
17092
17093                Ok(Expression::AlterTable(Box::new(AlterTable {
17094                    name,
17095                    actions,
17096                    if_exists,
17097                    algorithm,
17098                    lock,
17099                    with_check: with_check_modifier,
17100                    partition,
17101                    on_cluster,
17102                })))
17103            }
17104            TokenType::View => self.parse_alter_view_with_modifiers(None, None, None),
17105            TokenType::Index => self.parse_alter_index(),
17106            TokenType::Sequence => self.parse_alter_sequence(),
17107            _ if self.check_identifier("SESSION") => {
17108                // ALTER SESSION SET/UNSET (Snowflake)
17109                self.advance(); // consume SESSION
17110                match self.parse_alter_session()? {
17111                    Some(expr) => Ok(expr),
17112                    None => {
17113                        // Fall back to command
17114                        Ok(Expression::Command(Box::new(Command {
17115                            this: "ALTER SESSION".to_string(),
17116                        })))
17117                    }
17118                }
17119            }
17120            _ => {
17121                // MySQL: ALTER ALGORITHM = val VIEW, ALTER DEFINER = val VIEW,
17122                // ALTER SQL SECURITY = val VIEW
17123                let mut view_algorithm = None;
17124                let mut view_definer = None;
17125                let mut view_sql_security = None;
17126
17127                loop {
17128                    if self.check_identifier("ALGORITHM") {
17129                        self.advance();
17130                        self.expect(TokenType::Eq)?;
17131                        view_algorithm = Some(self.expect_identifier_or_keyword()?.to_uppercase());
17132                    } else if self.check_identifier("DEFINER") {
17133                        self.advance();
17134                        self.expect(TokenType::Eq)?;
17135                        // Parse user@host format: 'admin'@'localhost'
17136                        let mut definer_str = String::new();
17137                        if self.check(TokenType::String) {
17138                            definer_str.push_str(&format!("'{}'", self.advance().text));
17139                        } else {
17140                            definer_str.push_str(&self.expect_identifier_or_keyword()?);
17141                        }
17142                        // Check for @ separator
17143                        if !self.is_at_end() && self.peek().text == "@" {
17144                            definer_str.push_str(&self.advance().text);
17145                            if self.check(TokenType::String) {
17146                                definer_str.push_str(&format!("'{}'", self.advance().text));
17147                            } else if !self.is_at_end() {
17148                                definer_str.push_str(&self.advance().text);
17149                            }
17150                        }
17151                        view_definer = Some(definer_str);
17152                    } else if self.check_identifier("SQL") {
17153                        self.advance();
17154                        if self.match_identifier("SECURITY") {
17155                            self.match_token(TokenType::Eq);
17156                            view_sql_security =
17157                                Some(self.expect_identifier_or_keyword()?.to_uppercase());
17158                        }
17159                    } else {
17160                        break;
17161                    }
17162                }
17163
17164                if self.check(TokenType::View) {
17165                    self.parse_alter_view_with_modifiers(
17166                        view_algorithm,
17167                        view_definer,
17168                        view_sql_security,
17169                    )
17170                } else {
17171                    // Fall back to Raw for unrecognized ALTER targets
17172                    let start = self.current;
17173                    while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17174                        self.advance();
17175                    }
17176                    let sql = self.tokens_to_sql(start, self.current);
17177                    Ok(Expression::Raw(Raw {
17178                        sql: format!("ALTER {}", sql),
17179                    }))
17180                }
17181            }
17182        }
17183    }
17184
17185    /// Parse ALTER TABLE action
17186    fn parse_alter_action(&mut self) -> Result<AlterTableAction> {
17187        if self.match_token(TokenType::Add) {
17188            // ClickHouse: ADD INDEX idx expr TYPE minmax GRANULARITY 1
17189            // ClickHouse: ADD PROJECTION name (SELECT ...)
17190            // ClickHouse: ADD STATISTICS col1, col2 TYPE tdigest, uniq
17191            // These have different syntax from MySQL ADD INDEX, so consume as Raw
17192            if matches!(
17193                self.config.dialect,
17194                Some(crate::dialects::DialectType::ClickHouse)
17195            ) && (self.check(TokenType::Index)
17196                || self.check_identifier("PROJECTION")
17197                || self.check_identifier("STATISTICS"))
17198            {
17199                let is_statistics = self.check_identifier("STATISTICS");
17200                let mut tokens: Vec<(String, TokenType)> =
17201                    vec![("ADD".to_string(), TokenType::Add)];
17202                let mut paren_depth = 0i32;
17203                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17204                    // STATISTICS uses commas internally (col1, col2 TYPE t1, t2), don't break at comma
17205                    if self.check(TokenType::Comma) && paren_depth == 0 && !is_statistics {
17206                        break;
17207                    }
17208                    let token = self.advance();
17209                    if token.token_type == TokenType::LParen {
17210                        paren_depth += 1;
17211                    }
17212                    if token.token_type == TokenType::RParen {
17213                        paren_depth -= 1;
17214                    }
17215                    let text = if token.token_type == TokenType::QuotedIdentifier {
17216                        format!("\"{}\"", token.text)
17217                    } else if token.token_type == TokenType::String {
17218                        format!("'{}'", token.text)
17219                    } else {
17220                        token.text.clone()
17221                    };
17222                    tokens.push((text, token.token_type));
17223                }
17224                return Ok(AlterTableAction::Raw {
17225                    sql: self.join_command_tokens(tokens),
17226                });
17227            }
17228            // ADD CONSTRAINT or ADD COLUMN or ADD INDEX
17229            if self.match_token(TokenType::Constraint) {
17230                // ADD CONSTRAINT name ...
17231                let name = Some(self.expect_identifier_with_quoted()?);
17232                let constraint = self.parse_constraint_definition(name)?;
17233                Ok(AlterTableAction::AddConstraint(constraint))
17234            } else if self.check(TokenType::PrimaryKey)
17235                || self.check(TokenType::ForeignKey)
17236                || self.check(TokenType::Check)
17237            {
17238                // ADD PRIMARY KEY / FOREIGN KEY / CHECK (without CONSTRAINT keyword)
17239                let constraint = self.parse_table_constraint()?;
17240                Ok(AlterTableAction::AddConstraint(constraint))
17241            } else if self.check(TokenType::Index)
17242                || self.check(TokenType::Key)
17243                || self.check(TokenType::Unique)
17244                || self.check_identifier("FULLTEXT")
17245                || self.check_identifier("SPATIAL")
17246            {
17247                // ADD [UNIQUE|FULLTEXT|SPATIAL] [{INDEX|KEY}] [name] (columns) [USING {BTREE|HASH}]
17248                let kind = if self.match_token(TokenType::Unique) {
17249                    Some("UNIQUE".to_string())
17250                } else if self.match_identifier("FULLTEXT") {
17251                    Some("FULLTEXT".to_string())
17252                } else if self.match_identifier("SPATIAL") {
17253                    Some("SPATIAL".to_string())
17254                } else {
17255                    None
17256                };
17257                // Consume optional INDEX or KEY keyword, track which was used
17258                let use_key_keyword = if self.match_token(TokenType::Key) {
17259                    true
17260                } else {
17261                    self.match_token(TokenType::Index);
17262                    false
17263                };
17264
17265                // Optional index name (before the columns)
17266                let name = if !self.check(TokenType::LParen) && !self.check(TokenType::Using) {
17267                    Some(self.expect_identifier_with_quoted()?)
17268                } else {
17269                    None
17270                };
17271
17272                // Parse columns (with optional prefix length and DESC)
17273                self.expect(TokenType::LParen)?;
17274                let columns = self.parse_index_identifier_list()?;
17275                self.expect(TokenType::RParen)?;
17276
17277                // Parse optional USING BTREE|HASH
17278                let modifiers = self.parse_constraint_modifiers();
17279
17280                Ok(AlterTableAction::AddConstraint(TableConstraint::Index {
17281                    name,
17282                    columns,
17283                    kind,
17284                    modifiers,
17285                    use_key_keyword,
17286                    expression: None,
17287                    index_type: None,
17288                    granularity: None,
17289                }))
17290            } else if self.match_identifier("COLUMNS") {
17291                // ADD COLUMNS (col1 TYPE, col2 TYPE, ...) [CASCADE] - Hive/Spark syntax
17292                self.expect(TokenType::LParen)?;
17293                let mut columns = Vec::new();
17294                loop {
17295                    let col_def = self.parse_column_def()?;
17296                    columns.push(col_def);
17297                    if !self.match_token(TokenType::Comma) {
17298                        break;
17299                    }
17300                }
17301                self.expect(TokenType::RParen)?;
17302                let cascade = self.match_token(TokenType::Cascade);
17303                Ok(AlterTableAction::AddColumns { columns, cascade })
17304            } else if self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]) {
17305                // ADD IF NOT EXISTS PARTITION(key = value) - Hive/Spark syntax
17306                // ADD IF NOT EXISTS col1 INT, col2 INT - Snowflake syntax
17307                if self.match_token(TokenType::Partition) {
17308                    self.expect(TokenType::LParen)?;
17309                    let mut partition_exprs = Vec::new();
17310                    loop {
17311                        if let Some(expr) = self.parse_conjunction()? {
17312                            partition_exprs.push(expr);
17313                        }
17314                        if !self.match_token(TokenType::Comma) {
17315                            break;
17316                        }
17317                    }
17318                    self.expect(TokenType::RParen)?;
17319                    let partition =
17320                        Expression::Partition(Box::new(crate::expressions::Partition {
17321                            expressions: partition_exprs,
17322                            subpartition: false,
17323                        }));
17324                    let location = if self.match_text_seq(&["LOCATION"]) {
17325                        self.parse_property()?
17326                    } else {
17327                        None
17328                    };
17329                    return Ok(AlterTableAction::AddPartition {
17330                        partition,
17331                        if_not_exists: true,
17332                        location,
17333                    });
17334                } else {
17335                    // Snowflake: ADD IF NOT EXISTS col1 INT, [IF NOT EXISTS] col2 INT
17336                    // Parse just the first column; the caller's comma loop handles the rest
17337                    let col_def = self.parse_column_def()?;
17338                    return Ok(AlterTableAction::AddColumn {
17339                        column: col_def,
17340                        if_not_exists: true,
17341                        position: None,
17342                    });
17343                }
17344            } else if self.check(TokenType::Partition) {
17345                // ADD PARTITION(key = value) - Hive/Spark syntax
17346                self.advance(); // consume PARTITION
17347                self.expect(TokenType::LParen)?;
17348                let mut partition_exprs = Vec::new();
17349                loop {
17350                    if let Some(expr) = self.parse_conjunction()? {
17351                        partition_exprs.push(expr);
17352                    }
17353                    if !self.match_token(TokenType::Comma) {
17354                        break;
17355                    }
17356                }
17357                self.expect(TokenType::RParen)?;
17358                let partition = Expression::Partition(Box::new(crate::expressions::Partition {
17359                    expressions: partition_exprs,
17360                    subpartition: false,
17361                }));
17362                let location = if self.match_text_seq(&["LOCATION"]) {
17363                    // Parse the LOCATION value (typically a string literal like 'path')
17364                    Some(self.parse_primary()?)
17365                } else {
17366                    None
17367                };
17368                Ok(AlterTableAction::AddPartition {
17369                    partition,
17370                    if_not_exists: false,
17371                    location,
17372                })
17373            } else {
17374                // ADD COLUMN or ADD (col1 TYPE, col2 TYPE) for Oracle
17375                let has_column_keyword = self.match_token(TokenType::Column); // optional COLUMN keyword
17376
17377                // Check for Oracle-style ADD (col1 TYPE, col2 TYPE, ...) without COLUMN keyword
17378                if !has_column_keyword && self.check(TokenType::LParen) {
17379                    // Oracle multi-column ADD syntax: ADD (col1 TYPE, col2 TYPE, ...)
17380                    self.advance(); // consume '('
17381                    let mut columns = Vec::new();
17382                    loop {
17383                        let col_def = self.parse_column_def()?;
17384                        columns.push(col_def);
17385                        if !self.match_token(TokenType::Comma) {
17386                            break;
17387                        }
17388                    }
17389                    self.expect(TokenType::RParen)?;
17390                    // Use AddColumns with cascade=false for Oracle syntax
17391                    Ok(AlterTableAction::AddColumns {
17392                        columns,
17393                        cascade: false,
17394                    })
17395                } else {
17396                    // Handle IF NOT EXISTS for ADD COLUMN
17397                    let if_not_exists =
17398                        self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
17399                    let col_def = self.parse_column_def()?;
17400                    // Check for FIRST or AFTER position modifiers (MySQL/MariaDB)
17401                    let position = if self.match_token(TokenType::First) {
17402                        Some(ColumnPosition::First)
17403                    } else if self.match_token(TokenType::After) {
17404                        let after_col = self.expect_identifier()?;
17405                        // ClickHouse: AFTER n.a (dotted nested column name)
17406                        let after_name = if self.match_token(TokenType::Dot) {
17407                            let field = self.expect_identifier()?;
17408                            format!("{}.{}", after_col, field)
17409                        } else {
17410                            after_col
17411                        };
17412                        Some(ColumnPosition::After(Identifier::new(after_name)))
17413                    } else {
17414                        None
17415                    };
17416                    Ok(AlterTableAction::AddColumn {
17417                        column: col_def,
17418                        if_not_exists,
17419                        position,
17420                    })
17421                }
17422            }
17423        } else if self.match_token(TokenType::Drop) {
17424            // ClickHouse: DROP INDEX idx, DROP PROJECTION name, DROP STATISTICS, etc.
17425            // These have different syntax from MySQL, so consume as Raw
17426            if matches!(
17427                self.config.dialect,
17428                Some(crate::dialects::DialectType::ClickHouse)
17429            ) && (self.check(TokenType::Index)
17430                || self.check_identifier("PROJECTION")
17431                || self.check_identifier("STATISTICS")
17432                || self.check_identifier("DETACHED")
17433                || self.check_identifier("PART"))
17434            {
17435                let is_statistics = self.check_identifier("STATISTICS");
17436                let mut tokens: Vec<(String, TokenType)> =
17437                    vec![("DROP".to_string(), TokenType::Drop)];
17438                let mut paren_depth = 0i32;
17439                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17440                    if self.check(TokenType::Comma) && paren_depth == 0 && !is_statistics {
17441                        break;
17442                    }
17443                    let token = self.advance();
17444                    if token.token_type == TokenType::LParen {
17445                        paren_depth += 1;
17446                    }
17447                    if token.token_type == TokenType::RParen {
17448                        paren_depth -= 1;
17449                    }
17450                    let text = if token.token_type == TokenType::QuotedIdentifier {
17451                        format!("\"{}\"", token.text)
17452                    } else if token.token_type == TokenType::String {
17453                        format!("'{}'", token.text)
17454                    } else {
17455                        token.text.clone()
17456                    };
17457                    tokens.push((text, token.token_type));
17458                }
17459                return Ok(AlterTableAction::Raw {
17460                    sql: self.join_command_tokens(tokens),
17461                });
17462            }
17463            // Handle IF EXISTS before determining what to drop
17464            let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17465
17466            if self.match_token(TokenType::Partition) {
17467                // DROP [IF EXISTS] PARTITION expr [, PARTITION expr ...]
17468                // ClickHouse supports: PARTITION 201901, PARTITION ALL,
17469                // PARTITION tuple(...), PARTITION ID '...'
17470                let mut partitions = Vec::new();
17471                loop {
17472                    if self.check(TokenType::LParen) {
17473                        // ClickHouse: PARTITION (expr) or PARTITION (expr, expr, ...)
17474                        // Standard SQL: PARTITION (key=value, ...)
17475                        // Peek ahead: if LParen is followed by String/Number (not identifier=),
17476                        // parse as expression
17477                        let is_ch_expr = matches!(
17478                            self.config.dialect,
17479                            Some(crate::dialects::DialectType::ClickHouse)
17480                        ) && self.current + 1 < self.tokens.len()
17481                            && (self.tokens[self.current + 1].token_type == TokenType::String
17482                                || self.tokens[self.current + 1].token_type == TokenType::Number
17483                                || self.tokens[self.current + 1].token_type == TokenType::LParen
17484                                || (self.current + 2 < self.tokens.len()
17485                                    && self.tokens[self.current + 2].token_type != TokenType::Eq));
17486                        if is_ch_expr {
17487                            // Parse as tuple expression
17488                            let expr = self.parse_expression()?;
17489                            partitions.push(vec![(Identifier::new("__expr__".to_string()), expr)]);
17490                        } else {
17491                            self.advance(); // consume (
17492                            let mut parts = Vec::new();
17493                            loop {
17494                                let key = self.expect_identifier()?;
17495                                self.expect(TokenType::Eq)?;
17496                                let value = self.parse_expression()?;
17497                                parts.push((Identifier::new(key), value));
17498                                if !self.match_token(TokenType::Comma) {
17499                                    break;
17500                                }
17501                            }
17502                            self.expect(TokenType::RParen)?;
17503                            partitions.push(parts);
17504                        }
17505                    } else if self.match_text_seq(&["ALL"]) {
17506                        // ClickHouse: PARTITION ALL
17507                        partitions.push(vec![(
17508                            Identifier::new("ALL".to_string()),
17509                            Expression::Boolean(BooleanLiteral { value: true }),
17510                        )]);
17511                    } else if self.match_text_seq(&["ID"]) {
17512                        // ClickHouse: PARTITION ID 'string'
17513                        let id_val = self.parse_expression()?;
17514                        partitions.push(vec![(Identifier::new("ID".to_string()), id_val)]);
17515                    } else {
17516                        // ClickHouse: PARTITION <expression> (number, tuple(...), etc.)
17517                        let expr = self.parse_expression()?;
17518                        partitions.push(vec![(Identifier::new("__expr__".to_string()), expr)]);
17519                    }
17520                    // Check for ", PARTITION" for multiple partitions
17521                    if self.match_token(TokenType::Comma) {
17522                        if !self.match_token(TokenType::Partition) {
17523                            break;
17524                        }
17525                    } else {
17526                        break;
17527                    }
17528                }
17529                Ok(AlterTableAction::DropPartition {
17530                    partitions,
17531                    if_exists,
17532                })
17533            } else if self.match_token(TokenType::Column) {
17534                // DROP [IF EXISTS] COLUMN [IF EXISTS] name [CASCADE]
17535                // Check for IF EXISTS after COLUMN as well
17536                let if_exists =
17537                    if_exists || self.match_keywords(&[TokenType::If, TokenType::Exists]);
17538                let mut name = self.expect_identifier_with_quoted()?;
17539                // ClickHouse: nested column names like n.ui8
17540                if matches!(
17541                    self.config.dialect,
17542                    Some(crate::dialects::DialectType::ClickHouse)
17543                ) && self.match_token(TokenType::Dot)
17544                {
17545                    let sub = self.expect_identifier_with_quoted()?;
17546                    name.name = format!("{}.{}", name.name, sub.name);
17547                }
17548                let cascade = self.match_token(TokenType::Cascade);
17549                Ok(AlterTableAction::DropColumn {
17550                    name,
17551                    if_exists,
17552                    cascade,
17553                })
17554            } else if self.match_token(TokenType::Constraint) {
17555                // DROP [IF EXISTS] CONSTRAINT name
17556                let name = self.expect_identifier_with_quoted()?;
17557                Ok(AlterTableAction::DropConstraint { name, if_exists })
17558            } else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
17559                // DROP FOREIGN KEY name (Oracle/MySQL)
17560                let name = self.expect_identifier_with_quoted()?;
17561                Ok(AlterTableAction::DropForeignKey { name })
17562            } else if self.check_identifier("COLUMNS") && self.check_next(TokenType::LParen) {
17563                // DROP COLUMNS (col1, col2, ...) - Spark/Databricks syntax
17564                self.advance(); // consume COLUMNS
17565                self.expect(TokenType::LParen)?;
17566                let mut names = Vec::new();
17567                loop {
17568                    let name = self.expect_identifier_with_quoted()?;
17569                    names.push(name);
17570                    if !self.match_token(TokenType::Comma) {
17571                        break;
17572                    }
17573                }
17574                self.expect(TokenType::RParen)?;
17575                Ok(AlterTableAction::DropColumns { names })
17576            } else {
17577                // DROP [IF EXISTS] name (implicit column) [CASCADE]
17578                let mut name = self.expect_identifier_with_quoted()?;
17579                // ClickHouse: nested column names like n.ui8
17580                if matches!(
17581                    self.config.dialect,
17582                    Some(crate::dialects::DialectType::ClickHouse)
17583                ) && self.match_token(TokenType::Dot)
17584                {
17585                    let sub = self.expect_identifier_with_quoted()?;
17586                    name.name = format!("{}.{}", name.name, sub.name);
17587                }
17588                let cascade = self.match_token(TokenType::Cascade);
17589                Ok(AlterTableAction::DropColumn {
17590                    name,
17591                    if_exists,
17592                    cascade,
17593                })
17594            }
17595        } else if self.match_token(TokenType::Rename) {
17596            if self.match_token(TokenType::Column) {
17597                // RENAME COLUMN [IF EXISTS] old TO new
17598                let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
17599                let mut old_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
17600                // ClickHouse: nested column names like n.x
17601                if matches!(
17602                    self.config.dialect,
17603                    Some(crate::dialects::DialectType::ClickHouse)
17604                ) && self.match_token(TokenType::Dot)
17605                {
17606                    let field = self.expect_identifier_with_quoted()?;
17607                    old_name = Identifier {
17608                        name: format!("{}.{}", old_name.name, field.name),
17609                        quoted: false,
17610                        trailing_comments: Vec::new(),
17611                        span: None,
17612                    };
17613                }
17614                self.expect(TokenType::To)?;
17615                let mut new_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
17616                // ClickHouse: nested column names like n.y
17617                if matches!(
17618                    self.config.dialect,
17619                    Some(crate::dialects::DialectType::ClickHouse)
17620                ) && self.match_token(TokenType::Dot)
17621                {
17622                    let field = self.expect_identifier_or_safe_keyword_with_quoted()?;
17623                    new_name = Identifier {
17624                        name: format!("{}.{}", new_name.name, field.name),
17625                        quoted: false,
17626                        trailing_comments: Vec::new(),
17627                        span: None,
17628                    };
17629                }
17630                Ok(AlterTableAction::RenameColumn {
17631                    old_name,
17632                    new_name,
17633                    if_exists,
17634                })
17635            } else if self.match_token(TokenType::To) {
17636                // RENAME TO new_table
17637                let new_name = self.parse_table_ref()?;
17638                Ok(AlterTableAction::RenameTable(new_name))
17639            } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
17640                // StarRocks/Doris: RENAME new_name (without TO)
17641                // SQLite: RENAME old_name TO new_name (without COLUMN keyword)
17642                let first_name = self.expect_identifier_with_quoted()?;
17643                if self.match_token(TokenType::To) {
17644                    let new_name = self.expect_identifier_with_quoted()?;
17645                    Ok(AlterTableAction::RenameColumn {
17646                        old_name: first_name,
17647                        new_name,
17648                        if_exists: false,
17649                    })
17650                } else {
17651                    // No TO keyword: treat as RENAME TABLE (StarRocks/Doris)
17652                    Ok(AlterTableAction::RenameTable(TableRef::new(
17653                        first_name.name,
17654                    )))
17655                }
17656            } else {
17657                Err(self.parse_error("Expected COLUMN or TO after RENAME"))
17658            }
17659        } else if self.match_token(TokenType::Alter) {
17660            // Check for ALTER INDEX (MySQL: ALTER TABLE t ALTER INDEX i VISIBLE/INVISIBLE)
17661            if self.match_token(TokenType::Index) {
17662                let name = self.expect_identifier_with_quoted()?;
17663                let visible = if self.match_identifier("VISIBLE") {
17664                    true
17665                } else if self.match_identifier("INVISIBLE") {
17666                    false
17667                } else {
17668                    return Err(
17669                        self.parse_error("Expected VISIBLE or INVISIBLE after ALTER INDEX name")
17670                    );
17671                };
17672                Ok(AlterTableAction::AlterIndex { name, visible })
17673            } else if self.check_identifier("SORTKEY") {
17674                // Redshift: ALTER TABLE t ALTER SORTKEY AUTO|NONE|(col1, col2)
17675                self.advance(); // consume SORTKEY
17676                if self.match_texts(&["AUTO", "NONE"]) {
17677                    let style = self.previous().text.to_uppercase();
17678                    Ok(AlterTableAction::AlterSortKey {
17679                        this: Some(style),
17680                        expressions: Vec::new(),
17681                        compound: false,
17682                    })
17683                } else if self.check(TokenType::LParen) {
17684                    // (col1, col2) syntax
17685                    let wrapped = self.parse_wrapped_id_vars()?;
17686                    let expressions = if let Some(Expression::Tuple(t)) = wrapped {
17687                        t.expressions
17688                    } else {
17689                        Vec::new()
17690                    };
17691                    Ok(AlterTableAction::AlterSortKey {
17692                        this: None,
17693                        expressions,
17694                        compound: false,
17695                    })
17696                } else {
17697                    Err(self.parse_error("Expected AUTO, NONE, or (columns) after SORTKEY"))
17698                }
17699            } else if self.check_identifier("COMPOUND") {
17700                // Redshift: ALTER TABLE t ALTER COMPOUND SORTKEY (col1, col2)
17701                self.advance(); // consume COMPOUND
17702                if !self.match_identifier("SORTKEY") {
17703                    return Err(self.parse_error("Expected SORTKEY after COMPOUND"));
17704                }
17705                if self.check(TokenType::LParen) {
17706                    let wrapped = self.parse_wrapped_id_vars()?;
17707                    let expressions = if let Some(Expression::Tuple(t)) = wrapped {
17708                        t.expressions
17709                    } else {
17710                        Vec::new()
17711                    };
17712                    Ok(AlterTableAction::AlterSortKey {
17713                        this: None,
17714                        expressions,
17715                        compound: true,
17716                    })
17717                } else {
17718                    Err(self.parse_error("Expected (columns) after COMPOUND SORTKEY"))
17719                }
17720            } else if self.check_identifier("DISTSTYLE") {
17721                // Redshift: ALTER TABLE t ALTER DISTSTYLE ALL|EVEN|AUTO|KEY [DISTKEY col]
17722                self.advance(); // consume DISTSTYLE
17723                if self.match_texts(&["ALL", "EVEN", "AUTO"]) {
17724                    let style = self.previous().text.to_uppercase();
17725                    Ok(AlterTableAction::AlterDistStyle {
17726                        style,
17727                        distkey: None,
17728                    })
17729                } else if self.match_token(TokenType::Key) || self.match_identifier("KEY") {
17730                    // DISTSTYLE KEY DISTKEY col
17731                    if !self.match_identifier("DISTKEY") {
17732                        return Err(self.parse_error("Expected DISTKEY after DISTSTYLE KEY"));
17733                    }
17734                    let col = self.expect_identifier_with_quoted()?;
17735                    Ok(AlterTableAction::AlterDistStyle {
17736                        style: "KEY".to_string(),
17737                        distkey: Some(col),
17738                    })
17739                } else {
17740                    Err(self.parse_error("Expected ALL, EVEN, AUTO, or KEY after DISTSTYLE"))
17741                }
17742            } else if self.check_identifier("DISTKEY") {
17743                // Redshift: ALTER TABLE t ALTER DISTKEY col (shorthand for DISTSTYLE KEY DISTKEY col)
17744                self.advance(); // consume DISTKEY
17745                let col = self.expect_identifier_with_quoted()?;
17746                Ok(AlterTableAction::AlterDistStyle {
17747                    style: "KEY".to_string(),
17748                    distkey: Some(col),
17749                })
17750            } else {
17751                // ALTER COLUMN
17752                self.match_token(TokenType::Column); // optional COLUMN keyword
17753                let name = self.expect_identifier_with_quoted()?;
17754                let action = self.parse_alter_column_action()?;
17755                Ok(AlterTableAction::AlterColumn {
17756                    name,
17757                    action,
17758                    use_modify_keyword: false,
17759                })
17760            }
17761        } else if self.match_identifier("MODIFY") {
17762            // ClickHouse: MODIFY ORDER BY, MODIFY SETTING, MODIFY TTL, MODIFY QUERY,
17763            // MODIFY COLUMN name type [DEFAULT|MATERIALIZED|ALIAS] [CODEC] [TTL] [COMMENT], etc.
17764            // These are ClickHouse-specific and have richer syntax than MySQL MODIFY COLUMN.
17765            // Consume all ClickHouse MODIFY actions as Raw.
17766            if matches!(
17767                self.config.dialect,
17768                Some(crate::dialects::DialectType::ClickHouse)
17769            ) {
17770                // MODIFY SETTING uses commas between settings (not action separators)
17771                let is_setting =
17772                    self.check(TokenType::Settings) || self.check_identifier("SETTING");
17773                let mut tokens: Vec<(String, TokenType)> =
17774                    vec![("MODIFY".to_string(), TokenType::Var)];
17775                let mut paren_depth = 0i32;
17776                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
17777                    if self.check(TokenType::Comma) && paren_depth == 0 && !is_setting {
17778                        break;
17779                    }
17780                    let token = self.advance();
17781                    if token.token_type == TokenType::LParen {
17782                        paren_depth += 1;
17783                    }
17784                    if token.token_type == TokenType::RParen {
17785                        paren_depth -= 1;
17786                    }
17787                    let text = if token.token_type == TokenType::QuotedIdentifier {
17788                        format!("\"{}\"", token.text)
17789                    } else if token.token_type == TokenType::String {
17790                        format!("'{}'", token.text)
17791                    } else {
17792                        token.text.clone()
17793                    };
17794                    tokens.push((text, token.token_type));
17795                }
17796                return Ok(AlterTableAction::Raw {
17797                    sql: self.join_command_tokens(tokens),
17798                });
17799            }
17800            // MODIFY COLUMN (MySQL syntax for altering column type)
17801            self.match_token(TokenType::Column); // optional COLUMN keyword
17802            let name = Identifier::new(self.expect_identifier()?);
17803            // Parse the data type directly (MySQL MODIFY COLUMN col TYPE)
17804            let data_type = self.parse_data_type()?;
17805            // Parse optional COLLATE clause
17806            let collate = if self.match_token(TokenType::Collate) {
17807                if self.check(TokenType::String) {
17808                    Some(self.advance().text)
17809                } else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
17810                    Some(self.advance().text)
17811                } else {
17812                    None
17813                }
17814            } else {
17815                None
17816            };
17817            Ok(AlterTableAction::AlterColumn {
17818                name,
17819                action: AlterColumnAction::SetDataType {
17820                    data_type,
17821                    using: None,
17822                    collate,
17823                },
17824                use_modify_keyword: true,
17825            })
17826        } else if self.match_identifier("CHANGE") {
17827            // CHANGE [COLUMN] old_name new_name [data_type] [COMMENT 'comment'] - Hive/MySQL/SingleStore syntax
17828            // In SingleStore, data_type can be omitted for simple renames
17829            self.match_token(TokenType::Column); // optional COLUMN keyword
17830            let old_name = Identifier::new(self.expect_identifier()?);
17831            let new_name = Identifier::new(self.expect_identifier()?);
17832            // Try to parse data type - it's optional in SingleStore
17833            let data_type = if !self.is_at_end()
17834                && !self.check(TokenType::Comment)
17835                && !self.check(TokenType::Comma)
17836                && !self.check(TokenType::Semicolon)
17837            {
17838                // Check if next token could start a data type
17839                let tok = self.peek();
17840                if tok.token_type.is_keyword()
17841                    || tok.token_type == TokenType::Identifier
17842                    || tok.token_type == TokenType::Var
17843                {
17844                    Some(self.parse_data_type()?)
17845                } else {
17846                    None
17847                }
17848            } else {
17849                None
17850            };
17851            let comment = if self.match_token(TokenType::Comment) {
17852                Some(self.expect_string()?)
17853            } else {
17854                None
17855            };
17856            let cascade = self.match_text_seq(&["CASCADE"]);
17857            // Also check for RESTRICT (the opposite, just consume it)
17858            if !cascade {
17859                self.match_text_seq(&["RESTRICT"]);
17860            }
17861            Ok(AlterTableAction::ChangeColumn {
17862                old_name,
17863                new_name,
17864                data_type,
17865                comment,
17866                cascade,
17867            })
17868        } else if self.match_token(TokenType::Constraint) {
17869            // CONSTRAINT name ... (implicit ADD, CONSTRAINT already consumed)
17870            // Parse the constraint name and then the constraint definition
17871            let name = Some(self.expect_identifier_with_quoted()?);
17872            let constraint = self.parse_constraint_definition(name)?;
17873            Ok(AlterTableAction::AddConstraint(constraint))
17874        } else if self.check(TokenType::PrimaryKey)
17875            || self.check(TokenType::ForeignKey)
17876            || self.check(TokenType::Unique)
17877        {
17878            // ADD CONSTRAINT (implicit ADD, no CONSTRAINT keyword)
17879            let constraint = self.parse_table_constraint()?;
17880            Ok(AlterTableAction::AddConstraint(constraint))
17881        } else if self.match_token(TokenType::Delete) {
17882            // ALTER TABLE t DELETE WHERE x = 1 (BigQuery syntax)
17883            self.expect(TokenType::Where)?;
17884            let where_clause = self.parse_expression()?;
17885            Ok(AlterTableAction::Delete { where_clause })
17886        } else if self.match_keyword("SWAP") {
17887            // Snowflake: ALTER TABLE a SWAP WITH b
17888            self.expect(TokenType::With)?;
17889            let target = self.parse_table_ref()?;
17890            Ok(AlterTableAction::SwapWith(target))
17891        } else if self.match_token(TokenType::Set) {
17892            // TSQL: ALTER TABLE t SET (SYSTEM_VERSIONING=ON, DATA_DELETION=ON, ...)
17893            if self.check(TokenType::LParen) {
17894                self.advance(); // consume (
17895                let mut expressions = Vec::new();
17896                loop {
17897                    if self.check(TokenType::RParen) {
17898                        break;
17899                    }
17900                    if self.check_identifier("SYSTEM_VERSIONING") {
17901                        let expr = self.parse_system_versioning_option()?;
17902                        expressions.push(expr);
17903                    } else if self.check_identifier("DATA_DELETION") {
17904                        let expr = self.parse_data_deletion_option()?;
17905                        expressions.push(expr);
17906                    } else {
17907                        // Generic key=value (e.g., FILESTREAM_ON = 'test')
17908                        let expr = self.parse_expression()?;
17909                        expressions.push(expr);
17910                    }
17911                    if !self.match_token(TokenType::Comma) {
17912                        break;
17913                    }
17914                }
17915                self.expect(TokenType::RParen)?;
17916                Ok(AlterTableAction::SetOptions { expressions })
17917            } else if self.match_keyword("TAG") {
17918                // Snowflake: SET TAG key='value', ... (key can be qualified like schema.tagname)
17919                let mut tags = Vec::new();
17920                loop {
17921                    // Parse qualified tag name (e.g., foo.bar or just bar)
17922                    let mut key = self.expect_identifier_or_keyword()?;
17923                    while self.match_token(TokenType::Dot) {
17924                        let next = self.expect_identifier_or_keyword()?;
17925                        key = format!("{}.{}", key, next);
17926                    }
17927                    self.expect(TokenType::Eq)?;
17928                    let value = self.parse_primary()?;
17929                    tags.push((key, value));
17930                    if !self.match_token(TokenType::Comma) {
17931                        break;
17932                    }
17933                }
17934                Ok(AlterTableAction::SetTag { expressions: tags })
17935            } else if self.check_identifier("LOGGED") {
17936                // PostgreSQL: ALTER TABLE t SET LOGGED
17937                self.advance();
17938                Ok(AlterTableAction::SetAttribute {
17939                    attribute: "LOGGED".to_string(),
17940                })
17941            } else if self.check_identifier("UNLOGGED") {
17942                // PostgreSQL: ALTER TABLE t SET UNLOGGED
17943                self.advance();
17944                Ok(AlterTableAction::SetAttribute {
17945                    attribute: "UNLOGGED".to_string(),
17946                })
17947            } else if self.match_identifier("WITHOUT") {
17948                // PostgreSQL: ALTER TABLE t SET WITHOUT CLUSTER/OIDS
17949                let what = self.expect_identifier_or_keyword()?;
17950                Ok(AlterTableAction::SetAttribute {
17951                    attribute: format!("WITHOUT {}", what),
17952                })
17953            } else if self.check_identifier("ACCESS") {
17954                // PostgreSQL: ALTER TABLE t SET ACCESS METHOD method
17955                self.advance();
17956                // Consume "METHOD"
17957                if !self.match_identifier("METHOD") {
17958                    return Err(self.parse_error("Expected METHOD after ACCESS"));
17959                }
17960                let method = self.expect_identifier_or_keyword()?;
17961                Ok(AlterTableAction::SetAttribute {
17962                    attribute: format!("ACCESS METHOD {}", method),
17963                })
17964            } else if self.check_identifier("TABLESPACE") {
17965                // PostgreSQL: ALTER TABLE t SET TABLESPACE tablespace
17966                self.advance();
17967                let name = self.expect_identifier_or_keyword()?;
17968                Ok(AlterTableAction::SetAttribute {
17969                    attribute: format!("TABLESPACE {}", name),
17970                })
17971            } else if self.check_identifier("STAGE_FILE_FORMAT") {
17972                // Snowflake: ALTER TABLE t SET STAGE_FILE_FORMAT = (options)
17973                self.advance();
17974                let options = self.parse_wrapped_options()?;
17975                Ok(AlterTableAction::SetStageFileFormat { options })
17976            } else if self.check_identifier("STAGE_COPY_OPTIONS") {
17977                // Snowflake: ALTER TABLE t SET STAGE_COPY_OPTIONS = (options)
17978                self.advance();
17979                let options = self.parse_wrapped_options()?;
17980                Ok(AlterTableAction::SetStageCopyOptions { options })
17981            } else if self.match_token(TokenType::Authorization) {
17982                // Trino: ALTER TABLE t SET AUTHORIZATION [ROLE] user
17983                let mut auth_text = String::new();
17984                if self.match_texts(&["ROLE"]) {
17985                    auth_text.push_str("ROLE ");
17986                }
17987                let user = self.expect_identifier_or_keyword()?;
17988                auth_text.push_str(&user);
17989                Ok(AlterTableAction::SetAttribute {
17990                    attribute: format!("AUTHORIZATION {}", auth_text),
17991                })
17992            } else if self.match_identifier("PROPERTIES") {
17993                // Trino: ALTER TABLE t SET PROPERTIES x = 'y', ...
17994                let mut properties = Vec::new();
17995                loop {
17996                    // Parse property name (could be identifier or string literal)
17997                    let key = if self.check(TokenType::String) {
17998                        self.expect_string()?
17999                    } else {
18000                        self.expect_identifier_or_keyword()?
18001                    };
18002                    self.expect(TokenType::Eq)?;
18003                    // Parse value (could be DEFAULT or an expression)
18004                    let value = if self.match_token(TokenType::Default) {
18005                        // Use Var instead of Identifier so it won't be quoted
18006                        Expression::Var(Box::new(crate::expressions::Var {
18007                            this: "DEFAULT".to_string(),
18008                        }))
18009                    } else {
18010                        self.parse_expression()?
18011                    };
18012                    properties.push((key, value));
18013                    if !self.match_token(TokenType::Comma) {
18014                        break;
18015                    }
18016                }
18017                Ok(AlterTableAction::SetProperty { properties })
18018            } else if self.match_text_seq(&["TABLE", "PROPERTIES"]) {
18019                // Redshift: ALTER TABLE t SET TABLE PROPERTIES ('a' = '5', 'b' = 'c')
18020                self.expect(TokenType::LParen)?;
18021                let mut properties = Vec::new();
18022                loop {
18023                    if self.check(TokenType::RParen) {
18024                        break;
18025                    }
18026                    // Parse key (string literal)
18027                    let key = self.parse_primary()?;
18028                    self.expect(TokenType::Eq)?;
18029                    // Parse value (string literal)
18030                    let value = self.parse_primary()?;
18031                    properties.push((key, value));
18032                    if !self.match_token(TokenType::Comma) {
18033                        break;
18034                    }
18035                }
18036                self.expect(TokenType::RParen)?;
18037                Ok(AlterTableAction::SetTableProperties { properties })
18038            } else if self.match_text_seq(&["LOCATION"]) {
18039                // Redshift: ALTER TABLE t SET LOCATION 's3://bucket/folder/'
18040                let location = self.expect_string()?;
18041                Ok(AlterTableAction::SetLocation { location })
18042            } else if self.match_text_seq(&["FILE", "FORMAT"]) {
18043                // Redshift: ALTER TABLE t SET FILE FORMAT AVRO
18044                let format = self.expect_identifier_or_keyword()?;
18045                Ok(AlterTableAction::SetFileFormat { format })
18046            } else {
18047                // Snowflake: SET property=value, ...
18048                let mut properties = Vec::new();
18049                loop {
18050                    let key = self.expect_identifier_or_keyword()?;
18051                    self.expect(TokenType::Eq)?;
18052                    let value = self.parse_expression()?;
18053                    properties.push((key, value));
18054                    if !self.match_token(TokenType::Comma) {
18055                        break;
18056                    }
18057                }
18058                Ok(AlterTableAction::SetProperty { properties })
18059            }
18060        } else if self.match_keyword("UNSET") {
18061            // Snowflake: ALTER TABLE t UNSET property or UNSET TAG key
18062            if self.match_keyword("TAG") {
18063                // UNSET TAG key1, key2 (keys can be qualified like schema.tagname)
18064                let mut names = Vec::new();
18065                loop {
18066                    let mut name = self.expect_identifier_or_keyword()?;
18067                    while self.match_token(TokenType::Dot) {
18068                        let next = self.expect_identifier_or_keyword()?;
18069                        name = format!("{}.{}", name, next);
18070                    }
18071                    names.push(name);
18072                    if !self.match_token(TokenType::Comma) {
18073                        break;
18074                    }
18075                }
18076                Ok(AlterTableAction::UnsetTag { names })
18077            } else {
18078                // UNSET property1, property2
18079                let mut properties = Vec::new();
18080                loop {
18081                    let name = self.expect_identifier_or_keyword()?;
18082                    properties.push(name);
18083                    if !self.match_token(TokenType::Comma) {
18084                        break;
18085                    }
18086                }
18087                Ok(AlterTableAction::UnsetProperty { properties })
18088            }
18089        } else if self.match_keyword("CLUSTER") {
18090            // Snowflake: ALTER TABLE t CLUSTER BY (col1, col2 DESC)
18091            self.expect(TokenType::By)?;
18092            self.expect(TokenType::LParen)?;
18093            // Parse ordered expressions (can have ASC/DESC modifiers)
18094            let ordered = self.parse_order_by_list()?;
18095            // Convert Ordered to Expression (wrapping in Ordered if it has ordering)
18096            let expressions: Vec<Expression> = ordered
18097                .into_iter()
18098                .map(|o| Expression::Ordered(Box::new(o)))
18099                .collect();
18100            self.expect(TokenType::RParen)?;
18101            Ok(AlterTableAction::ClusterBy { expressions })
18102        } else if self.match_token(TokenType::Replace) {
18103            // ClickHouse: REPLACE PARTITION expr FROM table
18104            if self.match_token(TokenType::Partition) {
18105                let partition_expr = if self.match_text_seq(&["ALL"]) {
18106                    Expression::Identifier(Identifier::new("ALL".to_string()))
18107                } else if self.match_text_seq(&["ID"]) {
18108                    let id_val = self.parse_expression()?;
18109                    // Store as Raw to preserve "ID <value>" format
18110                    let id_str = match &id_val {
18111                        Expression::Literal(Literal::String(s)) => format!("ID '{}'", s),
18112                        _ => format!("ID {}", "?"),
18113                    };
18114                    Expression::Raw(Raw { sql: id_str })
18115                } else {
18116                    self.parse_expression()?
18117                };
18118                let source = if self.match_token(TokenType::From) {
18119                    let tref = self.parse_table_ref()?;
18120                    Some(Box::new(Expression::Table(tref)))
18121                } else {
18122                    None
18123                };
18124                Ok(AlterTableAction::ReplacePartition {
18125                    partition: partition_expr,
18126                    source,
18127                })
18128            } else {
18129                Err(self.parse_error("Expected PARTITION after REPLACE in ALTER TABLE"))
18130            }
18131        } else if matches!(
18132            self.config.dialect,
18133            Some(crate::dialects::DialectType::ClickHouse)
18134        ) {
18135            // ClickHouse-specific ALTER TABLE mutations: UPDATE, DELETE, DETACH, ATTACH,
18136            // FREEZE, UNFREEZE, MATERIALIZE, CLEAR, COMMENT COLUMN, MODIFY ORDER BY,
18137            // MOVE PARTITION, FETCH PARTITION, ADD INDEX, DROP INDEX, CLEAR INDEX
18138            // For ClickHouse, consume any unrecognized ALTER TABLE action as Raw
18139            // (covers UPDATE, DELETE, DETACH, ATTACH, FREEZE, MOVE, FETCH, etc.)
18140            {
18141                let keyword = self.advance().text.clone();
18142                let mut tokens: Vec<(String, TokenType)> = vec![(keyword, TokenType::Var)];
18143                let mut paren_depth = 0i32;
18144                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18145                    // Stop at comma only when at top-level (not inside parens) — it separates ALTER actions
18146                    if self.check(TokenType::Comma) && paren_depth == 0 {
18147                        break;
18148                    }
18149                    let token = self.advance();
18150                    if token.token_type == TokenType::LParen {
18151                        paren_depth += 1;
18152                    }
18153                    if token.token_type == TokenType::RParen {
18154                        paren_depth -= 1;
18155                    }
18156                    let text = if token.token_type == TokenType::QuotedIdentifier {
18157                        format!("\"{}\"", token.text)
18158                    } else if token.token_type == TokenType::String {
18159                        format!("'{}'", token.text)
18160                    } else {
18161                        token.text.clone()
18162                    };
18163                    tokens.push((text, token.token_type));
18164                }
18165                Ok(AlterTableAction::Raw {
18166                    sql: self.join_command_tokens(tokens),
18167                })
18168            }
18169        } else {
18170            Err(self.parse_error(format!(
18171                "Expected ADD, DROP, RENAME, ALTER, SET, UNSET, SWAP, CLUSTER, or REPLACE in ALTER TABLE, got {:?}",
18172                self.peek().token_type
18173            )))
18174        }
18175    }
18176
18177    /// Parse TSQL SYSTEM_VERSIONING option in ALTER TABLE SET (...)
18178    /// Handles: SYSTEM_VERSIONING=OFF, SYSTEM_VERSIONING=ON, SYSTEM_VERSIONING=ON(HISTORY_TABLE=..., ...)
18179    fn parse_system_versioning_option(&mut self) -> Result<Expression> {
18180        self.advance(); // consume SYSTEM_VERSIONING
18181        self.expect(TokenType::Eq)?;
18182
18183        let mut prop = WithSystemVersioningProperty {
18184            on: None,
18185            this: None,
18186            data_consistency: None,
18187            retention_period: None,
18188            with_: None,
18189        };
18190
18191        if self.match_identifier("OFF") {
18192            // SYSTEM_VERSIONING=OFF
18193            // on is None => generates OFF
18194            return Ok(Expression::WithSystemVersioningProperty(Box::new(prop)));
18195        }
18196
18197        // SYSTEM_VERSIONING=ON or SYSTEM_VERSIONING=ON(...)
18198        if self.match_token(TokenType::On) || self.match_identifier("ON") {
18199            prop.on = Some(Box::new(Expression::Boolean(BooleanLiteral {
18200                value: true,
18201            })));
18202        }
18203
18204        if self.match_token(TokenType::LParen) {
18205            // Parse options inside ON(...)
18206            loop {
18207                if self.check(TokenType::RParen) {
18208                    break;
18209                }
18210                if self.match_identifier("HISTORY_TABLE") {
18211                    self.expect(TokenType::Eq)?;
18212                    let table = self.parse_table_ref()?;
18213                    prop.this = Some(Box::new(Expression::Table(table)));
18214                } else if self.match_identifier("DATA_CONSISTENCY_CHECK") {
18215                    self.expect(TokenType::Eq)?;
18216                    let val = self.expect_identifier_or_keyword()?;
18217                    prop.data_consistency = Some(Box::new(Expression::Identifier(
18218                        Identifier::new(val.to_uppercase()),
18219                    )));
18220                } else if self.match_identifier("HISTORY_RETENTION_PERIOD") {
18221                    self.expect(TokenType::Eq)?;
18222                    if let Some(rp) = self.parse_retention_period()? {
18223                        prop.retention_period = Some(Box::new(rp));
18224                    }
18225                } else {
18226                    // Skip unknown options
18227                    self.advance();
18228                }
18229                if !self.match_token(TokenType::Comma) {
18230                    break;
18231                }
18232            }
18233            self.expect(TokenType::RParen)?;
18234        }
18235
18236        Ok(Expression::WithSystemVersioningProperty(Box::new(prop)))
18237    }
18238
18239    /// Parse TSQL DATA_DELETION option in ALTER TABLE SET (...)
18240    /// Handles: DATA_DELETION=ON, DATA_DELETION=OFF, DATA_DELETION=ON(FILTER_COLUMN=..., RETENTION_PERIOD=...)
18241    fn parse_data_deletion_option(&mut self) -> Result<Expression> {
18242        self.advance(); // consume DATA_DELETION
18243        self.expect(TokenType::Eq)?;
18244
18245        let on = if self.match_identifier("ON") || self.match_token(TokenType::On) {
18246            true
18247        } else if self.match_identifier("OFF") {
18248            false
18249        } else {
18250            false
18251        };
18252
18253        let on_expr = Box::new(Expression::Boolean(BooleanLiteral { value: on }));
18254        let mut filter_column = None;
18255        let mut retention_period = None;
18256
18257        if self.match_token(TokenType::LParen) {
18258            loop {
18259                if self.check(TokenType::RParen) {
18260                    break;
18261                }
18262                if self.match_identifier("FILTER_COLUMN") {
18263                    self.expect(TokenType::Eq)?;
18264                    let col = self.expect_identifier_or_keyword()?;
18265                    filter_column = Some(Box::new(Expression::Column(Column {
18266                        name: Identifier::new(col),
18267                        table: None,
18268                        join_mark: false,
18269                        trailing_comments: Vec::new(),
18270                        span: None,
18271                        inferred_type: None,
18272                    })));
18273                } else if self.match_identifier("RETENTION_PERIOD") {
18274                    self.expect(TokenType::Eq)?;
18275                    if let Some(rp) = self.parse_retention_period()? {
18276                        retention_period = Some(Box::new(rp));
18277                    }
18278                } else {
18279                    self.advance();
18280                }
18281                if !self.match_token(TokenType::Comma) {
18282                    break;
18283                }
18284            }
18285            self.expect(TokenType::RParen)?;
18286        }
18287
18288        Ok(Expression::DataDeletionProperty(Box::new(
18289            DataDeletionProperty {
18290                on: on_expr,
18291                filter_column,
18292                retention_period,
18293            },
18294        )))
18295    }
18296
18297    /// Parse ALTER COLUMN action
18298    fn parse_alter_column_action(&mut self) -> Result<AlterColumnAction> {
18299        if self.match_token(TokenType::Set) {
18300            if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
18301                Ok(AlterColumnAction::SetNotNull)
18302            } else if self.match_token(TokenType::Default) {
18303                let expr = self.parse_primary()?;
18304                Ok(AlterColumnAction::SetDefault(expr))
18305            } else if self.match_identifier("DATA") {
18306                // SET DATA TYPE
18307                // TYPE can be a keyword token or identifier
18308                let _ = self.match_token(TokenType::Type) || self.match_identifier("TYPE");
18309                let data_type = self.parse_data_type()?;
18310                // Optional COLLATE
18311                let collate = if self.match_token(TokenType::Collate) {
18312                    Some(self.expect_identifier_or_keyword()?)
18313                } else {
18314                    None
18315                };
18316                // Optional USING expression
18317                let using = if self.match_token(TokenType::Using) {
18318                    Some(self.parse_expression()?)
18319                } else {
18320                    None
18321                };
18322                Ok(AlterColumnAction::SetDataType {
18323                    data_type,
18324                    using,
18325                    collate,
18326                })
18327            } else if self.match_identifier("VISIBLE") {
18328                Ok(AlterColumnAction::SetVisible)
18329            } else if self.match_identifier("INVISIBLE") {
18330                Ok(AlterColumnAction::SetInvisible)
18331            } else {
18332                Err(self.parse_error("Expected NOT NULL, DEFAULT, VISIBLE, or INVISIBLE after SET"))
18333            }
18334        } else if self.match_token(TokenType::Drop) {
18335            if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
18336                Ok(AlterColumnAction::DropNotNull)
18337            } else if self.match_token(TokenType::Default) {
18338                Ok(AlterColumnAction::DropDefault)
18339            } else {
18340                Err(self.parse_error("Expected NOT NULL or DEFAULT after DROP"))
18341            }
18342        } else if self.match_token(TokenType::Comment) {
18343            // ALTER COLUMN col COMMENT 'comment'
18344            let comment = self.expect_string()?;
18345            Ok(AlterColumnAction::Comment(comment))
18346        } else if self.match_token(TokenType::Type)
18347            || self.match_identifier("TYPE")
18348            || self.is_identifier_token()
18349        {
18350            // TYPE data_type or just data_type (PostgreSQL/Redshift: ALTER COLUMN col TYPE datatype)
18351            let data_type = self.parse_data_type()?;
18352            // Optional COLLATE
18353            let collate = if self.match_token(TokenType::Collate) {
18354                Some(self.expect_identifier_or_keyword()?)
18355            } else {
18356                None
18357            };
18358            // Optional USING expression
18359            let using = if self.match_token(TokenType::Using) {
18360                Some(self.parse_expression()?)
18361            } else {
18362                None
18363            };
18364            Ok(AlterColumnAction::SetDataType {
18365                data_type,
18366                using,
18367                collate,
18368            })
18369        } else {
18370            Err(self.parse_error("Expected SET, DROP, or TYPE in ALTER COLUMN"))
18371        }
18372    }
18373
18374    /// Parse TRUNCATE statement
18375    fn parse_truncate(&mut self) -> Result<Expression> {
18376        self.expect(TokenType::Truncate)?;
18377
18378        // ClickHouse: TRUNCATE ALL TABLES FROM [IF EXISTS] db
18379        if matches!(
18380            self.config.dialect,
18381            Some(crate::dialects::DialectType::ClickHouse)
18382        ) && self.check_identifier("ALL")
18383            && self.current + 1 < self.tokens.len()
18384            && self.tokens[self.current + 1]
18385                .text
18386                .eq_ignore_ascii_case("TABLES")
18387        {
18388            // Consume remaining tokens as Command
18389            let mut parts = vec!["TRUNCATE".to_string()];
18390            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
18391                let token = self.advance();
18392                if token.token_type == TokenType::String {
18393                    parts.push(format!("'{}'", token.text));
18394                } else {
18395                    parts.push(token.text.clone());
18396                }
18397            }
18398            return Ok(Expression::Command(Box::new(crate::expressions::Command {
18399                this: parts.join(" "),
18400            })));
18401        }
18402
18403        let target = if self.match_token(TokenType::Database) {
18404            TruncateTarget::Database
18405        } else {
18406            // ClickHouse: TRUNCATE TEMPORARY TABLE t
18407            self.match_token(TokenType::Temporary);
18408            self.match_token(TokenType::Table); // optional TABLE keyword
18409            TruncateTarget::Table
18410        };
18411
18412        // Parse optional IF EXISTS
18413        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
18414
18415        // Parse first table with optional ONLY modifier
18416        let has_only = self.match_token(TokenType::Only);
18417        let mut table = self.parse_table_ref()?;
18418        if has_only {
18419            table.only = true;
18420        }
18421
18422        // Check for * suffix on table name (PostgreSQL: inherit children)
18423        let first_star = self.match_token(TokenType::Star);
18424
18425        // TSQL: WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))
18426        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
18427            if let Some(hint_expr) = self.parse_truncate_table_hints()? {
18428                match hint_expr {
18429                    Expression::Tuple(tuple) => {
18430                        table.hints = tuple.expressions;
18431                    }
18432                    other => {
18433                        table.hints = vec![other];
18434                    }
18435                }
18436            }
18437        }
18438
18439        // ClickHouse: ON CLUSTER clause
18440        let on_cluster = self.parse_on_cluster_clause()?;
18441
18442        // Parse additional tables for multi-table TRUNCATE
18443        let mut extra_tables = Vec::new();
18444        if first_star {
18445            // The first table has a * suffix, so create an entry for it
18446            extra_tables.push(TruncateTableEntry {
18447                table: table.clone(),
18448                star: true,
18449            });
18450        }
18451        while self.match_token(TokenType::Comma) {
18452            let extra_only = self.match_token(TokenType::Only);
18453            let mut extra_table = self.parse_table_ref()?;
18454            if extra_only {
18455                extra_table.only = true;
18456            }
18457            let extra_star = self.match_token(TokenType::Star);
18458            extra_tables.push(TruncateTableEntry {
18459                table: extra_table,
18460                star: extra_star,
18461            });
18462        }
18463
18464        // Parse RESTART IDENTITY / CONTINUE IDENTITY
18465        // RESTART is TokenType::Restart keyword, IDENTITY is TokenType::Identity keyword
18466        let identity = if self.match_token(TokenType::Restart) {
18467            self.match_token(TokenType::Identity);
18468            Some(TruncateIdentity::Restart)
18469        } else if self.match_identifier("CONTINUE") {
18470            self.match_token(TokenType::Identity);
18471            Some(TruncateIdentity::Continue)
18472        } else {
18473            None
18474        };
18475
18476        // Parse CASCADE or RESTRICT
18477        // CASCADE is TokenType::Cascade keyword, RESTRICT is TokenType::Restrict keyword
18478        let cascade = self.match_token(TokenType::Cascade);
18479        let restrict = if !cascade {
18480            self.match_token(TokenType::Restrict)
18481        } else {
18482            false
18483        };
18484
18485        // Parse Hive PARTITION clause: PARTITION(key = value, ...)
18486        // parse_partition consumes the PARTITION keyword itself
18487        let partition = self.parse_partition()?;
18488
18489        // ClickHouse: TRUNCATE TABLE t SETTINGS key=value, ...
18490        if matches!(
18491            self.config.dialect,
18492            Some(crate::dialects::DialectType::ClickHouse)
18493        ) && self.match_token(TokenType::Settings)
18494        {
18495            // Consume settings expressions (they're not stored in the AST for TRUNCATE)
18496            loop {
18497                let _ = self.parse_expression()?;
18498                if !self.match_token(TokenType::Comma) {
18499                    break;
18500                }
18501            }
18502        }
18503
18504        Ok(Expression::Truncate(Box::new(Truncate {
18505            target,
18506            if_exists,
18507            table,
18508            on_cluster,
18509            cascade,
18510            extra_tables,
18511            identity,
18512            restrict,
18513            partition: partition.map(Box::new),
18514        })))
18515    }
18516
18517    /// Parse VALUES table constructor: VALUES (1, 'a'), (2, 'b')
18518    fn parse_values(&mut self) -> Result<Expression> {
18519        self.expect(TokenType::Values)?;
18520
18521        let mut expressions = Vec::new();
18522
18523        // Handle bare VALUES without parentheses: VALUES 1, 2, 3 -> VALUES (1), (2), (3)
18524        if !self.check(TokenType::LParen) {
18525            loop {
18526                let val = self.parse_expression()?;
18527                expressions.push(Tuple {
18528                    expressions: vec![val],
18529                });
18530                if !self.match_token(TokenType::Comma) {
18531                    break;
18532                }
18533            }
18534        } else {
18535            loop {
18536                self.expect(TokenType::LParen)?;
18537                // Parse VALUES tuple elements with optional AS aliases (Hive syntax)
18538                let row_values = self.parse_values_expression_list()?;
18539                self.expect(TokenType::RParen)?;
18540
18541                expressions.push(Tuple {
18542                    expressions: row_values,
18543                });
18544
18545                if !self.match_token(TokenType::Comma) {
18546                    break;
18547                }
18548                // ClickHouse: allow trailing comma after last tuple
18549                if matches!(
18550                    self.config.dialect,
18551                    Some(crate::dialects::DialectType::ClickHouse)
18552                ) && !self.check(TokenType::LParen)
18553                {
18554                    break;
18555                }
18556            }
18557        }
18558
18559        // Check for alias: VALUES (1, 2) AS new_data or VALUES (1, 2) new_data
18560        let (alias, column_aliases) = if self.match_token(TokenType::As) {
18561            let alias_name = self.expect_identifier()?;
18562            let alias = Some(Identifier::new(alias_name));
18563
18564            // Check for column aliases: AS new_data(a, b)
18565            let col_aliases = if self.match_token(TokenType::LParen) {
18566                let aliases = self.parse_identifier_list()?;
18567                self.expect(TokenType::RParen)?;
18568                aliases
18569            } else {
18570                Vec::new()
18571            };
18572            (alias, col_aliases)
18573        } else if self.check(TokenType::Var) && !self.check_keyword() {
18574            // Implicit alias: VALUES (0) foo(bar)
18575            let alias_name = self.advance().text.clone();
18576            let alias = Some(Identifier::new(alias_name));
18577            let col_aliases = if self.match_token(TokenType::LParen) {
18578                let aliases = self.parse_identifier_list()?;
18579                self.expect(TokenType::RParen)?;
18580                aliases
18581            } else {
18582                Vec::new()
18583            };
18584            (alias, col_aliases)
18585        } else {
18586            (None, Vec::new())
18587        };
18588
18589        // VALUES can be followed by set operations (UNION, etc.)
18590        let values_expr = Expression::Values(Box::new(Values {
18591            expressions,
18592            alias,
18593            column_aliases,
18594        }));
18595
18596        // Check for set operations after VALUES
18597        self.parse_set_operation(values_expr)
18598    }
18599
18600    /// Parse USE statement: USE db, USE DATABASE x, USE SCHEMA x.y, USE ROLE x, etc.
18601    fn parse_use(&mut self) -> Result<Expression> {
18602        self.expect(TokenType::Use)?;
18603
18604        // Check for Snowflake: USE SECONDARY ROLES ALL|NONE|role1, role2, ...
18605        if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "SECONDARY" {
18606            self.advance(); // consume SECONDARY
18607                            // Check for ROLES
18608            if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "ROLES" {
18609                self.advance(); // consume ROLES
18610                                // Parse ALL, NONE, or comma-separated role list
18611                let mut roles = Vec::new();
18612                loop {
18613                    if self.check(TokenType::Var)
18614                        || self.check(TokenType::All)
18615                        || self.check(TokenType::Identifier)
18616                    {
18617                        let role = self.advance().text.clone();
18618                        roles.push(role);
18619                        if !self.match_token(TokenType::Comma) {
18620                            break;
18621                        }
18622                    } else {
18623                        break;
18624                    }
18625                }
18626                let name = if roles.is_empty() {
18627                    "ALL".to_string()
18628                } else {
18629                    roles.join(", ")
18630                };
18631                return Ok(Expression::Use(Box::new(Use {
18632                    kind: Some(UseKind::SecondaryRoles),
18633                    this: Identifier::new(name),
18634                })));
18635            }
18636        }
18637
18638        // Check for kind: DATABASE, SCHEMA, ROLE, WAREHOUSE, CATALOG
18639        // Note: ROLE and CATALOG are not keywords, so we check the text
18640        let kind = if self.match_token(TokenType::Database) {
18641            Some(UseKind::Database)
18642        } else if self.match_token(TokenType::Schema) {
18643            Some(UseKind::Schema)
18644        } else if self.match_token(TokenType::Warehouse) {
18645            Some(UseKind::Warehouse)
18646        } else if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "ROLE" {
18647            self.advance();
18648            Some(UseKind::Role)
18649        } else if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "CATALOG" {
18650            self.advance();
18651            Some(UseKind::Catalog)
18652        } else {
18653            None
18654        };
18655
18656        // Parse the name (can be qualified like x.y)
18657        // Use expect_identifier_or_keyword_with_quoted because names like "default", "system" are valid
18658        let mut ident = self.expect_identifier_or_keyword_with_quoted()?;
18659
18660        // Handle qualified names like schema.table for USE SCHEMA x.y
18661        if self.match_token(TokenType::Dot) {
18662            let second_part = self.expect_identifier_or_keyword_with_quoted()?;
18663            ident.name = format!("{}.{}", ident.name, second_part.name);
18664        }
18665
18666        Ok(Expression::Use(Box::new(Use { kind, this: ident })))
18667    }
18668
18669    /// Parse EXPORT DATA statement (BigQuery)
18670    /// EXPORT DATA [WITH CONNECTION connection] OPTIONS (...) AS SELECT ...
18671    fn parse_export_data(&mut self) -> Result<Expression> {
18672        self.advance(); // consume EXPORT
18673
18674        // Expect DATA
18675        if !self.match_identifier("DATA") {
18676            return Err(self.parse_error("Expected DATA after EXPORT"));
18677        }
18678
18679        // Optional: WITH CONNECTION connection
18680        let connection = if self.match_text_seq(&["WITH", "CONNECTION"]) {
18681            // Parse connection identifier (can be qualified: project.location.connection)
18682            let first = self.expect_identifier()?;
18683            let connection_name = if self.match_token(TokenType::Dot) {
18684                let second = self.expect_identifier()?;
18685                if self.match_token(TokenType::Dot) {
18686                    let third = self.expect_identifier()?;
18687                    format!("{}.{}.{}", first, second, third)
18688                } else {
18689                    format!("{}.{}", first, second)
18690                }
18691            } else {
18692                first
18693            };
18694            Some(Box::new(Expression::Identifier(Identifier::new(
18695                connection_name,
18696            ))))
18697        } else {
18698            None
18699        };
18700
18701        // Expect OPTIONS (...)
18702        let options = if self.match_identifier("OPTIONS") {
18703            self.parse_options_list()?
18704        } else {
18705            Vec::new()
18706        };
18707
18708        // Expect AS
18709        self.expect(TokenType::As)?;
18710
18711        // Parse the SELECT query
18712        let query = self.parse_statement()?;
18713
18714        Ok(Expression::Export(Box::new(Export {
18715            this: Box::new(query),
18716            connection,
18717            options,
18718        })))
18719    }
18720
18721    /// Parse CACHE TABLE statement (Spark)
18722    /// CACHE [LAZY] TABLE name [OPTIONS(...)] [AS query]
18723    fn parse_cache(&mut self) -> Result<Expression> {
18724        self.expect(TokenType::Cache)?;
18725
18726        // Check for LAZY keyword
18727        let lazy = self.check(TokenType::Var) && self.peek().text.to_uppercase() == "LAZY";
18728        if lazy {
18729            self.advance();
18730        }
18731
18732        self.expect(TokenType::Table)?;
18733        let table = Identifier::new(self.expect_identifier()?);
18734
18735        // Check for OPTIONS clause
18736        let options = if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "OPTIONS"
18737        {
18738            self.advance();
18739            self.expect(TokenType::LParen)?;
18740            let mut opts = Vec::new();
18741            loop {
18742                // Parse key = value pairs (key can be string literal or identifier)
18743                let key = if self.check(TokenType::NationalString) {
18744                    let token = self.advance();
18745                    Expression::Literal(Literal::NationalString(token.text))
18746                } else if self.check(TokenType::String) {
18747                    let token = self.advance();
18748                    Expression::Literal(Literal::String(token.text))
18749                } else {
18750                    Expression::Identifier(Identifier::new(self.expect_identifier()?))
18751                };
18752                // Eq is optional - Spark allows space-separated key value pairs
18753                // e.g., OPTIONS ('storageLevel' 'DISK_ONLY') or OPTIONS ('key' = 'value')
18754                let _ = self.match_token(TokenType::Eq);
18755                let value = self.parse_expression()?;
18756                opts.push((key, value));
18757                if !self.match_token(TokenType::Comma) {
18758                    break;
18759                }
18760            }
18761            self.expect(TokenType::RParen)?;
18762            opts
18763        } else {
18764            Vec::new()
18765        };
18766
18767        // Check for AS clause or implicit query (SELECT without AS in Spark)
18768        let query = if self.match_token(TokenType::As) {
18769            Some(self.parse_statement()?)
18770        } else if self.check(TokenType::Select) || self.check(TokenType::With) {
18771            // Spark allows SELECT without AS keyword after CACHE TABLE
18772            Some(self.parse_statement()?)
18773        } else {
18774            None
18775        };
18776
18777        Ok(Expression::Cache(Box::new(Cache {
18778            table,
18779            lazy,
18780            options,
18781            query,
18782        })))
18783    }
18784
18785    /// Parse UNCACHE TABLE statement (Spark)
18786    /// UNCACHE TABLE [IF EXISTS] name
18787    fn parse_uncache(&mut self) -> Result<Expression> {
18788        self.expect(TokenType::Uncache)?;
18789        self.expect(TokenType::Table)?;
18790
18791        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
18792        let table = Identifier::new(self.expect_identifier()?);
18793
18794        Ok(Expression::Uncache(Box::new(Uncache { table, if_exists })))
18795    }
18796
18797    /// Parse LOAD DATA statement (Hive)
18798    /// LOAD DATA [LOCAL] INPATH 'path' [OVERWRITE] INTO TABLE table_name
18799    /// [PARTITION (col=val, ...)] [INPUTFORMAT 'format'] [SERDE 'serde']
18800    fn parse_load_data(&mut self) -> Result<Expression> {
18801        self.expect(TokenType::Load)?;
18802
18803        // Expect DATA keyword
18804        let data_token = self.advance();
18805        if data_token.text.to_uppercase() != "DATA" {
18806            return Err(self.parse_error("Expected DATA after LOAD"));
18807        }
18808
18809        // Check for LOCAL keyword
18810        let local = self.match_token(TokenType::Local);
18811
18812        // Expect INPATH
18813        self.expect(TokenType::Inpath)?;
18814
18815        // Parse the path (string literal)
18816        let inpath = if self.check(TokenType::String) {
18817            self.advance().text
18818        } else {
18819            return Err(self.parse_error("Expected string literal after INPATH"));
18820        };
18821
18822        // Check for OVERWRITE keyword
18823        let overwrite = self.match_token(TokenType::Overwrite);
18824
18825        // Expect INTO TABLE
18826        self.expect(TokenType::Into)?;
18827        self.expect(TokenType::Table)?;
18828
18829        // Parse table name (can be qualified)
18830        let table = Expression::Table(self.parse_table_ref()?);
18831
18832        // Check for PARTITION clause
18833        let partition = if self.match_token(TokenType::Partition) {
18834            self.expect(TokenType::LParen)?;
18835            let mut partitions = Vec::new();
18836            loop {
18837                let col = Identifier::new(self.expect_identifier_or_keyword()?);
18838                self.expect(TokenType::Eq)?;
18839                let val = self.parse_expression()?;
18840                partitions.push((col, val));
18841                if !self.match_token(TokenType::Comma) {
18842                    break;
18843                }
18844            }
18845            self.expect(TokenType::RParen)?;
18846            partitions
18847        } else {
18848            Vec::new()
18849        };
18850
18851        // Check for INPUTFORMAT clause
18852        let input_format = if self.match_token(TokenType::InputFormat) {
18853            if self.check(TokenType::String) {
18854                Some(self.advance().text)
18855            } else {
18856                return Err(self.parse_error("Expected string literal after INPUTFORMAT"));
18857            }
18858        } else {
18859            None
18860        };
18861
18862        // Check for SERDE clause
18863        let serde = if self.match_token(TokenType::Serde) {
18864            if self.check(TokenType::String) {
18865                Some(self.advance().text)
18866            } else {
18867                return Err(self.parse_error("Expected string literal after SERDE"));
18868            }
18869        } else {
18870            None
18871        };
18872
18873        Ok(Expression::LoadData(Box::new(LoadData {
18874            local,
18875            inpath,
18876            overwrite,
18877            table,
18878            partition,
18879            input_format,
18880            serde,
18881        })))
18882    }
18883
18884    /// Parse PRAGMA statement (SQLite)
18885    /// PRAGMA [schema.]name [= value | (args...)]
18886    fn parse_pragma(&mut self) -> Result<Expression> {
18887        self.expect(TokenType::Pragma)?;
18888
18889        // Parse schema.name or just name
18890        let first_name = self.expect_identifier_or_keyword()?;
18891
18892        let (schema, name) = if self.match_token(TokenType::Dot) {
18893            // First name was schema
18894            let pragma_name = self.expect_identifier_or_keyword()?;
18895            (
18896                Some(Identifier::new(first_name)),
18897                Identifier::new(pragma_name),
18898            )
18899        } else {
18900            (None, Identifier::new(first_name))
18901        };
18902
18903        // Check for assignment or function call
18904        let (value, args) = if self.match_token(TokenType::Eq) {
18905            // PRAGMA name = value
18906            let val = self.parse_expression()?;
18907            (Some(val), Vec::new())
18908        } else if self.match_token(TokenType::LParen) {
18909            // PRAGMA name(args...)
18910            let mut arguments = Vec::new();
18911            if !self.check(TokenType::RParen) {
18912                loop {
18913                    arguments.push(self.parse_expression()?);
18914                    if !self.match_token(TokenType::Comma) {
18915                        break;
18916                    }
18917                }
18918            }
18919            self.expect(TokenType::RParen)?;
18920            (None, arguments)
18921        } else {
18922            (None, Vec::new())
18923        };
18924
18925        Ok(Expression::Pragma(Box::new(Pragma {
18926            schema,
18927            name,
18928            value,
18929            args,
18930        })))
18931    }
18932
18933    /// Parse ROLLBACK statement
18934    /// ROLLBACK [TO [SAVEPOINT] <name>]
18935    fn parse_rollback(&mut self) -> Result<Expression> {
18936        self.expect(TokenType::Rollback)?;
18937
18938        // Check for optional TRANSACTION, TRAN, or WORK keyword
18939        let has_transaction = self.match_token(TokenType::Transaction)
18940            || self.match_identifier("TRAN")
18941            || self.match_identifier("WORK");
18942
18943        // Check for TO SAVEPOINT (standard SQL) or transaction name (TSQL)
18944        let (savepoint, this) = if self.match_token(TokenType::To) {
18945            // Optional SAVEPOINT keyword
18946            self.match_token(TokenType::Savepoint);
18947            // Savepoint name
18948            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
18949                let name = self.advance().text;
18950                (
18951                    Some(Box::new(Expression::Identifier(Identifier::new(name)))),
18952                    None,
18953                )
18954            } else {
18955                (None, None)
18956            }
18957        } else if has_transaction
18958            && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
18959        {
18960            // TSQL: ROLLBACK TRANSACTION transaction_name
18961            let name = self.advance().text;
18962            (
18963                None,
18964                Some(Box::new(Expression::Identifier(Identifier::new(name)))),
18965            )
18966        } else if has_transaction {
18967            // Just ROLLBACK TRANSACTION - store marker
18968            (
18969                None,
18970                Some(Box::new(Expression::Identifier(Identifier::new(
18971                    "TRANSACTION".to_string(),
18972                )))),
18973            )
18974        } else {
18975            (None, None)
18976        };
18977
18978        Ok(Expression::Rollback(Box::new(Rollback { savepoint, this })))
18979    }
18980
18981    /// Parse COMMIT statement
18982    /// COMMIT [TRANSACTION|TRAN|WORK] [transaction_name] [WITH (DELAYED_DURABILITY = ON|OFF)] [AND [NO] CHAIN]
18983    fn parse_commit(&mut self) -> Result<Expression> {
18984        self.expect(TokenType::Commit)?;
18985
18986        // Check for optional TRANSACTION, TRAN, or WORK keyword
18987        let has_transaction = self.match_token(TokenType::Transaction)
18988            || self.match_identifier("TRAN")
18989            || self.match_identifier("WORK");
18990
18991        // Parse optional transaction name (TSQL)
18992        let this = if has_transaction
18993            && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
18994            && !self.check(TokenType::With)
18995            && !self.check(TokenType::And)
18996        {
18997            let name = self.advance().text;
18998            Some(Box::new(Expression::Identifier(Identifier::new(name))))
18999        } else if has_transaction {
19000            // Store marker that TRANSACTION keyword was present
19001            Some(Box::new(Expression::Identifier(Identifier::new(
19002                "TRANSACTION".to_string(),
19003            ))))
19004        } else {
19005            None
19006        };
19007
19008        // Parse WITH (DELAYED_DURABILITY = ON|OFF) for TSQL
19009        let durability = if self.match_token(TokenType::With) && self.match_token(TokenType::LParen)
19010        {
19011            // Check for DELAYED_DURABILITY
19012            if self.match_identifier("DELAYED_DURABILITY") && self.match_token(TokenType::Eq) {
19013                // ON is a keyword (TokenType::On), OFF is an identifier
19014                let on = self.match_token(TokenType::On) || self.match_identifier("ON");
19015                if !on {
19016                    self.match_identifier("OFF");
19017                }
19018                self.expect(TokenType::RParen)?;
19019                Some(Box::new(Expression::Boolean(BooleanLiteral { value: on })))
19020            } else {
19021                // Skip to RParen
19022                while !self.check(TokenType::RParen) && !self.is_at_end() {
19023                    self.advance();
19024                }
19025                self.match_token(TokenType::RParen);
19026                None
19027            }
19028        } else {
19029            None
19030        };
19031
19032        // Parse AND [NO] CHAIN
19033        let chain = if self.match_token(TokenType::And) {
19034            let no_chain = self.match_token(TokenType::No);
19035            self.match_identifier("CHAIN");
19036            if no_chain {
19037                // AND NO CHAIN - explicit false
19038                Some(Box::new(Expression::Boolean(BooleanLiteral {
19039                    value: false,
19040                })))
19041            } else {
19042                // AND CHAIN - explicit true
19043                Some(Box::new(Expression::Boolean(BooleanLiteral {
19044                    value: true,
19045                })))
19046            }
19047        } else {
19048            None
19049        };
19050
19051        Ok(Expression::Commit(Box::new(Commit {
19052            chain,
19053            this,
19054            durability,
19055        })))
19056    }
19057
19058    /// Parse END statement (PostgreSQL alias for COMMIT)
19059    /// END [WORK|TRANSACTION] [AND [NO] CHAIN]
19060    fn parse_end_transaction(&mut self) -> Result<Expression> {
19061        self.expect(TokenType::End)?;
19062
19063        // Check for optional WORK or TRANSACTION keyword
19064        let _has_work = self.match_identifier("WORK") || self.match_token(TokenType::Transaction);
19065
19066        // Parse AND [NO] CHAIN
19067        let chain = if self.match_token(TokenType::And) {
19068            let no_chain = self.match_token(TokenType::No);
19069            self.match_identifier("CHAIN");
19070            if no_chain {
19071                // AND NO CHAIN - explicit false
19072                Some(Box::new(Expression::Boolean(BooleanLiteral {
19073                    value: false,
19074                })))
19075            } else {
19076                // AND CHAIN - explicit true
19077                Some(Box::new(Expression::Boolean(BooleanLiteral {
19078                    value: true,
19079                })))
19080            }
19081        } else {
19082            None
19083        };
19084
19085        // Return as COMMIT since END is an alias
19086        Ok(Expression::Commit(Box::new(Commit {
19087            chain,
19088            this: None,
19089            durability: None,
19090        })))
19091    }
19092
19093    /// Parse BEGIN/START TRANSACTION statement
19094    /// BEGIN [DEFERRED|IMMEDIATE|EXCLUSIVE] [TRANSACTION|TRAN|WORK] [transaction_name] [WITH MARK 'description']
19095    /// Also handles procedural BEGIN blocks (BigQuery, etc.): BEGIN statement_list END
19096    fn parse_transaction(&mut self) -> Result<Expression> {
19097        self.expect(TokenType::Begin)?;
19098
19099        // Check if this is a procedural BEGIN block rather than a transaction
19100        // If next token is not a transaction keyword and we have more tokens, it's a procedural block
19101        let is_transaction = self.is_at_end()
19102            || self.check(TokenType::Semicolon)
19103            || self.check(TokenType::Transaction)
19104            || self.check_identifier("TRAN")
19105            || self.check_identifier("WORK")
19106            || self.check_identifier("DEFERRED")
19107            || self.check_identifier("IMMEDIATE")
19108            || self.check_identifier("EXCLUSIVE");
19109
19110        if !is_transaction {
19111            // This is a procedural BEGIN block - parse as Command
19112            // Collect remaining tokens until end of statement
19113            return self
19114                .parse_command()?
19115                .ok_or_else(|| self.parse_error("Failed to parse BEGIN block"));
19116        }
19117
19118        // Check for transaction kind: DEFERRED, IMMEDIATE, EXCLUSIVE (SQLite)
19119        let kind = if self.match_identifier("DEFERRED")
19120            || self.match_identifier("IMMEDIATE")
19121            || self.match_identifier("EXCLUSIVE")
19122        {
19123            Some(self.previous().text.clone())
19124        } else {
19125            None
19126        };
19127
19128        // Check for TRANSACTION, TRAN, or WORK keyword
19129        let has_transaction_keyword = self.match_token(TokenType::Transaction)
19130            || self.match_identifier("TRAN")
19131            || self.match_identifier("WORK");
19132
19133        // Parse optional transaction name (TSQL style: BEGIN TRANSACTION trans_name)
19134        let trans_name = if has_transaction_keyword
19135            && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19136            && !self.check(TokenType::With)
19137        {
19138            // Could be a transaction name or @variable
19139            let name = self.advance().text;
19140            Some(name)
19141        } else {
19142            None
19143        };
19144
19145        // Combine kind and trans_name into `this`
19146        let this = if let Some(name) = trans_name {
19147            Some(Box::new(Expression::Identifier(Identifier::new(name))))
19148        } else if let Some(k) = kind {
19149            Some(Box::new(Expression::Identifier(Identifier::new(k))))
19150        } else {
19151            None
19152        };
19153
19154        // Parse WITH MARK 'description' (TSQL)
19155        let mark = if self.match_token(TokenType::With) && self.match_identifier("MARK") {
19156            if self.check(TokenType::String) {
19157                let desc = self.advance().text;
19158                Some(Box::new(Expression::Literal(Literal::String(desc))))
19159            } else {
19160                Some(Box::new(Expression::Literal(Literal::String(
19161                    "".to_string(),
19162                ))))
19163            }
19164        } else if has_transaction_keyword {
19165            // Store "TRANSACTION" marker to preserve round-trip
19166            Some(Box::new(Expression::Identifier(Identifier::new(
19167                "TRANSACTION".to_string(),
19168            ))))
19169        } else {
19170            None
19171        };
19172
19173        // Parse any additional transaction modes (isolation levels, etc.)
19174        let mut mode_parts: Vec<String> = Vec::new();
19175        while self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
19176            let mut mode_tokens: Vec<String> = Vec::new();
19177            while (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19178                && !self.check(TokenType::Comma)
19179            {
19180                mode_tokens.push(self.advance().text);
19181            }
19182            if !mode_tokens.is_empty() {
19183                mode_parts.push(mode_tokens.join(" "));
19184            }
19185            if !self.match_token(TokenType::Comma) {
19186                break;
19187            }
19188        }
19189
19190        let modes = if !mode_parts.is_empty() {
19191            Some(Box::new(Expression::Identifier(Identifier::new(
19192                mode_parts.join(", "),
19193            ))))
19194        } else {
19195            None
19196        };
19197
19198        Ok(Expression::Transaction(Box::new(Transaction {
19199            this,
19200            modes,
19201            mark,
19202        })))
19203    }
19204
19205    /// Parse START TRANSACTION statement
19206    /// START TRANSACTION [READ ONLY | READ WRITE] [, ISOLATION LEVEL ...]
19207    fn parse_start_transaction(&mut self) -> Result<Expression> {
19208        self.expect(TokenType::Start)?;
19209
19210        // Expect TRANSACTION keyword
19211        self.expect(TokenType::Transaction)?;
19212
19213        // Parse any transaction modes (READ ONLY, READ WRITE, ISOLATION LEVEL, etc.)
19214        let mut mode_parts: Vec<String> = Vec::new();
19215        while self.is_identifier_token()
19216            || self.is_safe_keyword_as_identifier()
19217            || self.match_identifier("READ")
19218        {
19219            // If we matched READ, add it to tokens
19220            let read_matched = if self.previous().text.eq_ignore_ascii_case("READ") {
19221                true
19222            } else {
19223                false
19224            };
19225            let mut mode_tokens: Vec<String> = Vec::new();
19226            if read_matched {
19227                mode_tokens.push("READ".to_string());
19228            }
19229            while (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19230                && !self.check(TokenType::Comma)
19231            {
19232                mode_tokens.push(self.advance().text);
19233            }
19234            if !mode_tokens.is_empty() {
19235                mode_parts.push(mode_tokens.join(" "));
19236            }
19237            if !self.match_token(TokenType::Comma) {
19238                break;
19239            }
19240        }
19241
19242        let modes = if !mode_parts.is_empty() {
19243            Some(Box::new(Expression::Identifier(Identifier::new(
19244                mode_parts.join(", "),
19245            ))))
19246        } else {
19247            None
19248        };
19249
19250        Ok(Expression::Transaction(Box::new(Transaction {
19251            this: None, // START TRANSACTION doesn't have a kind like DEFERRED/IMMEDIATE
19252            modes,
19253            // Mark as START to differentiate from BEGIN
19254            mark: Some(Box::new(Expression::Identifier(Identifier::new(
19255                "START".to_string(),
19256            )))),
19257        })))
19258    }
19259
19260    /// Parse DESCRIBE statement
19261    /// DESCRIBE [EXTENDED|FORMATTED|ANALYZE] <table_or_query>
19262    /// Also handles EXPLAIN (parsed as Describe)
19263    fn parse_describe(&mut self) -> Result<Expression> {
19264        // Accept DESCRIBE, DESC, and EXPLAIN (Var token)
19265        // Capture leading comments from the first token
19266        let leading_comments = if self.check(TokenType::Describe) {
19267            let token = self.advance();
19268            token.comments
19269        } else if self.check(TokenType::Desc) {
19270            let token = self.advance();
19271            token.comments
19272        } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EXPLAIN") {
19273            let token = self.advance(); // consume EXPLAIN
19274            token.comments
19275        } else {
19276            return Err(self.parse_error("Expected DESCRIBE, DESC, or EXPLAIN"));
19277        };
19278
19279        // Check for EXTENDED or FORMATTED keywords
19280        let extended = self.match_identifier("EXTENDED");
19281        let formatted = if !extended {
19282            self.match_identifier("FORMATTED")
19283        } else {
19284            false
19285        };
19286
19287        // Check for style keywords like ANALYZE, HISTORY
19288        // ClickHouse: EXPLAIN SYNTAX/AST/PLAN/PIPELINE/ESTIMATE/TABLE OVERRIDE/CURRENT TRANSACTION
19289        // For HISTORY, we need to look ahead to ensure it's not part of a schema-qualified
19290        // table name like "history.tbl". If the next token is a Dot, "history" is a schema name.
19291        let style = if !extended && !formatted && self.match_identifier("ANALYZE") {
19292            Some("ANALYZE".to_string())
19293        } else if !extended
19294            && !formatted
19295            && matches!(
19296                self.config.dialect,
19297                Some(crate::dialects::DialectType::ClickHouse)
19298            )
19299        {
19300            // ClickHouse EXPLAIN styles
19301            let text_upper = if !self.is_at_end() {
19302                self.peek().text.to_uppercase()
19303            } else {
19304                String::new()
19305            };
19306            match text_upper.as_str() {
19307                "SYNTAX" | "AST" | "PLAN" | "PIPELINE" | "ESTIMATE" | "QUERY" | "CURRENT" => {
19308                    self.advance();
19309                    let mut style_str = text_upper;
19310                    // Handle multi-word: TABLE OVERRIDE, CURRENT TRANSACTION, QUERY TREE
19311                    if style_str == "CURRENT" && self.check_identifier("TRANSACTION") {
19312                        style_str.push_str(" TRANSACTION");
19313                        self.advance();
19314                    }
19315                    if style_str == "QUERY" && self.check_identifier("TREE") {
19316                        style_str.push_str(" TREE");
19317                        self.advance();
19318                    }
19319                    Some(style_str)
19320                }
19321                _ if self.check(TokenType::Table) => {
19322                    // EXPLAIN TABLE OVERRIDE
19323                    self.advance(); // consume TABLE
19324                    if self.check_identifier("OVERRIDE") {
19325                        self.advance();
19326                        Some("TABLE OVERRIDE".to_string())
19327                    } else {
19328                        // Not TABLE OVERRIDE, backtrack
19329                        self.current -= 1;
19330                        None
19331                    }
19332                }
19333                _ => None,
19334            }
19335        } else if !extended
19336            && !formatted
19337            && (self.check(TokenType::Identifier)
19338                || self.check(TokenType::Var)
19339                || self.check(TokenType::QuotedIdentifier))
19340            && self.peek().text.to_uppercase() == "HISTORY"
19341            && self.peek_nth(1).map(|t| t.token_type) != Some(TokenType::Dot)
19342        {
19343            self.advance(); // consume HISTORY
19344            Some("HISTORY".to_string())
19345        } else {
19346            None
19347        };
19348
19349        // Check for object kind like SEMANTIC VIEW, TABLE, INPUT, OUTPUT, etc.
19350        let kind = if self.match_identifier("SEMANTIC") {
19351            if self.match_token(TokenType::View) {
19352                Some("SEMANTIC VIEW".to_string())
19353            } else {
19354                Some("SEMANTIC".to_string())
19355            }
19356        } else if self.match_token(TokenType::Table) {
19357            Some("TABLE".to_string())
19358        } else if self.match_token(TokenType::View) {
19359            Some("VIEW".to_string())
19360        } else if self.match_identifier("DATABASE") {
19361            Some("DATABASE".to_string())
19362        } else if self.match_identifier("SCHEMA") {
19363            Some("SCHEMA".to_string())
19364        } else if self.match_token(TokenType::Input) {
19365            Some("INPUT".to_string())
19366        } else if self.match_token(TokenType::Output) {
19367            Some("OUTPUT".to_string())
19368        } else {
19369            None
19370        };
19371
19372        // ClickHouse: parse EXPLAIN settings before the target statement
19373        // e.g., EXPLAIN actions=1, description=0 SELECT ...
19374        // e.g., EXPLAIN PLAN actions=1 SELECT ...
19375        let mut properties = Vec::new();
19376        if matches!(
19377            self.config.dialect,
19378            Some(crate::dialects::DialectType::ClickHouse)
19379        ) {
19380            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
19381                // Look for key=value pairs before a statement keyword
19382                if (self.is_identifier_token()
19383                    || self.is_safe_keyword_as_identifier()
19384                    || self.check(TokenType::Type))
19385                    && self.current + 1 < self.tokens.len()
19386                    && self.tokens[self.current + 1].token_type == TokenType::Eq
19387                {
19388                    let name = self.advance().text.to_lowercase();
19389                    self.advance(); // consume =
19390                    let value = self.advance().text.clone();
19391                    properties.push((name, value));
19392                    self.match_token(TokenType::Comma); // optional comma between settings
19393                } else {
19394                    break;
19395                }
19396            }
19397        }
19398
19399        // Parse target - could be a table name or a SELECT/INSERT/other statement
19400        // ClickHouse: EXPLAIN/DESC can precede any statement or subquery
19401        let target = if self.check(TokenType::Select) || self.check(TokenType::With) {
19402            self.parse_statement()?
19403        } else if self.check(TokenType::LParen) && {
19404            // Look through nested parens for SELECT/WITH
19405            let mut depth = 0usize;
19406            let mut found_select = false;
19407            for i in 0..100 {
19408                match self.peek_nth(i).map(|t| t.token_type) {
19409                    Some(TokenType::LParen) => depth += 1,
19410                    Some(TokenType::Select) | Some(TokenType::With) if depth > 0 => {
19411                        found_select = true;
19412                        break;
19413                    }
19414                    _ => break,
19415                }
19416            }
19417            found_select
19418        } {
19419            // DESC (((SELECT ...))) — deeply nested parenthesized subquery
19420            self.parse_statement()?
19421        } else if matches!(
19422            self.config.dialect,
19423            Some(crate::dialects::DialectType::ClickHouse)
19424        ) && (self.check(TokenType::Insert)
19425            || self.check(TokenType::Create)
19426            || self.check(TokenType::Alter)
19427            || self.check(TokenType::Drop)
19428            || self.check(TokenType::Set)
19429            || self.check(TokenType::System))
19430        {
19431            self.parse_statement()?
19432        } else if matches!(
19433            self.config.dialect,
19434            Some(crate::dialects::DialectType::ClickHouse)
19435        ) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
19436            && self.peek_nth(1).map(|t| t.token_type) == Some(TokenType::LParen)
19437        {
19438            // ClickHouse: DESC format(Values, '(123)') — function call as target
19439            self.parse_expression()?
19440        } else {
19441            // Parse as table reference
19442            let table = self.parse_table_ref()?;
19443            Expression::Table(table)
19444        };
19445
19446        // Parse optional PARTITION clause (Spark/Hive)
19447        let partition = if self.match_token(TokenType::Partition) {
19448            // PARTITION(key = value, ...)
19449            self.expect(TokenType::LParen)?;
19450            // Parse partition expressions (e.g., ds = '2024-01-01')
19451            let mut partition_exprs = Vec::new();
19452            loop {
19453                if let Some(expr) = self.parse_conjunction()? {
19454                    partition_exprs.push(expr);
19455                }
19456                if !self.match_token(TokenType::Comma) {
19457                    break;
19458                }
19459            }
19460            self.expect(TokenType::RParen)?;
19461            let partition = Expression::Partition(Box::new(crate::expressions::Partition {
19462                expressions: partition_exprs,
19463                subpartition: false,
19464            }));
19465            Some(Box::new(partition))
19466        } else {
19467            None
19468        };
19469
19470        // ClickHouse: consume optional SETTINGS clause after target
19471        // e.g., DESC format(CSV, '...') SETTINGS key='val', key2='val2'
19472        if matches!(
19473            self.config.dialect,
19474            Some(crate::dialects::DialectType::ClickHouse)
19475        ) && self.check(TokenType::Settings)
19476        {
19477            self.advance(); // consume SETTINGS
19478            let _ = self.parse_settings_property()?;
19479        }
19480
19481        // Databricks: DESCRIBE ... AS JSON
19482        let as_json = if self.check(TokenType::As)
19483            && self
19484                .peek_nth(1)
19485                .map(|t| t.text.eq_ignore_ascii_case("JSON"))
19486                == Some(true)
19487        {
19488            self.advance(); // consume AS
19489            self.advance(); // consume JSON
19490            true
19491        } else {
19492            false
19493        };
19494
19495        // Parse optional post-target properties like type=stage (non-ClickHouse)
19496        if properties.is_empty() {
19497            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
19498                // Check for identifier or keyword that could be a property name
19499                if self.check(TokenType::Var) || self.check(TokenType::Type) || self.check_keyword()
19500                {
19501                    let name = self.advance().text.to_lowercase();
19502                    if self.match_token(TokenType::Eq) {
19503                        let value = self.advance().text.clone();
19504                        properties.push((name, value));
19505                    } else {
19506                        // Not a property, put it back (can't easily undo, so break)
19507                        break;
19508                    }
19509                } else {
19510                    break;
19511                }
19512            }
19513        }
19514
19515        Ok(Expression::Describe(Box::new(Describe {
19516            target,
19517            extended,
19518            formatted,
19519            kind,
19520            properties,
19521            style,
19522            partition,
19523            leading_comments,
19524            as_json,
19525        })))
19526    }
19527
19528    /// Parse SHOW statement
19529    /// SHOW [TERSE] <object_type> [HISTORY] [LIKE pattern] [IN <scope>] [STARTS WITH pattern] [LIMIT n] [FROM object]
19530    fn parse_show(&mut self) -> Result<Expression> {
19531        self.expect(TokenType::Show)?;
19532
19533        // Check for TERSE
19534        let terse = self.match_identifier("TERSE");
19535
19536        // Parse the thing to show (DATABASES, TABLES, SCHEMAS, etc.)
19537        // This can be multiple words like "PRIMARY KEYS" or "IMPORTED KEYS"
19538        let mut this_parts = Vec::new();
19539        let mut target: Option<Expression> = None;
19540        let mut mutex: Option<bool> = None;
19541
19542        // Consume identifier tokens until we hit a keyword like LIKE, IN, FROM, LIMIT, HISTORY
19543        // Special handling for SingleStore SHOW variations
19544        while !self.is_at_end() {
19545            let current = self.peek();
19546            // Stop at keywords that start clauses
19547            if matches!(
19548                current.token_type,
19549                TokenType::Like
19550                    | TokenType::In
19551                    | TokenType::From
19552                    | TokenType::Limit
19553                    | TokenType::Semicolon
19554                    | TokenType::Eof
19555                    | TokenType::Where
19556                    | TokenType::For
19557                    | TokenType::Offset
19558                    | TokenType::Settings
19559            ) {
19560                // ClickHouse: SHOW CREATE SETTINGS PROFILE - don't stop at SETTINGS
19561                if current.token_type == TokenType::Settings
19562                    && matches!(
19563                        self.config.dialect,
19564                        Some(crate::dialects::DialectType::ClickHouse)
19565                    )
19566                    && this_parts.join(" ") == "CREATE"
19567                {
19568                    // Fall through to process SETTINGS as part of the type name
19569                } else {
19570                    break;
19571                }
19572            }
19573            // Handle comma-separated profile types (e.g., SHOW PROFILE BLOCK IO, PAGE FAULTS)
19574            // Append comma to the last part to preserve spacing
19575            if current.token_type == TokenType::Comma {
19576                if !this_parts.is_empty() {
19577                    let last = this_parts.pop().unwrap();
19578                    this_parts.push(format!("{},", last));
19579                }
19580                self.advance();
19581                continue;
19582            }
19583            // Stop at HISTORY keyword (but not as the first word)
19584            if !this_parts.is_empty() && current.text.to_uppercase() == "HISTORY" {
19585                break;
19586            }
19587            // Stop at STARTS keyword
19588            if current.text.to_uppercase() == "STARTS" {
19589                break;
19590            }
19591            // SingleStore: SHOW PLAN <id> - handle number directly (before Var/keyword check)
19592            // This is needed because numbers don't pass the Var/keyword check
19593            let joined_check = this_parts.join(" ");
19594            if joined_check == "PLAN" && current.token_type == TokenType::Number {
19595                let id = self.advance().text;
19596                target = Some(Expression::Literal(Literal::Number(id)));
19597                break;
19598            }
19599            // Accept identifiers and keywords as part of the object type
19600            if current.token_type == TokenType::Var || current.token_type.is_keyword() {
19601                let joined = this_parts.join(" ");
19602
19603                // SingleStore: SHOW CREATE <type> <name> - preserve case for name
19604                // Types: AGGREGATE, PIPELINE, PROJECTION
19605                if matches!(
19606                    joined.as_str(),
19607                    "CREATE AGGREGATE" | "CREATE PIPELINE" | "CREATE PROJECTION"
19608                ) {
19609                    let name = self.advance().text;
19610                    target = Some(Expression::Identifier(Identifier::new(name)));
19611                    break;
19612                }
19613
19614                // SingleStore: SHOW <type> ON <name> - preserve case for name after ON
19615                // Check if current token is "ON" (but not at start)
19616                if current.text.to_uppercase() == "ON" && !this_parts.is_empty() {
19617                    this_parts.push("ON".to_string());
19618                    self.advance();
19619                    // Parse the name after ON, preserving case
19620                    if !self.is_at_end() {
19621                        let next = self.peek();
19622                        // Handle "ON TABLE name" pattern
19623                        if next.text.to_uppercase() == "TABLE" {
19624                            this_parts.push("TABLE".to_string());
19625                            self.advance();
19626                        }
19627                        // Parse the actual name
19628                        if !self.is_at_end() {
19629                            let name_tok = self.peek();
19630                            if name_tok.token_type == TokenType::Var
19631                                || name_tok.token_type.is_keyword()
19632                            {
19633                                let name = self.advance().text;
19634                                target = Some(Expression::Identifier(Identifier::new(name)));
19635                            }
19636                        }
19637                    }
19638                    break;
19639                }
19640
19641                // SingleStore: SHOW REPRODUCTION INTO OUTFILE 'filename'
19642                if current.text.to_uppercase() == "INTO" && joined == "REPRODUCTION" {
19643                    this_parts.push("INTO".to_string());
19644                    self.advance();
19645                    if !self.is_at_end() && self.peek().text.to_uppercase() == "OUTFILE" {
19646                        this_parts.push("OUTFILE".to_string());
19647                        self.advance();
19648                        // Parse the filename
19649                        if !self.is_at_end() && self.check(TokenType::String) {
19650                            let filename = self.advance().text;
19651                            target = Some(Expression::Literal(Literal::String(filename)));
19652                        }
19653                    }
19654                    break;
19655                }
19656
19657                // SingleStore: SHOW PLAN [JSON] <id> - capture the numeric ID
19658                if joined == "PLAN" {
19659                    // Check if current is "JSON" - if so, push it and check for number
19660                    if current.text.to_uppercase() == "JSON" {
19661                        this_parts.push("JSON".to_string());
19662                        self.advance();
19663                        // Now check for number
19664                        if !self.is_at_end() && self.check(TokenType::Number) {
19665                            let id = self.advance().text;
19666                            target = Some(Expression::Literal(Literal::Number(id)));
19667                        }
19668                        break;
19669                    }
19670                    // Check if current is a number (plan ID)
19671                    if current.token_type == TokenType::Number {
19672                        let id = self.advance().text;
19673                        target = Some(Expression::Literal(Literal::Number(id)));
19674                        break;
19675                    }
19676                }
19677
19678                this_parts.push(current.text.to_uppercase());
19679                self.advance();
19680
19681                // ClickHouse: SHOW CREATE TABLE/VIEW/DICTIONARY <qualified_name>
19682                // After detecting CREATE TABLE/VIEW/DICTIONARY, parse the next as a table ref
19683                let joined = this_parts.join(" ");
19684                if matches!(
19685                    joined.as_str(),
19686                    "CREATE TABLE"
19687                        | "CREATE VIEW"
19688                        | "CREATE DICTIONARY"
19689                        | "CREATE DATABASE"
19690                        | "CREATE MATERIALIZED VIEW"
19691                        | "CREATE LIVE VIEW"
19692                ) {
19693                    if !self.is_at_end()
19694                        && (self.check(TokenType::Var)
19695                            || self.check(TokenType::QuotedIdentifier)
19696                            || self.is_safe_keyword_as_identifier())
19697                    {
19698                        let table = self.parse_table_ref()?;
19699                        target = Some(Expression::Table(table));
19700                    }
19701                    break;
19702                }
19703
19704                // ClickHouse: SHOW CREATE ROLE/PROFILE/QUOTA/ROW POLICY/POLICY with multi-name or ON clause
19705                // These have complex syntax (comma-separated names, ON db.table) - consume as raw text
19706                if matches!(
19707                    self.config.dialect,
19708                    Some(crate::dialects::DialectType::ClickHouse)
19709                ) && (matches!(
19710                    joined.as_str(),
19711                    "CREATE ROLE"
19712                        | "CREATE QUOTA"
19713                        | "CREATE SETTINGS PROFILE"
19714                        | "CREATE PROFILE"
19715                        | "CREATE ROW POLICY"
19716                        | "CREATE POLICY"
19717                        | "CREATE USER"
19718                ) || matches!(
19719                    joined.as_str(),
19720                    "SHOW CREATE ROLE"
19721                        | "SHOW CREATE QUOTA"
19722                        | "SHOW CREATE SETTINGS PROFILE"
19723                        | "SHOW CREATE PROFILE"
19724                        | "SHOW CREATE ROW POLICY"
19725                        | "SHOW CREATE POLICY"
19726                        | "SHOW CREATE USER"
19727                )) {
19728                    let mut parts = Vec::new();
19729                    while !self.is_at_end() && self.peek().token_type != TokenType::Semicolon {
19730                        parts.push(self.advance().text.clone());
19731                    }
19732                    target = Some(Expression::Identifier(Identifier::new(parts.join(" "))));
19733                    break;
19734                }
19735
19736                // ClickHouse: SHOW CREATE <qualified_name> (without TABLE/VIEW keyword)
19737                // e.g., SHOW CREATE INFORMATION_SCHEMA.COLUMNS
19738                if joined == "CREATE"
19739                    && matches!(
19740                        self.config.dialect,
19741                        Some(crate::dialects::DialectType::ClickHouse)
19742                    )
19743                    && !self.is_at_end()
19744                    && (self.check(TokenType::Var) || self.check(TokenType::QuotedIdentifier))
19745                    && !matches!(
19746                        self.peek().text.to_uppercase().as_str(),
19747                        "TABLE"
19748                            | "VIEW"
19749                            | "DICTIONARY"
19750                            | "DATABASE"
19751                            | "MATERIALIZED"
19752                            | "LIVE"
19753                            | "TEMPORARY"
19754                            | "ROLE"
19755                            | "QUOTA"
19756                            | "POLICY"
19757                            | "PROFILE"
19758                            | "USER"
19759                            | "ROW"
19760                            | "SETTINGS"
19761                    )
19762                {
19763                    let table = self.parse_table_ref()?;
19764                    target = Some(Expression::Table(table));
19765                    break;
19766                }
19767
19768                // Special handling for ENGINE: the next token is the engine name (case-preserved)
19769                // followed by STATUS or MUTEX
19770                if joined == "ENGINE" {
19771                    // Parse engine name (case-preserved)
19772                    if !self.is_at_end() {
19773                        let engine_tok = self.peek();
19774                        if engine_tok.token_type == TokenType::Var
19775                            || engine_tok.token_type.is_keyword()
19776                        {
19777                            let engine_name = self.advance().text;
19778                            target = Some(Expression::Identifier(Identifier::new(engine_name)));
19779                            // Parse STATUS or MUTEX
19780                            if !self.is_at_end() {
19781                                let next = self.peek();
19782                                let next_upper = next.text.to_uppercase();
19783                                if next_upper == "STATUS" {
19784                                    self.advance();
19785                                    mutex = Some(false);
19786                                } else if next_upper == "MUTEX" {
19787                                    self.advance();
19788                                    mutex = Some(true);
19789                                }
19790                            }
19791                        }
19792                    }
19793                    break;
19794                }
19795            } else {
19796                break;
19797            }
19798        }
19799
19800        let this = this_parts.join(" ");
19801
19802        // Check for HISTORY
19803        let history = self.match_identifier("HISTORY");
19804
19805        // Check for FOR target (MySQL: SHOW GRANTS FOR foo, SHOW PROFILE ... FOR QUERY 5)
19806        // SingleStore: SHOW GROUPS FOR ROLE 'role_name', SHOW GROUPS FOR USER 'username'
19807        let for_target = if self.match_token(TokenType::For) {
19808            // Parse the target (can be multi-word like QUERY 5, or ROLE 'name')
19809            let mut parts = Vec::new();
19810            while !self.is_at_end() {
19811                let tok = self.peek();
19812                if matches!(
19813                    tok.token_type,
19814                    TokenType::Like
19815                        | TokenType::In
19816                        | TokenType::From
19817                        | TokenType::Limit
19818                        | TokenType::Semicolon
19819                        | TokenType::Eof
19820                        | TokenType::Where
19821                ) {
19822                    break;
19823                }
19824                if tok.token_type == TokenType::Var
19825                    || tok.token_type.is_keyword()
19826                    || tok.token_type == TokenType::Number
19827                {
19828                    parts.push(self.advance().text);
19829                } else if tok.token_type == TokenType::String {
19830                    // Handle string literals (e.g., SHOW GROUPS FOR ROLE 'role_name')
19831                    let text = self.advance().text;
19832                    parts.push(format!("'{}'", text));
19833                } else {
19834                    break;
19835                }
19836            }
19837            if parts.is_empty() {
19838                None
19839            } else {
19840                Some(Expression::Identifier(Identifier::new(parts.join(" "))))
19841            }
19842        } else {
19843            None
19844        };
19845
19846        // Check for LIKE pattern
19847        let like = if self.match_token(TokenType::Like) {
19848            Some(self.parse_primary()?)
19849        } else {
19850            None
19851        };
19852
19853        // Check for IN scope
19854        let (scope_kind, scope) = if self.match_token(TokenType::In) {
19855            // Parse scope kind and optionally scope object
19856            // Check for keywords: ACCOUNT, DATABASE, SCHEMA, TABLE, CLASS, APPLICATION
19857            let (kind, scope_obj) = if self.match_keyword("ACCOUNT") {
19858                (Some("ACCOUNT".to_string()), None)
19859            } else if self.match_token(TokenType::Database) {
19860                // IN DATABASE [name]
19861                let scope_obj = if !self.is_at_end()
19862                    && !self.check(TokenType::Like)
19863                    && !self.check(TokenType::Limit)
19864                    && !self.check(TokenType::Semicolon)
19865                    && !self.check_keyword_text("STARTS")
19866                {
19867                    let table = self.parse_table_ref()?;
19868                    Some(Expression::Table(table))
19869                } else {
19870                    None
19871                };
19872                (Some("DATABASE".to_string()), scope_obj)
19873            } else if self.match_token(TokenType::Schema) {
19874                // IN SCHEMA [name]
19875                let scope_obj = if !self.is_at_end()
19876                    && !self.check(TokenType::Like)
19877                    && !self.check(TokenType::Limit)
19878                    && !self.check(TokenType::Semicolon)
19879                    && !self.check_keyword_text("STARTS")
19880                {
19881                    let table = self.parse_table_ref()?;
19882                    Some(Expression::Table(table))
19883                } else {
19884                    None
19885                };
19886                (Some("SCHEMA".to_string()), scope_obj)
19887            } else if self.match_token(TokenType::Table) {
19888                // IN TABLE [name]
19889                let scope_obj = if !self.is_at_end()
19890                    && !self.check(TokenType::Like)
19891                    && !self.check(TokenType::Limit)
19892                    && !self.check(TokenType::Semicolon)
19893                    && !self.check_keyword_text("STARTS")
19894                {
19895                    let table = self.parse_table_ref()?;
19896                    Some(Expression::Table(table))
19897                } else {
19898                    None
19899                };
19900                (Some("TABLE".to_string()), scope_obj)
19901            } else if self.match_token(TokenType::View) {
19902                // IN VIEW [name]
19903                let scope_obj = if !self.is_at_end()
19904                    && !self.check(TokenType::Like)
19905                    && !self.check(TokenType::Limit)
19906                    && !self.check(TokenType::Semicolon)
19907                    && !self.check_keyword_text("STARTS")
19908                {
19909                    let table = self.parse_table_ref()?;
19910                    Some(Expression::Table(table))
19911                } else {
19912                    None
19913                };
19914                (Some("VIEW".to_string()), scope_obj)
19915            } else if self.match_keyword("CLASS") {
19916                // IN CLASS name
19917                let scope_obj = if !self.is_at_end() {
19918                    let table = self.parse_table_ref()?;
19919                    Some(Expression::Table(table))
19920                } else {
19921                    None
19922                };
19923                (Some("CLASS".to_string()), scope_obj)
19924            } else if self.match_keyword("APPLICATION") {
19925                // IN APPLICATION [PACKAGE] name
19926                let kind = if self.match_keyword("PACKAGE") {
19927                    "APPLICATION PACKAGE".to_string()
19928                } else {
19929                    "APPLICATION".to_string()
19930                };
19931                let scope_obj = if !self.is_at_end() {
19932                    let table = self.parse_table_ref()?;
19933                    Some(Expression::Table(table))
19934                } else {
19935                    None
19936                };
19937                (Some(kind), scope_obj)
19938            } else {
19939                // Default - infer scope_kind based on what we're showing
19940                // Python SQLGlot: SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"}
19941                let table = self.parse_table_ref()?;
19942                let inferred_kind = match this.as_str() {
19943                    "OBJECTS" | "TABLES" | "VIEWS" | "SEQUENCES" | "UNIQUE KEYS"
19944                    | "IMPORTED KEYS" => "SCHEMA",
19945                    "PRIMARY KEYS" => "TABLE",
19946                    _ => "SCHEMA", // Default to SCHEMA for unknown types
19947                };
19948                (
19949                    Some(inferred_kind.to_string()),
19950                    Some(Expression::Table(table)),
19951                )
19952            };
19953            (kind, scope_obj)
19954        } else {
19955            (None, None)
19956        };
19957
19958        // Check for STARTS WITH
19959        let starts_with = if self.match_keyword("STARTS") {
19960            self.match_token(TokenType::With); // WITH is a keyword token
19961            Some(self.parse_primary()?)
19962        } else {
19963            None
19964        };
19965
19966        // Check for LIMIT
19967        let limit = if self.match_token(TokenType::Limit) {
19968            Some(Box::new(Limit {
19969                this: self.parse_expression()?,
19970                percent: false,
19971                comments: Vec::new(),
19972            }))
19973        } else {
19974            None
19975        };
19976
19977        // Check for FROM (can be a string literal or identifier)
19978        // For MySQL SHOW COLUMNS/INDEX, the first FROM is the target table,
19979        // and the second FROM is the database
19980        let mut from = if self.match_token(TokenType::From) {
19981            Some(self.parse_primary()?)
19982        } else {
19983            None
19984        };
19985
19986        // Check for second FROM clause (MySQL: SHOW COLUMNS FROM tbl FROM db, SHOW INDEX FROM foo FROM bar)
19987        let mut db = if from.is_some() && self.match_token(TokenType::From) {
19988            Some(self.parse_primary()?)
19989        } else {
19990            None
19991        };
19992
19993        // Normalize MySQL SHOW INDEX/COLUMNS FROM db.tbl -> FROM tbl FROM db.
19994        if matches!(this.as_str(), "INDEX" | "COLUMNS") && db.is_none() {
19995            if let Some(from_expr) = from.take() {
19996                match from_expr {
19997                    Expression::Table(mut t) => {
19998                        if let Some(db_ident) = t.schema.take().or(t.catalog.take()) {
19999                            db = Some(Expression::Identifier(db_ident));
20000                            from = Some(Expression::Identifier(t.name));
20001                        } else {
20002                            from = Some(Expression::Table(t));
20003                        }
20004                    }
20005                    Expression::Column(c) => {
20006                        if let Some(table_ident) = c.table {
20007                            db = Some(Expression::Identifier(table_ident));
20008                            from = Some(Expression::Identifier(c.name));
20009                        } else {
20010                            from = Some(Expression::Column(c));
20011                        }
20012                    }
20013                    Expression::Identifier(id) => {
20014                        if let Some((db_name, table_name)) = id.name.split_once('.') {
20015                            db = Some(Expression::Identifier(Identifier::new(db_name)));
20016                            from = Some(Expression::Identifier(Identifier {
20017                                name: table_name.to_string(),
20018                                quoted: id.quoted,
20019                                trailing_comments: id.trailing_comments,
20020                                span: None,
20021                            }));
20022                        } else {
20023                            from = Some(Expression::Identifier(id));
20024                        }
20025                    }
20026                    other => {
20027                        from = Some(other);
20028                    }
20029                }
20030            }
20031        }
20032
20033        // MySQL: SHOW TABLES FROM db LIKE 'pattern' (LIKE can come after FROM)
20034        let like = if like.is_none() && self.match_token(TokenType::Like) {
20035            Some(self.parse_primary()?)
20036        } else {
20037            like
20038        };
20039
20040        // ClickHouse: SHOW ... NOT LIKE 'pattern' / NOT ILIKE 'pattern'
20041        if matches!(
20042            self.config.dialect,
20043            Some(crate::dialects::DialectType::ClickHouse)
20044        ) && self.check(TokenType::Not)
20045        {
20046            if self.current + 1 < self.tokens.len()
20047                && matches!(
20048                    self.tokens[self.current + 1].token_type,
20049                    TokenType::Like | TokenType::ILike
20050                )
20051            {
20052                self.advance(); // consume NOT
20053                self.advance(); // consume LIKE/ILIKE
20054                let _ = self.parse_primary()?; // consume pattern
20055            }
20056        }
20057
20058        // ClickHouse: SHOW ... ILIKE 'pattern'
20059        if matches!(
20060            self.config.dialect,
20061            Some(crate::dialects::DialectType::ClickHouse)
20062        ) && self.match_token(TokenType::ILike)
20063        {
20064            let _ = self.parse_primary()?; // consume pattern
20065        }
20066
20067        // Check for WHERE clause (MySQL: SHOW STATUS WHERE condition)
20068        let where_clause = if self.match_token(TokenType::Where) {
20069            Some(self.parse_expression()?)
20070        } else {
20071            None
20072        };
20073
20074        // Check for WITH PRIVILEGES clause (Snowflake: SHOW ... WITH PRIVILEGES USAGE, MODIFY)
20075        let privileges = if self.match_token(TokenType::With) && self.match_keyword("PRIVILEGES") {
20076            // Parse comma-separated list of privilege names (no parentheses)
20077            let mut privs = Vec::new();
20078            loop {
20079                if self.is_at_end() || self.check(TokenType::Semicolon) {
20080                    break;
20081                }
20082                let tok = self.peek();
20083                if tok.token_type == TokenType::Var || tok.token_type.is_keyword() {
20084                    privs.push(self.advance().text.to_uppercase());
20085                    // Check for comma to continue
20086                    if !self.match_token(TokenType::Comma) {
20087                        break;
20088                    }
20089                } else {
20090                    break;
20091                }
20092            }
20093            privs
20094        } else {
20095            Vec::new()
20096        };
20097
20098        // ClickHouse: SHOW ... SETTINGS key=val, key=val
20099        if matches!(
20100            self.config.dialect,
20101            Some(crate::dialects::DialectType::ClickHouse)
20102        ) {
20103            self.parse_clickhouse_settings_clause()?;
20104        }
20105
20106        Ok(Expression::Show(Box::new(Show {
20107            this,
20108            terse,
20109            history,
20110            like,
20111            scope_kind,
20112            scope,
20113            starts_with,
20114            limit,
20115            from,
20116            where_clause,
20117            for_target,
20118            db,
20119            target,
20120            mutex,
20121            privileges,
20122        })))
20123    }
20124
20125    /// Parse COPY statement (Snowflake, PostgreSQL)
20126    /// COPY INTO <table> FROM <source> [(<parameters>)]
20127    /// COPY INTO <location> FROM <table> [(<parameters>)]
20128    fn parse_copy(&mut self) -> Result<Expression> {
20129        self.expect(TokenType::Copy)?;
20130
20131        // Check for INTO (Snowflake/TSQL style: COPY INTO)
20132        let is_into = self.match_token(TokenType::Into);
20133
20134        // Parse target table or location (possibly with column list)
20135        let this = if self.check(TokenType::LParen) {
20136            // Subquery: COPY (SELECT ...) TO ...
20137            self.parse_primary()?
20138        } else if self.check(TokenType::DAt)
20139            || self.check(TokenType::String)
20140            || self.is_stage_reference()
20141        {
20142            // Stage or file destination (for exports): COPY INTO @stage or COPY INTO 's3://...'
20143            self.parse_file_location()?
20144        } else {
20145            // Table reference, possibly with column list: COPY table (col1, col2)
20146            let table = self.parse_table_ref()?;
20147            // Check for column list
20148            if self.check(TokenType::LParen) {
20149                // Peek ahead to see if this is a column list or a subquery
20150                // Column list won't start with SELECT
20151                let has_column_list = {
20152                    let start = self.current;
20153                    self.advance(); // consume (
20154                    let is_select = self.check(TokenType::Select);
20155                    self.current = start; // backtrack
20156                    !is_select
20157                };
20158                if has_column_list {
20159                    self.advance(); // consume (
20160                    let mut columns = Vec::new();
20161                    loop {
20162                        let col_name = self.expect_identifier_or_keyword()?;
20163                        columns.push(col_name);
20164                        if !self.match_token(TokenType::Comma) {
20165                            break;
20166                        }
20167                    }
20168                    self.expect(TokenType::RParen)?;
20169                    // Create a schema expression with the table and columns
20170                    Expression::Schema(Box::new(Schema {
20171                        this: Some(Box::new(Expression::Table(table))),
20172                        expressions: columns
20173                            .into_iter()
20174                            .map(|c| {
20175                                Expression::Column(Column {
20176                                    name: Identifier::new(c),
20177                                    table: None,
20178                                    join_mark: false,
20179                                    trailing_comments: Vec::new(),
20180                                    span: None,
20181                                    inferred_type: None,
20182                                })
20183                            })
20184                            .collect(),
20185                    }))
20186                } else {
20187                    Expression::Table(table)
20188                }
20189            } else {
20190                Expression::Table(table)
20191            }
20192        };
20193
20194        // Determine direction: FROM means loading into table, TO means exporting
20195        let kind = self.match_token(TokenType::From);
20196        let has_to = if !kind {
20197            // Try TO keyword for export (TO is a keyword token, not an identifier)
20198            self.match_token(TokenType::To)
20199        } else {
20200            false
20201        };
20202
20203        // Parse source/destination files or stage only if FROM/TO was found
20204        // and we're not at a parameter (which would start with identifier = ...)
20205        let mut files = Vec::new();
20206        if kind
20207            || has_to
20208            || self.check(TokenType::String)
20209            || self.is_stage_reference()
20210            || self.check(TokenType::LParen)
20211        {
20212            // Check for subquery: FROM (SELECT ...)
20213            if self.check(TokenType::LParen) {
20214                // Peek ahead to see if this is a subquery
20215                let start = self.current;
20216                self.advance(); // consume (
20217                let is_select = self.check(TokenType::Select);
20218                self.current = start; // backtrack
20219                if is_select {
20220                    // Parse the subquery
20221                    let subquery = self.parse_primary()?;
20222                    files.push(subquery);
20223                }
20224            }
20225            // Parse file location(s) until we hit a parameter or end
20226            while !self.is_at_end() && !self.check(TokenType::Semicolon) && files.is_empty()
20227                || (self.check(TokenType::Comma) && !files.is_empty())
20228            {
20229                // Consume comma if present (for multiple files)
20230                if !files.is_empty() && !self.match_token(TokenType::Comma) {
20231                    break;
20232                }
20233                // Check if this looks like a parameter (identifier followed by =)
20234                // But stage references (@stage) are not parameters
20235                if (self.check(TokenType::Var) || self.check_keyword())
20236                    && !self.is_stage_reference()
20237                {
20238                    let lookahead = self.current + 1;
20239                    if lookahead < self.tokens.len()
20240                        && self.tokens[lookahead].token_type == TokenType::Eq
20241                    {
20242                        break; // This is a parameter, stop parsing files
20243                    }
20244                }
20245                // Check for WITH keyword - stop parsing files
20246                if self.check(TokenType::With) {
20247                    break;
20248                }
20249                // Stop if we don't see a file location start
20250                // Include QuotedIdentifier for Databricks backtick-quoted paths like `s3://link`
20251                if !self.check(TokenType::String)
20252                    && !self.is_stage_reference()
20253                    && !self.check(TokenType::Var)
20254                    && !self.check_keyword()
20255                    && !self.check(TokenType::QuotedIdentifier)
20256                {
20257                    break;
20258                }
20259                // For COPY INTO ... FROM table_name, handle dotted table references
20260                // If the next token is a Var/Identifier and the one after is a Dot, parse as table reference
20261                if (self.check(TokenType::Var) || self.is_identifier_token())
20262                    && !self.is_stage_reference()
20263                {
20264                    let lookahead = self.current + 1;
20265                    let has_dot = lookahead < self.tokens.len()
20266                        && self.tokens[lookahead].token_type == TokenType::Dot;
20267                    if has_dot {
20268                        let table = self.parse_table_ref()?;
20269                        files.push(Expression::Table(table));
20270                        continue;
20271                    }
20272                }
20273                let location = self.parse_file_location()?;
20274                files.push(location);
20275            }
20276        }
20277
20278        // Parse credentials and parameters
20279        let mut params = Vec::new();
20280        let mut credentials = None;
20281        let mut with_wrapped = false;
20282
20283        // Parse Snowflake-style parameters: KEY = VALUE or KEY = (nested values)
20284        // or DuckDB/PostgreSQL WITH (KEY VALUE, ...) format
20285        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
20286            // Match WITH keyword if present (some dialects use WITH before params)
20287            let had_with = self.match_token(TokenType::With);
20288
20289            // Check for wrapped parameters in parentheses
20290            if self.match_token(TokenType::LParen) {
20291                if had_with {
20292                    with_wrapped = true;
20293                }
20294                while !self.check(TokenType::RParen) && !self.is_at_end() {
20295                    let param = self.parse_copy_parameter()?;
20296                    params.push(param);
20297                    // Consume optional comma between params
20298                    self.match_token(TokenType::Comma);
20299                }
20300                self.expect(TokenType::RParen)?;
20301                break;
20302            }
20303
20304            // Parse individual parameter: NAME = value
20305            if self.check(TokenType::Var) || self.check_keyword() {
20306                let param = self.parse_copy_parameter()?;
20307
20308                // Handle special CREDENTIALS parameter (case-insensitive)
20309                if param.name.eq_ignore_ascii_case("CREDENTIALS") {
20310                    // For Redshift-style CREDENTIALS 'string' (single string value)
20311                    // vs Snowflake-style CREDENTIALS = (KEY='value', KEY2='value')
20312                    if let Some(Expression::Literal(Literal::String(s))) = &param.value {
20313                        // Redshift style: store as a simple credentials string
20314                        let creds = Credentials {
20315                            credentials: vec![("".to_string(), s.clone())],
20316                            storage: None,
20317                            encryption: None,
20318                        };
20319                        credentials = Some(Box::new(creds));
20320                    } else {
20321                        // Snowflake style: key=value pairs
20322                        let creds = Credentials {
20323                            credentials: param
20324                                .values
20325                                .iter()
20326                                .filter_map(|v| {
20327                                    if let Expression::Eq(eq) = v {
20328                                        let key = if let Expression::Column(c) = &eq.left {
20329                                            c.name.name.clone()
20330                                        } else {
20331                                            return None;
20332                                        };
20333                                        let val = if let Expression::Literal(Literal::String(s)) =
20334                                            &eq.right
20335                                        {
20336                                            s.clone()
20337                                        } else {
20338                                            return None;
20339                                        };
20340                                        Some((key, val))
20341                                    } else {
20342                                        None
20343                                    }
20344                                })
20345                                .collect(),
20346                            storage: None,
20347                            encryption: None,
20348                        };
20349                        credentials = Some(Box::new(creds));
20350                    }
20351                } else if param.name.eq_ignore_ascii_case("STORAGE_INTEGRATION") {
20352                    // Store STORAGE_INTEGRATION as a regular parameter only
20353                    // Don't use the credentials.storage field for this
20354                    params.push(param);
20355                } else {
20356                    params.push(param);
20357                }
20358            } else {
20359                break;
20360            }
20361        }
20362
20363        Ok(Expression::Copy(Box::new(CopyStmt {
20364            this,
20365            kind,
20366            files,
20367            params,
20368            credentials,
20369            is_into,
20370            with_wrapped,
20371        })))
20372    }
20373
20374    /// Parse a single COPY parameter: NAME = value, NAME = (nested values), or NAME value (no =)
20375    fn parse_copy_parameter(&mut self) -> Result<CopyParameter> {
20376        // Preserve original case for parameter name (important for Redshift COPY options)
20377        let name = self.expect_identifier_or_keyword()?;
20378
20379        let mut value = None;
20380        let mut values = Vec::new();
20381
20382        let has_eq = self.match_token(TokenType::Eq);
20383
20384        if has_eq {
20385            if self.match_token(TokenType::LParen) {
20386                // Nested parameter list: KEY = (nested_key=value, ...) or KEY = (value1, value2)
20387                // Check if this is a list of simple values (like strings) or key=value pairs
20388                // If the first token is a string/number, it's a list of values
20389                if self.check(TokenType::String) || self.check(TokenType::Number) {
20390                    // Simple value list: FILES = ('test1.csv', 'test2.csv')
20391                    while !self.check(TokenType::RParen) && !self.is_at_end() {
20392                        values.push(self.parse_primary()?);
20393                        if !self.match_token(TokenType::Comma) {
20394                            break;
20395                        }
20396                    }
20397                } else {
20398                    // Key=value pairs: CREDENTIALS = (AWS_KEY_ID='id' AWS_SECRET_KEY='key')
20399                    while !self.check(TokenType::RParen) && !self.is_at_end() {
20400                        // Parse nested key=value pairs
20401                        let nested_key = self.expect_identifier_or_keyword()?.to_uppercase();
20402                        if self.match_token(TokenType::Eq) {
20403                            let nested_value = self.parse_copy_param_value()?;
20404                            // Create an Eq expression for the nested key=value
20405                            values.push(Expression::Eq(Box::new(BinaryOp {
20406                                left: Expression::Column(Column {
20407                                    name: Identifier::new(nested_key),
20408                                    table: None,
20409                                    join_mark: false,
20410                                    trailing_comments: Vec::new(),
20411                                    span: None,
20412                                    inferred_type: None,
20413                                }),
20414                                right: nested_value,
20415                                left_comments: Vec::new(),
20416                                operator_comments: Vec::new(),
20417                                trailing_comments: Vec::new(),
20418                                inferred_type: None,
20419                            })));
20420                        } else {
20421                            // Just a keyword/value without =
20422                            values.push(Expression::Column(Column {
20423                                name: Identifier::new(nested_key),
20424                                table: None,
20425                                join_mark: false,
20426                                trailing_comments: Vec::new(),
20427                                span: None,
20428                                inferred_type: None,
20429                            }));
20430                        }
20431                        // Consume optional comma between nested values
20432                        self.match_token(TokenType::Comma);
20433                    }
20434                }
20435                self.expect(TokenType::RParen)?;
20436            } else {
20437                // Simple value: KEY = value
20438                value = Some(self.parse_copy_param_value()?);
20439            }
20440        } else {
20441            // No = sign: DuckDB/PostgreSQL format (KEY value or KEY (col1, col2))
20442            // Check if followed by a value: string, number, boolean, identifier, or tuple
20443            if self.check(TokenType::LParen) {
20444                // Check if this is a COPY_INTO_VARLEN_OPTIONS parameter
20445                // These are Databricks/Snowflake options that contain key='value' pairs without = before (
20446                let is_varlen_option = matches!(
20447                    name.as_str(),
20448                    "FORMAT_OPTIONS" | "COPY_OPTIONS" | "FILE_FORMAT" | "CREDENTIAL"
20449                );
20450
20451                self.advance(); // consume (
20452
20453                if is_varlen_option {
20454                    // Parse as key='value' pairs: FORMAT_OPTIONS ('opt1'='true', 'opt2'='test')
20455                    while !self.check(TokenType::RParen) && !self.is_at_end() {
20456                        if self.check(TokenType::String) {
20457                            // Parse 'key'='value' pair
20458                            let key_token = self.advance();
20459                            let key = key_token.text.clone();
20460                            if self.match_token(TokenType::Eq) {
20461                                let val = self.parse_copy_param_value()?;
20462                                values.push(Expression::Eq(Box::new(BinaryOp {
20463                                    left: Expression::Literal(Literal::String(key)),
20464                                    right: val,
20465                                    left_comments: Vec::new(),
20466                                    operator_comments: Vec::new(),
20467                                    trailing_comments: Vec::new(),
20468                                    inferred_type: None,
20469                                })));
20470                            } else {
20471                                // Just a string without =
20472                                values.push(Expression::Literal(Literal::String(key)));
20473                            }
20474                        } else if self.check(TokenType::Var)
20475                            || self.check_keyword()
20476                            || self.is_identifier_token()
20477                        {
20478                            // Parse identifier='value' pair (unquoted key)
20479                            let key = self.advance().text.clone();
20480                            if self.match_token(TokenType::Eq) {
20481                                let val = self.parse_copy_param_value()?;
20482                                values.push(Expression::Eq(Box::new(BinaryOp {
20483                                    left: Expression::Column(Column {
20484                                        name: Identifier::new(key),
20485                                        table: None,
20486                                        join_mark: false,
20487                                        trailing_comments: Vec::new(),
20488                                        span: None,
20489                                        inferred_type: None,
20490                                    }),
20491                                    right: val,
20492                                    left_comments: Vec::new(),
20493                                    operator_comments: Vec::new(),
20494                                    trailing_comments: Vec::new(),
20495                                    inferred_type: None,
20496                                })));
20497                            } else {
20498                                // Just an identifier without =
20499                                values.push(Expression::Column(Column {
20500                                    name: Identifier::new(key),
20501                                    table: None,
20502                                    join_mark: false,
20503                                    trailing_comments: Vec::new(),
20504                                    span: None,
20505                                    inferred_type: None,
20506                                }));
20507                            }
20508                        } else {
20509                            break;
20510                        }
20511                        self.match_token(TokenType::Comma);
20512                    }
20513                } else {
20514                    // Tuple value: FORCE_NOT_NULL (col1, col2)
20515                    let mut items = Vec::new();
20516                    while !self.check(TokenType::RParen) && !self.is_at_end() {
20517                        items.push(self.parse_primary()?);
20518                        if !self.match_token(TokenType::Comma) {
20519                            break;
20520                        }
20521                    }
20522                    value = Some(Expression::Tuple(Box::new(Tuple { expressions: items })));
20523                }
20524                self.expect(TokenType::RParen)?;
20525            } else if self.check(TokenType::LBrace) {
20526                // Map literal: KV_METADATA {'key': 'value', ...}
20527                value = Some(self.parse_primary()?);
20528            } else if self.check(TokenType::String) || self.check(TokenType::Number) {
20529                // String or number value
20530                value = Some(self.parse_copy_param_value()?);
20531            } else if self.check(TokenType::True) || self.check(TokenType::False) {
20532                // Boolean value (TRUE/FALSE are keyword tokens)
20533                value = Some(self.parse_copy_param_value()?);
20534            } else if !self.check(TokenType::Comma)
20535                && !self.check(TokenType::RParen)
20536                && !self.is_at_end()
20537                && !self.check(TokenType::Semicolon)
20538            {
20539                // Identifier value: FORMAT JSON, HEADER MATCH, etc.
20540                // But skip if this is a known flag-only parameter (Redshift COPY options that take no value)
20541                let name_upper = name.to_uppercase();
20542                let is_flag_param = matches!(
20543                    name_upper.as_str(),
20544                    "EMPTYASNULL"
20545                        | "BLANKSASNULL"
20546                        | "ACCEPTINVCHARS"
20547                        | "COMPUPDATE"
20548                        | "STATUPDATE"
20549                        | "NOLOAD"
20550                        | "ESCAPE"
20551                        | "REMOVEQUOTES"
20552                        | "EXPLICIT_IDS"
20553                        | "FILLRECORD"
20554                        | "TRIMBLANKS"
20555                        | "TRUNCATECOLUMNS"
20556                        | "ROUNDEC"
20557                        | "IGNOREHEADER"
20558                        | "IGNOREBLANKLINES"
20559                        | "ACCEPTANYDATE"
20560                );
20561                if !is_flag_param && (self.check(TokenType::Var) || self.check_keyword()) {
20562                    value = Some(self.parse_copy_param_value()?);
20563                }
20564            }
20565            // If nothing matched, it's a bare flag parameter with no value (allowed)
20566        }
20567
20568        Ok(CopyParameter {
20569            name,
20570            value,
20571            values,
20572            eq: has_eq,
20573        })
20574    }
20575
20576    /// Parse a value for COPY parameters (handles strings, identifiers, numbers, lists)
20577    fn parse_copy_param_value(&mut self) -> Result<Expression> {
20578        // Handle lists like ('file1', 'file2')
20579        if self.match_token(TokenType::LParen) {
20580            let mut items = Vec::new();
20581            while !self.check(TokenType::RParen) && !self.is_at_end() {
20582                items.push(self.parse_primary()?);
20583                if !self.match_token(TokenType::Comma) {
20584                    break;
20585                }
20586            }
20587            self.expect(TokenType::RParen)?;
20588            return Ok(Expression::Tuple(Box::new(Tuple { expressions: items })));
20589        }
20590
20591        // Handle string, number, boolean, identifier
20592        if self.check(TokenType::String) {
20593            let token = self.advance();
20594            return Ok(Expression::Literal(Literal::String(token.text.clone())));
20595        }
20596        // Handle quoted identifier (e.g., STORAGE_INTEGRATION = "storage")
20597        if self.check(TokenType::QuotedIdentifier) {
20598            let token = self.advance();
20599            return Ok(Expression::Column(Column {
20600                name: Identifier::quoted(token.text.clone()),
20601                table: None,
20602                join_mark: false,
20603                trailing_comments: Vec::new(),
20604                span: None,
20605                inferred_type: None,
20606            }));
20607        }
20608        if self.check(TokenType::Number) {
20609            let token = self.advance();
20610            return Ok(Expression::Literal(Literal::Number(token.text.clone())));
20611        }
20612        if self.match_token(TokenType::True) {
20613            return Ok(Expression::Boolean(BooleanLiteral { value: true }));
20614        }
20615        if self.match_token(TokenType::False) {
20616            return Ok(Expression::Boolean(BooleanLiteral { value: false }));
20617        }
20618        // Identifier (e.g., FORMAT_NAME=my_format)
20619        if self.check(TokenType::Var) || self.check_keyword() {
20620            // Could be a qualified name like MY_DATABASE.MY_SCHEMA.MY_FORMAT
20621            let first = self.advance().text.clone();
20622            if self.match_token(TokenType::Dot) {
20623                let second = self.expect_identifier_or_keyword()?;
20624                if self.match_token(TokenType::Dot) {
20625                    let third = self.expect_identifier_or_keyword()?;
20626                    return Ok(Expression::Column(Column {
20627                        name: Identifier::new(format!("{}.{}.{}", first, second, third)),
20628                        table: None,
20629                        join_mark: false,
20630                        trailing_comments: Vec::new(),
20631                        span: None,
20632                        inferred_type: None,
20633                    }));
20634                }
20635                return Ok(Expression::Column(Column {
20636                    name: Identifier::new(format!("{}.{}", first, second)),
20637                    table: None,
20638                    join_mark: false,
20639                    trailing_comments: Vec::new(),
20640                    span: None,
20641                    inferred_type: None,
20642                }));
20643            }
20644            return Ok(Expression::Column(Column {
20645                name: Identifier::new(first),
20646                table: None,
20647                join_mark: false,
20648                trailing_comments: Vec::new(),
20649                span: None,
20650                inferred_type: None,
20651            }));
20652        }
20653
20654        Err(self.parse_error("Expected value for COPY parameter"))
20655    }
20656
20657    /// Parse Snowflake stage reference when tokenized as String (e.g., '@mystage', '@external/location')
20658    /// Handles: '@mystage', '@external/location'
20659    fn parse_stage_reference_from_string(&mut self) -> Result<Expression> {
20660        use crate::expressions::StageReference;
20661
20662        // The String token contains @ and the entire path
20663        let string_token = self.advance();
20664        let full_path = string_token.text.clone();
20665
20666        // Split on / to get stage name and path
20667        let parts: Vec<&str> = full_path.splitn(2, '/').collect();
20668        let name = parts[0].to_string();
20669        let path = if parts.len() > 1 {
20670            Some(format!("/{}", parts[1]))
20671        } else {
20672            None
20673        };
20674
20675        // Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
20676        let (file_format, pattern) = if self.match_token(TokenType::LParen) {
20677            let mut ff = None;
20678            let mut pat = None;
20679
20680            loop {
20681                if self.match_identifier("FILE_FORMAT") {
20682                    self.expect(TokenType::FArrow)?; // =>
20683                    ff = Some(self.parse_primary()?);
20684                } else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
20685                    // PATTERN can be tokenized as keyword or identifier
20686                    self.expect(TokenType::FArrow)?; // =>
20687                    if let Expression::Literal(Literal::String(s)) = self.parse_primary()? {
20688                        pat = Some(s);
20689                    }
20690                } else {
20691                    break;
20692                }
20693
20694                if !self.match_token(TokenType::Comma) {
20695                    break;
20696                }
20697            }
20698
20699            self.expect(TokenType::RParen)?;
20700            (ff, pat)
20701        } else {
20702            (None, None)
20703        };
20704
20705        Ok(Expression::StageReference(Box::new(StageReference {
20706            name,
20707            path,
20708            file_format,
20709            pattern,
20710            quoted: true, // Stage reference came from a quoted string
20711        })))
20712    }
20713
20714    /// Parse Snowflake stage reference when tokenized as Var (e.g., @mystage becomes Var token)
20715    /// Handles: @mystage, @mystage/path/to/file.csv
20716    fn parse_stage_reference_from_var(&mut self) -> Result<Expression> {
20717        use crate::expressions::StageReference;
20718
20719        // The Var token already contains @ and the stage name
20720        let var_token = self.advance();
20721        let mut name = var_token.text.clone();
20722
20723        // Handle qualified names: @namespace.stage
20724        while self.match_token(TokenType::Dot) {
20725            name.push('.');
20726            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
20727                name.push_str(&self.advance().text);
20728            } else if self.check(TokenType::Percent) {
20729                // Handle table stage in qualified path: @namespace.%table_name
20730                self.advance();
20731                name.push('%');
20732                if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
20733                    name.push_str(&self.advance().text);
20734                }
20735            } else {
20736                break;
20737            }
20738        }
20739
20740        // Handle path after stage: @stage/path/to/file.csv
20741        let path = if self.match_token(TokenType::Slash) {
20742            let mut path_str = String::from("/");
20743            // Consume path components until we hit whitespace/paren/etc.
20744            while !self.is_at_end() {
20745                if self.check(TokenType::Identifier)
20746                    || self.check(TokenType::Var)
20747                    || self.check(TokenType::Number)
20748                    || self.check(TokenType::Dot)
20749                    || self.check(TokenType::Dash)
20750                    || self.check(TokenType::Star)
20751                    || self.check(TokenType::To)
20752                    || self.is_safe_keyword_as_identifier()
20753                {
20754                    path_str.push_str(&self.advance().text);
20755                } else if self.match_token(TokenType::Slash) {
20756                    path_str.push('/');
20757                } else {
20758                    break;
20759                }
20760            }
20761            Some(path_str)
20762        } else {
20763            None
20764        };
20765
20766        // Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
20767        let (file_format, pattern) = if self.match_token(TokenType::LParen) {
20768            let mut ff = None;
20769            let mut pat = None;
20770
20771            loop {
20772                if self.match_identifier("FILE_FORMAT") {
20773                    self.expect(TokenType::FArrow)?; // =>
20774                    ff = Some(self.parse_primary()?);
20775                } else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
20776                    // PATTERN can be tokenized as keyword or identifier
20777                    self.expect(TokenType::FArrow)?; // =>
20778                    if let Expression::Literal(Literal::String(s)) = self.parse_primary()? {
20779                        pat = Some(s);
20780                    }
20781                } else {
20782                    break;
20783                }
20784
20785                if !self.match_token(TokenType::Comma) {
20786                    break;
20787                }
20788            }
20789
20790            self.expect(TokenType::RParen)?;
20791            (ff, pat)
20792        } else {
20793            (None, None)
20794        };
20795
20796        Ok(Expression::StageReference(Box::new(StageReference {
20797            name,
20798            path,
20799            file_format,
20800            pattern,
20801            quoted: false,
20802        })))
20803    }
20804
20805    /// Parse Snowflake stage reference in FROM clause
20806    /// Handles: @stage, @"stage", @namespace.stage, @stage/path/file.csv, @~, @%table
20807    fn parse_stage_reference(&mut self) -> Result<Expression> {
20808        use crate::expressions::StageReference;
20809
20810        self.expect(TokenType::DAt)?; // consume @
20811
20812        // Build the stage name - can include dots, slashes, etc.
20813        let mut name = String::from("@");
20814
20815        // Handle special stage types:
20816        // @~ = user stage
20817        // @% = table stage (followed by table name)
20818        if self.check(TokenType::Tilde) {
20819            self.advance();
20820            name.push('~');
20821        } else if self.check(TokenType::Percent) {
20822            self.advance();
20823            name.push('%');
20824            // Table name follows (can be qualified: schema.table)
20825            loop {
20826                if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
20827                    name.push_str(&self.advance().text);
20828                } else {
20829                    break;
20830                }
20831                // Handle qualified table names: %db.schema.table
20832                if self.match_token(TokenType::Dot) {
20833                    name.push('.');
20834                } else {
20835                    break;
20836                }
20837            }
20838        } else {
20839            // Handle quoted or unquoted stage names
20840            loop {
20841                if self.check(TokenType::QuotedIdentifier) {
20842                    // Preserve quotes for quoted identifiers
20843                    let text = self.advance().text;
20844                    name.push('"');
20845                    name.push_str(&text);
20846                    name.push('"');
20847                } else if self.check(TokenType::Percent) {
20848                    // Handle table stage in qualified path: @namespace.%table_name
20849                    self.advance();
20850                    name.push('%');
20851                    if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
20852                        name.push_str(&self.advance().text);
20853                    }
20854                } else if self.check(TokenType::Identifier)
20855                    || self.check(TokenType::Var)
20856                    || self.is_safe_keyword_as_identifier()
20857                {
20858                    name.push_str(&self.advance().text);
20859                } else {
20860                    break;
20861                }
20862
20863                // Handle dots for qualified names: @namespace.stage or @"schema"."stage"
20864                if self.match_token(TokenType::Dot) {
20865                    name.push('.');
20866                } else {
20867                    break;
20868                }
20869            }
20870        }
20871
20872        // Handle path after stage: @stage/path/to/file.csv
20873        let path = if self.match_token(TokenType::Slash) {
20874            let mut path_str = String::from("/");
20875            // Consume path components until we hit whitespace/paren/etc.
20876            // Note: path can include keywords like 'to', 'data', etc.
20877            while !self.is_at_end() {
20878                if self.check(TokenType::Identifier)
20879                    || self.check(TokenType::Var)
20880                    || self.check(TokenType::Number)
20881                    || self.check(TokenType::Dot)
20882                    || self.check(TokenType::Dash)
20883                    || self.check(TokenType::Star)
20884                    || self.check(TokenType::To)
20885                    || self.is_safe_keyword_as_identifier()
20886                {
20887                    path_str.push_str(&self.advance().text);
20888                } else if self.match_token(TokenType::Slash) {
20889                    path_str.push('/');
20890                } else {
20891                    break;
20892                }
20893            }
20894            Some(path_str)
20895        } else {
20896            None
20897        };
20898
20899        // Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
20900        let (file_format, pattern) = if self.match_token(TokenType::LParen) {
20901            let mut ff = None;
20902            let mut pat = None;
20903
20904            loop {
20905                if self.match_identifier("FILE_FORMAT") {
20906                    self.expect(TokenType::FArrow)?; // =>
20907                    ff = Some(self.parse_primary()?);
20908                } else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
20909                    // PATTERN can be tokenized as keyword or identifier
20910                    self.expect(TokenType::FArrow)?; // =>
20911                    if let Expression::Literal(Literal::String(s)) = self.parse_primary()? {
20912                        pat = Some(s);
20913                    }
20914                } else {
20915                    break;
20916                }
20917
20918                if !self.match_token(TokenType::Comma) {
20919                    break;
20920                }
20921            }
20922
20923            self.expect(TokenType::RParen)?;
20924            (ff, pat)
20925        } else {
20926            (None, None)
20927        };
20928
20929        Ok(Expression::StageReference(Box::new(StageReference {
20930            name,
20931            path,
20932            file_format,
20933            pattern,
20934            quoted: false,
20935        })))
20936    }
20937
20938    /// Parse file location for COPY/PUT statements
20939    /// Handles: @stage, @db.schema.stage, @stage/path, 's3://bucket/path', file:///path
20940    fn parse_file_location(&mut self) -> Result<Expression> {
20941        // Stage reference starting with @ (tokenized as DAt or as a Var starting with @)
20942        if self.check(TokenType::DAt) {
20943            self.advance(); // consume @
20944            let mut stage_path = String::from("@");
20945
20946            // Handle table stage prefix: @%table
20947            if self.check(TokenType::Percent) || self.check(TokenType::Mod) {
20948                stage_path.push('%');
20949                self.advance(); // consume %
20950            }
20951            // Handle user stage: @~
20952            else if self.check(TokenType::Tilde) {
20953                stage_path.push('~');
20954                self.advance(); // consume ~
20955            }
20956
20957            // Get stage name
20958            if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
20959                stage_path.push_str(&self.advance().text);
20960            }
20961            // Parse qualified name parts: .schema.stage
20962            while self.check(TokenType::Dot) {
20963                self.advance(); // consume .
20964                stage_path.push('.');
20965                if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token()
20966                {
20967                    stage_path.push_str(&self.advance().text);
20968                }
20969            }
20970            // Parse path segments: /path/to/file (slash is tokenized separately)
20971            while self.check(TokenType::Slash) {
20972                self.advance(); // consume /
20973                stage_path.push('/');
20974                // Get path segment (identifier, keyword, or special chars)
20975                // But don't consume if followed by = (that's a parameter, not path)
20976                if (self.check(TokenType::Var)
20977                    || self.check_keyword()
20978                    || self.is_identifier_token())
20979                    && !self.check_next(TokenType::Eq)
20980                {
20981                    stage_path.push_str(&self.advance().text);
20982                }
20983            }
20984            return Ok(Expression::Literal(Literal::String(stage_path)));
20985        }
20986
20987        // Stage reference tokenized as a Var starting with @ (e.g., @random_stage)
20988        // This happens when the tokenizer combines @ with the following identifier
20989        if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
20990            let mut stage_path = self.advance().text.clone();
20991            // Parse qualified name parts: .schema.stage
20992            while self.check(TokenType::Dot) {
20993                self.advance(); // consume .
20994                stage_path.push('.');
20995                if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token()
20996                {
20997                    stage_path.push_str(&self.advance().text);
20998                }
20999            }
21000            // Parse path segments: /path/to/file
21001            while self.check(TokenType::Slash) {
21002                self.advance(); // consume /
21003                stage_path.push('/');
21004                // Get path segment (identifier, keyword, or special chars)
21005                // But don't consume if followed by = (that's a parameter, not path)
21006                if (self.check(TokenType::Var)
21007                    || self.check_keyword()
21008                    || self.is_identifier_token())
21009                    && !self.check_next(TokenType::Eq)
21010                {
21011                    stage_path.push_str(&self.advance().text);
21012                }
21013            }
21014            return Ok(Expression::Literal(Literal::String(stage_path)));
21015        }
21016
21017        // String literal (file path or URL)
21018        if self.check(TokenType::String) {
21019            let token = self.advance();
21020            return Ok(Expression::Literal(Literal::String(token.text.clone())));
21021        }
21022
21023        // Backtick-quoted identifier (Databricks style: `s3://link`)
21024        if self.check(TokenType::QuotedIdentifier) {
21025            let token = self.advance();
21026            return Ok(Expression::Identifier(Identifier::quoted(
21027                token.text.clone(),
21028            )));
21029        }
21030
21031        // Identifier (could be a stage name without @)
21032        if self.check(TokenType::Var) || self.check_keyword() {
21033            let ident = self.advance().text.clone();
21034            return Ok(Expression::Column(Column {
21035                name: Identifier::new(ident),
21036                table: None,
21037                join_mark: false,
21038                trailing_comments: Vec::new(),
21039                span: None,
21040                inferred_type: None,
21041            }));
21042        }
21043
21044        Err(self.parse_error("Expected file location"))
21045    }
21046
21047    /// Parse Snowflake stage reference as a string for PUT/GET/COPY statements
21048    /// Handles: @stage, @%table, @~, @db.schema.stage, @"quoted"."stage", @stage/path
21049    /// Returns a Literal::String containing the stage path
21050    fn parse_stage_reference_as_string(&mut self) -> Result<Expression> {
21051        // Stage reference starting with @ (tokenized as DAt)
21052        if self.check(TokenType::DAt) {
21053            self.advance(); // consume @
21054            let mut stage_path = String::from("@");
21055
21056            // Handle table stage prefix: @%table
21057            if self.check(TokenType::Percent) || self.check(TokenType::Mod) {
21058                stage_path.push('%');
21059                self.advance(); // consume %
21060            }
21061            // Handle user stage: @~
21062            else if self.check(TokenType::Tilde) {
21063                stage_path.push('~');
21064                self.advance(); // consume ~
21065                                // After @~, parse any path segments
21066                while self.check(TokenType::Slash) {
21067                    self.advance(); // consume /
21068                    stage_path.push('/');
21069                    if (self.check(TokenType::Var)
21070                        || self.check_keyword()
21071                        || self.is_identifier_token())
21072                        && !self.check_next(TokenType::Eq)
21073                    {
21074                        stage_path.push_str(&self.advance().text);
21075                    }
21076                }
21077                return Ok(Expression::Literal(Literal::String(stage_path)));
21078            }
21079
21080            // Get stage name (could be quoted identifier)
21081            if self.check(TokenType::QuotedIdentifier) {
21082                // Preserve quoted identifier with quotes
21083                let text = &self.peek().text;
21084                stage_path.push('"');
21085                stage_path.push_str(text);
21086                stage_path.push('"');
21087                self.advance();
21088            } else if self.check(TokenType::Var)
21089                || self.check_keyword()
21090                || self.check(TokenType::Identifier)
21091            {
21092                stage_path.push_str(&self.advance().text);
21093            }
21094
21095            // Parse qualified name parts: .schema.stage (may include quoted identifiers)
21096            while self.check(TokenType::Dot) {
21097                self.advance(); // consume .
21098                stage_path.push('.');
21099                if self.check(TokenType::QuotedIdentifier) {
21100                    // Preserve quoted identifier with quotes
21101                    let text = &self.peek().text;
21102                    stage_path.push('"');
21103                    stage_path.push_str(text);
21104                    stage_path.push('"');
21105                    self.advance();
21106                } else if self.check(TokenType::Var)
21107                    || self.check_keyword()
21108                    || self.check(TokenType::Identifier)
21109                {
21110                    stage_path.push_str(&self.advance().text);
21111                }
21112            }
21113
21114            // Parse path segments: /path/to/file
21115            while self.check(TokenType::Slash) {
21116                self.advance(); // consume /
21117                stage_path.push('/');
21118                // Get path segment but don't consume if followed by = (that's a parameter)
21119                if (self.check(TokenType::Var)
21120                    || self.check_keyword()
21121                    || self.is_identifier_token())
21122                    && !self.check_next(TokenType::Eq)
21123                {
21124                    stage_path.push_str(&self.advance().text);
21125                }
21126            }
21127            return Ok(Expression::Literal(Literal::String(stage_path)));
21128        }
21129
21130        // Stage reference tokenized as a Var starting with @ (e.g., @s1)
21131        if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
21132            let mut stage_path = self.advance().text.clone();
21133
21134            // Parse qualified name parts: .schema.stage (may include quoted identifiers)
21135            while self.check(TokenType::Dot) {
21136                self.advance(); // consume .
21137                stage_path.push('.');
21138                if self.check(TokenType::QuotedIdentifier) {
21139                    let text = &self.peek().text;
21140                    stage_path.push('"');
21141                    stage_path.push_str(text);
21142                    stage_path.push('"');
21143                    self.advance();
21144                } else if self.check(TokenType::Var)
21145                    || self.check_keyword()
21146                    || self.check(TokenType::Identifier)
21147                {
21148                    stage_path.push_str(&self.advance().text);
21149                }
21150            }
21151
21152            // Parse path segments: /path/to/file
21153            while self.check(TokenType::Slash) {
21154                self.advance(); // consume /
21155                stage_path.push('/');
21156                if (self.check(TokenType::Var)
21157                    || self.check_keyword()
21158                    || self.is_identifier_token())
21159                    && !self.check_next(TokenType::Eq)
21160                {
21161                    stage_path.push_str(&self.advance().text);
21162                }
21163            }
21164            return Ok(Expression::Literal(Literal::String(stage_path)));
21165        }
21166
21167        Err(self.parse_error("Expected stage reference starting with @"))
21168    }
21169
21170    /// Parse PUT statement (Snowflake)
21171    /// PUT file://<path> @<stage> [AUTO_COMPRESS = TRUE|FALSE] ...
21172    fn parse_put(&mut self) -> Result<Expression> {
21173        self.expect(TokenType::Put)?;
21174
21175        // Parse source file path (usually file:///path/to/file)
21176        let (source, source_quoted) = if self.check(TokenType::String) {
21177            (self.advance().text.clone(), true)
21178        } else {
21179            // Handle file://path syntax (parsed as identifier + colon + etc.)
21180            // Stop when we see @ (start of stage reference)
21181            let mut source_parts = Vec::new();
21182            while !self.is_at_end() {
21183                // Stop if we see @ (DAt token or Var starting with @)
21184                if self.check(TokenType::DAt) {
21185                    break;
21186                }
21187                if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
21188                    break;
21189                }
21190                let token = self.advance();
21191                source_parts.push(token.text.clone());
21192            }
21193            (source_parts.join(""), false)
21194        };
21195
21196        // Parse target stage (@stage_name)
21197        let target = self.parse_stage_reference_as_string()?;
21198
21199        // Parse optional parameters
21200        // Note: Some parameter names like OVERWRITE are keywords, so we check for those explicitly
21201        // Preserve original casing for identity tests
21202        let mut params = Vec::new();
21203        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
21204            let is_param_name = self.check(TokenType::Var)
21205                || self.check_keyword()
21206                || self.check(TokenType::Overwrite);
21207            if is_param_name {
21208                let name = self.advance().text.clone();
21209                let value = if self.match_token(TokenType::Eq) {
21210                    Some(self.parse_primary()?)
21211                } else {
21212                    None
21213                };
21214                params.push(CopyParameter {
21215                    name,
21216                    value,
21217                    values: Vec::new(),
21218                    eq: true,
21219                });
21220            } else {
21221                break;
21222            }
21223        }
21224
21225        Ok(Expression::Put(Box::new(PutStmt {
21226            source,
21227            source_quoted,
21228            target,
21229            params,
21230        })))
21231    }
21232
21233    /// Helper to join command tokens with smart spacing
21234    /// Preserves the structure of file paths, stage references, etc.
21235    fn join_command_tokens(&self, tokens: Vec<(String, TokenType)>) -> String {
21236        let mut result = String::new();
21237        let mut prev_token_type: Option<TokenType> = None;
21238        let mut prev_prev_token_type: Option<TokenType> = None;
21239
21240        for (i, (text, token_type)) in tokens.iter().enumerate() {
21241            let needs_space = if result.is_empty() {
21242                false
21243            } else {
21244                match (prev_token_type, *token_type) {
21245                    // No space after @ (stage references: @stage, @%, @~)
21246                    (Some(TokenType::DAt), _) => false,
21247                    // No space around dots (identifiers: a.b.c)
21248                    (Some(TokenType::Dot), _) => false,
21249                    (_, TokenType::Dot) => false,
21250                    // No space around parentheses
21251                    (Some(TokenType::LParen), _) => false,
21252                    (_, TokenType::LParen) => false,
21253                    (_, TokenType::RParen) => false,
21254                    // No space around square brackets (array access: arr[i])
21255                    (Some(TokenType::LBracket), _) => false,
21256                    (_, TokenType::LBracket) => false,
21257                    (_, TokenType::RBracket) => false,
21258                    // No space before ,
21259                    (_, TokenType::Comma) => false,
21260                    // No space around / (paths: @s1/test)
21261                    (Some(TokenType::Slash), _) => false,
21262                    (_, TokenType::Slash) => false,
21263                    // No space around : (file://path)
21264                    (Some(TokenType::Colon), _) => false,
21265                    (_, TokenType::Colon) => false,
21266                    // No space around % (table stage: @%table)
21267                    (Some(TokenType::Mod), _) => false,
21268                    (_, TokenType::Mod) => false,
21269                    (Some(TokenType::Percent), _) => false,
21270                    (_, TokenType::Percent) => false,
21271                    // Handle = contextually:
21272                    // - No space around = in simple KEY=VALUE patterns where value is terminal
21273                    //   (PARALLEL=1, ENABLED=TRUE, FILE_FORMAT='csv')
21274                    // - Keep space for expressions like SET x = x + 1
21275                    (Some(TokenType::Var), TokenType::Eq) => {
21276                        // If the var starts with @ (parameter like @id = 123), always use spaces
21277                        if i >= 1 && tokens[i - 1].0.starts_with('@') {
21278                            true
21279                        } else if i + 1 < tokens.len() {
21280                            // Check what follows: Var=Number where number is terminal (end or followed by Var)
21281                            let next_type = tokens[i + 1].1;
21282                            // Is the value terminal (end of tokens, or followed by another Var=... pattern)?
21283                            let is_terminal_value =
21284                                i + 2 >= tokens.len() || tokens[i + 2].1 == TokenType::Var;
21285                            match next_type {
21286                                // No space for terminal numbers/bools: PARALLEL=1, ENABLED=TRUE
21287                                // Return false (no space) when terminal
21288                                TokenType::Number | TokenType::True | TokenType::False => {
21289                                    !is_terminal_value
21290                                }
21291                                // No space for terminal strings: FILE_FORMAT='csv'
21292                                TokenType::String => !is_terminal_value,
21293                                // Always space if followed by Var (SET x = y ...)
21294                                _ => true,
21295                            }
21296                        } else {
21297                            true
21298                        }
21299                    }
21300                    // No space after = in terminal KEY=VALUE patterns
21301                    (Some(TokenType::Eq), TokenType::Number)
21302                    | (Some(TokenType::Eq), TokenType::True)
21303                    | (Some(TokenType::Eq), TokenType::False)
21304                    | (Some(TokenType::Eq), TokenType::String) => {
21305                        // Is this a terminal value (end or followed by another Var=...)?
21306                        let is_terminal =
21307                            i + 1 >= tokens.len() || tokens[i + 1].1 == TokenType::Var;
21308                        match prev_prev_token_type {
21309                            // No space (return false) when terminal, space otherwise
21310                            // But always space if the var before = was preceded by @ (parameter)
21311                            Some(TokenType::Var) => {
21312                                // Always space if the var before = starts with @ (parameter)
21313                                if i >= 2 && tokens[i - 2].0.starts_with('@') {
21314                                    true
21315                                } else {
21316                                    !is_terminal
21317                                }
21318                            }
21319                            _ => true, // Space for other cases
21320                        }
21321                    }
21322                    // Always space after = when followed by Var (SET x = y, could be expression)
21323                    (Some(TokenType::Eq), TokenType::Var) => true,
21324                    // No space around :: (cast)
21325                    (Some(TokenType::DColon), _) => false,
21326                    (_, TokenType::DColon) => false,
21327                    // Default: add space
21328                    _ => true,
21329                }
21330            };
21331
21332            if needs_space {
21333                result.push(' ');
21334            }
21335            result.push_str(text);
21336            prev_prev_token_type = prev_token_type;
21337            prev_token_type = Some(*token_type);
21338        }
21339        result
21340    }
21341
21342    /// Join Teradata table option tokens with Teradata-specific spacing
21343    /// - No spaces around '='
21344    /// - No spaces around dots or parentheses
21345    /// - Space-separated words otherwise
21346    fn join_teradata_option_tokens(&self, tokens: Vec<(String, TokenType)>) -> String {
21347        let mut result = String::new();
21348        let mut prev_token_type: Option<TokenType> = None;
21349
21350        for (text, token_type) in tokens {
21351            let needs_space = if result.is_empty() {
21352                false
21353            } else {
21354                match (prev_token_type, token_type) {
21355                    (Some(TokenType::Dot), _) => false,
21356                    (_, TokenType::Dot) => false,
21357                    (Some(TokenType::LParen), _) => false,
21358                    (_, TokenType::LParen) => false,
21359                    (_, TokenType::RParen) => false,
21360                    (_, TokenType::Comma) => false,
21361                    (Some(TokenType::Eq), _) => false,
21362                    (_, TokenType::Eq) => false,
21363                    _ => true,
21364                }
21365            };
21366
21367            if needs_space {
21368                result.push(' ');
21369            }
21370            result.push_str(&text);
21371            prev_token_type = Some(token_type);
21372        }
21373
21374        result
21375    }
21376
21377    /// Parse RM or REMOVE command (Snowflake)
21378    /// RM @stage_name / REMOVE @stage_name
21379    fn parse_rm_command(&mut self) -> Result<Expression> {
21380        let command_token = self.advance(); // RM or REMOVE
21381        let command_name = command_token.text.to_uppercase();
21382
21383        // Collect remaining tokens with their types
21384        let mut tokens = vec![(command_name, command_token.token_type)];
21385        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
21386            let token = self.advance();
21387            tokens.push((token.text.clone(), token.token_type));
21388        }
21389
21390        Ok(Expression::Command(Box::new(Command {
21391            this: self.join_command_tokens(tokens),
21392        })))
21393    }
21394
21395    /// Parse GET command (Snowflake)
21396    /// GET @stage_name 'file:///path'
21397    fn parse_get_command(&mut self) -> Result<Expression> {
21398        let get_token = self.advance(); // consume GET (it's already matched)
21399
21400        // Collect remaining tokens with their types, preserving quotes
21401        let mut tokens = vec![("GET".to_string(), get_token.token_type)];
21402        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
21403            let token = self.advance();
21404            // Re-add quotes around string and quoted identifier tokens
21405            let text = match token.token_type {
21406                TokenType::String => format!("'{}'", token.text),
21407                TokenType::QuotedIdentifier => format!("\"{}\"", token.text),
21408                _ => token.text.clone(),
21409            };
21410            tokens.push((text, token.token_type));
21411        }
21412
21413        Ok(Expression::Command(Box::new(Command {
21414            this: self.join_command_tokens(tokens),
21415        })))
21416    }
21417
21418    /// Parse CALL statement (stored procedure call)
21419    /// CALL procedure_name(args, ...)
21420    fn parse_call(&mut self) -> Result<Expression> {
21421        let call_token = self.advance(); // consume CALL
21422
21423        // Collect remaining tokens with their types
21424        let mut tokens = vec![("CALL".to_string(), call_token.token_type)];
21425        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
21426            let token = self.advance();
21427            tokens.push((token.text.clone(), token.token_type));
21428        }
21429
21430        Ok(Expression::Command(Box::new(Command {
21431            this: self.join_command_tokens(tokens),
21432        })))
21433    }
21434
21435    /// Parse KILL statement (MySQL/MariaDB)
21436    /// KILL [CONNECTION | QUERY] <id>
21437    fn parse_kill(&mut self) -> Result<Expression> {
21438        self.expect(TokenType::Kill)?;
21439
21440        // Check for optional kind: CONNECTION or QUERY
21441        let kind = if self.match_identifier("CONNECTION") {
21442            Some("CONNECTION".to_string())
21443        } else if self.match_identifier("QUERY") {
21444            Some("QUERY".to_string())
21445        } else {
21446            None
21447        };
21448
21449        // Parse the target (process ID - usually a number or string)
21450        let this = self.parse_primary()?;
21451
21452        Ok(Expression::Kill(Box::new(Kill { this, kind })))
21453    }
21454
21455    /// Parse EXEC/EXECUTE statement (TSQL stored procedure call)
21456    /// EXEC [schema.]procedure_name [@param=value, ...]
21457    fn parse_execute(&mut self) -> Result<Expression> {
21458        self.expect(TokenType::Execute)?;
21459
21460        // Parse procedure name (can be qualified: schema.proc_name)
21461        let proc_name = self.parse_table_ref()?;
21462        let this = Expression::Table(proc_name);
21463
21464        // Parse optional parameters: @param=value, ...
21465        let mut parameters = Vec::new();
21466
21467        // Check if there are parameters (starts with @ or identifier)
21468        while self.check(TokenType::Var) || self.check(TokenType::Parameter) {
21469            // Get the parameter name (starts with @)
21470            let token = self.advance();
21471            let param_name = if token.text.starts_with('@') {
21472                token.text.clone()
21473            } else {
21474                format!("@{}", token.text)
21475            };
21476
21477            // Check for = (named parameter) or positional parameter
21478            if self.match_token(TokenType::Eq) {
21479                // Named parameter: @param = value
21480                let value = self.parse_primary()?;
21481                parameters.push(ExecuteParameter {
21482                    name: param_name,
21483                    value,
21484                    positional: false,
21485                });
21486            } else {
21487                // Positional parameter: @var (no = sign)
21488                // Positional parameter: @var (no = sign)
21489                parameters.push(ExecuteParameter {
21490                    name: param_name.clone(),
21491                    value: Expression::Column(Column {
21492                        name: Identifier::new(&param_name),
21493                        table: None,
21494                        join_mark: false,
21495                        trailing_comments: Vec::new(),
21496                        span: None,
21497                        inferred_type: None,
21498                    }),
21499                    positional: true,
21500                });
21501            }
21502
21503            // Check for comma to continue
21504            if !self.match_token(TokenType::Comma) {
21505                break;
21506            }
21507        }
21508
21509        Ok(Expression::Execute(Box::new(ExecuteStatement {
21510            this,
21511            parameters,
21512        })))
21513    }
21514
21515    /// Parse GRANT statement
21516    /// GRANT <privileges> ON [<kind>] <object> TO <principals> [WITH GRANT OPTION]
21517    fn parse_grant(&mut self) -> Result<Expression> {
21518        self.expect(TokenType::Grant)?;
21519
21520        // ClickHouse: GRANT can grant roles (no ON clause), grant privileges (has ON clause),
21521        // or use complex syntax. If we see TO before ON, treat as command.
21522        // Also: multi-privilege grants (multiple ON), wildcard grants (test*.*),
21523        // WITH REPLACE OPTION all parse as commands.
21524        if matches!(
21525            self.config.dialect,
21526            Some(crate::dialects::DialectType::ClickHouse)
21527        ) {
21528            // Save position after GRANT keyword
21529            let saved_pos = self.current;
21530            // Scan ahead to check grant structure
21531            let mut depth = 0i32;
21532            let mut on_count = 0;
21533            let mut found_to = false;
21534            let mut has_star_in_name = false;
21535            let mut has_replace_option = false;
21536            let mut i = self.current;
21537            while i < self.tokens.len() && self.tokens[i].token_type != TokenType::Semicolon {
21538                match self.tokens[i].token_type {
21539                    TokenType::LParen => depth += 1,
21540                    TokenType::RParen => depth -= 1,
21541                    TokenType::On if depth == 0 => on_count += 1,
21542                    TokenType::To if depth == 0 => {
21543                        found_to = true;
21544                    }
21545                    TokenType::Star if depth == 0 && on_count > 0 && !found_to => {
21546                        // Check if star is part of a wildcard name (e.g., test*.*)
21547                        if i > 0
21548                            && self.tokens[i - 1].token_type != TokenType::Dot
21549                            && self.tokens[i - 1].token_type != TokenType::On
21550                        {
21551                            has_star_in_name = true;
21552                        }
21553                    }
21554                    TokenType::Replace if depth == 0 && found_to => {
21555                        has_replace_option = true;
21556                    }
21557                    _ => {}
21558                }
21559                i += 1;
21560            }
21561            if (found_to && on_count == 0) || on_count > 1 || has_star_in_name || has_replace_option
21562            {
21563                // Role grant, multi-privilege grant, wildcard grant, or REPLACE OPTION — parse as command
21564                self.current = saved_pos;
21565                return self
21566                    .parse_command()?
21567                    .ok_or_else(|| self.parse_error("Failed to parse GRANT statement"));
21568            }
21569            self.current = saved_pos;
21570        }
21571
21572        // Parse privileges (e.g., SELECT, INSERT, UPDATE)
21573        let privileges = self.parse_privileges()?;
21574
21575        // Expect ON
21576        self.expect(TokenType::On)?;
21577
21578        // Parse optional kind (TABLE, SCHEMA, FUNCTION, etc.)
21579        let kind = self.parse_object_kind()?;
21580
21581        // Parse securable (the object) - may be dot-separated qualified name
21582        let securable = self.parse_securable_name()?;
21583
21584        // Parse optional function parameter types: func(type1, type2, ...)
21585        let function_params = if self.check(TokenType::LParen) {
21586            self.parse_function_param_types()?
21587        } else {
21588            Vec::new()
21589        };
21590
21591        // Expect TO
21592        self.expect(TokenType::To)?;
21593
21594        // Parse principals
21595        let principals = self.parse_principals()?;
21596
21597        // Check for WITH GRANT OPTION
21598        let grant_option = self.match_token(TokenType::With)
21599            && self.check(TokenType::Grant)
21600            && {
21601                self.advance();
21602                self.check(TokenType::Var) && self.peek().text.to_uppercase() == "OPTION"
21603            }
21604            && {
21605                self.advance();
21606                true
21607            };
21608
21609        // Check for TSQL AS principal clause
21610        let as_principal = if self.match_token(TokenType::As) {
21611            let name = self.expect_identifier_or_keyword()?;
21612            Some(Identifier::new(name))
21613        } else {
21614            None
21615        };
21616
21617        Ok(Expression::Grant(Box::new(Grant {
21618            privileges,
21619            kind,
21620            securable,
21621            function_params,
21622            principals,
21623            grant_option,
21624            as_principal,
21625        })))
21626    }
21627
21628    /// Parse REVOKE statement
21629    /// REVOKE [GRANT OPTION FOR] <privileges> ON [<kind>] <object> FROM <principals> [CASCADE]
21630    fn parse_revoke(&mut self) -> Result<Expression> {
21631        self.expect(TokenType::Revoke)?;
21632
21633        // ClickHouse: REVOKE role FROM user (no ON clause), multi-privilege, or wildcard — parse as command
21634        if matches!(
21635            self.config.dialect,
21636            Some(crate::dialects::DialectType::ClickHouse)
21637        ) {
21638            let saved_pos = self.current;
21639            let mut depth = 0i32;
21640            let mut on_count = 0;
21641            let mut found_from = false;
21642            let mut has_star_in_name = false;
21643            let mut i = self.current;
21644            while i < self.tokens.len() && self.tokens[i].token_type != TokenType::Semicolon {
21645                match self.tokens[i].token_type {
21646                    TokenType::LParen => depth += 1,
21647                    TokenType::RParen => depth -= 1,
21648                    TokenType::On if depth == 0 => on_count += 1,
21649                    TokenType::From if depth == 0 => {
21650                        found_from = true;
21651                    }
21652                    TokenType::Star if depth == 0 && on_count > 0 && !found_from => {
21653                        if i > 0
21654                            && self.tokens[i - 1].token_type != TokenType::Dot
21655                            && self.tokens[i - 1].token_type != TokenType::On
21656                        {
21657                            has_star_in_name = true;
21658                        }
21659                    }
21660                    _ => {}
21661                }
21662                i += 1;
21663            }
21664            if (found_from && on_count == 0) || on_count > 1 || has_star_in_name {
21665                self.current = saved_pos;
21666                return self
21667                    .parse_command()?
21668                    .ok_or_else(|| self.parse_error("Failed to parse REVOKE statement"));
21669            }
21670            self.current = saved_pos;
21671        }
21672
21673        // Check for GRANT OPTION FOR
21674        let grant_option = if self.check(TokenType::Grant) {
21675            self.advance();
21676            if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "OPTION" {
21677                self.advance();
21678                self.expect(TokenType::For)?;
21679                true
21680            } else {
21681                return Err(self.parse_error("Expected OPTION after GRANT in REVOKE"));
21682            }
21683        } else {
21684            false
21685        };
21686
21687        // Parse privileges
21688        let privileges = self.parse_privileges()?;
21689
21690        // Expect ON
21691        self.expect(TokenType::On)?;
21692
21693        // Parse optional kind
21694        let kind = self.parse_object_kind()?;
21695
21696        // Parse securable - may be dot-separated qualified name
21697        let securable = self.parse_securable_name()?;
21698
21699        // Parse optional function parameter types: func(type1, type2, ...)
21700        let function_params = if self.check(TokenType::LParen) {
21701            self.parse_function_param_types()?
21702        } else {
21703            Vec::new()
21704        };
21705
21706        // Expect FROM
21707        self.expect(TokenType::From)?;
21708
21709        // Parse principals
21710        let principals = self.parse_principals()?;
21711
21712        // Check for CASCADE or RESTRICT
21713        let cascade = self.match_token(TokenType::Cascade);
21714        let restrict = if !cascade {
21715            self.match_token(TokenType::Restrict)
21716        } else {
21717            false
21718        };
21719
21720        Ok(Expression::Revoke(Box::new(Revoke {
21721            privileges,
21722            kind,
21723            securable,
21724            function_params,
21725            principals,
21726            grant_option,
21727            cascade,
21728            restrict,
21729        })))
21730    }
21731
21732    /// Parse privilege list for GRANT/REVOKE
21733    /// Handles multi-word privileges like "ALL PRIVILEGES" and column-level privileges like "SELECT(col1, col2)"
21734    fn parse_privileges(&mut self) -> Result<Vec<Privilege>> {
21735        let mut privileges = Vec::new();
21736        loop {
21737            let mut priv_parts = Vec::new();
21738            // Collect privilege words until we hit ON, comma, LParen, or similar terminator
21739            while !self.is_at_end() {
21740                if self.check(TokenType::On)
21741                    || self.check(TokenType::Comma)
21742                    || self.check(TokenType::LParen)
21743                {
21744                    break;
21745                }
21746                if self.is_identifier_or_keyword_token() {
21747                    priv_parts.push(self.advance().text.to_uppercase());
21748                } else {
21749                    break;
21750                }
21751            }
21752            if priv_parts.is_empty() {
21753                break;
21754            }
21755            let priv_name = priv_parts.join(" ");
21756
21757            // Check for column list in parentheses: SELECT(col1, col2)
21758            let columns = if self.match_token(TokenType::LParen) {
21759                let mut cols = Vec::new();
21760                loop {
21761                    // Parse column name (identifier)
21762                    if self.is_identifier_or_keyword_token() {
21763                        cols.push(self.advance().text.to_string());
21764                    } else if self.check(TokenType::RParen) {
21765                        break;
21766                    } else {
21767                        break;
21768                    }
21769                    if !self.match_token(TokenType::Comma) {
21770                        break;
21771                    }
21772                }
21773                self.expect(TokenType::RParen)?;
21774                cols
21775            } else {
21776                Vec::new()
21777            };
21778
21779            privileges.push(Privilege {
21780                name: priv_name,
21781                columns,
21782            });
21783            if !self.match_token(TokenType::Comma) {
21784                break;
21785            }
21786        }
21787        Ok(privileges)
21788    }
21789
21790    /// Parse object kind (TABLE, SCHEMA, FUNCTION, PROCEDURE, SEQUENCE, etc.)
21791    fn parse_object_kind(&mut self) -> Result<Option<String>> {
21792        if self.check(TokenType::Table) {
21793            self.advance();
21794            Ok(Some("TABLE".to_string()))
21795        } else if self.check(TokenType::Schema) {
21796            self.advance();
21797            Ok(Some("SCHEMA".to_string()))
21798        } else if self.check(TokenType::Database) {
21799            self.advance();
21800            Ok(Some("DATABASE".to_string()))
21801        } else if self.check(TokenType::Function) {
21802            self.advance();
21803            Ok(Some("FUNCTION".to_string()))
21804        } else if self.check(TokenType::View) {
21805            self.advance();
21806            Ok(Some("VIEW".to_string()))
21807        } else if self.check(TokenType::Procedure) {
21808            self.advance();
21809            Ok(Some("PROCEDURE".to_string()))
21810        } else if self.check(TokenType::Sequence) {
21811            self.advance();
21812            Ok(Some("SEQUENCE".to_string()))
21813        } else {
21814            Ok(None)
21815        }
21816    }
21817
21818    /// Parse principal list for GRANT/REVOKE
21819    fn parse_principals(&mut self) -> Result<Vec<GrantPrincipal>> {
21820        let mut principals = Vec::new();
21821        loop {
21822            // Check for ROLE keyword (TokenType::Var with text "ROLE")
21823            let is_role = if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "ROLE"
21824            {
21825                self.advance();
21826                true
21827            } else {
21828                false
21829            };
21830            // Check for GROUP keyword (Redshift) - TokenType::Group
21831            let is_group = if !is_role && self.check(TokenType::Group) {
21832                self.advance();
21833                true
21834            } else {
21835                false
21836            };
21837            // Parse principal name (with quoted flag preserved for backtick-quoted identifiers)
21838            let name = self.expect_identifier_or_keyword_with_quoted()?;
21839            principals.push(GrantPrincipal {
21840                name,
21841                is_role,
21842                is_group,
21843            });
21844            if !self.match_token(TokenType::Comma) {
21845                break;
21846            }
21847        }
21848        Ok(principals)
21849    }
21850
21851    /// Parse a securable name (potentially dot-separated qualified name)
21852    /// e.g., "mydb.myschema.ADD5" -> Identifier("mydb.myschema.ADD5")
21853    fn parse_securable_name(&mut self) -> Result<Identifier> {
21854        // Accept * as a name part (e.g., GRANT ON *.* or GRANT ON db.*)
21855        let first = if self.match_token(TokenType::Star) {
21856            "*".to_string()
21857        } else {
21858            self.expect_identifier_or_keyword()?
21859        };
21860        let mut parts = vec![first];
21861
21862        while self.match_token(TokenType::Dot) {
21863            let next = if self.match_token(TokenType::Star) {
21864                "*".to_string()
21865            } else {
21866                self.expect_identifier_or_keyword()?
21867            };
21868            parts.push(next);
21869        }
21870
21871        Ok(Identifier::new(parts.join(".")))
21872    }
21873
21874    /// Parse function parameter types for GRANT/REVOKE ON FUNCTION
21875    /// e.g., "(number, varchar)" -> vec!["number", "varchar"]
21876    fn parse_function_param_types(&mut self) -> Result<Vec<String>> {
21877        self.expect(TokenType::LParen)?;
21878
21879        let mut params = Vec::new();
21880        if !self.check(TokenType::RParen) {
21881            loop {
21882                // Parse parameter type - can be a keyword (INT, VARCHAR) or identifier
21883                let param_type = self.expect_identifier_or_keyword()?;
21884                params.push(param_type);
21885                if !self.match_token(TokenType::Comma) {
21886                    break;
21887                }
21888            }
21889        }
21890
21891        self.expect(TokenType::RParen)?;
21892        Ok(params)
21893    }
21894
21895    /// Parse COMMENT ON statement
21896    fn parse_comment(&mut self) -> Result<Expression> {
21897        self.expect(TokenType::Comment)?;
21898
21899        // Check for IF EXISTS
21900        let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
21901
21902        // Expect ON
21903        self.expect(TokenType::On)?;
21904
21905        // Check for MATERIALIZED (can be TokenType::Materialized or TokenType::Var)
21906        let materialized = if self.match_token(TokenType::Materialized) {
21907            true
21908        } else if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "MATERIALIZED" {
21909            self.advance();
21910            true
21911        } else {
21912            false
21913        };
21914
21915        // Parse the object kind (COLUMN, TABLE, DATABASE, PROCEDURE, etc.)
21916        let kind = self.expect_identifier_or_keyword()?.to_uppercase();
21917
21918        // Parse the object name (can be qualified like schema.table.column)
21919        // For PROCEDURE/FUNCTION, we need to handle the parameter list like my_proc(integer, integer)
21920        let this = if kind == "PROCEDURE" || kind == "FUNCTION" {
21921            // Parse name possibly with parameter types, preserving original case
21922            let name_token = self.advance();
21923            let mut name_str = name_token.text.clone();
21924
21925            // Parse additional qualified parts
21926            while self.match_token(TokenType::Dot) {
21927                let next = self.advance();
21928                name_str.push('.');
21929                name_str.push_str(&next.text);
21930            }
21931
21932            // Check for parameter types in parentheses
21933            if self.match_token(TokenType::LParen) {
21934                name_str.push('(');
21935                let mut first = true;
21936                while !self.check(TokenType::RParen) && !self.is_at_end() {
21937                    if !first {
21938                        name_str.push_str(", ");
21939                    }
21940                    first = false;
21941                    let param_token = self.advance();
21942                    name_str.push_str(&param_token.text);
21943                    self.match_token(TokenType::Comma);
21944                }
21945                self.expect(TokenType::RParen)?;
21946                name_str.push(')');
21947            }
21948
21949            Expression::Identifier(Identifier::new(name_str))
21950        } else {
21951            self.parse_qualified_name()?
21952        };
21953
21954        // Expect IS
21955        if self.check(TokenType::Is) {
21956            self.advance();
21957        } else {
21958            return Err(self.parse_error("Expected IS in COMMENT ON statement"));
21959        }
21960
21961        // Parse the comment expression (usually a string literal)
21962        let expression = self.parse_primary()?;
21963
21964        Ok(Expression::Comment(Box::new(Comment {
21965            this,
21966            kind,
21967            expression,
21968            exists,
21969            materialized,
21970        })))
21971    }
21972
21973    /// Parse SET statement
21974    fn parse_set(&mut self) -> Result<Expression> {
21975        self.expect(TokenType::Set)?;
21976
21977        let mut items = Vec::new();
21978
21979        // ClickHouse: SET DEFAULT ROLE ... TO user - parse as command
21980        if matches!(
21981            self.config.dialect,
21982            Some(crate::dialects::DialectType::ClickHouse)
21983        ) && self.check(TokenType::Default)
21984        {
21985            let mut parts = vec!["SET".to_string()];
21986            while !self.is_at_end() && self.peek().token_type != TokenType::Semicolon {
21987                parts.push(self.advance().text.clone());
21988            }
21989            return Ok(Expression::Command(Box::new(crate::expressions::Command {
21990                this: parts.join(" "),
21991            })));
21992        }
21993
21994        // Teradata: SET QUERY_BAND = ... [UPDATE] [FOR scope]
21995        if matches!(
21996            self.config.dialect,
21997            Some(crate::dialects::DialectType::Teradata)
21998        ) && self.match_identifier("QUERY_BAND")
21999        {
22000            return self.parse_query_band();
22001        }
22002
22003        // Handle MySQL SET CHARACTER SET / SET NAMES
22004        if self.match_identifier("CHARACTER") {
22005            // SET CHARACTER SET <charset> | SET CHARACTER SET DEFAULT
22006            self.expect(TokenType::Set)?;
22007            let value = if self.match_token(TokenType::Default) {
22008                Expression::Identifier(Identifier::new("DEFAULT".to_string()))
22009            } else {
22010                self.parse_primary()?
22011            };
22012            items.push(SetItem {
22013                name: Expression::Identifier(Identifier::new("CHARACTER SET".to_string())),
22014                value,
22015                kind: None,
22016                no_equals: false,
22017            });
22018            return Ok(Expression::SetStatement(Box::new(SetStatement { items })));
22019        }
22020
22021        if self.match_identifier("NAMES") {
22022            // SET NAMES <charset> [COLLATE <collation>] | SET NAMES DEFAULT
22023            let value = if self.match_token(TokenType::Default) {
22024                Expression::Identifier(Identifier::new("DEFAULT".to_string()))
22025            } else {
22026                self.parse_primary()?
22027            };
22028            // Check for optional COLLATE clause
22029            let collation = if self.match_identifier("COLLATE") {
22030                Some(self.parse_primary()?)
22031            } else {
22032                None
22033            };
22034            items.push(SetItem {
22035                name: Expression::Identifier(Identifier::new("NAMES".to_string())),
22036                value,
22037                kind: None,
22038                no_equals: false,
22039            });
22040            if let Some(coll) = collation {
22041                items.push(SetItem {
22042                    name: Expression::Identifier(Identifier::new("COLLATE".to_string())),
22043                    value: coll,
22044                    kind: None,
22045                    no_equals: false,
22046                });
22047            }
22048            return Ok(Expression::SetStatement(Box::new(SetStatement { items })));
22049        }
22050
22051        loop {
22052            // Check for GLOBAL, LOCAL, SESSION, PERSIST, PERSIST_ONLY modifiers
22053            // LOCAL is a token type, others are identifiers
22054            let kind = if self.match_identifier("GLOBAL") {
22055                Some("GLOBAL".to_string())
22056            } else if self.match_token(TokenType::Local) {
22057                Some("LOCAL".to_string())
22058            } else if self.match_identifier("SESSION") {
22059                Some("SESSION".to_string())
22060            } else if self.match_identifier("PERSIST") {
22061                Some("PERSIST".to_string())
22062            } else if self.match_identifier("PERSIST_ONLY") {
22063                Some("PERSIST_ONLY".to_string())
22064            } else {
22065                None
22066            };
22067
22068            // Check for SET [GLOBAL|SESSION] TRANSACTION (MySQL)
22069            if self.match_token(TokenType::Transaction) {
22070                // Parse transaction characteristics (ISOLATION LEVEL, READ ONLY, READ WRITE)
22071                let mut characteristics = Vec::new();
22072                loop {
22073                    let mut char_tokens = Vec::new();
22074                    // Parse ISOLATION LEVEL ... or READ ONLY/WRITE
22075                    // Must handle keywords like ONLY, REPEATABLE, SERIALIZABLE, etc.
22076                    while !self.is_at_end()
22077                        && !self.check(TokenType::Comma)
22078                        && !self.check(TokenType::Semicolon)
22079                    {
22080                        // Allow identifiers and common transaction-related keywords
22081                        if self.is_identifier_token()
22082                            || self.is_safe_keyword_as_identifier()
22083                            || self.check(TokenType::Only)
22084                            || self.check(TokenType::Repeatable)
22085                        {
22086                            char_tokens.push(self.advance().text);
22087                        } else {
22088                            break;
22089                        }
22090                    }
22091                    if !char_tokens.is_empty() {
22092                        characteristics.push(char_tokens.join(" "));
22093                    }
22094                    if !self.match_token(TokenType::Comma) {
22095                        break;
22096                    }
22097                }
22098
22099                let name = Expression::Identifier(Identifier::new("TRANSACTION".to_string()));
22100                let value = if characteristics.is_empty() {
22101                    Expression::Identifier(Identifier::new("".to_string()))
22102                } else {
22103                    Expression::Identifier(Identifier::new(characteristics.join(", ")))
22104                };
22105
22106                items.push(SetItem {
22107                    name,
22108                    value,
22109                    kind,
22110                    no_equals: false,
22111                });
22112                break;
22113            }
22114
22115            // Handle DuckDB: SET VARIABLE var = value
22116            // Only match if "VARIABLE" is followed by another identifier (not by = or TO)
22117            let is_variable = if self.check(TokenType::Var)
22118                && self.peek().text.eq_ignore_ascii_case("VARIABLE")
22119            {
22120                // Look ahead: VARIABLE should be followed by another name, not by = or TO
22121                if let Some(next) = self.tokens.get(self.current + 1) {
22122                    if next.token_type != TokenType::Eq
22123                        && next.token_type != TokenType::To
22124                        && next.token_type != TokenType::ColonEq
22125                    {
22126                        self.advance(); // consume VARIABLE
22127                        true
22128                    } else {
22129                        false
22130                    }
22131                } else {
22132                    false
22133                }
22134            } else {
22135                false
22136            };
22137
22138            // Parse variable name - use a simple approach to avoid expression parsing issues
22139            // Variable names can be dotted identifiers or keywords used as names
22140            let name = {
22141                if self.check(TokenType::AtAt) {
22142                    // @@SCOPE.variable or @@variable syntax (MySQL system variables)
22143                    self.advance(); // consume @@
22144                    let mut name_str = "@@".to_string();
22145                    let first = self.advance().text.clone();
22146                    name_str.push_str(&first);
22147                    // Handle @@scope.variable (e.g., @@GLOBAL.max_connections)
22148                    while self.match_token(TokenType::Dot) {
22149                        let next = self.advance().text.clone();
22150                        name_str.push('.');
22151                        name_str.push_str(&next);
22152                    }
22153                    Expression::Identifier(Identifier::new(name_str))
22154                } else if self.check(TokenType::DAt) {
22155                    // @variable syntax (MySQL user variables)
22156                    self.advance(); // consume @
22157                    let mut name_str = "@".to_string();
22158                    let first = self.advance().text.clone();
22159                    name_str.push_str(&first);
22160                    Expression::Identifier(Identifier::new(name_str))
22161                } else {
22162                    let first = self.advance().text.clone();
22163                    let mut name_str = first;
22164                    // Handle dotted identifiers (e.g., schema.variable)
22165                    while self.match_token(TokenType::Dot) {
22166                        let next = self.advance().text.clone();
22167                        name_str.push('.');
22168                        name_str.push_str(&next);
22169                    }
22170                    // Handle Hive-style colon-separated names (e.g., hiveconf:some_var)
22171                    // But not := which is assignment
22172                    while self.check(TokenType::Colon) && !self.check_next(TokenType::Eq) {
22173                        self.advance(); // consume :
22174                        let next = self.advance().text.clone();
22175                        name_str.push(':');
22176                        name_str.push_str(&next);
22177                    }
22178                    Expression::Identifier(Identifier::new(name_str))
22179                }
22180            };
22181
22182            // Wrap name with VARIABLE marker if SET VARIABLE was used
22183            let name = if is_variable {
22184                // Store as "VARIABLE name" identifier
22185                let name_str = match &name {
22186                    Expression::Column(col) => col.name.name.clone(),
22187                    Expression::Identifier(id) => id.name.clone(),
22188                    _ => format!("{:?}", name),
22189                };
22190                Expression::Identifier(Identifier::new(format!("VARIABLE {}", name_str)))
22191            } else {
22192                name
22193            };
22194
22195            // Expect = or := or TO
22196            if self.match_token(TokenType::Eq) || self.match_token(TokenType::ColonEq) {
22197                // ok - standard assignment
22198            } else if self.match_token(TokenType::To) {
22199                // PostgreSQL uses SET var TO value
22200            } else if self.is_at_end()
22201                || self.check(TokenType::Semicolon)
22202                || self.check(TokenType::Comma)
22203            {
22204                // SET x ON/OFF without = (TSQL: SET XACT_ABORT ON)
22205                // The ON/OFF was already parsed as part of the name expression
22206                // Handle as a name-only set (value is empty)
22207                items.push(SetItem {
22208                    name,
22209                    value: Expression::Identifier(Identifier::new("".to_string())),
22210                    kind,
22211                    no_equals: false,
22212                });
22213                if !self.match_token(TokenType::Comma) {
22214                    break;
22215                }
22216                continue;
22217            } else {
22218                // Check if the next token looks like a value (ON/OFF without =)
22219                // TSQL: SET XACT_ABORT ON, SET NOCOUNT ON
22220                if self.check(TokenType::On) || self.check_keyword_text("OFF") {
22221                    let val = self.advance().text;
22222                    // Include ON/OFF in the name so generator doesn't add "="
22223                    let name_with_val = match &name {
22224                        Expression::Column(col) => format!("{} {}", col.name.name, val),
22225                        Expression::Identifier(id) => format!("{} {}", id.name, val),
22226                        _ => val.clone(),
22227                    };
22228                    items.push(SetItem {
22229                        name: Expression::Identifier(Identifier::new(name_with_val)),
22230                        value: Expression::Identifier(Identifier::new("".to_string())),
22231                        kind,
22232                        no_equals: false,
22233                    });
22234                    if !self.match_token(TokenType::Comma) {
22235                        break;
22236                    }
22237                    continue;
22238                }
22239                // TSQL/Generic: SET key value (without = or TO)
22240                // Parse the next token as the value
22241                if !self.is_at_end() && !self.check(TokenType::Semicolon) {
22242                    let value = self.parse_expression()?;
22243                    items.push(SetItem {
22244                        name,
22245                        value,
22246                        kind,
22247                        no_equals: true,
22248                    });
22249                    if !self.match_token(TokenType::Comma) {
22250                        break;
22251                    }
22252                    continue;
22253                }
22254                return Err(self.parse_error("Expected '=' or 'TO' in SET statement"));
22255            }
22256
22257            // Parse value - handle ON/OFF keywords as identifiers (MySQL: SET autocommit = ON)
22258            let value = if self.check(TokenType::On) || self.check_keyword_text("OFF") {
22259                Expression::Identifier(Identifier::new(self.advance().text.clone()))
22260            } else if self.match_token(TokenType::Default) {
22261                Expression::Identifier(Identifier::new("DEFAULT".to_string()))
22262            } else {
22263                self.parse_expression()?
22264            };
22265
22266            items.push(SetItem {
22267                name,
22268                value,
22269                kind,
22270                no_equals: false,
22271            });
22272
22273            if !self.match_token(TokenType::Comma) {
22274                break;
22275            }
22276        }
22277
22278        Ok(Expression::SetStatement(Box::new(SetStatement { items })))
22279    }
22280
22281    /// Parse Teradata SET QUERY_BAND statement
22282    fn parse_query_band(&mut self) -> Result<Expression> {
22283        self.expect(TokenType::Eq)?;
22284
22285        let value = if self.match_identifier("NONE") {
22286            Expression::Var(Box::new(Var {
22287                this: "NONE".to_string(),
22288            }))
22289        } else if self.check(TokenType::String) {
22290            Expression::Literal(Literal::String(self.expect_string()?))
22291        } else {
22292            self.parse_primary()?
22293        };
22294
22295        let update = if self.match_token(TokenType::Update) || self.match_identifier("UPDATE") {
22296            Some(Box::new(Expression::Boolean(BooleanLiteral {
22297                value: true,
22298            })))
22299        } else {
22300            None
22301        };
22302
22303        let _ = self.match_token(TokenType::For);
22304
22305        let scope = if self.match_token(TokenType::Session) || self.match_identifier("SESSION") {
22306            if self.match_identifier("VOLATILE") {
22307                Some("SESSION VOLATILE".to_string())
22308            } else {
22309                Some("SESSION".to_string())
22310            }
22311        } else if self.match_token(TokenType::Transaction) || self.match_identifier("TRANSACTION") {
22312            Some("TRANSACTION".to_string())
22313        } else if self.match_identifier("VOLATILE") {
22314            Some("VOLATILE".to_string())
22315        } else {
22316            None
22317        };
22318
22319        Ok(Expression::QueryBand(Box::new(QueryBand {
22320            this: Box::new(value),
22321            scope: scope.map(|s| Box::new(Expression::Var(Box::new(Var { this: s })))),
22322            update,
22323        })))
22324    }
22325
22326    /// Parse FETCH FIRST/NEXT clause
22327    fn parse_fetch(&mut self) -> Result<Fetch> {
22328        // FETCH [FIRST|NEXT] [count] [PERCENT] [ROW|ROWS] [ONLY|WITH TIES]
22329
22330        // FIRST or NEXT
22331        let direction = if self.match_token(TokenType::First) {
22332            "FIRST".to_string()
22333        } else if self.match_token(TokenType::Next) {
22334            "NEXT".to_string()
22335        } else {
22336            "FIRST".to_string() // Default
22337        };
22338
22339        // Optional count - but check if next token is ROW/ROWS/PERCENT/ONLY (no count)
22340        let count = if !self.check(TokenType::Row)
22341            && !self.check(TokenType::Rows)
22342            && !self.check(TokenType::Percent)
22343            && !self.check(TokenType::Only)
22344        {
22345            // Accept number, parenthesized expression, or TSQL @variable (Var token)
22346            if self.check(TokenType::Number)
22347                || self.check(TokenType::LParen)
22348                || self.check(TokenType::DAt)
22349                || self.check(TokenType::Var)
22350            {
22351                Some(self.parse_primary()?)
22352            } else {
22353                None
22354            }
22355        } else {
22356            None
22357        };
22358
22359        // PERCENT modifier
22360        let percent = self.match_token(TokenType::Percent);
22361
22362        // ROW or ROWS
22363        let rows = self.match_token(TokenType::Row) || self.match_token(TokenType::Rows);
22364
22365        // ONLY or WITH TIES
22366        self.match_token(TokenType::Only);
22367        let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
22368
22369        Ok(Fetch {
22370            direction,
22371            count,
22372            percent,
22373            rows,
22374            with_ties,
22375        })
22376    }
22377
22378    /// Parse a qualified name (schema.table.column or just table)
22379    fn parse_qualified_name(&mut self) -> Result<Expression> {
22380        let first = self.expect_identifier_or_keyword()?;
22381        let mut parts = vec![first];
22382
22383        while self.match_token(TokenType::Dot) {
22384            let next = self.expect_identifier_or_keyword()?;
22385            parts.push(next);
22386        }
22387
22388        if parts.len() == 1 {
22389            Ok(Expression::Identifier(Identifier::new(parts.remove(0))))
22390        } else if parts.len() == 2 {
22391            Ok(Expression::Column(Column {
22392                table: Some(Identifier::new(parts[0].clone())),
22393                name: Identifier::new(parts[1].clone()),
22394                join_mark: false,
22395                trailing_comments: Vec::new(),
22396                span: None,
22397                inferred_type: None,
22398            }))
22399        } else {
22400            // For 3+ parts, create a Column with concatenated table parts
22401            let column_name = parts.pop().unwrap();
22402            let table_name = parts.join(".");
22403            Ok(Expression::Column(Column {
22404                table: Some(Identifier::new(table_name)),
22405                name: Identifier::new(column_name),
22406                join_mark: false,
22407                trailing_comments: Vec::new(),
22408                span: None,
22409                inferred_type: None,
22410            }))
22411        }
22412    }
22413
22414    // ==================== Phase 4: Additional DDL Parsing ====================
22415
22416    /// Parse CREATE SCHEMA statement
22417    fn parse_create_schema(&mut self, leading_comments: Vec<String>) -> Result<Expression> {
22418        self.expect(TokenType::Schema)?;
22419
22420        let if_not_exists =
22421            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
22422        let name = self.expect_identifier_with_quoted()?;
22423
22424        // Parse CLONE clause (Snowflake)
22425        let clone_from = if self.match_identifier("CLONE") {
22426            Some(self.expect_identifier_with_quoted()?)
22427        } else {
22428            None
22429        };
22430
22431        // Parse AT/BEFORE clause for time travel (Snowflake)
22432        // Note: BEFORE is a keyword token, AT is an identifier
22433        let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
22434            let keyword = self.previous().text.to_uppercase();
22435            self.expect(TokenType::LParen)?;
22436            // Parse the content: OFFSET => value or TIMESTAMP => value
22437            let mut result = format!("{} (", keyword);
22438            let mut prev_token_type: Option<TokenType> = None;
22439            let mut paren_depth = 1; // Track nested parens
22440            while !self.is_at_end() && paren_depth > 0 {
22441                let token = self.advance();
22442                if token.token_type == TokenType::LParen {
22443                    paren_depth += 1;
22444                } else if token.token_type == TokenType::RParen {
22445                    paren_depth -= 1;
22446                    if paren_depth == 0 {
22447                        break; // Don't include the closing paren in result yet
22448                    }
22449                }
22450                // Smart spacing: no space after ( or => or - and no space before (
22451                let needs_space = !result.ends_with('(')
22452                    && prev_token_type != Some(TokenType::Arrow)
22453                    && prev_token_type != Some(TokenType::Dash)
22454                    && prev_token_type != Some(TokenType::LParen)
22455                    && token.token_type != TokenType::LParen; // no space before (
22456                if needs_space
22457                    && token.token_type != TokenType::RParen
22458                    && token.token_type != TokenType::Comma
22459                {
22460                    result.push(' ');
22461                }
22462                // Properly quote string literals
22463                if token.token_type == TokenType::String {
22464                    result.push('\'');
22465                    result.push_str(&token.text.replace('\'', "''"));
22466                    result.push('\'');
22467                } else {
22468                    result.push_str(&token.text);
22469                }
22470                if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma {
22471                    result.push(' ');
22472                }
22473                prev_token_type = Some(token.token_type);
22474            }
22475            result.push(')');
22476            Some(Expression::Raw(Raw { sql: result }))
22477        } else {
22478            None
22479        };
22480
22481        let authorization = if self.match_token(TokenType::Authorization) {
22482            Some(Identifier::new(self.expect_identifier()?))
22483        } else {
22484            None
22485        };
22486
22487        // Parse schema properties like DEFAULT COLLATE or WITH (properties)
22488        let mut properties = Vec::new();
22489
22490        // Parse WITH (prop1=val1, prop2=val2, ...) (Trino/Presto)
22491        if self.match_token(TokenType::With) {
22492            self.expect(TokenType::LParen)?;
22493            loop {
22494                // Parse property name (identifier or string)
22495                let prop_name = if self.check(TokenType::String) {
22496                    Expression::Literal(Literal::String(self.expect_string()?))
22497                } else {
22498                    Expression::Identifier(Identifier::new(self.expect_identifier_or_keyword()?))
22499                };
22500                self.expect(TokenType::Eq)?;
22501                // Parse property value
22502                let prop_value = self.parse_expression()?;
22503                // Create Property expression: key=value
22504                properties.push(Expression::Property(Box::new(Property {
22505                    this: Box::new(prop_name),
22506                    value: Some(Box::new(prop_value)),
22507                })));
22508                if !self.match_token(TokenType::Comma) {
22509                    break;
22510                }
22511            }
22512            self.expect(TokenType::RParen)?;
22513        }
22514
22515        // Parse DEFAULT COLLATE 'value' (BigQuery)
22516        if self.match_token(TokenType::Default) && self.match_token(TokenType::Collate) {
22517            // Parse the collation value (could be string literal or identifier)
22518            let collation = self.parse_primary()?;
22519            properties.push(Expression::CollateProperty(Box::new(CollateProperty {
22520                this: Box::new(collation),
22521                default: Some(Box::new(Expression::Boolean(BooleanLiteral {
22522                    value: true,
22523                }))),
22524            })));
22525        }
22526
22527        Ok(Expression::CreateSchema(Box::new(CreateSchema {
22528            name,
22529            if_not_exists,
22530            authorization,
22531            clone_from,
22532            at_clause,
22533            properties,
22534            leading_comments,
22535        })))
22536    }
22537
22538    /// Parse DROP SCHEMA statement
22539    fn parse_drop_schema(&mut self) -> Result<Expression> {
22540        self.expect(TokenType::Schema)?;
22541
22542        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
22543        let name = Identifier::new(self.expect_identifier()?);
22544
22545        let cascade = self.match_token(TokenType::Cascade);
22546        if !cascade {
22547            self.match_token(TokenType::Restrict);
22548        }
22549
22550        Ok(Expression::DropSchema(Box::new(DropSchema {
22551            name,
22552            if_exists,
22553            cascade,
22554        })))
22555    }
22556
22557    /// Parse CREATE DATABASE statement
22558    fn parse_create_database(&mut self) -> Result<Expression> {
22559        self.expect(TokenType::Database)?;
22560
22561        let if_not_exists =
22562            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
22563        let name = Identifier::new(self.expect_identifier()?);
22564
22565        // Check for Snowflake CLONE clause
22566        let clone_from = if self.match_identifier("CLONE") {
22567            Some(Identifier::new(self.expect_identifier()?))
22568        } else {
22569            None
22570        };
22571
22572        // Parse AT/BEFORE clause for time travel (Snowflake)
22573        // Note: BEFORE is a keyword token, AT is an identifier
22574        let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
22575            let keyword = self.previous().text.to_uppercase();
22576            self.expect(TokenType::LParen)?;
22577            // Parse the content: OFFSET => value or TIMESTAMP => value
22578            let mut result = format!("{} (", keyword);
22579            let mut prev_token_type: Option<TokenType> = None;
22580            let mut paren_depth = 1; // Track nested parens
22581            while !self.is_at_end() && paren_depth > 0 {
22582                let token = self.advance();
22583                if token.token_type == TokenType::LParen {
22584                    paren_depth += 1;
22585                } else if token.token_type == TokenType::RParen {
22586                    paren_depth -= 1;
22587                    if paren_depth == 0 {
22588                        break; // Don't include the closing paren in result yet
22589                    }
22590                }
22591                // Smart spacing: no space after ( or => or - and no space before (
22592                let needs_space = !result.ends_with('(')
22593                    && prev_token_type != Some(TokenType::Arrow)
22594                    && prev_token_type != Some(TokenType::Dash)
22595                    && prev_token_type != Some(TokenType::LParen)
22596                    && token.token_type != TokenType::LParen; // no space before (
22597                if needs_space
22598                    && token.token_type != TokenType::RParen
22599                    && token.token_type != TokenType::Comma
22600                {
22601                    result.push(' ');
22602                }
22603                // Properly quote string literals
22604                if token.token_type == TokenType::String {
22605                    result.push('\'');
22606                    result.push_str(&token.text.replace('\'', "''"));
22607                    result.push('\'');
22608                } else {
22609                    result.push_str(&token.text);
22610                }
22611                if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma {
22612                    result.push(' ');
22613                }
22614                prev_token_type = Some(token.token_type);
22615            }
22616            result.push(')');
22617            Some(Expression::Raw(Raw { sql: result }))
22618        } else {
22619            None
22620        };
22621
22622        // ClickHouse: ON CLUSTER clause
22623        let _on_cluster = self.parse_on_cluster_clause()?;
22624
22625        let mut options = Vec::new();
22626
22627        // Parse database options
22628        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22629            if self.match_identifier("OWNER") || self.match_token(TokenType::Eq) {
22630                self.match_token(TokenType::Eq);
22631                options.push(DatabaseOption::Owner(Identifier::new(
22632                    self.expect_identifier()?,
22633                )));
22634            } else if self.match_identifier("TEMPLATE") {
22635                self.match_token(TokenType::Eq);
22636                options.push(DatabaseOption::Template(Identifier::new(
22637                    self.expect_identifier()?,
22638                )));
22639            } else if self.match_identifier("ENCODING") {
22640                self.match_token(TokenType::Eq);
22641                let encoding = if self.check(TokenType::String) {
22642                    let tok = self.advance();
22643                    tok.text.trim_matches('\'').to_string()
22644                } else {
22645                    self.expect_identifier()?
22646                };
22647                options.push(DatabaseOption::Encoding(encoding));
22648            } else if self.match_identifier("CHARACTER") {
22649                self.match_token(TokenType::Set);
22650                self.match_token(TokenType::Eq);
22651                let charset = if self.check(TokenType::String) {
22652                    let tok = self.advance();
22653                    tok.text.trim_matches('\'').to_string()
22654                } else {
22655                    self.expect_identifier()?
22656                };
22657                options.push(DatabaseOption::CharacterSet(charset));
22658            } else if self.match_identifier("COLLATE") {
22659                self.match_token(TokenType::Eq);
22660                let collate = if self.check(TokenType::String) {
22661                    let tok = self.advance();
22662                    tok.text.trim_matches('\'').to_string()
22663                } else {
22664                    self.expect_identifier()?
22665                };
22666                options.push(DatabaseOption::Collate(collate));
22667            } else if self.match_identifier("LOCATION") {
22668                self.match_token(TokenType::Eq);
22669                let loc = if self.check(TokenType::String) {
22670                    let tok = self.advance();
22671                    tok.text.trim_matches('\'').to_string()
22672                } else {
22673                    self.expect_identifier()?
22674                };
22675                options.push(DatabaseOption::Location(loc));
22676            } else {
22677                break;
22678            }
22679        }
22680
22681        Ok(Expression::CreateDatabase(Box::new(CreateDatabase {
22682            name,
22683            if_not_exists,
22684            options,
22685            clone_from,
22686            at_clause,
22687        })))
22688    }
22689
22690    /// Parse DROP DATABASE statement
22691    fn parse_drop_database(&mut self) -> Result<Expression> {
22692        self.expect(TokenType::Database)?;
22693
22694        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
22695
22696        // ClickHouse: IF EMPTY
22697        if !if_exists
22698            && matches!(
22699                self.config.dialect,
22700                Some(crate::dialects::DialectType::ClickHouse)
22701            )
22702        {
22703            if self.check(TokenType::If)
22704                && self.current + 1 < self.tokens.len()
22705                && self.tokens[self.current + 1]
22706                    .text
22707                    .eq_ignore_ascii_case("EMPTY")
22708            {
22709                self.advance(); // consume IF
22710                self.advance(); // consume EMPTY
22711            }
22712        }
22713        let name = Identifier::new(self.expect_identifier()?);
22714
22715        // ClickHouse: ON CLUSTER clause
22716        if matches!(
22717            self.config.dialect,
22718            Some(crate::dialects::DialectType::ClickHouse)
22719        ) {
22720            let _ = self.parse_on_cluster_clause()?;
22721            self.match_identifier("SYNC");
22722        }
22723
22724        Ok(Expression::DropDatabase(Box::new(DropDatabase {
22725            name,
22726            if_exists,
22727        })))
22728    }
22729
22730    /// Parse CREATE FUNCTION statement
22731    fn parse_create_function(
22732        &mut self,
22733        or_replace: bool,
22734        temporary: bool,
22735        is_table_function: bool,
22736    ) -> Result<Expression> {
22737        self.expect(TokenType::Function)?;
22738
22739        let if_not_exists =
22740            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
22741        let name = self.parse_table_ref()?;
22742
22743        // Parse parameters (optional - some dialects allow CREATE FUNCTION f AS 'body')
22744        let (parameters, has_parens) = if self.match_token(TokenType::LParen) {
22745            let params = self.parse_function_parameters()?;
22746            self.expect(TokenType::RParen)?;
22747            (params, true)
22748        } else {
22749            (Vec::new(), false)
22750        };
22751
22752        // Track if LANGUAGE appears before RETURNS
22753        let mut language_first = false;
22754        let mut return_type = None;
22755        let mut language = None;
22756        let mut sql_data_access = None;
22757
22758        // Check for LANGUAGE before RETURNS
22759        if self.match_token(TokenType::Language) {
22760            language = Some(self.expect_identifier_or_keyword()?);
22761            language_first = true;
22762        }
22763
22764        // Parse RETURNS clause (may come before or after LANGUAGE)
22765        let mut returns_table_body: Option<String> = None;
22766        if self.match_token(TokenType::Returns) {
22767            if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
22768                // TSQL: RETURNS @var TABLE (col_defs)
22769                let var_name = self.advance().text.clone();
22770                if self.check(TokenType::Table) {
22771                    self.advance(); // consume TABLE
22772                    return_type = Some(DataType::Custom {
22773                        name: "TABLE".to_string(),
22774                    });
22775                    // Parse column definitions
22776                    if self.match_token(TokenType::LParen) {
22777                        let start = self.current;
22778                        let mut depth = 1;
22779                        while depth > 0 && !self.is_at_end() {
22780                            if self.check(TokenType::LParen) {
22781                                depth += 1;
22782                            }
22783                            if self.check(TokenType::RParen) {
22784                                depth -= 1;
22785                                if depth == 0 {
22786                                    break;
22787                                }
22788                            }
22789                            self.advance();
22790                        }
22791                        // Reconstruct the column definitions with proper spacing
22792                        let mut col_defs_str = String::new();
22793                        for (i, tok) in self.tokens[start..self.current].iter().enumerate() {
22794                            // Don't add space before comma, LParen, RParen
22795                            // Don't add space after LParen
22796                            let prev_tok = if i > 0 {
22797                                Some(&self.tokens[start + i - 1])
22798                            } else {
22799                                None
22800                            };
22801                            let needs_space = i > 0
22802                                && tok.token_type != TokenType::Comma
22803                                && tok.token_type != TokenType::RParen
22804                                && tok.token_type != TokenType::LParen
22805                                && prev_tok
22806                                    .map(|p| p.token_type != TokenType::LParen)
22807                                    .unwrap_or(true);
22808                            if needs_space {
22809                                col_defs_str.push(' ');
22810                            }
22811                            col_defs_str.push_str(&tok.text);
22812                        }
22813                        returns_table_body = Some(format!("{} TABLE ({})", var_name, col_defs_str));
22814                        self.expect(TokenType::RParen)?;
22815                    } else {
22816                        returns_table_body = Some(format!("{} TABLE", var_name));
22817                    }
22818                } else {
22819                    // Parse data type after var name
22820                    return_type = Some(self.parse_data_type()?);
22821                }
22822            } else if self.check(TokenType::Table) {
22823                // Could be:
22824                // - TSQL: RETURNS TABLE AS RETURN ...
22825                // - BigQuery: RETURNS TABLE <col1 TYPE, col2 TYPE>
22826                // - Snowflake: RETURNS TABLE(col1 TYPE, col2 TYPE)
22827                self.advance(); // consume TABLE
22828                if self.check(TokenType::Lt) {
22829                    // BigQuery: RETURNS TABLE <col1 TYPE, col2 TYPE>
22830                    self.advance(); // consume <
22831                    let mut cols = Vec::new();
22832                    loop {
22833                        let col_name = self.expect_identifier()?;
22834                        let col_type = self.parse_data_type()?;
22835                        cols.push(format!(
22836                            "{} {}",
22837                            col_name,
22838                            self.data_type_to_string(&col_type)
22839                        ));
22840                        if !self.match_token(TokenType::Comma) {
22841                            break;
22842                        }
22843                    }
22844                    if !self.match_token(TokenType::Gt) {
22845                        return Err(self.parse_error("Expected > after TABLE column definitions"));
22846                    }
22847                    returns_table_body = Some(format!("TABLE <{}>", cols.join(", ")));
22848                } else if self.check(TokenType::LParen) {
22849                    // Snowflake: RETURNS TABLE(col1 TYPE, col2 TYPE)
22850                    self.advance(); // consume (
22851                    let mut cols = Vec::new();
22852                    loop {
22853                        let col_name = self.expect_identifier()?;
22854                        let col_type = self.parse_data_type()?;
22855                        cols.push(format!(
22856                            "{} {}",
22857                            col_name,
22858                            self.data_type_to_string(&col_type)
22859                        ));
22860                        if !self.match_token(TokenType::Comma) {
22861                            break;
22862                        }
22863                    }
22864                    self.expect(TokenType::RParen)?;
22865                    returns_table_body = Some(format!("TABLE ({})", cols.join(", ")));
22866                } else {
22867                    // TSQL: RETURNS TABLE AS RETURN ...
22868                    return_type = Some(DataType::Custom {
22869                        name: "TABLE".to_string(),
22870                    });
22871                }
22872            } else {
22873                // Use parse_function_return_type to preserve original type names like 'integer'
22874                return_type = Some(self.parse_function_return_type()?);
22875            }
22876        }
22877
22878        let mut deterministic = None;
22879        let mut returns_null_on_null_input = None;
22880        let mut strict = false;
22881        let mut security = None;
22882        let mut body = None;
22883        let mut set_options: Vec<FunctionSetOption> = Vec::new();
22884        let mut property_order: Vec<FunctionPropertyKind> = Vec::new();
22885        let mut options: Vec<Expression> = Vec::new();
22886        let mut environment: Vec<Expression> = Vec::new();
22887
22888        // Parse function options
22889        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
22890            if self.check(TokenType::Returns)
22891                && self.current + 1 < self.tokens.len()
22892                && self.tokens[self.current + 1].token_type == TokenType::Null
22893            {
22894                // RETURNS NULL ON NULL INPUT
22895                self.advance(); // consume RETURNS
22896                self.advance(); // consume NULL
22897                self.match_token(TokenType::On);
22898                self.match_token(TokenType::Null);
22899                self.match_token(TokenType::Input);
22900                returns_null_on_null_input = Some(true);
22901                if !property_order.contains(&FunctionPropertyKind::NullInput) {
22902                    property_order.push(FunctionPropertyKind::NullInput);
22903                }
22904            } else if self.match_token(TokenType::Returns) {
22905                // RETURNS can come after LANGUAGE
22906                return_type = Some(self.parse_data_type()?);
22907            } else if self.match_token(TokenType::Language) {
22908                // Language can be SQL, PLPGSQL, PYTHON, etc.
22909                language = Some(self.expect_identifier_or_keyword()?);
22910                if !property_order.contains(&FunctionPropertyKind::Language) {
22911                    property_order.push(FunctionPropertyKind::Language);
22912                }
22913            } else if self.match_token(TokenType::Not) && self.match_identifier("DETERMINISTIC") {
22914                deterministic = Some(false);
22915                if !property_order.contains(&FunctionPropertyKind::Determinism) {
22916                    property_order.push(FunctionPropertyKind::Determinism);
22917                }
22918            } else if self.match_identifier("DETERMINISTIC") {
22919                deterministic = Some(true);
22920                if !property_order.contains(&FunctionPropertyKind::Determinism) {
22921                    property_order.push(FunctionPropertyKind::Determinism);
22922                }
22923            } else if self.match_identifier("IMMUTABLE") {
22924                deterministic = Some(true);
22925                if !property_order.contains(&FunctionPropertyKind::Determinism) {
22926                    property_order.push(FunctionPropertyKind::Determinism);
22927                }
22928            } else if self.match_identifier("STABLE") || self.match_identifier("VOLATILE") {
22929                deterministic = Some(false);
22930                if !property_order.contains(&FunctionPropertyKind::Determinism) {
22931                    property_order.push(FunctionPropertyKind::Determinism);
22932                }
22933            } else if self.match_identifier("STRICT") {
22934                returns_null_on_null_input = Some(true);
22935                strict = true;
22936                if !property_order.contains(&FunctionPropertyKind::NullInput) {
22937                    property_order.push(FunctionPropertyKind::NullInput);
22938                }
22939            } else if self.match_identifier("CALLED") {
22940                self.match_token(TokenType::On);
22941                self.match_token(TokenType::Null);
22942                self.match_token(TokenType::Input);
22943                returns_null_on_null_input = Some(false);
22944                if !property_order.contains(&FunctionPropertyKind::NullInput) {
22945                    property_order.push(FunctionPropertyKind::NullInput);
22946                }
22947            } else if self.match_identifier("SECURITY") {
22948                if self.match_identifier("DEFINER") {
22949                    security = Some(FunctionSecurity::Definer);
22950                } else if self.match_identifier("INVOKER") {
22951                    security = Some(FunctionSecurity::Invoker);
22952                }
22953                if !property_order.contains(&FunctionPropertyKind::Security) {
22954                    property_order.push(FunctionPropertyKind::Security);
22955                }
22956            } else if self.match_identifier("CONTAINS") {
22957                // CONTAINS SQL
22958                self.match_identifier("SQL");
22959                sql_data_access = Some(SqlDataAccess::ContainsSql);
22960                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
22961                    property_order.push(FunctionPropertyKind::SqlDataAccess);
22962                }
22963            } else if self.match_identifier("READS") {
22964                // READS SQL DATA
22965                self.match_identifier("SQL");
22966                self.match_identifier("DATA");
22967                sql_data_access = Some(SqlDataAccess::ReadsSqlData);
22968                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
22969                    property_order.push(FunctionPropertyKind::SqlDataAccess);
22970                }
22971            } else if self.match_identifier("MODIFIES") {
22972                // MODIFIES SQL DATA
22973                self.match_identifier("SQL");
22974                self.match_identifier("DATA");
22975                sql_data_access = Some(SqlDataAccess::ModifiesSqlData);
22976                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
22977                    property_order.push(FunctionPropertyKind::SqlDataAccess);
22978                }
22979            } else if self.match_token(TokenType::No) && self.match_identifier("SQL") {
22980                // NO SQL
22981                sql_data_access = Some(SqlDataAccess::NoSql);
22982                if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
22983                    property_order.push(FunctionPropertyKind::SqlDataAccess);
22984                }
22985            } else if self.match_token(TokenType::Set) {
22986                // PostgreSQL: SET key = value / SET key TO value / SET key FROM CURRENT
22987                let opt_name = self.expect_identifier_or_keyword()?;
22988                let value = if self.match_token(TokenType::From) {
22989                    // SET key FROM CURRENT
22990                    if !self.match_token(TokenType::Current) {
22991                        return Err(self.parse_error("Expected CURRENT after FROM in SET option"));
22992                    }
22993                    FunctionSetValue::FromCurrent
22994                } else {
22995                    // SET key = value or SET key TO value
22996                    let use_to = self.match_token(TokenType::To);
22997                    if !use_to && !self.match_token(TokenType::Eq) {
22998                        return Err(self.parse_error("Expected = or TO after SET key"));
22999                    }
23000                    // Value can be a string literal or identifier
23001                    let val = if self.check(TokenType::String) {
23002                        let tok = self.advance();
23003                        format!("'{}'", tok.text)
23004                    } else {
23005                        self.expect_identifier_or_keyword()?
23006                    };
23007                    FunctionSetValue::Value { value: val, use_to }
23008                };
23009                set_options.push(FunctionSetOption {
23010                    name: opt_name,
23011                    value,
23012                });
23013                if !property_order.contains(&FunctionPropertyKind::Set) {
23014                    property_order.push(FunctionPropertyKind::Set);
23015                }
23016            } else if self.match_token(TokenType::As) {
23017                // Parse function body: AS RETURN x, AS $$ ... $$, AS BEGIN ... END, AS 'body'
23018                if !property_order.contains(&FunctionPropertyKind::As) {
23019                    property_order.push(FunctionPropertyKind::As);
23020                }
23021                if self.match_identifier("RETURN") {
23022                    // AS RETURN expression (or SELECT statement for TSQL TVFs)
23023                    let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
23024                        // TSQL: AS RETURN SELECT ... for table-valued functions
23025                        self.parse_statement()?
23026                    } else {
23027                        self.parse_expression()?
23028                    };
23029                    body = Some(FunctionBody::Return(expr));
23030                } else if self.check(TokenType::Select) || self.check(TokenType::With) {
23031                    // TSQL: AS SELECT ... for table-valued functions (without RETURN keyword)
23032                    let stmt = self.parse_statement()?;
23033                    body = Some(FunctionBody::Expression(stmt));
23034                } else if self.check(TokenType::DollarString) {
23035                    let tok = self.advance();
23036                    // Parse the dollar string token to extract tag and content
23037                    let (tag, content) = crate::tokens::parse_dollar_string_token(&tok.text);
23038                    body = Some(FunctionBody::DollarQuoted { content, tag });
23039                } else if self.check(TokenType::String) {
23040                    let tok = self.advance();
23041                    body = Some(FunctionBody::StringLiteral(tok.text.clone()));
23042                } else if self.match_token(TokenType::Begin) {
23043                    // Parse BEGIN...END block
23044                    let mut block_content = String::new();
23045                    let mut depth = 1;
23046                    while depth > 0 && !self.is_at_end() {
23047                        let tok = self.advance();
23048                        if tok.token_type == TokenType::Begin {
23049                            depth += 1;
23050                        } else if tok.token_type == TokenType::End {
23051                            depth -= 1;
23052                            if depth == 0 {
23053                                break;
23054                            }
23055                        }
23056                        block_content.push_str(&tok.text);
23057                        block_content.push(' ');
23058                    }
23059                    body = Some(FunctionBody::Block(block_content.trim().to_string()));
23060                } else {
23061                    // Expression-based body
23062                    let expr = self.parse_expression()?;
23063                    body = Some(FunctionBody::Expression(expr));
23064                }
23065            } else if self.match_identifier("RETURN") {
23066                // RETURN expression (or SELECT statement for TSQL TVFs)
23067                let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
23068                    self.parse_statement()?
23069                } else {
23070                    self.parse_expression()?
23071                };
23072                body = Some(FunctionBody::Return(expr));
23073            } else if self.match_identifier("EXTERNAL") {
23074                self.match_identifier("NAME");
23075                let ext_name = if self.check(TokenType::String) {
23076                    let tok = self.advance();
23077                    tok.text.trim_matches('\'').to_string()
23078                } else {
23079                    self.expect_identifier()?
23080                };
23081                body = Some(FunctionBody::External(ext_name));
23082            } else if self.match_identifier("OPTIONS") {
23083                // BigQuery: OPTIONS (key=value, ...) - track in property_order
23084                let parsed_options = self.parse_options_list()?;
23085                options.extend(parsed_options);
23086                if !property_order.contains(&FunctionPropertyKind::Options) {
23087                    property_order.push(FunctionPropertyKind::Options);
23088                }
23089            } else if self.match_identifier("ENVIRONMENT") {
23090                // Databricks: ENVIRONMENT (dependencies = '...', environment_version = '...')
23091                let parsed_env = self.parse_environment_list()?;
23092                environment.extend(parsed_env);
23093                if !property_order.contains(&FunctionPropertyKind::Environment) {
23094                    property_order.push(FunctionPropertyKind::Environment);
23095                }
23096            } else {
23097                break;
23098            }
23099        }
23100
23101        // BigQuery: OPTIONS (key=value, ...) can also appear after AS body (legacy position)
23102        if options.is_empty() && self.match_identifier("OPTIONS") {
23103            let parsed_options = self.parse_options_list()?;
23104            options.extend(parsed_options);
23105            if !property_order.contains(&FunctionPropertyKind::Options) {
23106                property_order.push(FunctionPropertyKind::Options);
23107            }
23108        }
23109
23110        Ok(Expression::CreateFunction(Box::new(CreateFunction {
23111            name,
23112            parameters,
23113            return_type,
23114            body,
23115            or_replace,
23116            if_not_exists,
23117            temporary,
23118            language,
23119            deterministic,
23120            returns_null_on_null_input,
23121            security,
23122            has_parens,
23123            sql_data_access,
23124            returns_table_body,
23125            language_first,
23126            set_options,
23127            strict,
23128            options,
23129            is_table_function,
23130            property_order,
23131            environment,
23132        })))
23133    }
23134
23135    /// Parse function parameters
23136    fn parse_function_parameters(&mut self) -> Result<Vec<FunctionParameter>> {
23137        let mut params = Vec::new();
23138
23139        if self.check(TokenType::RParen) {
23140            return Ok(params);
23141        }
23142
23143        loop {
23144            let mut mode = None;
23145            let mut mode_text: Option<String> = None;
23146
23147            // Check for parameter mode (IN, OUT, INOUT, VARIADIC)
23148            // Note: OUT, INOUT, VARIADIC are tokenized as Var, not as dedicated keywords
23149            if self.match_token(TokenType::In) {
23150                // IN or IN OUT
23151                if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OUT") {
23152                    let out_text = self.advance().text.clone(); // consume OUT
23153                    mode_text = Some(format!("IN {}", out_text));
23154                    mode = Some(ParameterMode::InOut);
23155                } else {
23156                    mode_text = Some("IN".to_string());
23157                    mode = Some(ParameterMode::In);
23158                }
23159            } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OUT") {
23160                let text = self.advance().text.clone();
23161                mode_text = Some(text);
23162                mode = Some(ParameterMode::Out);
23163            } else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("INOUT") {
23164                let text = self.advance().text.clone();
23165                mode_text = Some(text);
23166                mode = Some(ParameterMode::InOut);
23167            } else if self.check(TokenType::Var)
23168                && self.peek().text.eq_ignore_ascii_case("VARIADIC")
23169            {
23170                let text = self.advance().text.clone();
23171                mode_text = Some(text);
23172                mode = Some(ParameterMode::Variadic);
23173            }
23174
23175            // Try to parse name and type
23176            // After a mode keyword (VARIADIC, OUT, etc.), the next thing could be:
23177            //   - a type directly (e.g., VARIADIC INT[], OUT INT)
23178            //   - a name then a type (e.g., VARIADIC a INT[], OUT result INT)
23179            //
23180            // Strategy: use backtracking. Save position, try parsing as data type.
23181            // If the result is followed by , or ) or DEFAULT, it was a type-only param.
23182            // Otherwise, restore position and parse as name + type.
23183            let (name, data_type) = if mode.is_some() {
23184                let saved = self.current;
23185                // Try parsing as a data type directly
23186                let type_result = self.parse_data_type();
23187                if let Ok(dt) = type_result {
23188                    if self.check(TokenType::Comma)
23189                        || self.check(TokenType::RParen)
23190                        || self.check(TokenType::Default)
23191                        || self.check(TokenType::Eq)
23192                    {
23193                        // Successfully parsed as a type-only parameter
23194                        (None, dt)
23195                    } else {
23196                        // Not followed by comma/rparen — restore and parse as name + type
23197                        self.current = saved;
23198                        let first_ident =
23199                            if self.check(TokenType::Input) || self.check(TokenType::Output) {
23200                                let token = self.advance();
23201                                Identifier {
23202                                    name: token.text,
23203                                    quoted: false,
23204                                    trailing_comments: Vec::new(),
23205                                    span: None,
23206                                }
23207                            } else {
23208                                self.expect_identifier_with_quoted()?
23209                            };
23210                        self.match_token(TokenType::As);
23211                        let dt = self.parse_data_type()?;
23212                        (Some(first_ident), dt)
23213                    }
23214                } else {
23215                    // Type parse failed — restore and try as name + type
23216                    self.current = saved;
23217                    let first_ident =
23218                        if self.check(TokenType::Input) || self.check(TokenType::Output) {
23219                            let token = self.advance();
23220                            Identifier {
23221                                name: token.text,
23222                                quoted: false,
23223                                trailing_comments: Vec::new(),
23224                                span: None,
23225                            }
23226                        } else {
23227                            self.expect_identifier_with_quoted()?
23228                        };
23229                    if self.check(TokenType::Comma)
23230                        || self.check(TokenType::RParen)
23231                        || self.check(TokenType::Default)
23232                    {
23233                        (None, self.identifier_to_datatype(&first_ident.name)?)
23234                    } else {
23235                        self.match_token(TokenType::As);
23236                        let dt = self.parse_data_type()?;
23237                        (Some(first_ident), dt)
23238                    }
23239                }
23240            } else {
23241                // No mode keyword — original logic
23242                // Handle keywords like INPUT that may be used as parameter names
23243                let first_ident = if self.check(TokenType::Input) || self.check(TokenType::Output) {
23244                    let token = self.advance();
23245                    Identifier {
23246                        name: token.text,
23247                        quoted: false,
23248                        trailing_comments: Vec::new(),
23249                        span: None,
23250                    }
23251                } else {
23252                    self.expect_identifier_with_quoted()?
23253                };
23254
23255                // Check if next token is a type or if this was the type
23256                if self.check(TokenType::Comma)
23257                    || self.check(TokenType::RParen)
23258                    || self.check(TokenType::Default)
23259                {
23260                    // This was the type, no name
23261                    (None, self.identifier_to_datatype(&first_ident.name)?)
23262                } else {
23263                    // This was the name, next is type
23264                    // TSQL allows: @param AS type (optional AS keyword)
23265                    self.match_token(TokenType::As);
23266                    let dt = self.parse_data_type()?;
23267                    (Some(first_ident), dt)
23268                }
23269            };
23270
23271            let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq)
23272            {
23273                Some(self.parse_expression()?)
23274            } else {
23275                None
23276            };
23277
23278            params.push(FunctionParameter {
23279                name,
23280                data_type,
23281                mode,
23282                default,
23283                mode_text: mode_text.clone(),
23284            });
23285
23286            if !self.match_token(TokenType::Comma) {
23287                break;
23288            }
23289        }
23290
23291        Ok(params)
23292    }
23293
23294    /// Parse TSQL-style unparenthesized procedure parameters
23295    /// Format: @param1 TYPE, @param2 TYPE, ... AS
23296    fn parse_tsql_procedure_params(&mut self) -> Result<Vec<FunctionParameter>> {
23297        let mut params = Vec::new();
23298        loop {
23299            if !self.check(TokenType::Var) {
23300                break;
23301            }
23302            let name = self.advance().text.clone();
23303            // Skip optional AS keyword between name and type
23304            self.match_token(TokenType::As);
23305            let data_type = self.parse_data_type()?;
23306            let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq)
23307            {
23308                Some(self.parse_expression()?)
23309            } else {
23310                None
23311            };
23312            params.push(FunctionParameter {
23313                name: Some(Identifier::new(name)),
23314                data_type,
23315                mode: None,
23316                default,
23317                mode_text: None,
23318            });
23319            if !self.match_token(TokenType::Comma) {
23320                break;
23321            }
23322        }
23323        Ok(params)
23324    }
23325
23326    /// Convert identifier to DataType for function parameters.
23327    /// Preserves the original identifier name to maintain exact type name as written.
23328    /// This matches Python sqlglot's behavior where function parameter types like 'integer'
23329    /// are stored as Identifiers rather than normalized DataTypes.
23330    fn identifier_to_datatype(&self, ident: &str) -> Result<DataType> {
23331        // Always use DataType::Custom to preserve the exact type name as written.
23332        // This is important for identity tests where e.g. 'integer' should not be normalized to 'INT'.
23333        Ok(DataType::Custom {
23334            name: ident.to_string(),
23335        })
23336    }
23337
23338    /// Parse a data type for function RETURNS clause, preserving original type names.
23339    /// For simple type names like 'integer', preserves the original name rather than
23340    /// normalizing to INT. This matches Python sqlglot's behavior.
23341    fn parse_function_return_type(&mut self) -> Result<DataType> {
23342        // Check if it's a simple identifier that could be a type name
23343        if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
23344            && !self.check_next(TokenType::LParen)  // Not a parameterized type like VARCHAR(10)
23345            && !self.check_next(TokenType::LBracket)
23346        // Not an array type
23347        {
23348            let type_name = self.advance().text.clone();
23349            // Check if the next token indicates we should use parse_data_type instead
23350            // For complex types, fall through to parse_data_type
23351            return Ok(DataType::Custom { name: type_name });
23352        }
23353
23354        // For complex types, use standard parsing
23355        self.parse_data_type()
23356    }
23357
23358    /// Parse DROP FUNCTION statement
23359    fn parse_drop_function(&mut self) -> Result<Expression> {
23360        self.expect(TokenType::Function)?;
23361
23362        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
23363        let name = self.parse_table_ref()?;
23364
23365        // Optional parameter types for overloaded functions
23366        let parameters = if self.match_token(TokenType::LParen) {
23367            let mut types = Vec::new();
23368            if !self.check(TokenType::RParen) {
23369                loop {
23370                    types.push(self.parse_data_type()?);
23371                    if !self.match_token(TokenType::Comma) {
23372                        break;
23373                    }
23374                }
23375            }
23376            self.expect(TokenType::RParen)?;
23377            Some(types)
23378        } else {
23379            None
23380        };
23381
23382        let cascade = self.match_token(TokenType::Cascade);
23383        if !cascade {
23384            self.match_token(TokenType::Restrict);
23385        }
23386
23387        Ok(Expression::DropFunction(Box::new(DropFunction {
23388            name,
23389            parameters,
23390            if_exists,
23391            cascade,
23392        })))
23393    }
23394
23395    /// Parse CREATE PROCEDURE statement
23396    fn parse_create_procedure(&mut self, or_replace: bool) -> Result<Expression> {
23397        // Check if PROC shorthand was used before consuming the token
23398        let use_proc_keyword = self.peek().text.to_uppercase() == "PROC";
23399        self.expect(TokenType::Procedure)?;
23400
23401        let if_not_exists =
23402            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
23403        let name = self.parse_table_ref()?;
23404
23405        // Parse parameters (optional parentheses for TSQL)
23406        let (parameters, has_parens) = if self.match_token(TokenType::LParen) {
23407            let params = self.parse_function_parameters()?;
23408            self.expect(TokenType::RParen)?;
23409            (params, true)
23410        } else if self.check(TokenType::Var) && !self.check(TokenType::As) {
23411            // TSQL: CREATE PROCEDURE foo @a INTEGER, @b INTEGER AS ...
23412            // Parameters without parentheses
23413            let params = self.parse_tsql_procedure_params()?;
23414            (params, false)
23415        } else {
23416            (Vec::new(), false)
23417        };
23418
23419        let mut language = None;
23420        let mut security = None;
23421        let mut body = None;
23422        let mut return_type = None;
23423        let mut execute_as = None;
23424        let mut with_options: Vec<String> = Vec::new();
23425
23426        // Parse procedure options
23427        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
23428            if self.match_token(TokenType::Returns) {
23429                // RETURNS type (Snowflake)
23430                return_type = Some(self.parse_data_type()?);
23431            } else if self.match_identifier("EXECUTE") || self.match_token(TokenType::Execute) {
23432                // EXECUTE AS CALLER/OWNER (Snowflake)
23433                if self.match_token(TokenType::As) {
23434                    if self.match_identifier("CALLER") {
23435                        execute_as = Some("CALLER".to_string());
23436                    } else if self.match_identifier("OWNER") {
23437                        execute_as = Some("OWNER".to_string());
23438                    } else if self.match_identifier("SELF") {
23439                        execute_as = Some("SELF".to_string());
23440                    }
23441                }
23442            } else if self.match_token(TokenType::Language) {
23443                // Language can be SQL, PLPGSQL, PYTHON, etc.
23444                language = Some(self.expect_identifier_or_keyword()?);
23445            } else if self.match_identifier("SECURITY") {
23446                if self.match_identifier("DEFINER") {
23447                    security = Some(FunctionSecurity::Definer);
23448                } else if self.match_identifier("INVOKER") {
23449                    security = Some(FunctionSecurity::Invoker);
23450                }
23451            } else if self.match_token(TokenType::With) {
23452                // TSQL: WITH option1, option2, ... AS body
23453                // Options: ENCRYPTION, RECOMPILE, SCHEMABINDING, NATIVE_COMPILATION,
23454                //          EXECUTE AS {OWNER|SELF|CALLER|'username'}
23455                loop {
23456                    if self.match_identifier("EXECUTE") || self.match_token(TokenType::Execute) {
23457                        // EXECUTE AS {OWNER|SELF|CALLER|'username'}
23458                        self.expect(TokenType::As)?;
23459                        if self.check(TokenType::String) {
23460                            let tok = self.advance();
23461                            with_options.push(format!("EXECUTE AS '{}'", tok.text));
23462                        } else {
23463                            let ident = self.expect_identifier_or_keyword()?;
23464                            with_options.push(format!("EXECUTE AS {}", ident.to_uppercase()));
23465                        }
23466                    } else {
23467                        let opt = self.expect_identifier_or_keyword()?;
23468                        with_options.push(opt.to_uppercase());
23469                    }
23470                    if !self.match_token(TokenType::Comma) {
23471                        break;
23472                    }
23473                }
23474            } else if self.match_token(TokenType::As) {
23475                // Parse procedure body
23476                if self.check(TokenType::String) {
23477                    // TokenType::String means single-quoted - tokenizer strips quotes
23478                    let tok = self.advance();
23479                    body = Some(FunctionBody::StringLiteral(tok.text.clone()));
23480                } else if self.match_token(TokenType::Begin) {
23481                    // Parse BEGIN ... END block as a list of statements
23482                    let mut statements = Vec::new();
23483                    while !self.check(TokenType::End) && !self.is_at_end() {
23484                        // Skip optional semicolons between statements
23485                        while self.match_token(TokenType::Semicolon) {}
23486                        if self.check(TokenType::End) {
23487                            break;
23488                        }
23489                        statements.push(self.parse_statement()?);
23490                        // Skip optional semicolon after statement
23491                        self.match_token(TokenType::Semicolon);
23492                    }
23493                    self.expect(TokenType::End)?;
23494                    body = Some(FunctionBody::Statements(statements));
23495                } else {
23496                    // TSQL: AS <statement> (e.g., AS SELECT 1)
23497                    let stmt = self.parse_statement()?;
23498                    body = Some(FunctionBody::Expression(stmt));
23499                }
23500            } else {
23501                break;
23502            }
23503        }
23504
23505        Ok(Expression::CreateProcedure(Box::new(CreateProcedure {
23506            name,
23507            parameters,
23508            body,
23509            or_replace,
23510            if_not_exists,
23511            language,
23512            security,
23513            return_type,
23514            execute_as,
23515            with_options,
23516            has_parens,
23517            use_proc_keyword,
23518        })))
23519    }
23520
23521    /// Parse DROP PROCEDURE statement
23522    fn parse_drop_procedure(&mut self) -> Result<Expression> {
23523        self.expect(TokenType::Procedure)?;
23524
23525        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
23526        let name = self.parse_table_ref()?;
23527
23528        let parameters = if self.match_token(TokenType::LParen) {
23529            let mut types = Vec::new();
23530            if !self.check(TokenType::RParen) {
23531                loop {
23532                    types.push(self.parse_data_type()?);
23533                    if !self.match_token(TokenType::Comma) {
23534                        break;
23535                    }
23536                }
23537            }
23538            self.expect(TokenType::RParen)?;
23539            Some(types)
23540        } else {
23541            None
23542        };
23543
23544        let cascade = self.match_token(TokenType::Cascade);
23545        if !cascade {
23546            self.match_token(TokenType::Restrict);
23547        }
23548
23549        Ok(Expression::DropProcedure(Box::new(DropProcedure {
23550            name,
23551            parameters,
23552            if_exists,
23553            cascade,
23554        })))
23555    }
23556
23557    /// Parse CREATE SEQUENCE statement
23558    fn parse_create_sequence(&mut self, temporary: bool, or_replace: bool) -> Result<Expression> {
23559        self.expect(TokenType::Sequence)?;
23560
23561        let if_not_exists =
23562            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
23563        let name = self.parse_table_ref()?;
23564
23565        let mut seq = CreateSequence {
23566            name,
23567            if_not_exists,
23568            temporary,
23569            or_replace,
23570            as_type: None,
23571            increment: None,
23572            minvalue: None,
23573            maxvalue: None,
23574            start: None,
23575            cache: None,
23576            cycle: false,
23577            owned_by: None,
23578            owned_by_none: false,
23579            order: None,
23580            comment: None,
23581            sharing: None,
23582            scale_modifier: None,
23583            shard_modifier: None,
23584            property_order: Vec::new(),
23585        };
23586
23587        // Parse optional AS <type> clause (e.g., AS SMALLINT, AS BIGINT)
23588        if self.match_token(TokenType::As) {
23589            seq.as_type = Some(self.parse_data_type()?);
23590        }
23591
23592        // Parse sequence options
23593        // Handle optional WITH keyword before options (Snowflake: WITH START = n INCREMENT = n)
23594        self.match_token(TokenType::With);
23595
23596        loop {
23597            // Skip optional commas between options (Snowflake uses comma-separated options)
23598            self.match_token(TokenType::Comma);
23599
23600            if self.is_at_end() || self.check(TokenType::Semicolon) {
23601                break;
23602            }
23603
23604            if self.match_token(TokenType::Increment) || self.match_identifier("INCREMENT") {
23605                self.match_token(TokenType::By);
23606                self.match_token(TokenType::Eq); // Snowflake uses = instead of BY
23607                seq.increment = Some(self.parse_signed_integer()?);
23608                seq.property_order.push(SeqPropKind::Increment);
23609            } else if self.match_token(TokenType::Minvalue) {
23610                seq.minvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
23611                seq.property_order.push(SeqPropKind::Minvalue);
23612            } else if self.match_keywords(&[TokenType::No, TokenType::Minvalue]) {
23613                seq.minvalue = Some(SequenceBound::None);
23614                seq.property_order.push(SeqPropKind::Minvalue);
23615            } else if self.match_identifier("NOMINVALUE") {
23616                seq.minvalue = Some(SequenceBound::None);
23617                seq.property_order.push(SeqPropKind::NoMinvalueWord);
23618            } else if self.match_token(TokenType::Maxvalue) {
23619                seq.maxvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
23620                seq.property_order.push(SeqPropKind::Maxvalue);
23621            } else if self.match_keywords(&[TokenType::No, TokenType::Maxvalue]) {
23622                seq.maxvalue = Some(SequenceBound::None);
23623                seq.property_order.push(SeqPropKind::Maxvalue);
23624            } else if self.match_identifier("NOMAXVALUE") {
23625                seq.maxvalue = Some(SequenceBound::None);
23626                seq.property_order.push(SeqPropKind::NoMaxvalueWord);
23627            } else if self.match_token(TokenType::Start) {
23628                self.match_token(TokenType::With);
23629                self.match_token(TokenType::Eq); // Snowflake uses = instead of WITH
23630                seq.start = Some(self.parse_signed_integer()?);
23631                seq.property_order.push(SeqPropKind::Start);
23632            } else if self.match_token(TokenType::Cache) {
23633                seq.cache = Some(self.parse_signed_integer()?);
23634                seq.property_order.push(SeqPropKind::Cache);
23635            } else if self.match_identifier("NOCACHE") {
23636                // Oracle: NOCACHE (single word)
23637                seq.property_order.push(SeqPropKind::NoCacheWord);
23638            } else if self.match_token(TokenType::Cycle) {
23639                seq.cycle = true;
23640                seq.property_order.push(SeqPropKind::Cycle);
23641            } else if self.match_token(TokenType::NoCycle) {
23642                // NOCYCLE keyword token - preserve as single word
23643                seq.cycle = false;
23644                seq.property_order.push(SeqPropKind::NoCycleWord);
23645            } else if self.match_token(TokenType::No) {
23646                // Two-word NO forms
23647                if self.match_token(TokenType::Cycle) {
23648                    seq.cycle = false;
23649                    seq.property_order.push(SeqPropKind::NoCycle);
23650                } else if self.match_token(TokenType::Cache) || self.match_identifier("CACHE") {
23651                    seq.property_order.push(SeqPropKind::NoCache);
23652                } else if self.match_token(TokenType::Minvalue) {
23653                    seq.minvalue = Some(SequenceBound::None);
23654                    seq.property_order.push(SeqPropKind::Minvalue);
23655                } else if self.match_token(TokenType::Maxvalue) {
23656                    seq.maxvalue = Some(SequenceBound::None);
23657                    seq.property_order.push(SeqPropKind::Maxvalue);
23658                } else {
23659                    // Unexpected token after NO
23660                    break;
23661                }
23662            } else if self.match_token(TokenType::Owned) {
23663                self.expect(TokenType::By)?;
23664                if self.match_identifier("NONE") {
23665                    seq.owned_by = None;
23666                    seq.owned_by_none = true;
23667                } else {
23668                    seq.owned_by = Some(self.parse_table_ref()?);
23669                }
23670                seq.property_order.push(SeqPropKind::OwnedBy);
23671            } else if self.match_token(TokenType::Order) {
23672                // Snowflake/Oracle: ORDER option
23673                seq.order = Some(true);
23674                seq.property_order.push(SeqPropKind::Order);
23675            } else if self.match_identifier("NOORDER") {
23676                // Snowflake/Oracle: NOORDER option
23677                seq.order = Some(false);
23678                seq.property_order.push(SeqPropKind::NoOrder);
23679            } else if self.match_token(TokenType::Comment) || self.match_identifier("COMMENT") {
23680                // Snowflake: COMMENT = 'value'
23681                self.expect(TokenType::Eq)?;
23682                let comment_val = self.expect(TokenType::String)?;
23683                seq.comment = Some(comment_val.text.clone());
23684                seq.property_order.push(SeqPropKind::Comment);
23685            } else if self.match_identifier("SHARING") {
23686                // Oracle: SHARING=value
23687                self.expect(TokenType::Eq)?;
23688                let val = self.expect_identifier_or_keyword()?;
23689                seq.sharing = Some(val);
23690                seq.property_order.push(SeqPropKind::Sharing);
23691            } else if self.match_identifier("NOKEEP") {
23692                seq.property_order.push(SeqPropKind::NoKeep);
23693            } else if self.match_token(TokenType::Keep) || self.match_identifier("KEEP") {
23694                seq.property_order.push(SeqPropKind::Keep);
23695            } else if self.match_identifier("SCALE") {
23696                let modifier = if self.match_identifier("EXTEND") {
23697                    "EXTEND".to_string()
23698                } else if self.match_identifier("NOEXTEND") {
23699                    "NOEXTEND".to_string()
23700                } else {
23701                    String::new()
23702                };
23703                seq.scale_modifier = Some(modifier);
23704                seq.property_order.push(SeqPropKind::Scale);
23705            } else if self.match_identifier("NOSCALE") {
23706                seq.property_order.push(SeqPropKind::NoScale);
23707            } else if self.match_identifier("SHARD") {
23708                let modifier = if self.match_identifier("EXTEND") {
23709                    "EXTEND".to_string()
23710                } else if self.match_identifier("NOEXTEND") {
23711                    "NOEXTEND".to_string()
23712                } else {
23713                    String::new()
23714                };
23715                seq.shard_modifier = Some(modifier);
23716                seq.property_order.push(SeqPropKind::Shard);
23717            } else if self.match_identifier("NOSHARD") {
23718                seq.property_order.push(SeqPropKind::NoShard);
23719            } else if self.match_identifier("SESSION") {
23720                seq.property_order.push(SeqPropKind::Session);
23721            } else if self.match_identifier("GLOBAL") {
23722                seq.property_order.push(SeqPropKind::Global);
23723            } else {
23724                break;
23725            }
23726        }
23727
23728        Ok(Expression::CreateSequence(Box::new(seq)))
23729    }
23730
23731    /// Parse a signed integer (positive or negative)
23732    fn parse_signed_integer(&mut self) -> Result<i64> {
23733        let negative = self.match_token(TokenType::Dash);
23734        let tok = self.expect(TokenType::Number)?;
23735        let value: i64 = tok
23736            .text
23737            .parse()
23738            .map_err(|_| self.parse_error(format!("Invalid integer: {}", tok.text)))?;
23739        Ok(if negative { -value } else { value })
23740    }
23741
23742    /// Parse DROP SEQUENCE statement
23743    fn parse_drop_sequence(&mut self) -> Result<Expression> {
23744        self.expect(TokenType::Sequence)?;
23745
23746        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
23747        let name = self.parse_table_ref()?;
23748
23749        let cascade = self.match_token(TokenType::Cascade);
23750        if !cascade {
23751            self.match_token(TokenType::Restrict);
23752        }
23753
23754        Ok(Expression::DropSequence(Box::new(DropSequence {
23755            name,
23756            if_exists,
23757            cascade,
23758        })))
23759    }
23760
23761    /// Parse ALTER SEQUENCE statement
23762    fn parse_alter_sequence(&mut self) -> Result<Expression> {
23763        self.expect(TokenType::Sequence)?;
23764
23765        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
23766        let name = self.parse_table_ref()?;
23767
23768        let mut seq = AlterSequence {
23769            name,
23770            if_exists,
23771            increment: None,
23772            minvalue: None,
23773            maxvalue: None,
23774            start: None,
23775            restart: None,
23776            cache: None,
23777            cycle: None,
23778            owned_by: None,
23779        };
23780
23781        // Parse sequence options
23782        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
23783            if self.match_token(TokenType::Increment) || self.match_identifier("INCREMENT") {
23784                self.match_token(TokenType::By);
23785                seq.increment = Some(self.parse_signed_integer()?);
23786            } else if self.match_token(TokenType::Minvalue) {
23787                seq.minvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
23788            } else if self.match_keywords(&[TokenType::No, TokenType::Minvalue]) {
23789                seq.minvalue = Some(SequenceBound::None);
23790            } else if self.match_token(TokenType::Maxvalue) {
23791                seq.maxvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
23792            } else if self.match_keywords(&[TokenType::No, TokenType::Maxvalue]) {
23793                seq.maxvalue = Some(SequenceBound::None);
23794            } else if self.match_token(TokenType::Start) {
23795                self.match_token(TokenType::With);
23796                seq.start = Some(self.parse_signed_integer()?);
23797            } else if self.match_token(TokenType::Restart) {
23798                if self.match_token(TokenType::With)
23799                    || self.check(TokenType::Number)
23800                    || self.check(TokenType::Dash)
23801                {
23802                    seq.restart = Some(Some(self.parse_signed_integer()?));
23803                } else {
23804                    seq.restart = Some(None);
23805                }
23806            } else if self.match_token(TokenType::Cache) {
23807                seq.cache = Some(self.parse_signed_integer()?);
23808            } else if self.match_token(TokenType::Cycle) {
23809                seq.cycle = Some(true);
23810            } else if self.match_token(TokenType::NoCycle) {
23811                seq.cycle = Some(false);
23812            } else if self.match_token(TokenType::Owned) {
23813                self.expect(TokenType::By)?;
23814                if self.match_identifier("NONE") {
23815                    seq.owned_by = Some(None);
23816                } else {
23817                    seq.owned_by = Some(Some(self.parse_table_ref()?));
23818                }
23819            } else {
23820                break;
23821            }
23822        }
23823
23824        Ok(Expression::AlterSequence(Box::new(seq)))
23825    }
23826
23827    /// Parse CREATE TRIGGER statement
23828    fn parse_create_trigger(
23829        &mut self,
23830        or_replace: bool,
23831        constraint: bool,
23832        create_pos: usize,
23833    ) -> Result<Expression> {
23834        self.expect(TokenType::Trigger)?;
23835
23836        let name = self.expect_identifier_with_quoted()?;
23837
23838        // TSQL triggers: CREATE TRIGGER name ON table AFTER INSERT AS BEGIN...END
23839        // These have ON before timing, unlike standard triggers.
23840        // Fall back to Command for these (matches Python sqlglot behavior).
23841        if self.check(TokenType::On) && !constraint {
23842            self.current = create_pos;
23843            return self.fallback_to_command(create_pos);
23844        }
23845
23846        // Parse timing (BEFORE, AFTER, INSTEAD OF)
23847        let timing = if self.match_token(TokenType::Before) {
23848            TriggerTiming::Before
23849        } else if self.match_token(TokenType::After) {
23850            TriggerTiming::After
23851        } else if self.match_token(TokenType::Instead) {
23852            self.expect(TokenType::Of)?;
23853            TriggerTiming::InsteadOf
23854        } else {
23855            // Fall back to Command for unknown trigger syntax
23856            self.current = create_pos;
23857            return self.fallback_to_command(create_pos);
23858        };
23859
23860        // Parse events
23861        let mut events = Vec::new();
23862        loop {
23863            if self.match_token(TokenType::Insert) {
23864                events.push(TriggerEvent::Insert);
23865            } else if self.match_token(TokenType::Update) {
23866                if self.match_token(TokenType::Of) {
23867                    let mut cols = Vec::new();
23868                    loop {
23869                        cols.push(Identifier::new(self.expect_identifier()?));
23870                        if !self.match_token(TokenType::Comma) {
23871                            break;
23872                        }
23873                    }
23874                    events.push(TriggerEvent::Update(Some(cols)));
23875                } else {
23876                    events.push(TriggerEvent::Update(None));
23877                }
23878            } else if self.match_token(TokenType::Delete) {
23879                events.push(TriggerEvent::Delete);
23880            } else if self.match_token(TokenType::Truncate) {
23881                events.push(TriggerEvent::Truncate);
23882            } else {
23883                break;
23884            }
23885
23886            if !self.match_token(TokenType::Or) {
23887                break;
23888            }
23889        }
23890
23891        self.expect(TokenType::On)?;
23892        let table = self.parse_table_ref()?;
23893
23894        // Parse optional REFERENCING clause (for non-constraint triggers)
23895        let referencing = if !constraint && self.match_token(TokenType::Referencing) {
23896            let mut ref_clause = TriggerReferencing {
23897                old_table: None,
23898                new_table: None,
23899                old_row: None,
23900                new_row: None,
23901            };
23902            while self.match_token(TokenType::Old) || self.match_token(TokenType::New) {
23903                let is_old = self.previous().token_type == TokenType::Old;
23904                let is_table = self.match_token(TokenType::Table);
23905                let _is_row = !is_table && self.match_token(TokenType::Row);
23906                self.match_token(TokenType::As);
23907                let alias = Identifier::new(self.expect_identifier()?);
23908
23909                if is_old {
23910                    if is_table {
23911                        ref_clause.old_table = Some(alias);
23912                    } else {
23913                        ref_clause.old_row = Some(alias);
23914                    }
23915                } else {
23916                    if is_table {
23917                        ref_clause.new_table = Some(alias);
23918                    } else {
23919                        ref_clause.new_row = Some(alias);
23920                    }
23921                }
23922            }
23923            Some(ref_clause)
23924        } else {
23925            None
23926        };
23927
23928        // Parse deferrable options for constraint triggers (comes before FOR EACH ROW in PostgreSQL)
23929        let mut deferrable = None;
23930        let mut initially_deferred = None;
23931        if constraint {
23932            if self.match_identifier("DEFERRABLE") {
23933                deferrable = Some(true);
23934            } else if self.match_keywords(&[TokenType::Not, TokenType::Identifier]) {
23935                // NOT DEFERRABLE
23936                deferrable = Some(false);
23937            }
23938            if self.match_identifier("INITIALLY") {
23939                if self.match_identifier("DEFERRED") {
23940                    initially_deferred = Some(true);
23941                } else if self.match_identifier("IMMEDIATE") {
23942                    initially_deferred = Some(false);
23943                }
23944            }
23945        }
23946
23947        // Parse FOR EACH ROW/STATEMENT (optional)
23948        let for_each = if self.match_token(TokenType::For) {
23949            self.match_token(TokenType::Each);
23950            if self.match_token(TokenType::Row) {
23951                Some(TriggerForEach::Row)
23952            } else if self.match_token(TokenType::Statement) {
23953                Some(TriggerForEach::Statement)
23954            } else {
23955                Some(TriggerForEach::Row)
23956            }
23957        } else {
23958            None
23959        };
23960
23961        // Parse optional WHEN clause (parentheses are optional, e.g. SQLite)
23962        let (when, when_paren) = if self.match_token(TokenType::When) {
23963            let has_paren = self.match_token(TokenType::LParen);
23964            let expr = self.parse_expression()?;
23965            if has_paren {
23966                self.expect(TokenType::RParen)?;
23967            }
23968            (Some(expr), has_paren)
23969        } else {
23970            (None, false)
23971        };
23972
23973        // Parse trigger body
23974        let body = if self.match_token(TokenType::Execute) {
23975            self.match_token(TokenType::Function);
23976            self.match_token(TokenType::Procedure);
23977            let func_name = self.parse_table_ref()?;
23978            self.expect(TokenType::LParen)?;
23979            let mut args = Vec::new();
23980            if !self.check(TokenType::RParen) {
23981                loop {
23982                    args.push(self.parse_expression()?);
23983                    if !self.match_token(TokenType::Comma) {
23984                        break;
23985                    }
23986                }
23987            }
23988            self.expect(TokenType::RParen)?;
23989            TriggerBody::Execute {
23990                function: func_name,
23991                args,
23992            }
23993        } else if self.match_token(TokenType::Begin) {
23994            // Record start position (first token after BEGIN)
23995            let body_start = if !self.is_at_end() {
23996                self.tokens[self.current].span.start
23997            } else {
23998                0
23999            };
24000            let mut depth = 1;
24001            while depth > 0 && !self.is_at_end() {
24002                let tok = self.advance();
24003                if tok.token_type == TokenType::Begin {
24004                    depth += 1;
24005                } else if tok.token_type == TokenType::End {
24006                    depth -= 1;
24007                    if depth == 0 {
24008                        break;
24009                    }
24010                }
24011            }
24012            // Extract verbatim text from source if available
24013            let block_content = if let Some(ref source) = self.source {
24014                // End position is the start of the END token
24015                let body_end = if self.current > 0 {
24016                    self.tokens[self.current - 1].span.start
24017                } else {
24018                    body_start
24019                };
24020                source[body_start..body_end].trim().to_string()
24021            } else {
24022                // Fallback: no source available
24023                String::new()
24024            };
24025            TriggerBody::Block(block_content)
24026        } else {
24027            return Err(self.parse_error("Expected EXECUTE or BEGIN in trigger body"));
24028        };
24029
24030        Ok(Expression::CreateTrigger(Box::new(CreateTrigger {
24031            name,
24032            table,
24033            timing,
24034            events,
24035            for_each,
24036            when,
24037            when_paren,
24038            body,
24039            or_replace,
24040            constraint,
24041            deferrable,
24042            initially_deferred,
24043            referencing,
24044        })))
24045    }
24046
24047    /// Parse DROP TRIGGER statement
24048    fn parse_drop_trigger(&mut self) -> Result<Expression> {
24049        self.expect(TokenType::Trigger)?;
24050
24051        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
24052        let name = Identifier::new(self.expect_identifier()?);
24053
24054        let table = if self.match_token(TokenType::On) {
24055            Some(self.parse_table_ref()?)
24056        } else {
24057            None
24058        };
24059
24060        let cascade = self.match_token(TokenType::Cascade);
24061        if !cascade {
24062            self.match_token(TokenType::Restrict);
24063        }
24064
24065        Ok(Expression::DropTrigger(Box::new(DropTrigger {
24066            name,
24067            table,
24068            if_exists,
24069            cascade,
24070        })))
24071    }
24072
24073    /// Parse CREATE TYPE statement
24074    fn parse_create_type(&mut self) -> Result<Expression> {
24075        self.expect(TokenType::Type)?;
24076
24077        let if_not_exists =
24078            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
24079        let name = self.parse_table_ref()?;
24080
24081        self.expect(TokenType::As)?;
24082
24083        let definition = if self.match_token(TokenType::Enum) {
24084            // ENUM type
24085            self.expect(TokenType::LParen)?;
24086            let mut values = Vec::new();
24087            loop {
24088                let tok = self.expect(TokenType::String)?;
24089                values.push(tok.text.trim_matches('\'').to_string());
24090                if !self.match_token(TokenType::Comma) {
24091                    break;
24092                }
24093            }
24094            self.expect(TokenType::RParen)?;
24095            TypeDefinition::Enum(values)
24096        } else if self.match_token(TokenType::LParen) {
24097            // Composite type
24098            let mut attrs = Vec::new();
24099            loop {
24100                let attr_name = Identifier::new(self.expect_identifier()?);
24101                let data_type = self.parse_data_type()?;
24102                let collate = if self.match_identifier("COLLATE") {
24103                    Some(Identifier::new(self.expect_identifier()?))
24104                } else {
24105                    None
24106                };
24107                attrs.push(TypeAttribute {
24108                    name: attr_name,
24109                    data_type,
24110                    collate,
24111                });
24112                if !self.match_token(TokenType::Comma) {
24113                    break;
24114                }
24115            }
24116            self.expect(TokenType::RParen)?;
24117            TypeDefinition::Composite(attrs)
24118        } else if self.match_token(TokenType::Range) {
24119            // Range type
24120            self.expect(TokenType::LParen)?;
24121            self.match_identifier("SUBTYPE");
24122            self.match_token(TokenType::Eq);
24123            let subtype = self.parse_data_type()?;
24124
24125            let mut subtype_diff = None;
24126            let mut canonical = None;
24127
24128            while self.match_token(TokenType::Comma) {
24129                if self.match_identifier("SUBTYPE_DIFF") {
24130                    self.match_token(TokenType::Eq);
24131                    subtype_diff = Some(self.expect_identifier()?);
24132                } else if self.match_identifier("CANONICAL") {
24133                    self.match_token(TokenType::Eq);
24134                    canonical = Some(self.expect_identifier()?);
24135                }
24136            }
24137            self.expect(TokenType::RParen)?;
24138
24139            TypeDefinition::Range {
24140                subtype,
24141                subtype_diff,
24142                canonical,
24143            }
24144        } else {
24145            return Err(
24146                self.parse_error("Expected ENUM, composite type definition, or RANGE after AS")
24147            );
24148        };
24149
24150        Ok(Expression::CreateType(Box::new(CreateType {
24151            name,
24152            definition,
24153            if_not_exists,
24154        })))
24155    }
24156
24157    /// Parse CREATE DOMAIN statement
24158    fn parse_create_domain(&mut self) -> Result<Expression> {
24159        self.expect(TokenType::Domain)?;
24160
24161        let if_not_exists =
24162            self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
24163        let name = self.parse_table_ref()?;
24164
24165        self.expect(TokenType::As)?;
24166        let base_type = self.parse_data_type()?;
24167
24168        let mut default = None;
24169        let mut constraints = Vec::new();
24170
24171        // Parse domain options
24172        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24173            if self.match_token(TokenType::Default) {
24174                default = Some(self.parse_expression()?);
24175            } else if self.match_token(TokenType::Constraint) {
24176                let constr_name = Some(Identifier::new(self.expect_identifier()?));
24177                self.expect(TokenType::Check)?;
24178                self.expect(TokenType::LParen)?;
24179                let check_expr = self.parse_expression()?;
24180                self.expect(TokenType::RParen)?;
24181                constraints.push(DomainConstraint {
24182                    name: constr_name,
24183                    check: check_expr,
24184                });
24185            } else if self.match_token(TokenType::Check) {
24186                self.expect(TokenType::LParen)?;
24187                let check_expr = self.parse_expression()?;
24188                self.expect(TokenType::RParen)?;
24189                constraints.push(DomainConstraint {
24190                    name: None,
24191                    check: check_expr,
24192                });
24193            } else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
24194                // NOT NULL is a constraint - represented as VALUE IS NOT NULL
24195                constraints.push(DomainConstraint {
24196                    name: None,
24197                    check: Expression::IsNull(Box::new(IsNull {
24198                        this: Expression::Identifier(Identifier::new("VALUE")),
24199                        not: true,
24200                        postfix_form: false,
24201                    })),
24202                });
24203            } else {
24204                break;
24205            }
24206        }
24207
24208        Ok(Expression::CreateType(Box::new(CreateType {
24209            name,
24210            definition: TypeDefinition::Domain {
24211                base_type,
24212                default,
24213                constraints,
24214            },
24215            if_not_exists,
24216        })))
24217    }
24218
24219    /// Parse CREATE STAGE statement (Snowflake)
24220    fn parse_create_stage(&mut self, or_replace: bool, temporary: bool) -> Result<Expression> {
24221        self.advance(); // consume STAGE (identifier)
24222                        // Parse remaining tokens, normalizing FILE_FORMAT clause
24223        let start = self.current;
24224        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24225            self.advance();
24226        }
24227        let sql = self.tokens_to_sql_stage_format(start, self.current);
24228
24229        // Build the CREATE prefix with modifiers
24230        let mut prefix = String::from("CREATE");
24231        if or_replace {
24232            prefix.push_str(" OR REPLACE");
24233        }
24234        if temporary {
24235            prefix.push_str(" TEMPORARY");
24236        }
24237        prefix.push_str(" STAGE");
24238
24239        Ok(Expression::Raw(Raw {
24240            sql: format!("{} {}", prefix, sql),
24241        }))
24242    }
24243
24244    /// Parse CREATE TAG statement (Snowflake)
24245    fn parse_create_tag(&mut self, or_replace: bool) -> Result<Expression> {
24246        self.advance(); // consume TAG
24247                        // Capture remaining tokens as raw SQL
24248        let start = self.current;
24249        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24250            self.advance();
24251        }
24252        let sql = self.tokens_to_sql(start, self.current);
24253        let prefix = if or_replace {
24254            "CREATE OR REPLACE TAG"
24255        } else {
24256            "CREATE TAG"
24257        };
24258        Ok(Expression::Raw(Raw {
24259            sql: format!("{} {}", prefix, sql),
24260        }))
24261    }
24262
24263    /// Parse CREATE STREAM statement (Snowflake)
24264    fn parse_create_stream(&mut self, _or_replace: bool) -> Result<Expression> {
24265        self.advance(); // consume STREAM
24266                        // Capture remaining tokens as raw SQL
24267        let start = self.current;
24268        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24269            self.advance();
24270        }
24271        let sql = self.tokens_to_sql(start, self.current);
24272        Ok(Expression::Raw(Raw {
24273            sql: format!("CREATE STREAM {}", sql),
24274        }))
24275    }
24276
24277    /// Parse CREATE FILE FORMAT statement (Snowflake)
24278    fn parse_create_file_format(
24279        &mut self,
24280        or_replace: bool,
24281        temporary: bool,
24282    ) -> Result<Expression> {
24283        self.advance(); // consume FILE
24284        self.advance(); // consume FORMAT
24285                        // Capture remaining tokens as raw SQL
24286        let start = self.current;
24287        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
24288            self.advance();
24289        }
24290        let sql = self.tokens_to_sql(start, self.current);
24291        let mut prefix = String::from("CREATE");
24292        if or_replace {
24293            prefix.push_str(" OR REPLACE");
24294        }
24295        if temporary {
24296            prefix.push_str(" TEMPORARY");
24297        }
24298        prefix.push_str(" FILE FORMAT ");
24299        prefix.push_str(&sql);
24300        Ok(Expression::Raw(Raw { sql: prefix }))
24301    }
24302
24303    /// Parse DROP TYPE statement
24304    fn parse_drop_type(&mut self) -> Result<Expression> {
24305        self.expect(TokenType::Type)?;
24306
24307        let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
24308        let name = self.parse_table_ref()?;
24309
24310        let cascade = self.match_token(TokenType::Cascade);
24311        if !cascade {
24312            self.match_token(TokenType::Restrict);
24313        }
24314
24315        Ok(Expression::DropType(Box::new(DropType {
24316            name,
24317            if_exists,
24318            cascade,
24319        })))
24320    }
24321
24322    fn parse_alter_view_with_modifiers(
24323        &mut self,
24324        algorithm: Option<String>,
24325        definer: Option<String>,
24326        sql_security: Option<String>,
24327    ) -> Result<Expression> {
24328        self.expect(TokenType::View)?;
24329
24330        let name = self.parse_table_ref()?;
24331        let mut actions = Vec::new();
24332
24333        // Hive: Optional column aliases with optional COMMENT: (c1, c2) or (c1 COMMENT 'text', c2)
24334        // Only parse if we see LParen followed by identifier (not SELECT for subquery)
24335        let columns = if self.check(TokenType::LParen) {
24336            // Peek ahead to see if this looks like column aliases
24337            let saved = self.current;
24338            self.advance(); // consume LParen
24339
24340            // Check if this is an identifier (column name) vs SELECT keyword
24341            let is_column_aliases = self.check(TokenType::Identifier)
24342                || self.check(TokenType::Var)
24343                || self.check(TokenType::QuotedIdentifier);
24344
24345            if is_column_aliases {
24346                // Parse column aliases
24347                let mut cols = Vec::new();
24348                loop {
24349                    let col_name = self.expect_identifier()?;
24350                    // Optional COMMENT 'text'
24351                    let comment = if self.match_token(TokenType::Comment) {
24352                        Some(self.expect_string()?)
24353                    } else {
24354                        None
24355                    };
24356                    cols.push(ViewColumn {
24357                        name: Identifier::new(col_name),
24358                        comment,
24359                        options: Vec::new(),
24360                    });
24361                    if !self.match_token(TokenType::Comma) {
24362                        break;
24363                    }
24364                }
24365                self.expect(TokenType::RParen)?;
24366                cols
24367            } else {
24368                self.current = saved; // retreat
24369                Vec::new()
24370            }
24371        } else {
24372            Vec::new()
24373        };
24374
24375        // TSQL: WITH option (SCHEMABINDING, ENCRYPTION, VIEW_METADATA) before AS
24376        let with_option = if self.match_token(TokenType::With) {
24377            let opt = self.expect_identifier_or_keyword()?;
24378            Some(opt.to_uppercase())
24379        } else {
24380            None
24381        };
24382
24383        // Parse actions
24384        if self.match_token(TokenType::Rename) {
24385            self.expect(TokenType::To)?;
24386            actions.push(AlterViewAction::Rename(self.parse_table_ref()?));
24387        } else if self.match_identifier("OWNER") {
24388            self.expect(TokenType::To)?;
24389            actions.push(AlterViewAction::OwnerTo(Identifier::new(
24390                self.expect_identifier()?,
24391            )));
24392        } else if self.match_token(TokenType::Set) {
24393            // Hive: SET TBLPROPERTIES ('key'='value', ...) or SET SCHEMA name
24394            // Trino: SET AUTHORIZATION [ROLE] user
24395            if self.match_identifier("TBLPROPERTIES") {
24396                let props = self.parse_tblproperties_key_value_list()?;
24397                actions.push(AlterViewAction::SetTblproperties(props));
24398            } else if self.match_token(TokenType::Authorization) {
24399                let mut auth_text = String::new();
24400                if self.match_texts(&["ROLE"]) {
24401                    auth_text.push_str("ROLE ");
24402                }
24403                let user = self.expect_identifier()?;
24404                auth_text.push_str(&user);
24405                actions.push(AlterViewAction::SetAuthorization(auth_text));
24406            } else {
24407                self.expect(TokenType::Schema)?;
24408                actions.push(AlterViewAction::SetSchema(Identifier::new(
24409                    self.expect_identifier()?,
24410                )));
24411            }
24412        } else if self.match_identifier("UNSET") {
24413            // Hive: UNSET TBLPROPERTIES ('key1', 'key2', ...)
24414            if !self.match_identifier("TBLPROPERTIES") {
24415                return Err(self.parse_error("Expected TBLPROPERTIES after UNSET"));
24416            }
24417            let keys = self.parse_tblproperties_key_list()?;
24418            actions.push(AlterViewAction::UnsetTblproperties(keys));
24419        } else if self.match_token(TokenType::Alter) {
24420            self.match_token(TokenType::Column);
24421            let col_name = Identifier::new(self.expect_identifier()?);
24422            let action = self.parse_alter_column_action()?;
24423            actions.push(AlterViewAction::AlterColumn {
24424                name: col_name,
24425                action,
24426            });
24427        } else if self.match_token(TokenType::As) {
24428            // AS SELECT ... or AS SELECT ... UNION ... (redefine view query)
24429            let query = self.parse_statement()?;
24430            actions.push(AlterViewAction::AsSelect(Box::new(query)));
24431        }
24432
24433        Ok(Expression::AlterView(Box::new(AlterView {
24434            name,
24435            actions,
24436            algorithm,
24437            definer,
24438            sql_security,
24439            with_option,
24440            columns,
24441        })))
24442    }
24443
24444    /// Parse TBLPROPERTIES key-value list: ('key1'='value1', 'key2'='value2', ...)
24445    fn parse_tblproperties_key_value_list(&mut self) -> Result<Vec<(String, String)>> {
24446        self.expect(TokenType::LParen)?;
24447        let mut props = Vec::new();
24448        loop {
24449            let key = self.expect_string()?;
24450            self.expect(TokenType::Eq)?;
24451            let value = self.expect_string()?;
24452            props.push((key, value));
24453            if !self.match_token(TokenType::Comma) {
24454                break;
24455            }
24456        }
24457        self.expect(TokenType::RParen)?;
24458        Ok(props)
24459    }
24460
24461    /// Parse TBLPROPERTIES key list (for UNSET): ('key1', 'key2', ...)
24462    fn parse_tblproperties_key_list(&mut self) -> Result<Vec<String>> {
24463        self.expect(TokenType::LParen)?;
24464        let mut keys = Vec::new();
24465        loop {
24466            let key = self.expect_string()?;
24467            keys.push(key);
24468            if !self.match_token(TokenType::Comma) {
24469                break;
24470            }
24471        }
24472        self.expect(TokenType::RParen)?;
24473        Ok(keys)
24474    }
24475
24476    /// Parse ALTER INDEX statement
24477    fn parse_alter_index(&mut self) -> Result<Expression> {
24478        self.expect(TokenType::Index)?;
24479
24480        // Use expect_identifier_or_keyword_with_quoted to preserve quoted flag
24481        let name = self.expect_identifier_or_keyword_with_quoted()?;
24482
24483        let table = if self.match_token(TokenType::On) {
24484            Some(self.parse_table_ref()?)
24485        } else {
24486            None
24487        };
24488
24489        let mut actions = Vec::new();
24490
24491        // Parse actions
24492        if self.match_token(TokenType::Rename) {
24493            self.expect(TokenType::To)?;
24494            // Also preserve quoted flag for the new name
24495            actions.push(AlterIndexAction::Rename(
24496                self.expect_identifier_or_keyword_with_quoted()?,
24497            ));
24498        } else if self.match_token(TokenType::Set) {
24499            self.match_identifier("TABLESPACE");
24500            actions.push(AlterIndexAction::SetTablespace(
24501                self.expect_identifier_or_keyword_with_quoted()?,
24502            ));
24503        } else if self.match_identifier("VISIBLE") {
24504            actions.push(AlterIndexAction::Visible(true));
24505        } else if self.match_identifier("INVISIBLE") {
24506            actions.push(AlterIndexAction::Visible(false));
24507        }
24508
24509        Ok(Expression::AlterIndex(Box::new(AlterIndex {
24510            name,
24511            table,
24512            actions,
24513        })))
24514    }
24515
24516    // ==================== End DDL Parsing ====================
24517
24518    /// Parse an expression (with precedence)
24519    /// Assignment (:=) has lower precedence than OR, matching Python sqlglot's
24520    /// _parse_expression -> _parse_assignment -> _parse_disjunction chain
24521    fn parse_expression(&mut self) -> Result<Expression> {
24522        let mut left = self.parse_or()?;
24523
24524        // Handle := assignment operator (MySQL @var := val, DuckDB named args/settings)
24525        // This has lower precedence than OR
24526        while self.match_token(TokenType::ColonEq) {
24527            let right = self.parse_or()?;
24528            left = Expression::PropertyEQ(Box::new(BinaryOp::new(left, right)));
24529        }
24530
24531        // ClickHouse ternary operator: condition ? true_value : false_value
24532        // Parsed as: CASE WHEN condition THEN true_value ELSE false_value END
24533        if matches!(
24534            self.config.dialect,
24535            Some(crate::dialects::DialectType::ClickHouse)
24536        ) && self.match_token(TokenType::Parameter)
24537        {
24538            if self.check(TokenType::Colon) {
24539                return Err(
24540                    self.parse_error("Expected true expression after ? in ClickHouse ternary")
24541                );
24542            }
24543            let true_value = self.parse_or()?;
24544            let false_value = if self.match_token(TokenType::Colon) {
24545                self.parse_or()?
24546            } else {
24547                Expression::Null(Null)
24548            };
24549            left = Expression::IfFunc(Box::new(IfFunc {
24550                original_name: None,
24551                condition: left,
24552                true_value,
24553                false_value: Some(false_value),
24554                inferred_type: None,
24555            }));
24556        }
24557
24558        // ClickHouse: APPLY(func) column transformer
24559        // e.g., COLUMNS('pattern') APPLY(toString) APPLY(length)
24560        // Also: APPLY func (no parens), APPLY(x -> expr) (lambda)
24561        // Only match APPLY when followed by ( — bare APPLY without ( is treated as an alias
24562        // by the select expression parser (e.g., SELECT col apply -> SELECT col AS apply)
24563        if matches!(
24564            self.config.dialect,
24565            Some(crate::dialects::DialectType::ClickHouse)
24566        ) {
24567            while self.check(TokenType::Apply) && self.check_next(TokenType::LParen) {
24568                self.advance(); // consume APPLY
24569                self.advance(); // consume (
24570                let expr = self.parse_expression()?;
24571                self.expect(TokenType::RParen)?;
24572                left = Expression::Apply(Box::new(crate::expressions::Apply {
24573                    this: Box::new(left),
24574                    expression: Box::new(expr),
24575                }));
24576            }
24577        }
24578
24579        Ok(left)
24580    }
24581
24582    /// Parse OR expressions
24583    fn parse_or(&mut self) -> Result<Expression> {
24584        let mut left = self.parse_xor()?;
24585
24586        while self.check(TokenType::Or)
24587            || (self.dpipe_is_logical_or() && self.check(TokenType::DPipe))
24588        {
24589            let mut all_comments = self.previous_trailing_comments();
24590            // Also capture leading comments on the OR token (comments on a separate line before OR)
24591            all_comments.extend(self.current_leading_comments());
24592            self.advance(); // consume OR
24593            all_comments.extend(self.previous_trailing_comments());
24594            // Clear trailing_comments from left expression to avoid duplication
24595            if !all_comments.is_empty() {
24596                Self::clear_rightmost_trailing_comments(&mut left);
24597            }
24598            // Filter out empty/whitespace-only comments
24599            all_comments.retain(|c| !c.trim().is_empty());
24600            // Split: block comments go before operator, line comments go after
24601            let mut left_comments = Vec::new();
24602            let mut operator_comments = Vec::new();
24603            for comment in all_comments {
24604                if comment.starts_with("/*") {
24605                    left_comments.push(comment);
24606                } else {
24607                    operator_comments.push(comment);
24608                }
24609            }
24610            let mut right = self.parse_xor()?;
24611            // If parse_comparison stored pending leading comments, attach them
24612            if !self.pending_leading_comments.is_empty() {
24613                let pending = self.pending_leading_comments.drain(..).collect::<Vec<_>>();
24614                right = Expression::Annotated(Box::new(Annotated {
24615                    this: right,
24616                    trailing_comments: pending,
24617                }));
24618            }
24619            left = Expression::Or(Box::new(BinaryOp {
24620                left,
24621                right,
24622                left_comments,
24623                operator_comments,
24624                trailing_comments: Vec::new(),
24625                inferred_type: None,
24626            }));
24627        }
24628
24629        Ok(Self::maybe_rebalance_boolean_chain(left, false))
24630    }
24631
24632    /// Whether `||` should be parsed as logical OR for the active dialect.
24633    fn dpipe_is_logical_or(&self) -> bool {
24634        matches!(
24635            self.config.dialect,
24636            Some(crate::dialects::DialectType::MySQL | crate::dialects::DialectType::Solr)
24637        )
24638    }
24639
24640    /// Parse XOR expressions (MySQL logical XOR)
24641    fn parse_xor(&mut self) -> Result<Expression> {
24642        let mut left = self.parse_and()?;
24643
24644        while self.match_token(TokenType::Xor) {
24645            let right = self.parse_and()?;
24646            left = Expression::Xor(Box::new(Xor {
24647                this: Some(Box::new(left)),
24648                expression: Some(Box::new(right)),
24649                expressions: Vec::new(),
24650            }));
24651        }
24652
24653        Ok(left)
24654    }
24655
24656    /// Parse AND expressions
24657    fn parse_and(&mut self) -> Result<Expression> {
24658        let mut left = self.parse_not()?;
24659
24660        while self.check(TokenType::And) {
24661            // Capture comments from the token before AND (left operand's last token)
24662            let mut all_comments = self.previous_trailing_comments();
24663            // Also capture leading comments on the AND token (comments on a separate line before AND)
24664            all_comments.extend(self.current_leading_comments());
24665            self.advance(); // consume AND
24666                            // Also capture any trailing comments on the AND token itself
24667            all_comments.extend(self.previous_trailing_comments());
24668            // Clear trailing_comments from left expression to avoid duplication
24669            if !all_comments.is_empty() {
24670                Self::clear_rightmost_trailing_comments(&mut left);
24671            }
24672            // Filter out empty/whitespace-only comments (e.g., bare "--" with no content)
24673            all_comments.retain(|c| !c.trim().is_empty());
24674            // Split comments: block comments (/*...*/) go BEFORE the operator (left_comments),
24675            // line comments (raw text from --) go AFTER the operator (operator_comments).
24676            // This matches Python sqlglot's behavior where inline block comments stay
24677            // in-place and line comments shift to after the operator.
24678            let mut left_comments = Vec::new();
24679            let mut operator_comments = Vec::new();
24680            for comment in all_comments {
24681                if comment.starts_with("/*") {
24682                    left_comments.push(comment);
24683                } else {
24684                    operator_comments.push(comment);
24685                }
24686            }
24687            let mut right = self.parse_not()?;
24688            // If parse_comparison stored pending leading comments (comments before
24689            // the right operand's first token with no comparison following),
24690            // attach them as trailing_comments on the right expression.
24691            if !self.pending_leading_comments.is_empty() {
24692                let pending = self.pending_leading_comments.drain(..).collect::<Vec<_>>();
24693                right = Expression::Annotated(Box::new(Annotated {
24694                    this: right,
24695                    trailing_comments: pending,
24696                }));
24697            }
24698            left = Expression::And(Box::new(BinaryOp {
24699                left,
24700                right,
24701                left_comments,
24702                operator_comments,
24703                trailing_comments: Vec::new(),
24704                inferred_type: None,
24705            }));
24706        }
24707
24708        Ok(Self::maybe_rebalance_boolean_chain(left, true))
24709    }
24710
24711    /// Rebalance AND/OR chains into a balanced tree when no connector comments are present.
24712    /// This keeps connector chain depth logarithmic for very large predicates.
24713    fn maybe_rebalance_boolean_chain(expr: Expression, is_and: bool) -> Expression {
24714        if !Self::should_rebalance_boolean_chain(&expr, is_and) {
24715            return expr;
24716        }
24717
24718        let terms = Self::flatten_boolean_terms_owned(expr, is_and);
24719        if terms.len() <= 2 {
24720            return Self::build_balanced_boolean_tree(terms, is_and);
24721        }
24722
24723        Self::build_balanced_boolean_tree(terms, is_and)
24724    }
24725
24726    fn should_rebalance_boolean_chain(expr: &Expression, is_and: bool) -> bool {
24727        let mut leaf_count = 0usize;
24728        let mut stack = vec![expr];
24729
24730        while let Some(node) = stack.pop() {
24731            match (is_and, node) {
24732                (true, Expression::And(op)) => {
24733                    if !op.left_comments.is_empty()
24734                        || !op.operator_comments.is_empty()
24735                        || !op.trailing_comments.is_empty()
24736                    {
24737                        return false;
24738                    }
24739                    stack.push(&op.right);
24740                    stack.push(&op.left);
24741                }
24742                (false, Expression::Or(op)) => {
24743                    if !op.left_comments.is_empty()
24744                        || !op.operator_comments.is_empty()
24745                        || !op.trailing_comments.is_empty()
24746                    {
24747                        return false;
24748                    }
24749                    stack.push(&op.right);
24750                    stack.push(&op.left);
24751                }
24752                _ => leaf_count += 1,
24753            }
24754        }
24755
24756        leaf_count > 2
24757    }
24758
24759    fn flatten_boolean_terms_owned(expr: Expression, is_and: bool) -> Vec<Expression> {
24760        let mut terms = Vec::new();
24761        let mut stack = vec![expr];
24762
24763        while let Some(node) = stack.pop() {
24764            match (is_and, node) {
24765                (true, Expression::And(op)) => {
24766                    stack.push(op.right);
24767                    stack.push(op.left);
24768                }
24769                (false, Expression::Or(op)) => {
24770                    stack.push(op.right);
24771                    stack.push(op.left);
24772                }
24773                (_, other) => terms.push(other),
24774            }
24775        }
24776
24777        terms
24778    }
24779
24780    fn build_balanced_boolean_tree(mut terms: Vec<Expression>, is_and: bool) -> Expression {
24781        if terms.is_empty() {
24782            return Expression::Null(Null);
24783        }
24784
24785        while terms.len() > 1 {
24786            let mut next = Vec::with_capacity((terms.len() + 1) / 2);
24787            let mut iter = terms.into_iter();
24788
24789            while let Some(left) = iter.next() {
24790                if let Some(right) = iter.next() {
24791                    let combined = if is_and {
24792                        Expression::And(Box::new(BinaryOp::new(left, right)))
24793                    } else {
24794                        Expression::Or(Box::new(BinaryOp::new(left, right)))
24795                    };
24796                    next.push(combined);
24797                } else {
24798                    next.push(left);
24799                }
24800            }
24801
24802            terms = next;
24803        }
24804
24805        terms.pop().unwrap_or(Expression::Null(Null))
24806    }
24807
24808    /// Parse NOT expressions
24809    fn parse_not(&mut self) -> Result<Expression> {
24810        if self.match_token(TokenType::Not) {
24811            let expr = self.parse_not()?;
24812            Ok(Expression::Not(Box::new(UnaryOp::new(expr))))
24813        } else {
24814            self.parse_comparison()
24815        }
24816    }
24817
24818    /// Parse comparison expressions
24819    fn parse_comparison(&mut self) -> Result<Expression> {
24820        // Capture leading comments from the first token before parsing the left side.
24821        // If a comparison operator follows, these are placed after the left operand.
24822        let pre_left_comments = self.current_leading_comments();
24823        let mut left = self.parse_bitwise_or()?;
24824
24825        // Only attach pre-left comments when a comparison operator follows.
24826        // When no comparison follows (e.g., in SELECT list expressions or AND operands),
24827        // the comments are returned to the caller by being accessible via the
24828        // `comparison_pre_left_comments` field, so they can be placed appropriately
24829        // (e.g., after an alias name, or after the expression in an AND chain).
24830        let has_comparison_op = self.check(TokenType::Eq)
24831            || self.check(TokenType::Neq)
24832            || self.check(TokenType::Lt)
24833            || self.check(TokenType::Gt)
24834            || self.check(TokenType::Lte)
24835            || self.check(TokenType::Gte)
24836            || self.check(TokenType::Is)
24837            || self.check(TokenType::In)
24838            || self.check(TokenType::Not)
24839            || self.check(TokenType::Between)
24840            || self.check(TokenType::Like)
24841            || self.check(TokenType::ILike)
24842            || self.check(TokenType::RLike)
24843            || self.check(TokenType::SimilarTo);
24844
24845        if !pre_left_comments.is_empty() {
24846            if has_comparison_op {
24847                // Comparison follows: attach comments between left operand and operator
24848                match &mut left {
24849                    Expression::Column(col) => {
24850                        col.trailing_comments.extend(pre_left_comments);
24851                    }
24852                    Expression::Identifier(id) => {
24853                        id.trailing_comments.extend(pre_left_comments);
24854                    }
24855                    _ => {
24856                        left = Expression::Annotated(Box::new(Annotated {
24857                            this: left,
24858                            trailing_comments: pre_left_comments,
24859                        }));
24860                    }
24861                }
24862            } else {
24863                // No comparison operator: store comments for the caller to use.
24864                // Save them as "pending" comments that the caller can retrieve.
24865                self.pending_leading_comments = pre_left_comments;
24866            }
24867        }
24868
24869        loop {
24870            let mut global_in = false;
24871            if matches!(
24872                self.config.dialect,
24873                Some(crate::dialects::DialectType::ClickHouse)
24874            ) && self.check_identifier("GLOBAL")
24875                && (self.check_next(TokenType::Not) || self.check_next(TokenType::In))
24876            {
24877                self.advance();
24878                global_in = true;
24879            }
24880
24881            let expr = if self.match_token(TokenType::Eq) {
24882                // Check for ANY/ALL subquery
24883                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
24884                    let was_any = self.previous_token_type() == Some(TokenType::Any);
24885                    self.expect(TokenType::LParen)?;
24886                    let inner = self.parse_statement()?;
24887                    self.expect(TokenType::RParen)?;
24888                    let subquery = if was_any {
24889                        self.maybe_wrap_in_subquery(inner)
24890                    } else {
24891                        inner
24892                    };
24893                    Expression::Any(Box::new(QuantifiedExpr {
24894                        this: left,
24895                        subquery,
24896                        op: Some(QuantifiedOp::Eq),
24897                    }))
24898                } else if self.match_token(TokenType::All) {
24899                    self.expect(TokenType::LParen)?;
24900                    let inner = self.parse_statement()?;
24901                    self.expect(TokenType::RParen)?;
24902                    let subquery = self.maybe_wrap_in_subquery(inner);
24903                    Expression::All(Box::new(QuantifiedExpr {
24904                        this: left,
24905                        subquery,
24906                        op: Some(QuantifiedOp::Eq),
24907                    }))
24908                } else {
24909                    let right = self.parse_bitwise_or()?;
24910                    let trailing_comments = self.previous_trailing_comments();
24911                    Expression::Eq(Box::new(BinaryOp {
24912                        left,
24913                        right,
24914                        left_comments: Vec::new(),
24915                        operator_comments: Vec::new(),
24916                        trailing_comments,
24917                        inferred_type: None,
24918                    }))
24919                }
24920            } else if self.match_token(TokenType::Neq) {
24921                // Check for ANY/ALL subquery
24922                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
24923                    let was_any = self.previous_token_type() == Some(TokenType::Any);
24924                    self.expect(TokenType::LParen)?;
24925                    let inner = self.parse_statement()?;
24926                    self.expect(TokenType::RParen)?;
24927                    let subquery = if was_any {
24928                        self.maybe_wrap_in_subquery(inner)
24929                    } else {
24930                        inner
24931                    };
24932                    Expression::Any(Box::new(QuantifiedExpr {
24933                        this: left,
24934                        subquery,
24935                        op: Some(QuantifiedOp::Neq),
24936                    }))
24937                } else if self.match_token(TokenType::All) {
24938                    self.expect(TokenType::LParen)?;
24939                    let inner = self.parse_statement()?;
24940                    self.expect(TokenType::RParen)?;
24941                    let subquery = self.maybe_wrap_in_subquery(inner);
24942                    Expression::All(Box::new(QuantifiedExpr {
24943                        this: left,
24944                        subquery,
24945                        op: Some(QuantifiedOp::Neq),
24946                    }))
24947                } else {
24948                    let right = self.parse_bitwise_or()?;
24949                    let trailing_comments = self.previous_trailing_comments();
24950                    Expression::Neq(Box::new(BinaryOp {
24951                        left,
24952                        right,
24953                        left_comments: Vec::new(),
24954                        operator_comments: Vec::new(),
24955                        trailing_comments,
24956                        inferred_type: None,
24957                    }))
24958                }
24959            } else if self.match_token(TokenType::Lt) {
24960                // Check for ANY/ALL subquery
24961                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
24962                    let was_any = self.previous_token_type() == Some(TokenType::Any);
24963                    self.expect(TokenType::LParen)?;
24964                    let inner = self.parse_statement()?;
24965                    self.expect(TokenType::RParen)?;
24966                    let subquery = if was_any {
24967                        self.maybe_wrap_in_subquery(inner)
24968                    } else {
24969                        inner
24970                    };
24971                    Expression::Any(Box::new(QuantifiedExpr {
24972                        this: left,
24973                        subquery,
24974                        op: Some(QuantifiedOp::Lt),
24975                    }))
24976                } else if self.match_token(TokenType::All) {
24977                    self.expect(TokenType::LParen)?;
24978                    let inner = self.parse_statement()?;
24979                    self.expect(TokenType::RParen)?;
24980                    let subquery = self.maybe_wrap_in_subquery(inner);
24981                    Expression::All(Box::new(QuantifiedExpr {
24982                        this: left,
24983                        subquery,
24984                        op: Some(QuantifiedOp::Lt),
24985                    }))
24986                } else {
24987                    let right = self.parse_bitwise_or()?;
24988                    let trailing_comments = self.previous_trailing_comments();
24989                    Expression::Lt(Box::new(BinaryOp {
24990                        left,
24991                        right,
24992                        left_comments: Vec::new(),
24993                        operator_comments: Vec::new(),
24994                        trailing_comments,
24995                        inferred_type: None,
24996                    }))
24997                }
24998            } else if self.match_token(TokenType::Lte) {
24999                // Check for ANY/ALL subquery
25000                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
25001                    let was_any = self.previous_token_type() == Some(TokenType::Any);
25002                    self.expect(TokenType::LParen)?;
25003                    let inner = self.parse_statement()?;
25004                    self.expect(TokenType::RParen)?;
25005                    let subquery = if was_any {
25006                        self.maybe_wrap_in_subquery(inner)
25007                    } else {
25008                        inner
25009                    };
25010                    Expression::Any(Box::new(QuantifiedExpr {
25011                        this: left,
25012                        subquery,
25013                        op: Some(QuantifiedOp::Lte),
25014                    }))
25015                } else if self.match_token(TokenType::All) {
25016                    self.expect(TokenType::LParen)?;
25017                    let inner = self.parse_statement()?;
25018                    self.expect(TokenType::RParen)?;
25019                    let subquery = self.maybe_wrap_in_subquery(inner);
25020                    Expression::All(Box::new(QuantifiedExpr {
25021                        this: left,
25022                        subquery,
25023                        op: Some(QuantifiedOp::Lte),
25024                    }))
25025                } else {
25026                    let right = self.parse_bitwise_or()?;
25027                    let trailing_comments = self.previous_trailing_comments();
25028                    Expression::Lte(Box::new(BinaryOp {
25029                        left,
25030                        right,
25031                        left_comments: Vec::new(),
25032                        operator_comments: Vec::new(),
25033                        trailing_comments,
25034                        inferred_type: None,
25035                    }))
25036                }
25037            } else if self.match_token(TokenType::Gt) {
25038                // Check for ANY/ALL subquery
25039                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
25040                    let was_any = self.previous_token_type() == Some(TokenType::Any);
25041                    self.expect(TokenType::LParen)?;
25042                    let inner = self.parse_statement()?;
25043                    self.expect(TokenType::RParen)?;
25044                    let subquery = if was_any {
25045                        self.maybe_wrap_in_subquery(inner)
25046                    } else {
25047                        inner
25048                    };
25049                    Expression::Any(Box::new(QuantifiedExpr {
25050                        this: left,
25051                        subquery,
25052                        op: Some(QuantifiedOp::Gt),
25053                    }))
25054                } else if self.match_token(TokenType::All) {
25055                    self.expect(TokenType::LParen)?;
25056                    let inner = self.parse_statement()?;
25057                    self.expect(TokenType::RParen)?;
25058                    let subquery = self.maybe_wrap_in_subquery(inner);
25059                    Expression::All(Box::new(QuantifiedExpr {
25060                        this: left,
25061                        subquery,
25062                        op: Some(QuantifiedOp::Gt),
25063                    }))
25064                } else {
25065                    let right = self.parse_bitwise_or()?;
25066                    let trailing_comments = self.previous_trailing_comments();
25067                    Expression::Gt(Box::new(BinaryOp {
25068                        left,
25069                        right,
25070                        left_comments: Vec::new(),
25071                        operator_comments: Vec::new(),
25072                        trailing_comments,
25073                        inferred_type: None,
25074                    }))
25075                }
25076            } else if self.match_token(TokenType::Gte) {
25077                // Check for ANY/ALL subquery
25078                if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
25079                    let was_any = self.previous_token_type() == Some(TokenType::Any);
25080                    self.expect(TokenType::LParen)?;
25081                    let inner = self.parse_statement()?;
25082                    self.expect(TokenType::RParen)?;
25083                    let subquery = if was_any {
25084                        self.maybe_wrap_in_subquery(inner)
25085                    } else {
25086                        inner
25087                    };
25088                    Expression::Any(Box::new(QuantifiedExpr {
25089                        this: left,
25090                        subquery,
25091                        op: Some(QuantifiedOp::Gte),
25092                    }))
25093                } else if self.match_token(TokenType::All) {
25094                    self.expect(TokenType::LParen)?;
25095                    let inner = self.parse_statement()?;
25096                    self.expect(TokenType::RParen)?;
25097                    let subquery = self.maybe_wrap_in_subquery(inner);
25098                    Expression::All(Box::new(QuantifiedExpr {
25099                        this: left,
25100                        subquery,
25101                        op: Some(QuantifiedOp::Gte),
25102                    }))
25103                } else {
25104                    let right = self.parse_bitwise_or()?;
25105                    let trailing_comments = self.previous_trailing_comments();
25106                    Expression::Gte(Box::new(BinaryOp {
25107                        left,
25108                        right,
25109                        left_comments: Vec::new(),
25110                        operator_comments: Vec::new(),
25111                        trailing_comments,
25112                        inferred_type: None,
25113                    }))
25114                }
25115            } else if self.match_token(TokenType::NullsafeEq) {
25116                // <=> (MySQL NULL-safe equality)
25117                let right = self.parse_bitwise_or()?;
25118                let trailing_comments = self.previous_trailing_comments();
25119                Expression::NullSafeEq(Box::new(BinaryOp {
25120                    left,
25121                    right,
25122                    left_comments: Vec::new(),
25123                    operator_comments: Vec::new(),
25124                    trailing_comments,
25125                    inferred_type: None,
25126                }))
25127            } else if self.check_identifier("SOUNDS") && self.check_next(TokenType::Like) {
25128                // MySQL SOUNDS LIKE: expr SOUNDS LIKE expr -> SOUNDEX(expr) = SOUNDEX(expr)
25129                self.advance(); // consume SOUNDS
25130                self.advance(); // consume LIKE
25131                let right = self.parse_bitwise_or()?;
25132                // Transform: SOUNDEX(left) = SOUNDEX(right)
25133                let soundex_left = Expression::Function(Box::new(Function::new(
25134                    "SOUNDEX".to_string(),
25135                    vec![left],
25136                )));
25137                let soundex_right = Expression::Function(Box::new(Function::new(
25138                    "SOUNDEX".to_string(),
25139                    vec![right],
25140                )));
25141                Expression::Eq(Box::new(BinaryOp::new(soundex_left, soundex_right)))
25142            } else if self.match_token(TokenType::Like) {
25143                // Check for ANY/ALL/SOME quantifier
25144                let quantifier = if self.match_token(TokenType::Any) {
25145                    Some("ANY".to_string())
25146                } else if self.match_token(TokenType::All) {
25147                    Some("ALL".to_string())
25148                } else if self.match_token(TokenType::Some) {
25149                    Some("SOME".to_string())
25150                } else {
25151                    None
25152                };
25153                let right = self.parse_bitwise_or()?;
25154                let escape = if self.match_token(TokenType::Escape) {
25155                    Some(self.parse_primary()?)
25156                } else {
25157                    None
25158                };
25159                Expression::Like(Box::new(LikeOp {
25160                    left,
25161                    right,
25162                    escape,
25163                    quantifier,
25164                    inferred_type: None,
25165                }))
25166            } else if self.match_token(TokenType::ILike) {
25167                // Check for ANY/ALL/SOME quantifier
25168                let quantifier = if self.match_token(TokenType::Any) {
25169                    Some("ANY".to_string())
25170                } else if self.match_token(TokenType::All) {
25171                    Some("ALL".to_string())
25172                } else if self.match_token(TokenType::Some) {
25173                    Some("SOME".to_string())
25174                } else {
25175                    None
25176                };
25177                let right = self.parse_bitwise_or()?;
25178                let escape = if self.match_token(TokenType::Escape) {
25179                    Some(self.parse_primary()?)
25180                } else {
25181                    None
25182                };
25183                Expression::ILike(Box::new(LikeOp {
25184                    left,
25185                    right,
25186                    escape,
25187                    quantifier,
25188                    inferred_type: None,
25189                }))
25190            } else if self.check_identifier("SIMILAR") && self.check_next(TokenType::To) {
25191                // SIMILAR TO operator (PostgreSQL/Redshift regex-like pattern matching)
25192                self.advance(); // consume SIMILAR
25193                self.advance(); // consume TO
25194                let pattern = self.parse_bitwise_or()?;
25195                let escape = if self.match_token(TokenType::Escape) {
25196                    Some(self.parse_primary()?)
25197                } else {
25198                    None
25199                };
25200                Expression::SimilarTo(Box::new(SimilarToExpr {
25201                    this: left,
25202                    pattern,
25203                    escape,
25204                    not: false,
25205                }))
25206            } else if self.match_token(TokenType::Glob) {
25207                let right = self.parse_bitwise_or()?;
25208                Expression::Glob(Box::new(BinaryOp::new(left, right)))
25209            } else if self.match_token(TokenType::Match) {
25210                // SQLite MATCH operator (FTS full-text search)
25211                let right = self.parse_bitwise_or()?;
25212                Expression::Match(Box::new(BinaryOp::new(left, right)))
25213            } else if self.match_token(TokenType::RLike) || self.match_token(TokenType::Tilde) {
25214                // PostgreSQL ~ (regexp match) operator
25215                let right = self.parse_bitwise_or()?;
25216                Expression::RegexpLike(Box::new(RegexpFunc {
25217                    this: left,
25218                    pattern: right,
25219                    flags: None,
25220                }))
25221            } else if self.match_token(TokenType::IRLike) {
25222                // PostgreSQL ~* (case-insensitive regexp match) operator
25223                let right = self.parse_bitwise_or()?;
25224                Expression::RegexpILike(Box::new(RegexpILike {
25225                    this: Box::new(left),
25226                    expression: Box::new(right),
25227                    flag: None,
25228                }))
25229            } else if self.match_token(TokenType::NotLike) {
25230                // PostgreSQL !~~ (NOT LIKE) operator
25231                let right = self.parse_bitwise_or()?;
25232                let escape = if self.match_token(TokenType::Escape) {
25233                    Some(self.parse_primary()?)
25234                } else {
25235                    None
25236                };
25237                let like_expr = Expression::Like(Box::new(LikeOp {
25238                    left,
25239                    right,
25240                    escape,
25241                    quantifier: None,
25242                    inferred_type: None,
25243                }));
25244                Expression::Not(Box::new(UnaryOp::new(like_expr)))
25245            } else if self.match_token(TokenType::NotILike) {
25246                // PostgreSQL !~~* (NOT ILIKE) operator
25247                let right = self.parse_bitwise_or()?;
25248                let escape = if self.match_token(TokenType::Escape) {
25249                    Some(self.parse_primary()?)
25250                } else {
25251                    None
25252                };
25253                let ilike_expr = Expression::ILike(Box::new(LikeOp {
25254                    left,
25255                    right,
25256                    escape,
25257                    quantifier: None,
25258                    inferred_type: None,
25259                }));
25260                Expression::Not(Box::new(UnaryOp::new(ilike_expr)))
25261            } else if self.match_token(TokenType::NotRLike) {
25262                // PostgreSQL !~ (NOT regexp match) operator
25263                let right = self.parse_bitwise_or()?;
25264                let regexp_expr = Expression::RegexpLike(Box::new(RegexpFunc {
25265                    this: left,
25266                    pattern: right,
25267                    flags: None,
25268                }));
25269                Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
25270            } else if self.match_token(TokenType::NotIRLike) {
25271                // PostgreSQL !~* (NOT case-insensitive regexp match) operator
25272                let right = self.parse_bitwise_or()?;
25273                let regexp_expr = Expression::RegexpILike(Box::new(RegexpILike {
25274                    this: Box::new(left),
25275                    expression: Box::new(right),
25276                    flag: None,
25277                }));
25278                Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
25279            } else if self.check(TokenType::Is)
25280                && !self.is_last_expression_token(TokenType::Is)
25281                && self.match_token(TokenType::Is)
25282            {
25283                let not = self.match_token(TokenType::Not);
25284                if self.match_token(TokenType::Null) {
25285                    let expr = Expression::IsNull(Box::new(IsNull {
25286                        this: left,
25287                        not,
25288                        postfix_form: false,
25289                    }));
25290                    // ClickHouse: IS NULL :: Type — handle :: cast after IS NULL
25291                    if matches!(
25292                        self.config.dialect,
25293                        Some(crate::dialects::DialectType::ClickHouse)
25294                    ) && self.check(TokenType::DColon)
25295                    {
25296                        self.advance(); // consume ::
25297                        let data_type = self.parse_data_type_for_cast()?;
25298                        Expression::Cast(Box::new(Cast {
25299                            this: expr,
25300                            to: data_type,
25301                            trailing_comments: Vec::new(),
25302                            double_colon_syntax: true,
25303                            format: None,
25304                            default: None,
25305                            inferred_type: None,
25306                        }))
25307                    } else {
25308                        expr
25309                    }
25310                } else if self.match_token(TokenType::True) {
25311                    // IS TRUE / IS NOT TRUE
25312                    Expression::IsTrue(Box::new(IsTrueFalse { this: left, not }))
25313                } else if self.match_token(TokenType::False) {
25314                    // IS FALSE / IS NOT FALSE
25315                    Expression::IsFalse(Box::new(IsTrueFalse { this: left, not }))
25316                } else if self.match_token(TokenType::Distinct) {
25317                    // IS DISTINCT FROM / IS NOT DISTINCT FROM
25318                    self.expect(TokenType::From)?;
25319                    let right = self.parse_bitwise_or()?;
25320                    if not {
25321                        // IS NOT DISTINCT FROM → null-safe equality
25322                        Expression::NullSafeEq(Box::new(BinaryOp::new(left, right)))
25323                    } else {
25324                        // IS DISTINCT FROM → null-safe inequality
25325                        Expression::NullSafeNeq(Box::new(BinaryOp::new(left, right)))
25326                    }
25327                } else if self.match_identifier("UNKNOWN") {
25328                    // IS UNKNOWN
25329                    Expression::IsNull(Box::new(IsNull {
25330                        this: left,
25331                        not,
25332                        postfix_form: false,
25333                    }))
25334                } else if self.match_texts(&["JSON"]) {
25335                    // IS JSON [VALUE|SCALAR|OBJECT|ARRAY] [WITH UNIQUE KEYS|WITHOUT UNIQUE KEYS|UNIQUE KEYS]
25336                    let json_type = if self.match_texts(&["VALUE"]) {
25337                        Some("VALUE".to_string())
25338                    } else if self.match_texts(&["SCALAR"]) {
25339                        Some("SCALAR".to_string())
25340                    } else if self.match_texts(&["OBJECT"]) {
25341                        Some("OBJECT".to_string())
25342                    } else if self.match_texts(&["ARRAY"]) {
25343                        Some("ARRAY".to_string())
25344                    } else {
25345                        None
25346                    };
25347
25348                    // Parse optional key uniqueness constraint
25349                    let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE", "KEYS"]) {
25350                        Some(JsonUniqueKeys::With)
25351                    } else if self.match_text_seq(&["WITHOUT", "UNIQUE", "KEYS"]) {
25352                        Some(JsonUniqueKeys::Without)
25353                    } else if self.match_text_seq(&["UNIQUE", "KEYS"]) {
25354                        // Shorthand for WITH UNIQUE KEYS
25355                        Some(JsonUniqueKeys::Shorthand)
25356                    } else {
25357                        None
25358                    };
25359
25360                    Expression::IsJson(Box::new(IsJson {
25361                        this: left,
25362                        json_type,
25363                        unique_keys,
25364                        negated: not,
25365                    }))
25366                } else {
25367                    // IS followed by an expression (e.g., IS ?)
25368                    // If we matched NOT, wrap the IS expression in NOT
25369                    let right = self.parse_primary()?;
25370                    let is_expr = Expression::Is(Box::new(BinaryOp::new(left, right)));
25371                    if not {
25372                        Expression::Not(Box::new(UnaryOp::new(is_expr)))
25373                    } else {
25374                        is_expr
25375                    }
25376                }
25377            } else if self.match_token(TokenType::Not) {
25378                // Handle NOT IN, NOT BETWEEN, NOT LIKE, NOT ILIKE, etc.
25379                if self.match_token(TokenType::In) {
25380                    // BigQuery: NOT IN UNNEST(expr)
25381                    if self.check_identifier("UNNEST") {
25382                        self.advance(); // consume UNNEST
25383                        self.expect(TokenType::LParen)?;
25384                        let unnest_expr = self.parse_expression()?;
25385                        self.expect(TokenType::RParen)?;
25386                        Expression::In(Box::new(In {
25387                            this: left,
25388                            expressions: Vec::new(),
25389                            query: None,
25390                            not: true,
25391                            global: global_in,
25392                            unnest: Some(Box::new(unnest_expr)),
25393                            is_field: false,
25394                        }))
25395                    } else if self.match_token(TokenType::LParen) {
25396                        if self.check(TokenType::Select) || self.check(TokenType::With) {
25397                            let subquery = self.parse_statement()?;
25398                            self.expect(TokenType::RParen)?;
25399                            Expression::In(Box::new(In {
25400                                this: left,
25401                                expressions: Vec::new(),
25402                                query: Some(subquery),
25403                                not: true,
25404                                global: global_in,
25405                                unnest: None,
25406                                is_field: false,
25407                            }))
25408                        } else if self.check(TokenType::RParen) {
25409                            // Empty NOT IN set: NOT IN ()
25410                            self.advance();
25411                            Expression::In(Box::new(In {
25412                                this: left,
25413                                expressions: Vec::new(),
25414                                query: None,
25415                                not: true,
25416                                global: global_in,
25417                                unnest: None,
25418                                is_field: false,
25419                            }))
25420                        } else {
25421                            let expressions = self.parse_expression_list()?;
25422                            self.expect(TokenType::RParen)?;
25423                            Expression::In(Box::new(In {
25424                                this: left,
25425                                expressions,
25426                                query: None,
25427                                not: true,
25428                                global: global_in,
25429                                unnest: None,
25430                                is_field: false,
25431                            }))
25432                        }
25433                    } else {
25434                        // ClickHouse/DuckDB: IN without parentheses: expr NOT IN table_name
25435                        let table_expr = self.parse_primary()?;
25436                        Expression::In(Box::new(In {
25437                            this: left,
25438                            expressions: vec![table_expr],
25439                            query: None,
25440                            not: true,
25441                            global: global_in,
25442                            unnest: None,
25443                            is_field: true,
25444                        }))
25445                    }
25446                } else if self.match_token(TokenType::Between) {
25447                    // Check for SYMMETRIC/ASYMMETRIC qualifier
25448                    let symmetric = if self.match_texts(&["SYMMETRIC"]) {
25449                        Some(true)
25450                    } else if self.match_texts(&["ASYMMETRIC"]) {
25451                        Some(false)
25452                    } else {
25453                        None
25454                    };
25455                    let low = self.parse_bitwise_or()?;
25456                    self.expect(TokenType::And)?;
25457                    let high = self.parse_bitwise_or()?;
25458                    Expression::Between(Box::new(Between {
25459                        this: left,
25460                        low,
25461                        high,
25462                        not: true,
25463                        symmetric,
25464                    }))
25465                } else if self.check_identifier("SOUNDS") && self.check_next(TokenType::Like) {
25466                    // MySQL NOT SOUNDS LIKE: expr NOT SOUNDS LIKE expr -> NOT SOUNDEX(expr) = SOUNDEX(expr)
25467                    self.advance(); // consume SOUNDS
25468                    self.advance(); // consume LIKE
25469                    let right = self.parse_bitwise_or()?;
25470                    let soundex_left = Expression::Function(Box::new(Function::new(
25471                        "SOUNDEX".to_string(),
25472                        vec![left],
25473                    )));
25474                    let soundex_right = Expression::Function(Box::new(Function::new(
25475                        "SOUNDEX".to_string(),
25476                        vec![right],
25477                    )));
25478                    let eq_expr =
25479                        Expression::Eq(Box::new(BinaryOp::new(soundex_left, soundex_right)));
25480                    Expression::Not(Box::new(UnaryOp::new(eq_expr)))
25481                } else if self.match_token(TokenType::Like) {
25482                    let right = self.parse_bitwise_or()?;
25483                    let escape = if self.match_token(TokenType::Escape) {
25484                        Some(self.parse_primary()?)
25485                    } else {
25486                        None
25487                    };
25488                    let like_expr = Expression::Like(Box::new(LikeOp {
25489                        left,
25490                        right,
25491                        escape,
25492                        quantifier: None,
25493                        inferred_type: None,
25494                    }));
25495                    Expression::Not(Box::new(UnaryOp::new(like_expr)))
25496                } else if self.match_token(TokenType::ILike) {
25497                    let right = self.parse_bitwise_or()?;
25498                    let escape = if self.match_token(TokenType::Escape) {
25499                        Some(self.parse_primary()?)
25500                    } else {
25501                        None
25502                    };
25503                    let ilike_expr = Expression::ILike(Box::new(LikeOp {
25504                        left,
25505                        right,
25506                        escape,
25507                        quantifier: None,
25508                        inferred_type: None,
25509                    }));
25510                    Expression::Not(Box::new(UnaryOp::new(ilike_expr)))
25511                } else if self.check_identifier("SIMILAR") && self.check_next(TokenType::To) {
25512                    // NOT SIMILAR TO
25513                    self.advance(); // consume SIMILAR
25514                    self.advance(); // consume TO
25515                    let pattern = self.parse_bitwise_or()?;
25516                    let escape = if self.match_token(TokenType::Escape) {
25517                        Some(self.parse_primary()?)
25518                    } else {
25519                        None
25520                    };
25521                    Expression::SimilarTo(Box::new(SimilarToExpr {
25522                        this: left,
25523                        pattern,
25524                        escape,
25525                        not: true,
25526                    }))
25527                } else if self.match_token(TokenType::RLike) {
25528                    let right = self.parse_bitwise_or()?;
25529                    let regexp_expr = Expression::RegexpLike(Box::new(RegexpFunc {
25530                        this: left,
25531                        pattern: right,
25532                        flags: None,
25533                    }));
25534                    Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
25535                } else if self.match_token(TokenType::Null) {
25536                    // SQLite: a NOT NULL (postfix form, two separate tokens)
25537                    // Creates NOT(a IS NULL) which is semantically equivalent
25538                    let is_null =
25539                        Expression::Is(Box::new(BinaryOp::new(left, Expression::Null(Null))));
25540                    Expression::Not(Box::new(UnaryOp::new(is_null)))
25541                } else {
25542                    // NOT followed by something else - revert
25543                    return Ok(left);
25544                }
25545            } else if self.match_token(TokenType::In) {
25546                // BigQuery: IN UNNEST(expr)
25547                if self.check_identifier("UNNEST") {
25548                    self.advance(); // consume UNNEST
25549                    self.expect(TokenType::LParen)?;
25550                    let unnest_expr = self.parse_expression()?;
25551                    self.expect(TokenType::RParen)?;
25552                    Expression::In(Box::new(In {
25553                        this: left,
25554                        expressions: Vec::new(),
25555                        query: None,
25556                        not: false,
25557                        global: global_in,
25558                        unnest: Some(Box::new(unnest_expr)),
25559                        is_field: false,
25560                    }))
25561                } else if self.match_token(TokenType::LParen) {
25562                    // Standard IN (list) or IN (subquery)
25563                    // Check if this is a subquery (IN (SELECT ...) or IN (WITH ... SELECT ...))
25564                    if self.check(TokenType::Select) || self.check(TokenType::With) {
25565                        // Use parse_statement to handle both SELECT and WITH...SELECT
25566                        let subquery = self.parse_statement()?;
25567                        self.expect(TokenType::RParen)?;
25568                        Expression::In(Box::new(In {
25569                            this: left,
25570                            expressions: Vec::new(),
25571                            query: Some(subquery),
25572                            not: false,
25573                            global: global_in,
25574                            unnest: None,
25575                            is_field: false,
25576                        }))
25577                    } else if self.check(TokenType::RParen) {
25578                        // Empty IN set: IN ()
25579                        self.advance();
25580                        Expression::In(Box::new(In {
25581                            this: left,
25582                            expressions: Vec::new(),
25583                            query: None,
25584                            not: false,
25585                            global: global_in,
25586                            unnest: None,
25587                            is_field: false,
25588                        }))
25589                    } else {
25590                        let expressions = self.parse_expression_list()?;
25591                        self.expect(TokenType::RParen)?;
25592                        Expression::In(Box::new(In {
25593                            this: left,
25594                            expressions,
25595                            query: None,
25596                            not: false,
25597                            global: global_in,
25598                            unnest: None,
25599                            is_field: false,
25600                        }))
25601                    }
25602                } else {
25603                    // DuckDB: IN without parentheses for array/list membership: 'red' IN tbl.flags
25604                    let expr = self.parse_bitwise_or()?;
25605                    Expression::In(Box::new(In {
25606                        this: left,
25607                        expressions: vec![expr],
25608                        query: None,
25609                        not: false,
25610                        global: global_in,
25611                        unnest: None,
25612                        is_field: true,
25613                    }))
25614                }
25615            } else if self.match_token(TokenType::Between) {
25616                // Check for SYMMETRIC/ASYMMETRIC qualifier
25617                let symmetric = if self.match_texts(&["SYMMETRIC"]) {
25618                    Some(true)
25619                } else if self.match_texts(&["ASYMMETRIC"]) {
25620                    Some(false)
25621                } else {
25622                    None
25623                };
25624                let low = self.parse_bitwise_or()?;
25625                self.expect(TokenType::And)?;
25626                let high = self.parse_bitwise_or()?;
25627                Expression::Between(Box::new(Between {
25628                    this: left,
25629                    low,
25630                    high,
25631                    not: false,
25632                    symmetric,
25633                }))
25634            } else if self.match_token(TokenType::Adjacent) {
25635                let right = self.parse_bitwise_or()?;
25636                Expression::Adjacent(Box::new(BinaryOp::new(left, right)))
25637            } else if self.check(TokenType::Overlaps)
25638                && self.current + 1 < self.tokens.len()
25639                && !matches!(
25640                    self.tokens[self.current + 1].token_type,
25641                    TokenType::Semicolon
25642                        | TokenType::Comma
25643                        | TokenType::From
25644                        | TokenType::Where
25645                        | TokenType::RParen
25646                        | TokenType::As
25647                        | TokenType::Join
25648                        | TokenType::On
25649                        | TokenType::OrderBy
25650                        | TokenType::GroupBy
25651                        | TokenType::Having
25652                        | TokenType::Limit
25653                        | TokenType::Union
25654                        | TokenType::Except
25655                        | TokenType::Intersect
25656                        | TokenType::Eof
25657                )
25658            {
25659                self.advance(); // consume OVERLAPS
25660                let right = self.parse_bitwise_or()?;
25661                Expression::Overlaps(Box::new(OverlapsExpr {
25662                    this: Some(left),
25663                    expression: Some(right),
25664                    left_start: None,
25665                    left_end: None,
25666                    right_start: None,
25667                    right_end: None,
25668                }))
25669            } else if self.match_token(TokenType::IsNull) {
25670                // ISNULL postfix operator (PostgreSQL/SQLite)
25671                Expression::IsNull(Box::new(IsNull {
25672                    this: left,
25673                    not: false,
25674                    postfix_form: true,
25675                }))
25676            } else if self.match_token(TokenType::NotNull) {
25677                // NOTNULL postfix operator (PostgreSQL/SQLite)
25678                Expression::IsNull(Box::new(IsNull {
25679                    this: left,
25680                    not: true,
25681                    postfix_form: true,
25682                }))
25683            } else if self.match_token(TokenType::AtAt) {
25684                // PostgreSQL text search match operator (@@)
25685                let right = self.parse_bitwise_or()?;
25686                Expression::TsMatch(Box::new(BinaryOp::new(left, right)))
25687            } else if self.match_token(TokenType::AtGt) {
25688                // PostgreSQL array contains all operator (@>)
25689                let right = self.parse_bitwise_or()?;
25690                Expression::ArrayContainsAll(Box::new(BinaryOp::new(left, right)))
25691            } else if self.match_token(TokenType::LtAt) {
25692                // PostgreSQL array contained by operator (<@)
25693                let right = self.parse_bitwise_or()?;
25694                Expression::ArrayContainedBy(Box::new(BinaryOp::new(left, right)))
25695            } else if self.match_token(TokenType::DAmp) {
25696                // PostgreSQL array overlaps operator (&&)
25697                let right = self.parse_bitwise_or()?;
25698                Expression::ArrayOverlaps(Box::new(BinaryOp::new(left, right)))
25699            } else if self.match_token(TokenType::QMarkAmp) {
25700                // PostgreSQL JSONB contains all top keys operator (?&)
25701                let right = self.parse_bitwise_or()?;
25702                Expression::JSONBContainsAllTopKeys(Box::new(BinaryOp::new(left, right)))
25703            } else if self.match_token(TokenType::QMarkPipe) {
25704                // PostgreSQL JSONB contains any top key operator (?|)
25705                let right = self.parse_bitwise_or()?;
25706                Expression::JSONBContainsAnyTopKeys(Box::new(BinaryOp::new(left, right)))
25707            } else if !matches!(
25708                self.config.dialect,
25709                Some(crate::dialects::DialectType::ClickHouse)
25710            ) && self.match_token(TokenType::Parameter)
25711            {
25712                // PostgreSQL JSONB contains key operator (?)
25713                // Note: ? is tokenized as Parameter, but when used between expressions
25714                // it's the JSONB key existence operator
25715                // ClickHouse uses ? as ternary operator instead, handled in parse_assignment()
25716                let right = self.parse_bitwise_or()?;
25717                Expression::JSONBContains(Box::new(BinaryFunc {
25718                    original_name: Some("?".to_string()),
25719                    this: left,
25720                    expression: right,
25721                    inferred_type: None,
25722                }))
25723            } else if self.match_token(TokenType::HashDash) {
25724                // PostgreSQL JSONB delete at path operator (#-)
25725                let right = self.parse_bitwise_or()?;
25726                Expression::JSONBDeleteAtPath(Box::new(BinaryOp::new(left, right)))
25727            } else if self.match_token(TokenType::AmpLt) {
25728                // PostgreSQL range extends left operator (&<)
25729                let right = self.parse_bitwise_or()?;
25730                Expression::ExtendsLeft(Box::new(BinaryOp::new(left, right)))
25731            } else if self.match_token(TokenType::AmpGt) {
25732                // PostgreSQL range extends right operator (&>)
25733                let right = self.parse_bitwise_or()?;
25734                Expression::ExtendsRight(Box::new(BinaryOp::new(left, right)))
25735            } else if self.match_identifier("MEMBER") {
25736                // MySQL MEMBER OF(expr) operator - JSON membership test
25737                self.expect(TokenType::Of)?;
25738                self.expect(TokenType::LParen)?;
25739                let right = self.parse_expression()?;
25740                self.expect(TokenType::RParen)?;
25741                Expression::MemberOf(Box::new(BinaryOp::new(left, right)))
25742            } else if self.match_token(TokenType::CaretAt) {
25743                // DuckDB/PostgreSQL starts-with operator (^@)
25744                let right = self.parse_bitwise_or()?;
25745                Expression::StartsWith(Box::new(BinaryFunc {
25746                    original_name: Some("^@".to_string()),
25747                    this: left,
25748                    expression: right,
25749                    inferred_type: None,
25750                }))
25751            } else if self.match_token(TokenType::LrArrow) {
25752                // PostgreSQL distance operator (<->)
25753                let right = self.parse_bitwise_or()?;
25754                Expression::EuclideanDistance(Box::new(EuclideanDistance {
25755                    this: Box::new(left),
25756                    expression: Box::new(right),
25757                }))
25758            } else if self.match_token(TokenType::Operator) {
25759                // PostgreSQL OPERATOR(schema.op) syntax for schema-qualified operators
25760                // Example: col1 OPERATOR(pg_catalog.~) col2
25761                self.expect(TokenType::LParen)?;
25762
25763                // Collect all tokens between parentheses as the operator text
25764                // This can include schema names, dots, and operator symbols like ~
25765                let mut op_text = String::new();
25766                while !self.check(TokenType::RParen) && !self.is_at_end() {
25767                    op_text.push_str(&self.peek().text);
25768                    self.advance();
25769                }
25770                self.expect(TokenType::RParen)?;
25771
25772                // Collect any inline comments (e.g., /* foo */) between OPERATOR() and the RHS
25773                // Try trailing comments of the RParen (previous token) first,
25774                // then leading comments of the next token
25775                let mut comments = if self.current > 0 {
25776                    std::mem::take(&mut self.tokens[self.current - 1].trailing_comments)
25777                } else {
25778                    Vec::new()
25779                };
25780                if comments.is_empty() && !self.is_at_end() {
25781                    comments = std::mem::take(&mut self.tokens[self.current].comments);
25782                }
25783
25784                // Parse the right-hand side expression
25785                let right = self.parse_bitwise_or()?;
25786
25787                Expression::Operator(Box::new(Operator {
25788                    this: Box::new(left),
25789                    operator: Some(Box::new(Expression::Identifier(Identifier::new(op_text)))),
25790                    expression: Box::new(right),
25791                    comments,
25792                }))
25793            } else {
25794                return Ok(left);
25795            };
25796
25797            left = expr;
25798        }
25799    }
25800
25801    /// Parse bitwise OR expressions (|)
25802    fn parse_bitwise_or(&mut self) -> Result<Expression> {
25803        let mut left = self.parse_bitwise_xor()?;
25804
25805        loop {
25806            if self.match_token(TokenType::Pipe) {
25807                let right = self.parse_bitwise_xor()?;
25808                left = Expression::BitwiseOr(Box::new(BinaryOp::new(left, right)));
25809            } else {
25810                return Ok(left);
25811            }
25812        }
25813    }
25814
25815    /// Parse bitwise operators with an existing left expression
25816    /// Used for DuckDB's @ operator when @col is tokenized as a single Var token
25817    /// We already have the column, now need to continue parsing any binary operators
25818    /// Follows the same precedence chain: bitwise -> shift -> addition -> multiplication
25819    fn parse_bitwise_continuation(&mut self, left: Expression) -> Result<Expression> {
25820        // Start from multiplication level since we have a primary expression (col)
25821        // Then work up through addition, shift, bitwise AND/XOR/OR
25822        let mult_result = self.parse_multiplication_continuation(left)?;
25823        let add_result = self.parse_addition_continuation(mult_result)?;
25824        self.parse_bitwise_or_continuation(add_result)
25825    }
25826
25827    /// Parse bitwise OR with an existing left expression
25828    fn parse_bitwise_or_continuation(&mut self, mut left: Expression) -> Result<Expression> {
25829        loop {
25830            if self.match_token(TokenType::Pipe) {
25831                let right = self.parse_bitwise_xor()?;
25832                left = Expression::BitwiseOr(Box::new(BinaryOp::new(left, right)));
25833            } else {
25834                return Ok(left);
25835            }
25836        }
25837    }
25838
25839    /// Parse multiplication/division with an existing left expression
25840    fn parse_multiplication_continuation(&mut self, mut left: Expression) -> Result<Expression> {
25841        loop {
25842            let expr = if self.match_token(TokenType::Star) {
25843                let right = self.parse_power()?;
25844                Expression::Mul(Box::new(BinaryOp::new(left, right)))
25845            } else if self.match_token(TokenType::Slash) {
25846                let right = self.parse_power()?;
25847                Expression::Div(Box::new(BinaryOp::new(left, right)))
25848            } else if self.match_token(TokenType::Percent) {
25849                let right = self.parse_power()?;
25850                Expression::Mod(Box::new(BinaryOp::new(left, right)))
25851            } else if !self.check(TokenType::QuotedIdentifier)
25852                && (self.match_identifier("DIV") || self.match_token(TokenType::Div))
25853            {
25854                // DIV keyword for integer division (Hive/Spark/MySQL/ClickHouse)
25855                // Don't match QuotedIdentifier — `DIV` is an identifier alias, not an operator
25856                // If DIV was matched as a Var (not keyword Div token), verify it's actually
25857                // an operator by checking that a right operand follows. Otherwise it's an alias.
25858                let matched_as_var = self.previous().token_type == TokenType::Var;
25859                if matched_as_var
25860                    && (self.is_at_end()
25861                        || self.check(TokenType::Semicolon)
25862                        || self.check(TokenType::From)
25863                        || self.check(TokenType::Where)
25864                        || self.check(TokenType::Comma)
25865                        || self.check(TokenType::RParen))
25866                {
25867                    // Backtrack: DIV is being used as an alias, not an operator
25868                    self.current -= 1;
25869                    return Ok(left);
25870                }
25871                let right = self.parse_power()?;
25872                Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
25873                    this: left,
25874                    expression: right,
25875                    original_name: None,
25876                    inferred_type: None,
25877                }))
25878            } else {
25879                return Ok(left);
25880            };
25881            left = expr;
25882        }
25883    }
25884
25885    /// Parse addition/subtraction with an existing left expression
25886    fn parse_addition_continuation(&mut self, mut left: Expression) -> Result<Expression> {
25887        loop {
25888            let left_comments = self.previous_trailing_comments();
25889
25890            let expr = if self.match_token(TokenType::Plus) {
25891                let operator_comments = self.previous_trailing_comments();
25892                let right = self.parse_at_time_zone()?;
25893                let trailing_comments = self.previous_trailing_comments();
25894                Expression::Add(Box::new(BinaryOp {
25895                    left,
25896                    right,
25897                    left_comments,
25898                    operator_comments,
25899                    trailing_comments,
25900                    inferred_type: None,
25901                }))
25902            } else if self.match_token(TokenType::Dash) {
25903                let operator_comments = self.previous_trailing_comments();
25904                let right = self.parse_at_time_zone()?;
25905                let trailing_comments = self.previous_trailing_comments();
25906                Expression::Sub(Box::new(BinaryOp {
25907                    left,
25908                    right,
25909                    left_comments,
25910                    operator_comments,
25911                    trailing_comments,
25912                    inferred_type: None,
25913                }))
25914            } else if !self.dpipe_is_logical_or() && self.match_token(TokenType::DPipe) {
25915                let operator_comments = self.previous_trailing_comments();
25916                let right = self.parse_at_time_zone()?;
25917                let trailing_comments = self.previous_trailing_comments();
25918                Expression::Concat(Box::new(BinaryOp {
25919                    left,
25920                    right,
25921                    left_comments,
25922                    operator_comments,
25923                    trailing_comments,
25924                    inferred_type: None,
25925                }))
25926            } else if self.match_token(TokenType::DQMark) {
25927                let right = self.parse_at_time_zone()?;
25928                Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
25929                    expressions: vec![left, right],
25930                    original_name: None,
25931                    inferred_type: None,
25932                }))
25933            } else {
25934                return Ok(left);
25935            };
25936
25937            left = expr;
25938        }
25939    }
25940
25941    /// Parse bitwise XOR expressions (^)
25942    fn parse_bitwise_xor(&mut self) -> Result<Expression> {
25943        let mut left = self.parse_bitwise_and()?;
25944
25945        loop {
25946            // In PostgreSQL, ^ is POWER (handled at parse_power level), and # is BitwiseXor
25947            if matches!(
25948                self.config.dialect,
25949                Some(crate::dialects::DialectType::PostgreSQL)
25950                    | Some(crate::dialects::DialectType::Redshift)
25951            ) {
25952                if self.match_token(TokenType::Hash) {
25953                    let right = self.parse_bitwise_and()?;
25954                    left = Expression::BitwiseXor(Box::new(BinaryOp::new(left, right)));
25955                } else {
25956                    return Ok(left);
25957                }
25958            } else if self.match_token(TokenType::Caret) {
25959                let right = self.parse_bitwise_and()?;
25960                left = Expression::BitwiseXor(Box::new(BinaryOp::new(left, right)));
25961            } else {
25962                return Ok(left);
25963            }
25964        }
25965    }
25966
25967    /// Parse bitwise AND expressions (&)
25968    fn parse_bitwise_and(&mut self) -> Result<Expression> {
25969        let mut left = self.parse_shift()?;
25970
25971        loop {
25972            if self.match_token(TokenType::Amp) {
25973                let right = self.parse_shift()?;
25974                left = Expression::BitwiseAnd(Box::new(BinaryOp::new(left, right)));
25975            } else {
25976                return Ok(left);
25977            }
25978        }
25979    }
25980
25981    /// Parse shift expressions (<< and >>)
25982    fn parse_shift(&mut self) -> Result<Expression> {
25983        let mut left = self.parse_addition()?;
25984
25985        loop {
25986            if self.match_token(TokenType::LtLt) {
25987                let right = self.parse_addition()?;
25988                left = Expression::BitwiseLeftShift(Box::new(BinaryOp::new(left, right)));
25989            } else if self.match_token(TokenType::GtGt) {
25990                let right = self.parse_addition()?;
25991                left = Expression::BitwiseRightShift(Box::new(BinaryOp::new(left, right)));
25992            } else {
25993                return Ok(left);
25994            }
25995        }
25996    }
25997
25998    /// Parse addition/subtraction
25999    fn parse_addition(&mut self) -> Result<Expression> {
26000        let mut left = self.parse_at_time_zone()?;
26001
26002        loop {
26003            // Capture comments after left operand before consuming operator
26004            let left_comments = self.previous_trailing_comments();
26005
26006            let expr = if self.match_token(TokenType::Plus) {
26007                // Capture comments after operator (before right operand)
26008                let operator_comments = self.previous_trailing_comments();
26009                let right = self.parse_at_time_zone()?;
26010                let trailing_comments = self.previous_trailing_comments();
26011                Expression::Add(Box::new(BinaryOp {
26012                    left,
26013                    right,
26014                    left_comments,
26015                    operator_comments,
26016                    trailing_comments,
26017                    inferred_type: None,
26018                }))
26019            } else if self.match_token(TokenType::Dash) {
26020                let operator_comments = self.previous_trailing_comments();
26021                let right = self.parse_at_time_zone()?;
26022                let trailing_comments = self.previous_trailing_comments();
26023                Expression::Sub(Box::new(BinaryOp {
26024                    left,
26025                    right,
26026                    left_comments,
26027                    operator_comments,
26028                    trailing_comments,
26029                    inferred_type: None,
26030                }))
26031            } else if !self.dpipe_is_logical_or() && self.match_token(TokenType::DPipe) {
26032                let operator_comments = self.previous_trailing_comments();
26033                let right = self.parse_at_time_zone()?;
26034                let trailing_comments = self.previous_trailing_comments();
26035                Expression::Concat(Box::new(BinaryOp {
26036                    left,
26037                    right,
26038                    left_comments,
26039                    operator_comments,
26040                    trailing_comments,
26041                    inferred_type: None,
26042                }))
26043            } else if self.match_token(TokenType::DQMark) {
26044                let right = self.parse_at_time_zone()?;
26045                Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
26046                    expressions: vec![left, right],
26047                    original_name: None,
26048                    inferred_type: None,
26049                }))
26050            } else {
26051                return Ok(left);
26052            };
26053
26054            left = expr;
26055        }
26056    }
26057
26058    /// Parse AT TIME ZONE expression
26059    fn parse_at_time_zone(&mut self) -> Result<Expression> {
26060        let mut expr = self.parse_multiplication()?;
26061
26062        // Check for AT TIME ZONE (can be chained)
26063        while self.check(TokenType::Var) && self.peek().text.to_uppercase() == "AT" {
26064            self.advance(); // consume AT
26065                            // Check for TIME ZONE
26066            if self.check(TokenType::Time) {
26067                self.advance(); // consume TIME
26068                if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "ZONE" {
26069                    self.advance(); // consume ZONE
26070                    let zone = self.parse_unary()?;
26071                    expr = Expression::AtTimeZone(Box::new(AtTimeZone { this: expr, zone }));
26072                } else {
26073                    return Err(self.parse_error("Expected ZONE after AT TIME"));
26074                }
26075            } else {
26076                return Err(self.parse_error("Expected TIME after AT"));
26077            }
26078        }
26079
26080        Ok(expr)
26081    }
26082
26083    /// Parse multiplication/division
26084    fn parse_multiplication(&mut self) -> Result<Expression> {
26085        let mut left = self.parse_power()?;
26086
26087        loop {
26088            let expr = if self.match_token(TokenType::Star) {
26089                let right = self.parse_power()?;
26090                Expression::Mul(Box::new(BinaryOp::new(left, right)))
26091            } else if self.match_token(TokenType::Slash) {
26092                let right = self.parse_power()?;
26093                Expression::Div(Box::new(BinaryOp::new(left, right)))
26094            } else if self.match_token(TokenType::Percent) {
26095                let right = self.parse_power()?;
26096                Expression::Mod(Box::new(BinaryOp::new(left, right)))
26097            } else if !self.check(TokenType::QuotedIdentifier)
26098                && (self.match_identifier("MOD") || self.match_token(TokenType::Mod))
26099            {
26100                // MySQL/Teradata: x MOD y (infix modulo operator)
26101                // Don't match QuotedIdentifier — `MOD` is an identifier alias, not an operator
26102                let right = self.parse_power()?;
26103                Expression::Mod(Box::new(BinaryOp::new(left, right)))
26104            } else if !self.check(TokenType::QuotedIdentifier)
26105                && (self.match_identifier("DIV") || self.match_token(TokenType::Div))
26106            {
26107                // DIV keyword for integer division (Hive/Spark/MySQL/ClickHouse)
26108                // Don't match QuotedIdentifier — `DIV` is an identifier alias, not an operator
26109                // If DIV was matched as a Var (not keyword Div token), verify it's actually
26110                // an operator by checking that a right operand follows. Otherwise it's an alias.
26111                let matched_as_var = self.previous().token_type == TokenType::Var;
26112                if matched_as_var
26113                    && (self.is_at_end()
26114                        || self.check(TokenType::Semicolon)
26115                        || self.check(TokenType::From)
26116                        || self.check(TokenType::Where)
26117                        || self.check(TokenType::Comma)
26118                        || self.check(TokenType::RParen))
26119                {
26120                    // Backtrack: DIV is being used as an alias, not an operator
26121                    self.current -= 1;
26122                    return Ok(left);
26123                }
26124                let right = self.parse_power()?;
26125                Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
26126                    this: left,
26127                    expression: right,
26128                    original_name: None,
26129                    inferred_type: None,
26130                }))
26131            } else {
26132                return Ok(left);
26133            };
26134
26135            left = expr;
26136        }
26137    }
26138
26139    /// Parse power/exponentiation (**) operator
26140    /// In PostgreSQL/Redshift, ^ (Caret) is POWER, not BitwiseXor
26141    fn parse_power(&mut self) -> Result<Expression> {
26142        let mut left = self.parse_unary()?;
26143
26144        loop {
26145            if self.match_token(TokenType::DStar) {
26146                let right = self.parse_unary()?;
26147                left = Expression::Power(Box::new(BinaryFunc {
26148                    original_name: Some("**".to_string()),
26149                    this: left,
26150                    expression: right,
26151                    inferred_type: None,
26152                }));
26153            } else if matches!(
26154                self.config.dialect,
26155                Some(crate::dialects::DialectType::PostgreSQL)
26156                    | Some(crate::dialects::DialectType::Redshift)
26157                    | Some(crate::dialects::DialectType::DuckDB)
26158            ) && self.match_token(TokenType::Caret)
26159            {
26160                let right = self.parse_unary()?;
26161                left = Expression::Power(Box::new(BinaryFunc {
26162                    original_name: None,
26163                    this: left,
26164                    expression: right,
26165                    inferred_type: None,
26166                }));
26167            } else {
26168                return Ok(left);
26169            }
26170        }
26171    }
26172
26173    /// Try to parse a type literal expression like: point '(4,4)', timestamp '2024-01-01'
26174    /// PostgreSQL allows type name followed by string literal as a cast shorthand.
26175    /// Returns None if not a type literal pattern, so caller can fall through to parse_primary.
26176    fn try_parse_type_literal(&mut self) -> Result<Option<Expression>> {
26177        // Save position for backtracking
26178        let start_pos = self.current;
26179
26180        // Check if we're at an identifier or Var token that could be a type name
26181        if !self.check(TokenType::Identifier) && !self.check(TokenType::Var) {
26182            return Ok(None);
26183        }
26184
26185        // Get the potential type name without consuming
26186        let type_name = self.peek().text.to_uppercase();
26187
26188        // Check if this looks like a known data type that supports literal syntax
26189        // These are types where PostgreSQL allows TYPE 'value' syntax
26190        // NOTE: DATE, TIME, TIMESTAMP, INTERVAL are NOT here because they have their own
26191        // token types and are handled specially in parse_primary
26192        let is_type_literal_type = matches!(
26193            type_name.as_str(),
26194            // Geometric types (PostgreSQL)
26195            "POINT" | "LINE" | "LSEG" | "BOX" | "PATH" | "POLYGON" | "CIRCLE" |
26196            // Network types (PostgreSQL)
26197            "INET" | "CIDR" | "MACADDR" | "MACADDR8" |
26198            // Other types that support literal syntax
26199            "UUID" | "JSON" | "JSONB" | "XML" | "BIT" | "VARBIT" |
26200            // Range types (PostgreSQL)
26201            "INT4RANGE" | "INT8RANGE" | "NUMRANGE" | "TSRANGE" | "TSTZRANGE" | "DATERANGE"
26202        );
26203
26204        if !is_type_literal_type {
26205            return Ok(None);
26206        }
26207
26208        // Check if the next token (after type name) is a string literal
26209        if self.current + 1 >= self.tokens.len() {
26210            return Ok(None);
26211        }
26212
26213        if self.tokens[self.current + 1].token_type != TokenType::String {
26214            return Ok(None);
26215        }
26216
26217        // This looks like a type literal! Parse it.
26218        // Consume the type name
26219        self.advance();
26220
26221        // Try to parse the data type from the name
26222        let data_type = match self.parse_data_type_from_name(&type_name) {
26223            Ok(dt) => dt,
26224            Err(_) => {
26225                // If we can't parse the type, backtrack
26226                self.current = start_pos;
26227                return Ok(None);
26228            }
26229        };
26230
26231        // Parse the string literal
26232        if !self.check(TokenType::String) {
26233            // Backtrack - something went wrong
26234            self.current = start_pos;
26235            return Ok(None);
26236        }
26237
26238        let string_token = self.advance();
26239        let value = Expression::Literal(Literal::String(string_token.text.clone()));
26240
26241        // JSON literal: JSON '"foo"' -> ParseJson expression (matches Python sqlglot)
26242        if matches!(data_type, DataType::Json | DataType::JsonB)
26243            || matches!(type_name.as_str(), "JSON" | "JSONB")
26244        {
26245            return Ok(Some(Expression::ParseJson(Box::new(UnaryFunc {
26246                this: value,
26247                original_name: None,
26248                inferred_type: None,
26249            }))));
26250        }
26251
26252        // Create the Cast expression
26253        Ok(Some(Expression::Cast(Box::new(Cast {
26254            this: value,
26255            to: data_type,
26256            trailing_comments: Vec::new(),
26257            double_colon_syntax: false,
26258            format: None,
26259            default: None,
26260            inferred_type: None,
26261        }))))
26262    }
26263
26264    /// Try to parse type shorthand CAST: INT 1, VARCHAR 'x', STRING 'x', TEXT 'y', etc.
26265    /// In generic mode (no dialect), a type keyword followed by a literal becomes CAST(literal AS type).
26266    /// This matches Python sqlglot's `_parse_types()` behavior.
26267    fn try_parse_type_shorthand_cast(&mut self) -> Result<Option<Expression>> {
26268        // Only apply in generic mode
26269        let is_generic = self.config.dialect.is_none()
26270            || matches!(
26271                self.config.dialect,
26272                Some(crate::dialects::DialectType::Generic)
26273            );
26274        if !is_generic {
26275            return Ok(None);
26276        }
26277
26278        let start_pos = self.current;
26279
26280        // Check if current token is a type keyword
26281        if !self.is_type_keyword() {
26282            return Ok(None);
26283        }
26284
26285        // Don't apply if the type keyword is followed by a left paren (function call)
26286        // or is not followed by a literal
26287        if self.current + 1 >= self.tokens.len() {
26288            return Ok(None);
26289        }
26290
26291        let next_type = self.tokens[self.current + 1].token_type;
26292        // The value after the type keyword must be a literal (number or string)
26293        if !matches!(next_type, TokenType::Number | TokenType::String) {
26294            return Ok(None);
26295        }
26296
26297        // Get the type name
26298        let type_token = self.advance();
26299        let type_name = type_token.text.to_uppercase();
26300
26301        // Parse the data type
26302        let data_type = match type_name.as_str() {
26303            "INT" | "INTEGER" => DataType::Int {
26304                length: None,
26305                integer_spelling: type_name == "INTEGER",
26306            },
26307            "BIGINT" => DataType::BigInt { length: None },
26308            "SMALLINT" => DataType::SmallInt { length: None },
26309            "TINYINT" => DataType::TinyInt { length: None },
26310            "FLOAT" => DataType::Float {
26311                precision: None,
26312                scale: None,
26313                real_spelling: false,
26314            },
26315            "DOUBLE" => DataType::Double {
26316                precision: None,
26317                scale: None,
26318            },
26319            "DECIMAL" | "NUMERIC" => DataType::Decimal {
26320                precision: None,
26321                scale: None,
26322            },
26323            "REAL" => DataType::Float {
26324                precision: None,
26325                scale: None,
26326                real_spelling: true,
26327            },
26328            "VARCHAR" => DataType::VarChar {
26329                length: None,
26330                parenthesized_length: false,
26331            },
26332            "CHAR" => DataType::Char { length: None },
26333            "TEXT" | "STRING" => DataType::Text,
26334            "BOOLEAN" | "BOOL" => DataType::Boolean,
26335            "BINARY" => DataType::Binary { length: None },
26336            "VARBINARY" => DataType::VarBinary { length: None },
26337            _ => {
26338                // Unknown type, backtrack
26339                self.current = start_pos;
26340                return Ok(None);
26341            }
26342        };
26343
26344        // Parse the literal value
26345        let value = if self.check(TokenType::String) {
26346            let tok = self.advance();
26347            Expression::Literal(Literal::String(tok.text.clone()))
26348        } else if self.check(TokenType::Number) {
26349            let tok = self.advance();
26350            Expression::Literal(Literal::Number(tok.text.clone()))
26351        } else {
26352            self.current = start_pos;
26353            return Ok(None);
26354        };
26355
26356        // Create the Cast expression
26357        Ok(Some(Expression::Cast(Box::new(Cast {
26358            this: value,
26359            to: data_type,
26360            trailing_comments: Vec::new(),
26361            double_colon_syntax: false,
26362            format: None,
26363            default: None,
26364            inferred_type: None,
26365        }))))
26366    }
26367
26368    /// Parse unary expressions
26369    fn parse_unary(&mut self) -> Result<Expression> {
26370        if self.match_token(TokenType::Plus) {
26371            // Unary plus is a no-op - just parse the inner expression
26372            // This handles +++1 -> 1, +-1 -> -1, etc.
26373            self.parse_unary()
26374        } else if self.match_token(TokenType::Dash) {
26375            let expr = self.parse_unary()?;
26376            Ok(Expression::Neg(Box::new(UnaryOp::new(expr))))
26377        } else if self.match_token(TokenType::Plus) {
26378            // Unary plus: +1, +expr — just return the inner expression (no-op)
26379            self.parse_unary()
26380        } else if self.match_token(TokenType::Tilde) {
26381            let expr = self.parse_unary()?;
26382            Ok(Expression::BitwiseNot(Box::new(UnaryOp::new(expr))))
26383        } else if self.match_token(TokenType::DPipeSlash) {
26384            // ||/ (Cube root - PostgreSQL)
26385            let expr = self.parse_unary()?;
26386            Ok(Expression::Cbrt(Box::new(UnaryFunc::with_name(
26387                expr,
26388                "||/".to_string(),
26389            ))))
26390        } else if self.match_token(TokenType::PipeSlash) {
26391            // |/ (Square root - PostgreSQL)
26392            let expr = self.parse_unary()?;
26393            Ok(Expression::Sqrt(Box::new(UnaryFunc::with_name(
26394                expr,
26395                "|/".to_string(),
26396            ))))
26397        } else if self.check(TokenType::DAt)
26398            && matches!(
26399                self.config.dialect,
26400                Some(crate::dialects::DialectType::DuckDB)
26401            )
26402        {
26403            // DuckDB @ operator: @(-1), @(expr), @-1
26404            // @ is the ABS operator in DuckDB with low precedence
26405            // Python sqlglot: "@": lambda self: exp.Abs(this=self._parse_bitwise())
26406            // This means @col + 1 parses as ABS(col + 1), not ABS(col) + 1
26407            self.advance(); // consume @
26408                            // Parse at bitwise level for correct precedence (matches Python sqlglot)
26409            let expr = self.parse_bitwise_or()?;
26410            Ok(Expression::Abs(Box::new(UnaryFunc::new(expr))))
26411        } else if self.check(TokenType::Var)
26412            && self.peek().text.starts_with('@')
26413            && matches!(
26414                self.config.dialect,
26415                Some(crate::dialects::DialectType::DuckDB)
26416            )
26417        {
26418            // DuckDB @ operator with identifier: @col, @col + 1
26419            // Tokenizer creates "@col" as a single Var token, so we need to handle it here
26420            // Python sqlglot: "@": lambda self: exp.Abs(this=self._parse_bitwise())
26421            let token = self.advance(); // consume @col token
26422            let col_name = &token.text[1..]; // strip leading @
26423
26424            // Create column expression for the identifier part
26425            let col_expr = Expression::Column(Column {
26426                name: Identifier::new(col_name),
26427                table: None,
26428                join_mark: false,
26429                trailing_comments: Vec::new(),
26430                span: None,
26431                inferred_type: None,
26432            });
26433
26434            // Check if followed by operators that should be included in the ABS
26435            // We need to parse any remaining operators at bitwise level
26436            // First, check if there's a binary operator after this column
26437            if self.check(TokenType::Plus)
26438                || self.check(TokenType::Dash)
26439                || self.check(TokenType::Star)
26440                || self.check(TokenType::Slash)
26441                || self.check(TokenType::Percent)
26442                || self.check(TokenType::Amp)
26443                || self.check(TokenType::Pipe)
26444                || self.check(TokenType::Caret)
26445                || self.check(TokenType::LtLt)
26446                || self.check(TokenType::GtGt)
26447            {
26448                // There are more operators - we need to continue parsing at bitwise level
26449                // But parse_bitwise_or expects to start fresh, not continue with existing left
26450                // So we use a helper approach: parse_bitwise_continuation
26451                let full_expr = self.parse_bitwise_continuation(col_expr)?;
26452                Ok(Expression::Abs(Box::new(UnaryFunc::new(full_expr))))
26453            } else {
26454                // Just the column, no more operators
26455                Ok(Expression::Abs(Box::new(UnaryFunc::new(col_expr))))
26456            }
26457        } else if self.check(TokenType::DAt)
26458            && (self.check_next(TokenType::LParen) || self.check_next(TokenType::Dash))
26459        {
26460            // Non-DuckDB dialects: only handle @(expr) and @-expr as ABS
26461            self.advance(); // consume @
26462            let expr = self.parse_bitwise_or()?;
26463            Ok(Expression::Abs(Box::new(UnaryFunc::new(expr))))
26464        } else if self.check(TokenType::Prior)
26465            && !self.check_next(TokenType::As)
26466            && !self.check_next(TokenType::Comma)
26467            && !self.check_next(TokenType::RParen)
26468            && !self.check_next(TokenType::Semicolon)
26469            && self.current + 1 < self.tokens.len()
26470        {
26471            // Oracle PRIOR expression - references parent row's value in hierarchical queries
26472            // Can appear in SELECT list, CONNECT BY, or other expression contexts
26473            // Python sqlglot: "PRIOR": lambda self: self.expression(exp.Prior, this=self._parse_bitwise())
26474            // When followed by AS/comma/rparen/end, treat PRIOR as an identifier (column name)
26475            self.advance(); // consume PRIOR
26476            let expr = self.parse_bitwise_or()?;
26477            Ok(Expression::Prior(Box::new(Prior { this: expr })))
26478        } else {
26479            // Try to parse type literals like: point '(4,4)', timestamp '2024-01-01', interval '1 day'
26480            // PostgreSQL allows type name followed by string literal as a cast shorthand
26481            if let Some(type_literal) = self.try_parse_type_literal()? {
26482                return self.parse_postfix_operators(type_literal);
26483            }
26484            // Try to parse type shorthand CAST: INT 1, VARCHAR 'x', STRING 'x', TEXT 'y', etc.
26485            // In generic mode, type keyword followed by literal -> CAST(literal AS type)
26486            if let Some(type_cast) = self.try_parse_type_shorthand_cast()? {
26487                return self.parse_postfix_operators(type_cast);
26488            }
26489            let expr = self.parse_primary()?;
26490            // Handle postfix exclamation mark for Snowflake model attribute syntax: model!PREDICT(...)
26491            self.parse_postfix_operators(expr)
26492        }
26493    }
26494
26495    /// Parse postfix operators like ! (model attribute in Snowflake) and : (JSON path in Snowflake)
26496    fn parse_postfix_operators(&mut self, mut expr: Expression) -> Result<Expression> {
26497        // Handle Oracle/Redshift outer join marker (+) after column reference
26498        // Syntax: column_ref (+) indicates optional side of join
26499        if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
26500            // Look ahead to verify it's ( + )
26501            let saved_pos = self.current;
26502            if self.match_token(TokenType::LParen)
26503                && self.match_token(TokenType::Plus)
26504                && self.match_token(TokenType::RParen)
26505            {
26506                // Set join_mark on the column expression
26507                if let Expression::Column(ref mut col) = expr {
26508                    col.join_mark = true;
26509                }
26510            } else {
26511                self.current = saved_pos;
26512            }
26513        }
26514
26515        // Handle EXCLAMATION for Snowflake model attribute syntax: model!PREDICT(...)
26516        while self.match_token(TokenType::Exclamation) {
26517            // Parse the attribute/function after the exclamation mark
26518            // This can be either a simple identifier (model!admin) or a function call (model!PREDICT(1))
26519            let attr = self.parse_primary()?;
26520            expr = Expression::ModelAttribute(Box::new(ModelAttribute {
26521                this: Box::new(expr),
26522                expression: Box::new(attr),
26523            }));
26524        }
26525
26526        // Handle COLON for Snowflake JSON path extraction: a:field or a:field.subfield
26527        // This creates JSONExtract expressions that transform to GET_PATH(a, 'field') in Snowflake
26528        expr = self.parse_colon_json_path(expr)?;
26529
26530        // Handle DCOLON (::) - in SingleStore it's JSON extraction, in other dialects it's cast
26531        // SingleStore JSON path syntax:
26532        //   a::b -> JSON_EXTRACT_JSON(a, 'b')
26533        //   a::$b -> JSON_EXTRACT_STRING(a, 'b')
26534        //   a::%b -> JSON_EXTRACT_DOUBLE(a, 'b')
26535        //   a::?names -> JSON match syntax
26536        if matches!(
26537            self.config.dialect,
26538            Some(crate::dialects::DialectType::SingleStore)
26539        ) {
26540            expr = self.parse_singlestore_json_path(expr)?;
26541        } else {
26542            // For other dialects, :: is cast syntax
26543            // IMPORTANT: Use parse_data_type_for_cast to avoid consuming subscripts as array dimensions
26544            // e.g., ::VARIANT[0] should be cast to VARIANT followed by subscript [0]
26545            while self.match_token(TokenType::DColon) {
26546                let data_type = self.parse_data_type_for_cast()?;
26547                expr = Expression::Cast(Box::new(Cast {
26548                    this: expr,
26549                    to: data_type,
26550                    trailing_comments: Vec::new(),
26551                    double_colon_syntax: true,
26552                    format: None,
26553                    default: None,
26554                    inferred_type: None,
26555                }));
26556            }
26557        }
26558
26559        // Teradata: (FORMAT '...') phrase after an expression
26560        if matches!(
26561            self.config.dialect,
26562            Some(crate::dialects::DialectType::Teradata)
26563        ) && self.check(TokenType::LParen)
26564            && self.check_next(TokenType::Format)
26565        {
26566            self.advance(); // consume (
26567            self.advance(); // consume FORMAT
26568            let format = self.expect_string()?;
26569            self.expect(TokenType::RParen)?;
26570            expr = Expression::FormatPhrase(Box::new(FormatPhrase {
26571                this: Box::new(expr),
26572                format,
26573            }));
26574        }
26575
26576        Ok(expr)
26577    }
26578
26579    /// Parse SingleStore JSON path extraction syntax
26580    /// Examples:
26581    ///   a::b -> JSON_EXTRACT_JSON(a, 'b')
26582    ///   a::$b -> JSON_EXTRACT_STRING(a, 'b')
26583    ///   a::%b -> JSON_EXTRACT_DOUBLE(a, 'b')
26584    ///   a::`b`::`2` -> nested JSON extraction
26585    fn parse_singlestore_json_path(&mut self, mut expr: Expression) -> Result<Expression> {
26586        loop {
26587            if self.match_token(TokenType::DColon) {
26588                // :: followed by identifier -> JSON_EXTRACT_JSON
26589                // Check if next is a backtick-quoted identifier or regular identifier
26590                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
26591                    self.advance().text
26592                } else if self.check(TokenType::Number) {
26593                    // a::2 -> JSON_EXTRACT_JSON(a, '2')
26594                    self.advance().text
26595                } else {
26596                    return Err(self.parse_error("Expected identifier after ::"));
26597                };
26598
26599                expr = Expression::Function(Box::new(Function::new(
26600                    "JSON_EXTRACT_JSON".to_string(),
26601                    vec![expr, Expression::string(&path_key)],
26602                )));
26603            } else if self.match_token(TokenType::DColonDollar) {
26604                // ::$ followed by identifier -> JSON_EXTRACT_STRING
26605                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
26606                    self.advance().text
26607                } else {
26608                    return Err(self.parse_error("Expected identifier after ::$"));
26609                };
26610
26611                expr = Expression::Function(Box::new(Function::new(
26612                    "JSON_EXTRACT_STRING".to_string(),
26613                    vec![expr, Expression::string(&path_key)],
26614                )));
26615            } else if self.match_token(TokenType::DColonPercent) {
26616                // ::% followed by identifier -> JSON_EXTRACT_DOUBLE
26617                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
26618                    self.advance().text
26619                } else {
26620                    return Err(self.parse_error("Expected identifier after ::%"));
26621                };
26622
26623                expr = Expression::Function(Box::new(Function::new(
26624                    "JSON_EXTRACT_DOUBLE".to_string(),
26625                    vec![expr, Expression::string(&path_key)],
26626                )));
26627            } else if self.match_token(TokenType::DColonQMark) {
26628                // ::? followed by identifier -> Keep as JSONMatchAny expression for now
26629                let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
26630                    self.advance().text
26631                } else {
26632                    return Err(self.parse_error("Expected identifier after ::?"));
26633                };
26634
26635                // For now, create a function that will be handled specially
26636                expr = Expression::Function(Box::new(Function::new(
26637                    "JSON_EXTRACT_JSON".to_string(), // placeholder
26638                    vec![expr, Expression::string(&format!("?{}", path_key))],
26639                )));
26640            } else {
26641                break;
26642            }
26643        }
26644        Ok(expr)
26645    }
26646
26647    /// Parse colon-separated JSON path syntax (Snowflake variant extraction)
26648    /// Examples:
26649    ///   a:from -> GET_PATH(a, 'from')
26650    ///   a:b.c.d -> GET_PATH(a, 'b.c.d')
26651    ///   a:from::STRING -> CAST(GET_PATH(a, 'from') AS VARCHAR)
26652    ///   a:b:c.d -> GET_PATH(a, 'b.c.d') (multiple colons joined into single path)
26653    fn parse_colon_json_path(&mut self, this: Expression) -> Result<Expression> {
26654        // DuckDB uses colon for prefix alias syntax (e.g., "alias: expr" means "expr AS alias")
26655        // Skip JSON path extraction for DuckDB - it's handled separately in parse_select_expressions
26656        if matches!(
26657            self.config.dialect,
26658            Some(crate::dialects::DialectType::DuckDB)
26659        ) {
26660            return Ok(this);
26661        }
26662
26663        // ClickHouse uses : as part of the ternary operator (condition ? true : false)
26664        // Skip JSON path extraction for ClickHouse to avoid consuming the ternary separator
26665        if matches!(
26666            self.config.dialect,
26667            Some(crate::dialects::DialectType::ClickHouse)
26668        ) {
26669            return Ok(this);
26670        }
26671
26672        // Only apply colon JSON path parsing to identifiers, columns, and function results
26673        // This prevents {'key': 'value'} object literals from being misinterpreted
26674        let is_valid_json_path_base = matches!(
26675            &this,
26676            Expression::Column(_) |
26677            Expression::Identifier(_) |
26678            Expression::Dot(_) |
26679            Expression::JSONExtract(_) |  // Allow chained paths like a:b:c
26680            Expression::Function(_) |     // Allow function results like PARSE_JSON(...):x
26681            Expression::ParseJson(_) |    // Allow PARSE_JSON specifically
26682            Expression::Parameter(_) // Allow positional params like $1:name
26683        );
26684
26685        if !is_valid_json_path_base {
26686            return Ok(this);
26687        }
26688
26689        // Check if we have a colon (but NOT double-colon which is cast syntax)
26690        if !self.check(TokenType::Colon) {
26691            return Ok(this);
26692        }
26693
26694        // Make sure this is not a double-colon (::) which is cast syntax
26695        if self.check_next(TokenType::Colon) {
26696            // This is :: (DColon should have been tokenized, but just in case)
26697            return Ok(this);
26698        }
26699
26700        // Collect ALL the JSON path parts across multiple colons
26701        // a:b.c:d.e -> GET_PATH(a, 'b.c.d.e')
26702        // a:b[0].c -> GET_PATH(a, 'b[0].c')
26703        let mut path_string = String::new();
26704
26705        // Parse all colon-separated path segments
26706        while self.check(TokenType::Colon) && !self.check_next(TokenType::Colon) {
26707            // Save position before consuming colon so we can backtrack
26708            // if what follows isn't a valid JSON path component (e.g., DuckDB's "foo: 1" label syntax)
26709            let saved_pos = self.current;
26710            let saved_path_len = path_string.len();
26711
26712            // Consume the colon
26713            self.advance();
26714
26715            // Parse first path component (required) - can be any identifier including keywords
26716            // Also handle backtick-quoted identifiers like `zip code` or `fb:testid`
26717            // Also handle bracket notation directly after colon: c1:['price'] or c1:["foo bar"]
26718            // IMPORTANT: Check QuotedIdentifier FIRST since is_identifier_token() includes QuotedIdentifier
26719            let mut had_initial_component = false;
26720            if self.check(TokenType::QuotedIdentifier) {
26721                // Quoted field name in variant access
26722                // Snowflake: v:"fruit" → double-quoted key → stored as plain text 'fruit'
26723                // Databricks: raw:`zip code` → backtick-quoted key → stored as bracket notation '["zip code"]'
26724                let quoted_name = self.advance().text.clone();
26725                let is_snowflake = matches!(
26726                    self.config.dialect,
26727                    Some(crate::dialects::DialectType::Snowflake)
26728                );
26729                let needs_bracket = quoted_name.contains(' ') || quoted_name.contains('\'');
26730                if is_snowflake && !needs_bracket {
26731                    // Snowflake double-quoted keys without special chars are stored as plain text
26732                    // Add dot separator for plain segments
26733                    if !path_string.is_empty() {
26734                        path_string.push('.');
26735                    }
26736                    path_string.push_str(&quoted_name);
26737                } else if is_snowflake && needs_bracket {
26738                    // Snowflake keys with spaces/apostrophes use bracket notation: ["key with spaces"]
26739                    // No dot before bracket notation
26740                    path_string.push_str("[\"");
26741                    // Don't escape single quotes here - the generator will handle escaping
26742                    // when outputting the string literal
26743                    path_string.push_str(&quoted_name);
26744                    path_string.push_str("\"]");
26745                } else {
26746                    // Other dialects (Databricks): wrap in bracket notation
26747                    // No dot before bracket notation
26748                    path_string.push_str("[\"");
26749                    for c in quoted_name.chars() {
26750                        if c == '"' {
26751                            path_string.push_str("\\\"");
26752                        } else {
26753                            path_string.push(c);
26754                        }
26755                    }
26756                    path_string.push_str("\"]");
26757                }
26758                had_initial_component = true;
26759            } else if self.is_identifier_token()
26760                || self.is_safe_keyword_as_identifier()
26761                || self.is_reserved_keyword_as_identifier()
26762            {
26763                // Add a dot separator for plain identifier segments
26764                if !path_string.is_empty() {
26765                    path_string.push('.');
26766                }
26767                let first_part = self.advance().text;
26768                path_string.push_str(&first_part);
26769                had_initial_component = true;
26770            } else if self.check(TokenType::LBracket) {
26771                // Bracket notation directly after colon: c1:['price'] or c1:["foo bar"]
26772                // Mark that we have a valid path start - the bracket will be parsed in the loop below
26773                had_initial_component = true;
26774            }
26775
26776            if !had_initial_component {
26777                // Not a valid JSON path component - backtrack and stop
26778                // This handles cases like DuckDB's "foo: 1" label/alias syntax
26779                // where the colon is followed by a non-identifier (e.g., a number)
26780                self.current = saved_pos;
26781                path_string.truncate(saved_path_len);
26782                break;
26783            }
26784
26785            // Parse optional array indices and additional path components
26786            loop {
26787                // Handle array index: [0], [1], [*], ['key'], ["key"], etc.
26788                if self.match_token(TokenType::LBracket) {
26789                    // Parse the index expression (typically a number, identifier, * for wildcard, or string key)
26790                    if self.check(TokenType::Number) {
26791                        path_string.push('[');
26792                        let idx = self.advance().text;
26793                        path_string.push_str(&idx);
26794                        self.expect(TokenType::RBracket)?;
26795                        path_string.push(']');
26796                    } else if self.check(TokenType::Star) {
26797                        // Wildcard array access: [*] matches all array elements
26798                        path_string.push('[');
26799                        self.advance();
26800                        path_string.push('*');
26801                        self.expect(TokenType::RBracket)?;
26802                        path_string.push(']');
26803                    } else if self.check(TokenType::String) {
26804                        // Single-quoted string key access: ['bicycle']
26805                        // Convert to dot notation for simple keys, keep bracket notation for keys with spaces
26806                        let key = self.advance().text;
26807                        self.expect(TokenType::RBracket)?;
26808                        // Check if the key contains spaces or special characters that require bracket notation
26809                        let needs_brackets =
26810                            key.contains(' ') || key.contains('"') || key.contains('\'');
26811                        if needs_brackets {
26812                            // Keep bracket notation with double quotes: ["zip code"]
26813                            path_string.push_str("[\"");
26814                            for c in key.chars() {
26815                                if c == '"' {
26816                                    path_string.push_str("\\\"");
26817                                } else {
26818                                    path_string.push(c);
26819                                }
26820                            }
26821                            path_string.push_str("\"]");
26822                        } else {
26823                            // Convert to dot notation: store['bicycle'] -> store.bicycle
26824                            // But only add dot if path_string is not empty (handles c1:['price'] -> c1:price)
26825                            if !path_string.is_empty() {
26826                                path_string.push('.');
26827                            }
26828                            path_string.push_str(&key);
26829                        }
26830                    } else if self.check(TokenType::QuotedIdentifier) {
26831                        // Double-quoted string key access: ["zip code"]
26832                        // These are tokenized as QuotedIdentifier, not String
26833                        // Must be checked BEFORE is_identifier_token() since it includes QuotedIdentifier
26834                        let key = self.advance().text;
26835                        self.expect(TokenType::RBracket)?;
26836                        // Always use bracket notation with double quotes for quoted identifiers
26837                        path_string.push_str("[\"");
26838                        for c in key.chars() {
26839                            if c == '"' {
26840                                path_string.push_str("\\\"");
26841                            } else {
26842                                path_string.push(c);
26843                            }
26844                        }
26845                        path_string.push_str("\"]");
26846                    } else if self.is_identifier_token() {
26847                        path_string.push('[');
26848                        let idx = self.advance().text;
26849                        path_string.push_str(&idx);
26850                        self.expect(TokenType::RBracket)?;
26851                        path_string.push(']');
26852                    } else {
26853                        // Empty brackets or unexpected token - just close the bracket
26854                        path_string.push('[');
26855                        self.expect(TokenType::RBracket)?;
26856                        path_string.push(']');
26857                    }
26858                } else if self.match_token(TokenType::Dot) {
26859                    // Handle dot access
26860                    path_string.push('.');
26861                    if self.is_identifier_token()
26862                        || self.is_safe_keyword_as_identifier()
26863                        || self.is_reserved_keyword_as_identifier()
26864                    {
26865                        let part = self.advance().text;
26866                        path_string.push_str(&part);
26867                    } else {
26868                        return Err(self.parse_error("Expected identifier after . in JSON path"));
26869                    }
26870                } else {
26871                    break;
26872                }
26873            }
26874        }
26875
26876        // If no path was parsed (e.g., backtracked on first colon), return the original expression
26877        if path_string.is_empty() {
26878            return Ok(this);
26879        }
26880
26881        // Create the JSONExtract expression with variant_extract marker
26882        let path_expr = Expression::Literal(Literal::String(path_string));
26883        let json_extract = Expression::JSONExtract(Box::new(JSONExtract {
26884            this: Box::new(this),
26885            expression: Box::new(path_expr),
26886            only_json_types: None,
26887            expressions: Vec::new(),
26888            variant_extract: Some(Box::new(Expression::Boolean(BooleanLiteral {
26889                value: true,
26890            }))),
26891            json_query: None,
26892            option: None,
26893            quote: None,
26894            on_condition: None,
26895            requires_json: None,
26896        }));
26897
26898        Ok(json_extract)
26899    }
26900
26901    /// Check if the current token is a reserved keyword that can be used as identifier in JSON path
26902    fn is_reserved_keyword_as_identifier(&self) -> bool {
26903        if self.is_at_end() {
26904            return false;
26905        }
26906        let token = self.peek();
26907        // Allow reserved keywords like FROM, SELECT, etc. as JSON path components
26908        matches!(
26909            token.token_type,
26910            TokenType::From
26911                | TokenType::Select
26912                | TokenType::Where
26913                | TokenType::And
26914                | TokenType::Or
26915                | TokenType::Not
26916                | TokenType::In
26917                | TokenType::As
26918                | TokenType::On
26919                | TokenType::Join
26920                | TokenType::Left
26921                | TokenType::Right
26922                | TokenType::Inner
26923                | TokenType::Outer
26924                | TokenType::Cross
26925                | TokenType::Full
26926                | TokenType::Group
26927                | TokenType::Order
26928                | TokenType::By
26929                | TokenType::Having
26930                | TokenType::Limit
26931                | TokenType::Offset
26932                | TokenType::Union
26933                | TokenType::Except
26934                | TokenType::Intersect
26935                | TokenType::All
26936                | TokenType::Distinct
26937                | TokenType::Case
26938                | TokenType::When
26939                | TokenType::Then
26940                | TokenType::Else
26941                | TokenType::End
26942                | TokenType::Null
26943                | TokenType::True
26944                | TokenType::False
26945                | TokenType::Between
26946                | TokenType::Like
26947                | TokenType::Is
26948                | TokenType::Exists
26949                | TokenType::Insert
26950                | TokenType::Update
26951                | TokenType::Delete
26952                | TokenType::Create
26953                | TokenType::Alter
26954                | TokenType::Drop
26955                | TokenType::Table
26956                | TokenType::View
26957                | TokenType::Index
26958                | TokenType::Set
26959                | TokenType::Values
26960                | TokenType::Into
26961                | TokenType::Default
26962                | TokenType::Key
26963                | TokenType::Unique
26964                | TokenType::Check
26965                | TokenType::Constraint
26966                | TokenType::References
26967        )
26968    }
26969
26970    /// Parse primary expressions
26971    fn parse_primary(&mut self) -> Result<Expression> {
26972        // Handle APPROXIMATE COUNT(DISTINCT expr) - Redshift syntax
26973        // Parses as ApproxDistinct expression
26974        if self.check(TokenType::Var) && self.peek().text.to_uppercase() == "APPROXIMATE" {
26975            let saved_pos = self.current;
26976            self.advance(); // consume APPROXIMATE
26977                            // Parse the COUNT(DISTINCT ...) that follows
26978            let func = self.parse_primary()?;
26979            // Check if it's COUNT with DISTINCT
26980            if let Expression::Count(ref count_expr) = func {
26981                if count_expr.distinct {
26982                    let this_expr = count_expr.this.clone().unwrap_or_else(|| {
26983                        Expression::Star(crate::expressions::Star {
26984                            table: None,
26985                            except: None,
26986                            replace: None,
26987                            rename: None,
26988                            trailing_comments: Vec::new(),
26989                            span: None,
26990                        })
26991                    });
26992                    return Ok(Expression::ApproxDistinct(Box::new(
26993                        crate::expressions::AggFunc {
26994                            this: this_expr,
26995                            distinct: false,
26996                            filter: None,
26997                            order_by: Vec::new(),
26998                            name: Some("APPROX_DISTINCT".to_string()),
26999                            ignore_nulls: None,
27000                            having_max: None,
27001                            limit: None,
27002                            inferred_type: None,
27003                        },
27004                    )));
27005                }
27006            }
27007            // Not COUNT(DISTINCT ...) - backtrack
27008            self.current = saved_pos;
27009        }
27010
27011        if let Some(connect_by_root) = self.try_parse_connect_by_root_expression()? {
27012            return Ok(connect_by_root);
27013        }
27014
27015        // PostgreSQL VARIADIC prefix in function call arguments
27016        // e.g., SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])
27017        if matches!(
27018            self.config.dialect,
27019            Some(crate::dialects::DialectType::PostgreSQL)
27020                | Some(crate::dialects::DialectType::Redshift)
27021        ) {
27022            if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("VARIADIC") {
27023                self.advance(); // consume VARIADIC
27024                let expr = self.parse_bitwise_or()?;
27025                return Ok(Expression::Variadic(Box::new(
27026                    crate::expressions::Variadic {
27027                        this: Box::new(expr),
27028                    },
27029                )));
27030            }
27031        }
27032
27033        // MySQL charset introducer: _utf8mb4 'string', _latin1 x'hex', etc.
27034        if matches!(
27035            self.config.dialect,
27036            Some(crate::dialects::DialectType::MySQL)
27037                | Some(crate::dialects::DialectType::SingleStore)
27038                | Some(crate::dialects::DialectType::Doris)
27039                | Some(crate::dialects::DialectType::StarRocks)
27040        ) {
27041            if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
27042                let text = self.peek().text.to_uppercase();
27043                if text.starts_with('_') && Self::is_mysql_charset_introducer(&text) {
27044                    // Check if next token is a string literal or hex string
27045                    if self.current + 1 < self.tokens.len() {
27046                        let next_tt = self.tokens[self.current + 1].token_type;
27047                        if matches!(
27048                            next_tt,
27049                            TokenType::String | TokenType::HexString | TokenType::BitString
27050                        ) {
27051                            let charset_token = self.advance(); // consume charset name
27052                            let charset_name = charset_token.text.clone();
27053                            let literal = self.parse_primary()?; // parse the string/hex literal
27054                            return Ok(Expression::Introducer(Box::new(
27055                                crate::expressions::Introducer {
27056                                    this: Box::new(Expression::Column(
27057                                        crate::expressions::Column {
27058                                            name: crate::expressions::Identifier {
27059                                                name: charset_name,
27060                                                quoted: false,
27061                                                trailing_comments: Vec::new(),
27062                                                span: None,
27063                                            },
27064                                            table: None,
27065                                            join_mark: false,
27066                                            trailing_comments: Vec::new(),
27067                                            span: None,
27068                                            inferred_type: None,
27069                                        },
27070                                    )),
27071                                    expression: Box::new(literal),
27072                                },
27073                            )));
27074                        }
27075                    }
27076                }
27077            }
27078        }
27079
27080        // Array literal: [1, 2, 3] or comprehension: [expr FOR var IN iterator]
27081        if self.match_token(TokenType::LBracket) {
27082            // Parse empty array: []
27083            if self.match_token(TokenType::RBracket) {
27084                return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
27085                    expressions: Vec::new(),
27086                    bracket_notation: true,
27087                    use_list_keyword: false,
27088                })));
27089            }
27090
27091            // Parse first expression
27092            let first_expr = self.parse_expression()?;
27093
27094            // Check for comprehension syntax: [expr FOR var IN iterator [IF condition]]
27095            if self.match_token(TokenType::For) {
27096                // Parse loop variable - typically a simple identifier like 'x'
27097                let loop_var = self.parse_primary()?;
27098
27099                // Parse optional position (second variable after comma)
27100                let position = if self.match_token(TokenType::Comma) {
27101                    Some(self.parse_primary()?)
27102                } else {
27103                    None
27104                };
27105
27106                // Expect IN keyword
27107                if !self.match_token(TokenType::In) {
27108                    return Err(self.parse_error("Expected IN in comprehension"));
27109                }
27110
27111                // Parse iterator expression
27112                let iterator = self.parse_expression()?;
27113
27114                // Parse optional condition after IF
27115                let condition = if self.match_token(TokenType::If) {
27116                    Some(self.parse_expression()?)
27117                } else {
27118                    None
27119                };
27120
27121                // Expect closing bracket
27122                self.expect(TokenType::RBracket)?;
27123
27124                // Return Comprehension
27125                return Ok(Expression::Comprehension(Box::new(Comprehension {
27126                    this: Box::new(first_expr),
27127                    expression: Box::new(loop_var),
27128                    position: position.map(Box::new),
27129                    iterator: Some(Box::new(iterator)),
27130                    condition: condition.map(Box::new),
27131                })));
27132            }
27133
27134            // Regular array - continue parsing elements
27135            // ClickHouse allows AS aliases in array: [1 AS a, 2 AS b]
27136            let first_expr = if matches!(
27137                self.config.dialect,
27138                Some(crate::dialects::DialectType::ClickHouse)
27139            ) && self.check(TokenType::As)
27140                && !self.check_next(TokenType::RBracket)
27141            {
27142                self.advance(); // consume AS
27143                let alias = self.expect_identifier()?;
27144                Expression::Alias(Box::new(Alias::new(first_expr, Identifier::new(alias))))
27145            } else {
27146                first_expr
27147            };
27148            let mut expressions = vec![first_expr];
27149            while self.match_token(TokenType::Comma) {
27150                // Handle trailing comma
27151                if self.check(TokenType::RBracket) {
27152                    break;
27153                }
27154                let expr = self.parse_expression()?;
27155                // ClickHouse: handle AS alias on array elements
27156                let expr = if matches!(
27157                    self.config.dialect,
27158                    Some(crate::dialects::DialectType::ClickHouse)
27159                ) && self.check(TokenType::As)
27160                    && !self.check_next(TokenType::RBracket)
27161                {
27162                    self.advance(); // consume AS
27163                    let alias = self.expect_identifier()?;
27164                    Expression::Alias(Box::new(Alias::new(expr, Identifier::new(alias))))
27165                } else {
27166                    expr
27167                };
27168                expressions.push(expr);
27169            }
27170            self.expect(TokenType::RBracket)?;
27171            return self.maybe_parse_subscript(Expression::ArrayFunc(Box::new(ArrayConstructor {
27172                expressions,
27173                bracket_notation: true,
27174                use_list_keyword: false,
27175            })));
27176        }
27177
27178        // Map/Struct literal with curly braces: {'a': 1, 'b': 2}
27179        // Or Snowflake wildcard syntax: {*}, {tbl.*}, {* EXCLUDE (...)}, {* ILIKE '...'}
27180        if self.match_token(TokenType::LBrace) {
27181            // ClickHouse query parameter: {name: Type}
27182            // We consumed `{` above, so rewind and let the dedicated parser consume it.
27183            if matches!(
27184                self.config.dialect,
27185                Some(crate::dialects::DialectType::ClickHouse)
27186            ) {
27187                self.current -= 1;
27188                if let Some(param) = self.parse_clickhouse_braced_parameter()? {
27189                    return self.maybe_parse_subscript(param);
27190                }
27191                // Not a ClickHouse query parameter, restore position after `{` for map/wildcard parsing.
27192                self.current += 1;
27193            }
27194
27195            // Parse empty map: {}
27196            if self.match_token(TokenType::RBrace) {
27197                return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
27198                    keys: Vec::new(),
27199                    values: Vec::new(),
27200                    curly_brace_syntax: true,
27201                    with_map_keyword: false,
27202                })));
27203            }
27204
27205            // Check for ODBC escape syntax: {fn function_name(args)}
27206            // This must be checked before wildcards and map literals
27207            if self.check_identifier("fn") {
27208                self.advance(); // consume 'fn'
27209                                // Parse function call
27210                let func_name = self.expect_identifier_or_keyword_with_quoted()?;
27211                self.expect(TokenType::LParen)?;
27212
27213                // Parse function arguments
27214                let mut args = Vec::new();
27215                if !self.check(TokenType::RParen) {
27216                    loop {
27217                        args.push(self.parse_expression()?);
27218                        if !self.match_token(TokenType::Comma) {
27219                            break;
27220                        }
27221                    }
27222                }
27223                self.expect(TokenType::RParen)?;
27224                self.expect(TokenType::RBrace)?;
27225
27226                // Return as a regular function call (the ODBC escape is just syntax sugar)
27227                return Ok(Expression::Function(Box::new(Function::new(
27228                    func_name.name,
27229                    args,
27230                ))));
27231            }
27232
27233            // Check for ODBC datetime literals: {d'2024-01-01'}, {t'12:00:00'}, {ts'2024-01-01 12:00:00'}
27234            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
27235                let type_text = self.peek().text.to_lowercase();
27236                if (type_text == "d" || type_text == "t" || type_text == "ts")
27237                    && self.check_next(TokenType::String)
27238                {
27239                    self.advance(); // consume type indicator (d, t, or ts)
27240                    let value = self.expect_string()?;
27241                    self.expect(TokenType::RBrace)?;
27242
27243                    // Return appropriate expression based on type
27244                    return match type_text.as_str() {
27245                        "d" => Ok(Expression::Date(Box::new(
27246                            crate::expressions::UnaryFunc::new(Expression::Literal(
27247                                crate::expressions::Literal::String(value),
27248                            )),
27249                        ))),
27250                        "t" => Ok(Expression::Time(Box::new(
27251                            crate::expressions::UnaryFunc::new(Expression::Literal(
27252                                crate::expressions::Literal::String(value),
27253                            )),
27254                        ))),
27255                        "ts" => Ok(Expression::Timestamp(Box::new(
27256                            crate::expressions::TimestampFunc {
27257                                this: Some(Box::new(Expression::Literal(
27258                                    crate::expressions::Literal::String(value),
27259                                ))),
27260                                zone: None,
27261                                with_tz: None,
27262                                safe: None,
27263                            },
27264                        ))),
27265                        _ => {
27266                            Err(self
27267                                .parse_error(format!("Unknown ODBC datetime type: {}", type_text)))
27268                        }
27269                    };
27270                }
27271            }
27272
27273            // Check for Snowflake wildcard syntax: {*}, {tbl.*}, {* EXCLUDE (...)}, {* ILIKE '...'}
27274            // Pattern: either {*...} or {identifier/var followed by .*}
27275            // Note: Identifiers may be tokenized as Var or Identifier
27276            let is_table_star = (self.check(TokenType::Identifier) || self.check(TokenType::Var))
27277                && self.check_next(TokenType::Dot)
27278                && self
27279                    .tokens
27280                    .get(self.current + 2)
27281                    .map(|t| t.token_type == TokenType::Star)
27282                    .unwrap_or(false);
27283            let is_wildcard = self.check(TokenType::Star) || is_table_star;
27284
27285            if is_wildcard {
27286                // Parse the wildcard expression
27287                let wildcard_expr = if self.match_token(TokenType::Star) {
27288                    // {*} or {* EXCLUDE ...} or {* ILIKE ...}
27289                    // Check for ILIKE first since it's different from standard star modifiers
27290                    if self.check_keyword_text("ILIKE") {
27291                        self.advance();
27292                        let pattern = self.parse_expression()?;
27293                        // Create an ILike expression with Star as left side
27294                        Expression::ILike(Box::new(LikeOp {
27295                            left: Expression::Star(Star {
27296                                table: None,
27297                                except: None,
27298                                replace: None,
27299                                rename: None,
27300                                trailing_comments: Vec::new(),
27301                                span: None,
27302                            }),
27303                            right: pattern,
27304                            escape: None,
27305                            quantifier: None,
27306                            inferred_type: None,
27307                        }))
27308                    } else {
27309                        // {*} or {* EXCLUDE ...}
27310                        let star = self.parse_star_modifiers(None)?;
27311                        Expression::Star(star)
27312                    }
27313                } else {
27314                    // {tbl.*} - table qualified wildcard
27315                    let table_name = self.expect_identifier_or_keyword_with_quoted()?;
27316                    self.expect(TokenType::Dot)?;
27317                    self.expect(TokenType::Star)?;
27318                    let star = self.parse_star_modifiers(Some(table_name))?;
27319                    Expression::Star(star)
27320                };
27321
27322                self.expect(TokenType::RBrace)?;
27323
27324                // Wrap in BracedWildcard for generation
27325                return Ok(Expression::BracedWildcard(Box::new(wildcard_expr)));
27326            }
27327
27328            // Parse key-value pairs: key: value, ...
27329            let mut keys = Vec::new();
27330            let mut values = Vec::new();
27331            loop {
27332                let key = self.parse_expression()?;
27333                self.expect(TokenType::Colon)?;
27334                let value = self.parse_expression()?;
27335                keys.push(key);
27336                values.push(value);
27337                if !self.match_token(TokenType::Comma) {
27338                    break;
27339                }
27340                // Handle trailing comma
27341                if self.check(TokenType::RBrace) {
27342                    break;
27343                }
27344            }
27345            self.expect(TokenType::RBrace)?;
27346            return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
27347                keys,
27348                values,
27349                curly_brace_syntax: true,
27350                with_map_keyword: false,
27351            })));
27352        }
27353
27354        // Parenthesized expression or subquery
27355        if self.match_token(TokenType::LParen) {
27356            // Capture comments from the ( token (e.g., "(/* comment */ 1)")
27357            let lparen_comments = self.previous_trailing_comments();
27358
27359            // Empty parens () — could be empty tuple or zero-param lambda () -> body
27360            if self.check(TokenType::RParen) {
27361                self.advance(); // consume )
27362                                // Check for lambda: () -> body
27363                if self.match_token(TokenType::Arrow) || self.match_token(TokenType::FArrow) {
27364                    let body = self.parse_expression()?;
27365                    return Ok(Expression::Lambda(Box::new(LambdaExpr {
27366                        parameters: Vec::new(),
27367                        body,
27368                        colon: false,
27369                        parameter_types: Vec::new(),
27370                    })));
27371                }
27372                // Otherwise empty tuple
27373                return self.maybe_parse_subscript(Expression::Tuple(Box::new(Tuple {
27374                    expressions: Vec::new(),
27375                })));
27376            }
27377
27378            // Check if this is a VALUES expression inside parens: (VALUES ...)
27379            if self.check(TokenType::Values) {
27380                let values = self.parse_values()?;
27381                self.expect(TokenType::RParen)?;
27382                return Ok(Expression::Subquery(Box::new(Subquery {
27383                    this: values,
27384                    alias: None,
27385                    column_aliases: Vec::new(),
27386                    order_by: None,
27387                    limit: None,
27388                    offset: None,
27389                    distribute_by: None,
27390                    sort_by: None,
27391                    cluster_by: None,
27392                    lateral: false,
27393                    modifiers_inside: false,
27394                    trailing_comments: self.previous_trailing_comments(),
27395                    inferred_type: None,
27396                })));
27397            }
27398
27399            // Check if this is a subquery (SELECT, WITH, DuckDB FROM-first, or ClickHouse EXPLAIN)
27400            let is_explain_subquery = self.check(TokenType::Var)
27401                && self.peek().text.eq_ignore_ascii_case("EXPLAIN")
27402                && self.peek_nth(1).map_or(false, |t| {
27403                    // EXPLAIN followed by statement/style keywords is a subquery
27404                    matches!(
27405                        t.token_type,
27406                        TokenType::Select
27407                            | TokenType::Insert
27408                            | TokenType::Create
27409                            | TokenType::Alter
27410                            | TokenType::Drop
27411                            | TokenType::Set
27412                            | TokenType::System
27413                            | TokenType::Table
27414                    ) || matches!(
27415                        t.text.to_uppercase().as_str(),
27416                        "SYNTAX" | "AST" | "PLAN" | "PIPELINE" | "ESTIMATE" | "CURRENT" | "QUERY"
27417                    ) || (t.token_type == TokenType::Var
27418                        && self
27419                            .peek_nth(2)
27420                            .map_or(false, |t2| t2.token_type == TokenType::Eq))
27421                });
27422            // ClickHouse: (from, to, ...) -> body is a tuple-lambda with keyword params
27423            // Detect pattern: (keyword/ident, keyword/ident, ...) ->
27424            if matches!(
27425                self.config.dialect,
27426                Some(crate::dialects::DialectType::ClickHouse)
27427            ) {
27428                let mut look = self.current;
27429                let mut is_tuple_lambda = true;
27430                let mut param_count = 0;
27431                loop {
27432                    if look >= self.tokens.len() {
27433                        is_tuple_lambda = false;
27434                        break;
27435                    }
27436                    let tt = self.tokens[look].token_type;
27437                    if tt == TokenType::Identifier
27438                        || tt == TokenType::Var
27439                        || tt == TokenType::QuotedIdentifier
27440                        || tt.is_keyword()
27441                    {
27442                        param_count += 1;
27443                        look += 1;
27444                    } else {
27445                        is_tuple_lambda = false;
27446                        break;
27447                    }
27448                    if look >= self.tokens.len() {
27449                        is_tuple_lambda = false;
27450                        break;
27451                    }
27452                    if self.tokens[look].token_type == TokenType::Comma {
27453                        look += 1;
27454                    } else if self.tokens[look].token_type == TokenType::RParen {
27455                        look += 1;
27456                        break;
27457                    } else {
27458                        is_tuple_lambda = false;
27459                        break;
27460                    }
27461                }
27462                if is_tuple_lambda
27463                    && param_count >= 1
27464                    && look < self.tokens.len()
27465                    && self.tokens[look].token_type == TokenType::Arrow
27466                {
27467                    // Parse as lambda: consume params
27468                    let mut params = Vec::new();
27469                    loop {
27470                        let tok = self.advance();
27471                        params.push(Identifier::new(tok.text));
27472                        if self.match_token(TokenType::Comma) {
27473                            continue;
27474                        }
27475                        break;
27476                    }
27477                    self.expect(TokenType::RParen)?;
27478                    self.expect(TokenType::Arrow)?;
27479                    let body = self.parse_expression()?;
27480                    return Ok(Expression::Lambda(Box::new(LambdaExpr {
27481                        parameters: params,
27482                        body,
27483                        colon: false,
27484                        parameter_types: Vec::new(),
27485                    })));
27486                }
27487            }
27488            if self.check(TokenType::Select)
27489                || self.check(TokenType::With)
27490                || self.check(TokenType::From)
27491                || is_explain_subquery
27492            {
27493                let query = self.parse_statement()?;
27494
27495                // Parse LIMIT/OFFSET that may appear after set operations INSIDE the parentheses
27496                // e.g., (SELECT 1 EXCEPT (SELECT 2) LIMIT 1)
27497                let limit = if self.match_token(TokenType::Limit) {
27498                    Some(Limit {
27499                        this: self.parse_expression()?,
27500                        percent: false,
27501                        comments: Vec::new(),
27502                    })
27503                } else {
27504                    None
27505                };
27506                let offset = if self.match_token(TokenType::Offset) {
27507                    Some(Offset {
27508                        this: self.parse_expression()?,
27509                        rows: None,
27510                    })
27511                } else {
27512                    None
27513                };
27514
27515                self.expect(TokenType::RParen)?;
27516
27517                // Wrap in Subquery to preserve parentheses in set operations
27518                let subquery = if limit.is_some() || offset.is_some() {
27519                    // If we have limit/offset INSIDE the parens, set modifiers_inside = true
27520                    Expression::Subquery(Box::new(Subquery {
27521                        this: query,
27522                        alias: None,
27523                        column_aliases: Vec::new(),
27524                        order_by: None,
27525                        limit,
27526                        offset,
27527                        distribute_by: None,
27528                        sort_by: None,
27529                        cluster_by: None,
27530                        lateral: false,
27531                        modifiers_inside: true,
27532                        trailing_comments: self.previous_trailing_comments(),
27533                        inferred_type: None,
27534                    }))
27535                } else {
27536                    Expression::Subquery(Box::new(Subquery {
27537                        this: query,
27538                        alias: None,
27539                        column_aliases: Vec::new(),
27540                        order_by: None,
27541                        limit: None,
27542                        offset: None,
27543                        distribute_by: None,
27544                        sort_by: None,
27545                        cluster_by: None,
27546                        lateral: false,
27547                        modifiers_inside: false,
27548                        trailing_comments: self.previous_trailing_comments(),
27549                        inferred_type: None,
27550                    }))
27551                };
27552
27553                // Check for set operations after the subquery (e.g., (SELECT 1) UNION (SELECT 2))
27554                let set_result = self.parse_set_operation(subquery)?;
27555
27556                // Only parse ORDER BY/LIMIT/OFFSET after set operations if there WAS a set operation
27557                // (for cases like ((SELECT 0) UNION (SELECT 1) ORDER BY 1 OFFSET 1))
27558                // If there's no set operation, we should NOT consume these - they belong to outer context
27559                let had_set_operation = matches!(
27560                    &set_result,
27561                    Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)
27562                );
27563
27564                let result = if had_set_operation {
27565                    let order_by = if self.check(TokenType::Order) {
27566                        self.expect(TokenType::Order)?;
27567                        self.expect(TokenType::By)?;
27568                        Some(self.parse_order_by()?)
27569                    } else {
27570                        None
27571                    };
27572                    let limit_after = if self.match_token(TokenType::Limit) {
27573                        Some(Limit {
27574                            this: self.parse_expression()?,
27575                            percent: false,
27576                            comments: Vec::new(),
27577                        })
27578                    } else {
27579                        None
27580                    };
27581                    let offset_after = if self.match_token(TokenType::Offset) {
27582                        Some(Offset {
27583                            this: self.parse_expression()?,
27584                            rows: None,
27585                        })
27586                    } else {
27587                        None
27588                    };
27589
27590                    // If we have any modifiers, wrap in a Subquery with the modifiers OUTSIDE the paren
27591                    if order_by.is_some() || limit_after.is_some() || offset_after.is_some() {
27592                        Expression::Subquery(Box::new(Subquery {
27593                            this: set_result,
27594                            alias: None,
27595                            column_aliases: Vec::new(),
27596                            order_by,
27597                            limit: limit_after,
27598                            offset: offset_after,
27599                            lateral: false,
27600                            modifiers_inside: false,
27601                            trailing_comments: Vec::new(),
27602                            distribute_by: None,
27603                            sort_by: None,
27604                            cluster_by: None,
27605                            inferred_type: None,
27606                        }))
27607                    } else {
27608                        set_result
27609                    }
27610                } else {
27611                    set_result
27612                };
27613                // Allow postfix operators on subquery expressions (e.g., (SELECT 1, 2).1 for tuple element access)
27614                return self.maybe_parse_subscript(result);
27615            }
27616
27617            // Check if this starts with another paren that might be a subquery
27618            // e.g., ((SELECT 1))
27619            if self.check(TokenType::LParen) {
27620                let expr = self.parse_expression()?;
27621
27622                // Handle aliasing of expression inside outer parens (e.g., ((a, b) AS c))
27623                let first_expr = if self.match_token(TokenType::As) {
27624                    let alias = self.expect_identifier_or_alias_keyword_with_quoted()?;
27625                    Expression::Alias(Box::new(Alias::new(expr, alias)))
27626                } else {
27627                    expr
27628                };
27629
27630                // Check for tuple of tuples: ((1, 2), (3, 4))
27631                // Also handles ClickHouse: ((SELECT 1) AS x, (SELECT 2) AS y)
27632                if self.match_token(TokenType::Comma) {
27633                    let mut expressions = vec![first_expr];
27634                    loop {
27635                        if self.check(TokenType::RParen) {
27636                            break;
27637                        } // trailing comma
27638                        let elem = self.parse_expression()?;
27639                        // Handle AS alias after each element (ClickHouse tuple CTE pattern)
27640                        let elem = if self.match_token(TokenType::As) {
27641                            let alias = self.expect_identifier_or_keyword()?;
27642                            Expression::Alias(Box::new(Alias::new(elem, Identifier::new(alias))))
27643                        } else {
27644                            elem
27645                        };
27646                        expressions.push(elem);
27647                        if !self.match_token(TokenType::Comma) {
27648                            break;
27649                        }
27650                    }
27651                    self.expect(TokenType::RParen)?;
27652                    let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
27653                    return self.maybe_parse_subscript(tuple_expr);
27654                }
27655
27656                let result = first_expr;
27657
27658                self.expect(TokenType::RParen)?;
27659                let mut nested_paren_comments = lparen_comments.clone();
27660                nested_paren_comments.extend(self.previous_trailing_comments());
27661                // Check for set operations after parenthesized expression
27662                if self.check(TokenType::Union)
27663                    || self.check(TokenType::Intersect)
27664                    || self.check(TokenType::Except)
27665                {
27666                    // This is a set operation - need to handle specially
27667                    if let Expression::Subquery(subq) = &result {
27668                        let set_result = self.parse_set_operation(subq.this.clone())?;
27669
27670                        // Parse ORDER BY/LIMIT/OFFSET after set operations
27671                        let order_by = if self.check(TokenType::Order) {
27672                            self.expect(TokenType::Order)?;
27673                            self.expect(TokenType::By)?;
27674                            Some(self.parse_order_by()?)
27675                        } else {
27676                            None
27677                        };
27678                        let limit = if self.match_token(TokenType::Limit) {
27679                            Some(Limit {
27680                                this: self.parse_expression()?,
27681                                percent: false,
27682                                comments: Vec::new(),
27683                            })
27684                        } else {
27685                            None
27686                        };
27687                        let offset = if self.match_token(TokenType::Offset) {
27688                            Some(Offset {
27689                                this: self.parse_expression()?,
27690                                rows: None,
27691                            })
27692                        } else {
27693                            None
27694                        };
27695
27696                        return Ok(Expression::Subquery(Box::new(Subquery {
27697                            this: set_result,
27698                            alias: None,
27699                            column_aliases: Vec::new(),
27700                            order_by,
27701                            limit,
27702                            offset,
27703                            lateral: false,
27704                            modifiers_inside: false,
27705                            trailing_comments: Vec::new(),
27706                            distribute_by: None,
27707                            sort_by: None,
27708                            cluster_by: None,
27709                            inferred_type: None,
27710                        })));
27711                    }
27712                }
27713                return self.maybe_parse_over(Expression::Paren(Box::new(Paren {
27714                    this: result,
27715                    trailing_comments: nested_paren_comments,
27716                })));
27717            }
27718
27719            let expr = self.parse_expression()?;
27720
27721            // Check for AS alias on the first element (e.g., (x AS y, ...))
27722            let first_expr = if self.match_token(TokenType::As) {
27723                let alias = self.expect_identifier_or_keyword_with_quoted()?;
27724                Expression::Alias(Box::new(Alias::new(expr, alias)))
27725            } else {
27726                expr
27727            };
27728
27729            // Check for tuple (multiple expressions separated by commas)
27730            if self.match_token(TokenType::Comma) {
27731                let mut expressions = vec![first_expr];
27732                // ClickHouse: trailing comma creates single-element tuple, e.g., (1,)
27733                if self.check(TokenType::RParen) {
27734                    self.advance(); // consume )
27735                    let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
27736                    return self.maybe_parse_subscript(tuple_expr);
27737                }
27738                // Parse remaining tuple elements, each can have AS alias
27739                loop {
27740                    let elem = self.parse_expression()?;
27741                    let elem_with_alias = if self.match_token(TokenType::As) {
27742                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
27743                        Expression::Alias(Box::new(Alias::new(elem, alias)))
27744                    } else {
27745                        elem
27746                    };
27747                    expressions.push(elem_with_alias);
27748                    if !self.match_token(TokenType::Comma) {
27749                        break;
27750                    }
27751                    // ClickHouse: trailing comma in multi-element tuple, e.g., (1, 2,)
27752                    if self.check(TokenType::RParen) {
27753                        break;
27754                    }
27755                }
27756
27757                self.expect(TokenType::RParen)?;
27758
27759                // Check for lambda expression: (a, b) -> body
27760                if self.match_token(TokenType::Arrow) {
27761                    let parameters = expressions
27762                        .into_iter()
27763                        .filter_map(|e| {
27764                            if let Expression::Column(c) = e {
27765                                Some(c.name)
27766                            } else if let Expression::Identifier(id) = e {
27767                                Some(id)
27768                            } else {
27769                                None
27770                            }
27771                        })
27772                        .collect();
27773                    let body = self.parse_expression()?;
27774                    return Ok(Expression::Lambda(Box::new(LambdaExpr {
27775                        parameters,
27776                        body,
27777                        colon: false,
27778                        parameter_types: Vec::new(),
27779                    })));
27780                }
27781
27782                // Check for optional alias on the whole tuple
27783                // But NOT when AS is followed by a type constructor like Tuple(a Int8, ...)
27784                // or STRUCT<a TINYINT, ...> which would be part of a CAST expression: CAST((1, 2) AS Tuple(a Int8, b Int16))
27785                // Also NOT when AS is followed by a type name then ) like: CAST((1, 2) AS String)
27786                let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
27787                let result = if self.check(TokenType::As) {
27788                    // Look ahead: AS + type_keyword + ( or < → likely a type, not an alias
27789                    let after_as = self.current + 1;
27790                    let after_ident = self.current + 2;
27791                    let is_type_constructor = after_ident < self.tokens.len()
27792                        && (self.tokens[after_as].token_type == TokenType::Identifier
27793                            || self.tokens[after_as].token_type == TokenType::Var
27794                            || self.tokens[after_as].token_type == TokenType::Nullable
27795                            || self.tokens[after_as].token_type == TokenType::Struct
27796                            || self.tokens[after_as].token_type == TokenType::Array)
27797                        && (self.tokens[after_ident].token_type == TokenType::LParen
27798                            || self.tokens[after_ident].token_type == TokenType::Lt);
27799                    // Check if AS is followed by identifier/keyword then ), indicating CAST(tuple AS Type)
27800                    let is_cast_type = after_ident < self.tokens.len()
27801                        && (self.tokens[after_as].token_type == TokenType::Identifier
27802                            || self.tokens[after_as].token_type == TokenType::Var
27803                            || self.tokens[after_as].token_type.is_keyword())
27804                        && self.tokens[after_ident].token_type == TokenType::RParen;
27805                    if is_type_constructor || is_cast_type {
27806                        tuple_expr
27807                    } else {
27808                        self.advance(); // consume AS
27809                        let alias = self.expect_identifier()?;
27810                        Expression::Alias(Box::new(Alias::new(tuple_expr, Identifier::new(alias))))
27811                    }
27812                } else {
27813                    tuple_expr
27814                };
27815
27816                // Allow postfix operators on tuple expressions (e.g., ('a', 'b').1 for tuple element access)
27817                return self.maybe_parse_subscript(result);
27818            }
27819
27820            // ClickHouse: (x -> body) — lambda inside parentheses
27821            if matches!(
27822                self.config.dialect,
27823                Some(crate::dialects::DialectType::ClickHouse)
27824            ) && self.match_token(TokenType::Arrow)
27825            {
27826                let parameters = if let Expression::Column(c) = first_expr {
27827                    vec![c.name]
27828                } else if let Expression::Identifier(id) = first_expr {
27829                    vec![id]
27830                } else {
27831                    return Err(self.parse_error("Expected identifier as lambda parameter"));
27832                };
27833                let body = self.parse_expression()?;
27834                self.expect(TokenType::RParen)?;
27835                return Ok(Expression::Paren(Box::new(Paren {
27836                    this: Expression::Lambda(Box::new(LambdaExpr {
27837                        parameters,
27838                        body,
27839                        colon: false,
27840                        parameter_types: Vec::new(),
27841                    })),
27842                    trailing_comments: Vec::new(),
27843                })));
27844            }
27845
27846            self.expect(TokenType::RParen)?;
27847            // Combine comments from ( and ) tokens
27848            let mut paren_comments = lparen_comments.clone();
27849            paren_comments.extend(self.previous_trailing_comments());
27850
27851            // Check for lambda expression: (x) -> body or single identifier case
27852            if self.match_token(TokenType::Arrow) {
27853                // first_expr should be a single identifier for the parameter
27854                let parameters = if let Expression::Column(c) = first_expr {
27855                    vec![c.name]
27856                } else if let Expression::Identifier(id) = first_expr {
27857                    vec![id]
27858                } else {
27859                    return Err(self.parse_error("Expected identifier as lambda parameter"));
27860                };
27861                let body = self.parse_expression()?;
27862                return Ok(Expression::Lambda(Box::new(LambdaExpr {
27863                    parameters,
27864                    body,
27865                    colon: false,
27866                    parameter_types: Vec::new(),
27867                })));
27868            }
27869
27870            return self.maybe_parse_over(Expression::Paren(Box::new(Paren {
27871                this: first_expr,
27872                trailing_comments: paren_comments,
27873            })));
27874        }
27875
27876        // NULL
27877        if self.match_token(TokenType::Null) {
27878            return Ok(Expression::Null(Null));
27879        }
27880
27881        // TRUE
27882        if self.match_token(TokenType::True) {
27883            return Ok(Expression::Boolean(BooleanLiteral { value: true }));
27884        }
27885
27886        // FALSE
27887        if self.match_token(TokenType::False) {
27888            return Ok(Expression::Boolean(BooleanLiteral { value: false }));
27889        }
27890
27891        // LAMBDA expression (DuckDB syntax: LAMBDA x : expr)
27892        if self.check(TokenType::Lambda) {
27893            if let Some(lambda) = self.parse_lambda()? {
27894                return Ok(lambda);
27895            }
27896        }
27897
27898        // CASE expression - but not if followed by DOT (then it's an identifier like case.column)
27899        if self.check(TokenType::Case) && !self.check_next(TokenType::Dot) {
27900            let case_expr = self.parse_case()?;
27901            return self.maybe_parse_over(case_expr);
27902        }
27903
27904        // CAST expression
27905        if self.check(TokenType::Cast) {
27906            let cast_expr = self.parse_cast()?;
27907            return self.maybe_parse_subscript(cast_expr);
27908        }
27909
27910        // TRY_CAST expression
27911        if self.check(TokenType::TryCast) {
27912            let cast_expr = self.parse_try_cast()?;
27913            return self.maybe_parse_subscript(cast_expr);
27914        }
27915
27916        // SAFE_CAST expression (BigQuery)
27917        if self.check(TokenType::SafeCast) {
27918            let cast_expr = self.parse_safe_cast()?;
27919            return self.maybe_parse_subscript(cast_expr);
27920        }
27921
27922        // EXISTS - either subquery predicate EXISTS(SELECT ...) or Hive array function EXISTS(array, lambda)
27923        // ClickHouse: EXISTS without ( is a column name/identifier
27924        if self.check(TokenType::Exists)
27925            && matches!(
27926                self.config.dialect,
27927                Some(crate::dialects::DialectType::ClickHouse)
27928            )
27929            && !self.check_next(TokenType::LParen)
27930        {
27931            let tok = self.advance();
27932            return Ok(Expression::Identifier(Identifier::new(tok.text)));
27933        }
27934        if self.match_token(TokenType::Exists) {
27935            self.expect(TokenType::LParen)?;
27936
27937            // Check if this is a subquery EXISTS (SELECT, WITH, or FROM for DuckDB)
27938            // ClickHouse: also handle EXISTS((SELECT ...)) with double parens
27939            if self.check(TokenType::Select)
27940                || self.check(TokenType::With)
27941                || self.check(TokenType::From)
27942                || (self.check(TokenType::LParen)
27943                    && self
27944                        .peek_nth(1)
27945                        .map(|t| {
27946                            matches!(
27947                                t.token_type,
27948                                TokenType::Select | TokenType::With | TokenType::From
27949                            )
27950                        })
27951                        .unwrap_or(false))
27952            {
27953                let query = self.parse_statement()?;
27954                self.expect(TokenType::RParen)?;
27955                return Ok(Expression::Exists(Box::new(Exists {
27956                    this: query,
27957                    not: false,
27958                })));
27959            }
27960
27961            // Otherwise it's Hive's array EXISTS function: EXISTS(array, lambda_predicate)
27962            // This function checks if any element in the array matches the predicate
27963            let array_expr = self.parse_expression()?;
27964            self.expect(TokenType::Comma)?;
27965            let predicate = self.parse_expression()?;
27966            self.expect(TokenType::RParen)?;
27967            return Ok(Expression::Function(Box::new(Function {
27968                name: "EXISTS".to_string(),
27969                args: vec![array_expr, predicate],
27970                distinct: false,
27971                trailing_comments: Vec::new(),
27972                use_bracket_syntax: false,
27973                no_parens: false,
27974                quoted: false,
27975                span: None,
27976                inferred_type: None,
27977            })));
27978        }
27979
27980        // INTERVAL expression or identifier
27981        if self.check(TokenType::Interval) {
27982            if let Some(interval_expr) = self.try_parse_interval()? {
27983                return Ok(interval_expr);
27984            }
27985            // INTERVAL is used as an identifier
27986            let token = self.advance();
27987            return Ok(Expression::Identifier(Identifier::new(token.text)));
27988        }
27989
27990        // DATE literal: DATE '2024-01-15' or DATE function: DATE(expr)
27991        if self.check(TokenType::Date) {
27992            let token = self.advance();
27993            let original_text = token.text.clone();
27994            if self.check(TokenType::String) {
27995                let str_token = self.advance();
27996                if self.config.dialect.is_none() {
27997                    // Generic (no dialect): DATE 'literal' -> CAST('literal' AS DATE)
27998                    return Ok(Expression::Cast(Box::new(Cast {
27999                        this: Expression::Literal(Literal::String(str_token.text)),
28000                        to: DataType::Date,
28001                        trailing_comments: Vec::new(),
28002                        double_colon_syntax: false,
28003                        format: None,
28004                        default: None,
28005                        inferred_type: None,
28006                    })));
28007                }
28008                return Ok(Expression::Literal(Literal::Date(str_token.text)));
28009            }
28010            // Check for DATE() function call
28011            if self.match_token(TokenType::LParen) {
28012                let func_expr = self.parse_typed_function(&original_text, "DATE", false)?;
28013                return self.maybe_parse_over(func_expr);
28014            }
28015            // Fallback to DATE as identifier/type - preserve original case
28016            return Ok(Expression::Identifier(Identifier::new(original_text)));
28017        }
28018
28019        // TIME literal: TIME '10:30:00' or TIME function: TIME(expr)
28020        if self.check(TokenType::Time) {
28021            let token = self.advance();
28022            let original_text = token.text.clone();
28023            if self.check(TokenType::String) {
28024                let str_token = self.advance();
28025                return Ok(Expression::Literal(Literal::Time(str_token.text)));
28026            }
28027            // Check for TIME() function call
28028            if self.match_token(TokenType::LParen) {
28029                let func_expr = self.parse_typed_function(&original_text, "TIME", false)?;
28030                return self.maybe_parse_over(func_expr);
28031            }
28032            // Fallback to TIME as identifier/type - preserve original case
28033            return self
28034                .maybe_parse_subscript(Expression::Identifier(Identifier::new(original_text)));
28035        }
28036
28037        // TIMESTAMP literal: TIMESTAMP '2024-01-15 10:30:00' or TIMESTAMP function: TIMESTAMP(expr)
28038        // Also handles TIMESTAMP(n) WITH TIME ZONE as a data type expression
28039        if self.check(TokenType::Timestamp) {
28040            let token = self.advance();
28041            let original_text = token.text.clone();
28042            if self.check(TokenType::String) {
28043                let str_token = self.advance();
28044                if self.config.dialect.is_none() {
28045                    // Generic (no dialect): TIMESTAMP 'literal' -> CAST('literal' AS TIMESTAMP)
28046                    return Ok(Expression::Cast(Box::new(Cast {
28047                        this: Expression::Literal(Literal::String(str_token.text)),
28048                        to: DataType::Timestamp {
28049                            precision: None,
28050                            timezone: false,
28051                        },
28052                        trailing_comments: Vec::new(),
28053                        double_colon_syntax: false,
28054                        format: None,
28055                        default: None,
28056                        inferred_type: None,
28057                    })));
28058                }
28059                // Dialect-specific: keep as Literal::Timestamp for dialect transforms
28060                return Ok(Expression::Literal(Literal::Timestamp(str_token.text)));
28061            }
28062            // Check for TIMESTAMP(n) WITH/WITHOUT TIME ZONE or TIMESTAMP(n) 'literal' as data type
28063            // This is a data type, not a function call
28064            if self.check(TokenType::LParen) {
28065                // Look ahead to see if this is TIMESTAMP(number) WITH/WITHOUT/String (data type)
28066                // vs TIMESTAMP(expr) (function call)
28067                let is_data_type = self.check_next(TokenType::Number) && {
28068                    // Check if after (number) there's WITH, WITHOUT, or String literal
28069                    let mut lookahead = self.current + 2;
28070                    // Skip the number
28071                    while lookahead < self.tokens.len()
28072                        && self.tokens[lookahead].token_type == TokenType::RParen
28073                    {
28074                        lookahead += 1;
28075                        break;
28076                    }
28077                    // Check for WITH, WITHOUT, or String after the closing paren
28078                    lookahead < self.tokens.len()
28079                        && (self.tokens[lookahead].token_type == TokenType::With
28080                            || self.tokens[lookahead].text.to_uppercase() == "WITHOUT"
28081                            || self.tokens[lookahead].token_type == TokenType::String)
28082                };
28083
28084                if is_data_type {
28085                    // Parse as data type: TIMESTAMP(precision) [WITH/WITHOUT TIME ZONE] ['literal']
28086                    self.advance(); // consume (
28087                    let precision = Some(self.expect_number()? as u32);
28088                    self.expect(TokenType::RParen)?;
28089
28090                    let data_type = if self.match_token(TokenType::With) {
28091                        if self.match_token(TokenType::Local) {
28092                            // WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
28093                            self.match_keyword("TIME");
28094                            self.match_keyword("ZONE");
28095                            DataType::Custom {
28096                                name: format!("TIMESTAMPLTZ({})", precision.unwrap()),
28097                            }
28098                        } else {
28099                            self.match_keyword("TIME");
28100                            self.match_keyword("ZONE");
28101                            DataType::Timestamp {
28102                                precision,
28103                                timezone: true,
28104                            }
28105                        }
28106                    } else if self.match_keyword("WITHOUT") {
28107                        self.match_keyword("TIME");
28108                        self.match_keyword("ZONE");
28109                        DataType::Timestamp {
28110                            precision,
28111                            timezone: false,
28112                        }
28113                    } else {
28114                        DataType::Timestamp {
28115                            precision,
28116                            timezone: false,
28117                        }
28118                    };
28119
28120                    // Check for following string literal -> wrap in CAST
28121                    if self.check(TokenType::String) {
28122                        let str_token = self.advance();
28123                        return Ok(Expression::Cast(Box::new(Cast {
28124                            this: Expression::Literal(Literal::String(str_token.text)),
28125                            to: data_type,
28126                            trailing_comments: Vec::new(),
28127                            double_colon_syntax: false,
28128                            format: None,
28129                            default: None,
28130                            inferred_type: None,
28131                        })));
28132                    }
28133
28134                    return Ok(Expression::DataType(data_type));
28135                }
28136
28137                // Otherwise parse as function call
28138                self.advance(); // consume (
28139                let func_expr = self.parse_typed_function(&original_text, "TIMESTAMP", false)?;
28140                return self.maybe_parse_over(func_expr);
28141            }
28142            // Check for TIMESTAMP WITH/WITHOUT TIME ZONE (no precision) as data type
28143            // Use lookahead to verify WITH is followed by TIME (not WITH FILL, WITH TOTALS, etc.)
28144            if (self.check(TokenType::With)
28145                && self.peek_nth(1).map_or(false, |t| {
28146                    t.text.eq_ignore_ascii_case("TIME") || t.text.eq_ignore_ascii_case("LOCAL")
28147                }))
28148                || self.check_keyword_text("WITHOUT")
28149            {
28150                let data_type = if self.match_token(TokenType::With) {
28151                    if self.match_token(TokenType::Local) {
28152                        // WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
28153                        self.match_keyword("TIME");
28154                        self.match_keyword("ZONE");
28155                        DataType::Custom {
28156                            name: "TIMESTAMPLTZ".to_string(),
28157                        }
28158                    } else {
28159                        self.match_keyword("TIME");
28160                        self.match_keyword("ZONE");
28161                        DataType::Timestamp {
28162                            precision: None,
28163                            timezone: true,
28164                        }
28165                    }
28166                } else if self.match_keyword("WITHOUT") {
28167                    self.match_keyword("TIME");
28168                    self.match_keyword("ZONE");
28169                    DataType::Timestamp {
28170                        precision: None,
28171                        timezone: false,
28172                    }
28173                } else {
28174                    DataType::Timestamp {
28175                        precision: None,
28176                        timezone: false,
28177                    }
28178                };
28179
28180                // Check for following string literal -> wrap in CAST
28181                if self.check(TokenType::String) {
28182                    let str_token = self.advance();
28183                    return Ok(Expression::Cast(Box::new(Cast {
28184                        this: Expression::Literal(Literal::String(str_token.text)),
28185                        to: data_type,
28186                        trailing_comments: Vec::new(),
28187                        double_colon_syntax: false,
28188                        format: None,
28189                        default: None,
28190                        inferred_type: None,
28191                    })));
28192                }
28193
28194                return Ok(Expression::DataType(data_type));
28195            }
28196            // Fallback to TIMESTAMP as identifier/type - preserve original case
28197            return Ok(Expression::Identifier(Identifier::new(original_text)));
28198        }
28199
28200        // DATETIME literal: DATETIME '2024-01-15 10:30:00' or DATETIME function: DATETIME(expr)
28201        if self.check(TokenType::DateTime) {
28202            let token = self.advance();
28203            let original_text = token.text.clone();
28204            if self.check(TokenType::String) {
28205                let str_token = self.advance();
28206                return Ok(Expression::Literal(Literal::Datetime(str_token.text)));
28207            }
28208            // Check for DATETIME() function call
28209            if self.match_token(TokenType::LParen) {
28210                let func_expr = self.parse_typed_function(&original_text, "DATETIME", false)?;
28211                return self.maybe_parse_over(func_expr);
28212            }
28213            // Fallback to DATETIME as identifier/type - preserve original case
28214            return Ok(Expression::Identifier(Identifier::new(original_text)));
28215        }
28216
28217        // ROW() function (window function for row number)
28218        if self.check(TokenType::Row) && self.check_next(TokenType::LParen) {
28219            self.advance(); // consume ROW
28220            self.expect(TokenType::LParen)?;
28221            // ROW() typically takes no arguments
28222            let args = if !self.check(TokenType::RParen) {
28223                self.parse_expression_list()?
28224            } else {
28225                Vec::new()
28226            };
28227            self.expect(TokenType::RParen)?;
28228            let func_expr = Expression::Function(Box::new(Function {
28229                name: "ROW".to_string(),
28230                args,
28231                distinct: false,
28232                trailing_comments: Vec::new(),
28233                use_bracket_syntax: false,
28234                no_parens: false,
28235                quoted: false,
28236                span: None,
28237                inferred_type: None,
28238            }));
28239            return self.maybe_parse_over(func_expr);
28240        }
28241
28242        // Number - support postfix operators like ::type
28243        if self.check(TokenType::Number) {
28244            let token = self.advance();
28245            if matches!(
28246                self.config.dialect,
28247                Some(crate::dialects::DialectType::MySQL)
28248            ) {
28249                let text = token.text.as_str();
28250                if text.len() > 2
28251                    && (text.starts_with("0x") || text.starts_with("0X"))
28252                    && !text[2..].chars().all(|c| c.is_ascii_hexdigit())
28253                {
28254                    let ident = Expression::Identifier(Identifier {
28255                        name: token.text,
28256                        quoted: true,
28257                        trailing_comments: Vec::new(),
28258                        span: None,
28259                    });
28260                    return self.maybe_parse_subscript(ident);
28261                }
28262            }
28263            if matches!(
28264                self.config.dialect,
28265                Some(crate::dialects::DialectType::Teradata)
28266            ) && token.text == "0"
28267            {
28268                if let Some(next) = self.tokens.get(self.current) {
28269                    let is_adjacent = token.span.end == next.span.start;
28270                    let next_text = next.text.as_str();
28271                    let is_hex_prefix = next_text.starts_with('x') || next_text.starts_with('X');
28272                    if is_adjacent
28273                        && matches!(next.token_type, TokenType::Identifier | TokenType::Var)
28274                        && is_hex_prefix
28275                        && next_text.len() > 1
28276                        && next_text[1..].chars().all(|c| c.is_ascii_hexdigit())
28277                    {
28278                        // Consume the hex suffix token and emit a HexString literal
28279                        let hex_token = self.advance();
28280                        let hex = hex_token.text[1..].to_string();
28281                        let literal = Expression::Literal(Literal::HexString(hex));
28282                        return self.maybe_parse_subscript(literal);
28283                    }
28284                }
28285            }
28286            if matches!(
28287                self.config.dialect,
28288                Some(crate::dialects::DialectType::ClickHouse)
28289            ) {
28290                if let Some(next) = self.tokens.get(self.current) {
28291                    let is_adjacent = token.span.end == next.span.start;
28292                    if is_adjacent
28293                        && matches!(next.token_type, TokenType::Identifier | TokenType::Var)
28294                        && next.text.starts_with('_')
28295                    {
28296                        let suffix = next.text.clone();
28297                        self.advance(); // consume suffix token
28298                        let combined = format!("{}{}", token.text, suffix);
28299                        let literal = Expression::Literal(Literal::Number(combined));
28300                        return self.maybe_parse_subscript(literal);
28301                    }
28302                }
28303            }
28304            // Check for numeric literal suffix encoded as "number::TYPE" by tokenizer
28305            let literal = if let Some(sep_pos) = token.text.find("::") {
28306                let num_part = &token.text[..sep_pos];
28307                let type_name = &token.text[sep_pos + 2..];
28308                let num_expr = Expression::Literal(Literal::Number(num_part.to_string()));
28309                let data_type = match type_name {
28310                    "BIGINT" => crate::expressions::DataType::BigInt { length: None },
28311                    "SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
28312                    "TINYINT" => crate::expressions::DataType::TinyInt { length: None },
28313                    "DOUBLE" => crate::expressions::DataType::Double {
28314                        precision: None,
28315                        scale: None,
28316                    },
28317                    "FLOAT" => crate::expressions::DataType::Float {
28318                        precision: None,
28319                        scale: None,
28320                        real_spelling: false,
28321                    },
28322                    "DECIMAL" => crate::expressions::DataType::Decimal {
28323                        precision: None,
28324                        scale: None,
28325                    },
28326                    _ => crate::expressions::DataType::Custom {
28327                        name: type_name.to_string(),
28328                    },
28329                };
28330                Expression::Cast(Box::new(crate::expressions::Cast {
28331                    this: num_expr,
28332                    to: data_type,
28333                    trailing_comments: Vec::new(),
28334                    double_colon_syntax: false,
28335                    format: None,
28336                    default: None,
28337                    inferred_type: None,
28338                }))
28339            } else {
28340                Expression::Literal(Literal::Number(token.text))
28341            };
28342            return self.maybe_parse_subscript(literal);
28343        }
28344
28345        // String - support postfix operators like ::type, ->, ->>
28346        // Also handle adjacent string literals (SQL standard) which concatenate: 'x' 'y' 'z' -> CONCAT('x', 'y', 'z')
28347        if self.check(TokenType::String) {
28348            let token = self.advance();
28349            let first_literal = Expression::Literal(Literal::String(token.text));
28350
28351            // Check for adjacent string literals (PostgreSQL and SQL standard feature)
28352            // 'x' 'y' 'z' should be treated as string concatenation
28353            if self.check(TokenType::String) {
28354                let mut expressions = vec![first_literal];
28355                while self.check(TokenType::String) {
28356                    let next_token = self.advance();
28357                    expressions.push(Expression::Literal(Literal::String(next_token.text)));
28358                }
28359                // Create CONCAT function call with all adjacent strings
28360                let concat_func =
28361                    Expression::Function(Box::new(Function::new("CONCAT", expressions)));
28362                return self.maybe_parse_subscript(concat_func);
28363            }
28364
28365            return self.maybe_parse_subscript(first_literal);
28366        }
28367
28368        // Dollar-quoted string: $$...$$ or $tag$...$tag$ -- preserve as DollarString
28369        // so the generator can handle dialect-specific conversion
28370        if self.check(TokenType::DollarString) {
28371            let token = self.advance();
28372            let literal = Expression::Literal(Literal::DollarString(token.text));
28373            return self.maybe_parse_subscript(literal);
28374        }
28375
28376        // Triple-quoted string with double quotes: """..."""
28377        if self.check(TokenType::TripleDoubleQuotedString) {
28378            let token = self.advance();
28379            let literal = Expression::Literal(Literal::TripleQuotedString(token.text, '"'));
28380            return self.maybe_parse_subscript(literal);
28381        }
28382
28383        // Triple-quoted string with single quotes: '''...'''
28384        if self.check(TokenType::TripleSingleQuotedString) {
28385            let token = self.advance();
28386            let literal = Expression::Literal(Literal::TripleQuotedString(token.text, '\''));
28387            return self.maybe_parse_subscript(literal);
28388        }
28389
28390        // National String (N'...')
28391        if self.check(TokenType::NationalString) {
28392            let token = self.advance();
28393            let literal = Expression::Literal(Literal::NationalString(token.text));
28394            return self.maybe_parse_subscript(literal);
28395        }
28396
28397        // Hex String (X'...')
28398        if self.check(TokenType::HexString) {
28399            let token = self.advance();
28400            let literal = Expression::Literal(Literal::HexString(token.text));
28401            return self.maybe_parse_subscript(literal);
28402        }
28403
28404        // Hex Number (0xA from BigQuery/SQLite) - integer in hex notation
28405        if self.check(TokenType::HexNumber) {
28406            let token = self.advance();
28407            if matches!(
28408                self.config.dialect,
28409                Some(crate::dialects::DialectType::MySQL)
28410            ) {
28411                let text = token.text.as_str();
28412                if text.len() > 2
28413                    && (text.starts_with("0x") || text.starts_with("0X"))
28414                    && !text[2..].chars().all(|c| c.is_ascii_hexdigit())
28415                {
28416                    let ident = Expression::Identifier(Identifier {
28417                        name: token.text,
28418                        quoted: true,
28419                        trailing_comments: Vec::new(),
28420                        span: None,
28421                    });
28422                    return self.maybe_parse_subscript(ident);
28423                }
28424            }
28425            let literal = Expression::Literal(Literal::HexNumber(token.text));
28426            return self.maybe_parse_subscript(literal);
28427        }
28428
28429        // Bit String (B'...')
28430        if self.check(TokenType::BitString) {
28431            let token = self.advance();
28432            let literal = Expression::Literal(Literal::BitString(token.text));
28433            return self.maybe_parse_subscript(literal);
28434        }
28435
28436        // Byte String (b"..." - BigQuery style)
28437        if self.check(TokenType::ByteString) {
28438            let token = self.advance();
28439            let literal = Expression::Literal(Literal::ByteString(token.text));
28440            return self.maybe_parse_subscript(literal);
28441        }
28442
28443        // Raw String (r"..." - BigQuery style, backslashes are literal)
28444        if self.check(TokenType::RawString) {
28445            let token = self.advance();
28446            // Raw strings preserve backslashes as literal characters.
28447            // The generator will handle escaping when converting to a regular string.
28448            let literal = Expression::Literal(Literal::RawString(token.text));
28449            return self.maybe_parse_subscript(literal);
28450        }
28451
28452        // Escape String (E'...' - PostgreSQL)
28453        if self.check(TokenType::EscapeString) {
28454            let token = self.advance();
28455            // EscapeString is stored as "E'content'" - extract just the content
28456            let literal = Expression::Literal(Literal::EscapeString(token.text));
28457            return self.maybe_parse_subscript(literal);
28458        }
28459
28460        // Star - check for DuckDB *COLUMNS(...) syntax first
28461        if self.check(TokenType::Star) {
28462            // DuckDB *COLUMNS(...) syntax: *COLUMNS(*), *COLUMNS('regex'), *COLUMNS(['col1', 'col2'])
28463            // Check if * is followed by COLUMNS and (
28464            if self.check_next_identifier("COLUMNS") {
28465                // Check if there's a ( after COLUMNS
28466                if self
28467                    .tokens
28468                    .get(self.current + 2)
28469                    .map(|t| t.token_type == TokenType::LParen)
28470                    .unwrap_or(false)
28471                {
28472                    self.advance(); // consume *
28473                    self.advance(); // consume COLUMNS
28474                    self.advance(); // consume (
28475
28476                    // Parse the argument: can be *, a regex string, or an array of column names
28477                    let arg = if self.check(TokenType::Star) {
28478                        self.advance(); // consume *
28479                        Expression::Star(Star {
28480                            table: None,
28481                            except: None,
28482                            replace: None,
28483                            rename: None,
28484                            trailing_comments: Vec::new(),
28485                            span: None,
28486                        })
28487                    } else {
28488                        self.parse_expression()?
28489                    };
28490
28491                    self.expect(TokenType::RParen)?;
28492
28493                    // Create Columns expression with unpack=true
28494                    return Ok(Expression::Columns(Box::new(Columns {
28495                        this: Box::new(arg),
28496                        unpack: Some(Box::new(Expression::Boolean(BooleanLiteral {
28497                            value: true,
28498                        }))),
28499                    })));
28500                }
28501            }
28502
28503            // Regular star
28504            self.advance(); // consume *
28505            let star = self.parse_star_modifiers(None)?;
28506            return Ok(Expression::Star(star));
28507        }
28508
28509        // Generic type expressions: ARRAY<T>, MAP<K,V>, STRUCT<...>
28510        // These are standalone type expressions (not in CAST context)
28511        // But also handle STRUCT<TYPE>(args) which becomes CAST(STRUCT(args) AS STRUCT<TYPE>)
28512        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
28513            let name_upper = self.peek().text.to_uppercase();
28514            if (name_upper == "ARRAY" || name_upper == "MAP" || name_upper == "STRUCT")
28515                && self.check_next(TokenType::Lt)
28516            {
28517                self.advance(); // consume ARRAY/MAP/STRUCT
28518                let data_type = self.parse_data_type_from_name(&name_upper)?;
28519
28520                // Check for typed constructor: STRUCT<TYPE>(args) or ARRAY<TYPE>(args)
28521                // These become CAST(STRUCT(args) AS TYPE) or CAST(ARRAY(args) AS TYPE)
28522                if self.match_token(TokenType::LParen) {
28523                    if name_upper == "STRUCT" {
28524                        // Parse struct constructor arguments
28525                        let args = if self.check(TokenType::RParen) {
28526                            Vec::new()
28527                        } else {
28528                            self.parse_struct_args()?
28529                        };
28530                        self.expect(TokenType::RParen)?;
28531
28532                        // Convert args to Struct fields (all unnamed)
28533                        let fields: Vec<(Option<String>, Expression)> =
28534                            args.into_iter().map(|e| (None, e)).collect();
28535
28536                        // Create CAST(STRUCT(args) AS STRUCT<TYPE>)
28537                        let struct_expr = Expression::Struct(Box::new(Struct { fields }));
28538                        let cast_expr = Expression::Cast(Box::new(Cast {
28539                            this: struct_expr,
28540                            to: data_type,
28541                            trailing_comments: Vec::new(),
28542                            double_colon_syntax: false,
28543                            format: None,
28544                            default: None,
28545                            inferred_type: None,
28546                        }));
28547                        return self.maybe_parse_subscript(cast_expr);
28548                    } else if name_upper == "ARRAY" {
28549                        // Parse array constructor arguments
28550                        let mut expressions = Vec::new();
28551                        if !self.check(TokenType::RParen) {
28552                            loop {
28553                                expressions.push(self.parse_expression()?);
28554                                if !self.match_token(TokenType::Comma) {
28555                                    break;
28556                                }
28557                            }
28558                        }
28559                        self.expect(TokenType::RParen)?;
28560
28561                        // Create CAST(ARRAY[args] AS ARRAY<TYPE>)
28562                        let array_expr = Expression::Array(Box::new(Array { expressions }));
28563                        let cast_expr = Expression::Cast(Box::new(Cast {
28564                            this: array_expr,
28565                            to: data_type,
28566                            trailing_comments: Vec::new(),
28567                            double_colon_syntax: false,
28568                            format: None,
28569                            default: None,
28570                            inferred_type: None,
28571                        }));
28572                        return self.maybe_parse_subscript(cast_expr);
28573                    }
28574                } else if self.match_token(TokenType::LBracket) {
28575                    // ARRAY<TYPE>[values] or ARRAY<TYPE>[] - bracket-style array constructor
28576                    let expressions = if self.check(TokenType::RBracket) {
28577                        Vec::new()
28578                    } else {
28579                        self.parse_expression_list()?
28580                    };
28581                    self.expect(TokenType::RBracket)?;
28582                    // Create CAST(Array(values) AS DataType)
28583                    let array_expr = Expression::Array(Box::new(Array { expressions }));
28584                    let cast_expr = Expression::Cast(Box::new(Cast {
28585                        this: array_expr,
28586                        to: data_type,
28587                        trailing_comments: Vec::new(),
28588                        double_colon_syntax: false,
28589                        format: None,
28590                        default: None,
28591                        inferred_type: None,
28592                    }));
28593                    return self.maybe_parse_subscript(cast_expr);
28594                }
28595
28596                return Ok(Expression::DataType(data_type));
28597            }
28598            // DuckDB-style MAP with curly brace literals: MAP {'key': value}
28599            if name_upper == "MAP" && self.check_next(TokenType::LBrace) {
28600                self.advance(); // consume MAP
28601                self.expect(TokenType::LBrace)?;
28602
28603                // Handle empty: MAP {}
28604                if self.match_token(TokenType::RBrace) {
28605                    return self.maybe_parse_subscript(Expression::MapFunc(Box::new(
28606                        MapConstructor {
28607                            keys: Vec::new(),
28608                            values: Vec::new(),
28609                            curly_brace_syntax: true,
28610                            with_map_keyword: true,
28611                        },
28612                    )));
28613                }
28614
28615                // Parse key-value pairs
28616                let mut keys = Vec::new();
28617                let mut values = Vec::new();
28618                loop {
28619                    let key = self.parse_primary()?;
28620                    self.expect(TokenType::Colon)?;
28621                    let value = self.parse_expression()?;
28622                    keys.push(key);
28623                    values.push(value);
28624                    if !self.match_token(TokenType::Comma) {
28625                        break;
28626                    }
28627                    // Handle trailing comma
28628                    if self.check(TokenType::RBrace) {
28629                        break;
28630                    }
28631                }
28632                self.expect(TokenType::RBrace)?;
28633
28634                return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
28635                    keys,
28636                    values,
28637                    curly_brace_syntax: true,
28638                    with_map_keyword: true,
28639                })));
28640            }
28641        }
28642
28643        // Keywords as identifiers when followed by DOT (e.g., case.x, top.y)
28644        // These keywords can be table/column names when used with dot notation
28645        if (self.check(TokenType::Case) || self.check(TokenType::Top))
28646            && self.check_next(TokenType::Dot)
28647        {
28648            let token = self.advance();
28649            let ident = Identifier::new(token.text);
28650            self.expect(TokenType::Dot)?;
28651            if self.match_token(TokenType::Star) {
28652                // case.* or top.*
28653                let star = self.parse_star_modifiers(Some(ident))?;
28654                return Ok(Expression::Star(star));
28655            }
28656            // case.column or top.column
28657            let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
28658            // Capture trailing comments from the column name token
28659            let trailing_comments = self.previous_trailing_comments();
28660            let mut col = Expression::Column(Column {
28661                name: col_ident,
28662                table: Some(ident),
28663                join_mark: false,
28664                trailing_comments,
28665                span: None,
28666                inferred_type: None,
28667            });
28668            // Handle Oracle/Redshift outer join marker (+) after column reference
28669            if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
28670                let saved_pos = self.current;
28671                if self.match_token(TokenType::LParen)
28672                    && self.match_token(TokenType::Plus)
28673                    && self.match_token(TokenType::RParen)
28674                {
28675                    if let Expression::Column(ref mut c) = col {
28676                        c.join_mark = true;
28677                    }
28678                } else {
28679                    self.current = saved_pos;
28680                }
28681            }
28682            return self.maybe_parse_subscript(col);
28683        }
28684
28685        // MySQL BINARY prefix operator: BINARY expr -> CAST(expr AS BINARY)
28686        // Only treat as prefix operator when followed by an expression (not ( which would be BINARY() function,
28687        // and not when it would be a data type like BINARY in column definitions)
28688        if self.check(TokenType::Var)
28689            && self.peek().text.eq_ignore_ascii_case("BINARY")
28690            && !self.check_next(TokenType::LParen)
28691            && !self.check_next(TokenType::Dot)
28692            && !self.check_next(TokenType::RParen)
28693            && !self.check_next(TokenType::Comma)
28694            && !self.is_at_end()
28695        {
28696            // Check if this is actually followed by an expression token (not end of statement)
28697            let next_idx = self.current + 1;
28698            let has_expr = next_idx < self.tokens.len()
28699                && !matches!(
28700                    self.tokens[next_idx].token_type,
28701                    TokenType::Semicolon | TokenType::Eof | TokenType::RParen | TokenType::Comma
28702                );
28703            if has_expr {
28704                self.advance(); // consume BINARY
28705                let expr = self.parse_unary()?;
28706                return Ok(Expression::Cast(Box::new(Cast {
28707                    this: expr,
28708                    to: DataType::Binary { length: None },
28709                    trailing_comments: Vec::new(),
28710                    double_colon_syntax: false,
28711                    format: None,
28712                    default: None,
28713                    inferred_type: None,
28714                })));
28715            }
28716        }
28717
28718        // RLIKE/REGEXP as function call: RLIKE(expr, pattern, flags)
28719        // Normally RLIKE is an operator, but Snowflake allows function syntax
28720        if self.check(TokenType::RLike) && self.check_next(TokenType::LParen) {
28721            let token = self.advance(); // consume RLIKE
28722            self.advance(); // consume LParen
28723            let args = if self.check(TokenType::RParen) {
28724                Vec::new()
28725            } else {
28726                self.parse_function_arguments()?
28727            };
28728            self.expect(TokenType::RParen)?;
28729            let func = Expression::Function(Box::new(Function {
28730                name: token.text.clone(), // Preserve original case; generator handles normalization
28731                args,
28732                distinct: false,
28733                trailing_comments: Vec::new(),
28734                use_bracket_syntax: false,
28735                no_parens: false,
28736                quoted: false,
28737                span: None,
28738                inferred_type: None,
28739            }));
28740            return self.maybe_parse_over(func);
28741        }
28742
28743        // INSERT as function call: INSERT(str, pos, len, newstr)
28744        // Snowflake/MySQL have INSERT as a string function, but INSERT is also a DML keyword.
28745        // When followed by ( in expression context, treat as function call.
28746        if self.check(TokenType::Insert) && self.check_next(TokenType::LParen) {
28747            let token = self.advance(); // consume INSERT
28748            self.advance(); // consume LParen
28749            let args = if self.check(TokenType::RParen) {
28750                Vec::new()
28751            } else {
28752                self.parse_function_arguments()?
28753            };
28754            self.expect(TokenType::RParen)?;
28755            let func = Expression::Function(Box::new(Function {
28756                name: token.text.clone(),
28757                args,
28758                distinct: false,
28759                trailing_comments: Vec::new(),
28760                use_bracket_syntax: false,
28761                no_parens: false,
28762                quoted: false,
28763                span: None,
28764                inferred_type: None,
28765            }));
28766            return self.maybe_parse_over(func);
28767        }
28768
28769        // ClickHouse: MINUS/EXCEPT/INTERSECT/REGEXP as function names (e.g., minus(a, b), REGEXP('^db'))
28770        // MINUS is tokenized as TokenType::Except (Oracle alias), REGEXP as TokenType::RLike
28771        if matches!(
28772            self.config.dialect,
28773            Some(crate::dialects::DialectType::ClickHouse)
28774        ) && (self.check(TokenType::Except)
28775            || self.check(TokenType::Intersect)
28776            || self.check(TokenType::RLike))
28777            && self.check_next(TokenType::LParen)
28778        {
28779            let token = self.advance(); // consume keyword
28780            self.advance(); // consume LParen
28781            let args = if self.check(TokenType::RParen) {
28782                Vec::new()
28783            } else {
28784                self.parse_function_arguments()?
28785            };
28786            self.expect(TokenType::RParen)?;
28787            let func = Expression::Function(Box::new(Function {
28788                name: token.text.clone(),
28789                args,
28790                distinct: false,
28791                trailing_comments: Vec::new(),
28792                use_bracket_syntax: false,
28793                no_parens: false,
28794                quoted: false,
28795                span: None,
28796                inferred_type: None,
28797            }));
28798            return self.maybe_parse_over(func);
28799        }
28800
28801        // Handle CURRENT_DATE/CURRENT_TIMESTAMP/CURRENT_TIME/CURRENT_DATETIME with parentheses
28802        // These have special token types but BigQuery and others use them as function calls with args
28803        if matches!(
28804            self.peek().token_type,
28805            TokenType::CurrentDate
28806                | TokenType::CurrentTimestamp
28807                | TokenType::CurrentTime
28808                | TokenType::CurrentDateTime
28809        ) {
28810            // Snowflake: CURRENT_TIME / CURRENT_TIME(n) -> Localtime (so DuckDB can output LOCALTIME)
28811            if matches!(
28812                self.config.dialect,
28813                Some(crate::dialects::DialectType::Snowflake)
28814            ) && self.peek().token_type == TokenType::CurrentTime
28815            {
28816                self.advance(); // consume CURRENT_TIME
28817                if self.match_token(TokenType::LParen) {
28818                    // CURRENT_TIME(n) - consume args but ignore precision
28819                    if !self.check(TokenType::RParen) {
28820                        let _ = self.parse_function_arguments()?;
28821                    }
28822                    self.expect(TokenType::RParen)?;
28823                }
28824                return self.maybe_parse_subscript(Expression::Localtime(Box::new(
28825                    crate::expressions::Localtime { this: None },
28826                )));
28827            }
28828            if self.check_next(TokenType::LParen) {
28829                // Parse as function call: CURRENT_DATE('UTC'), CURRENT_TIMESTAMP(), etc.
28830                let token = self.advance(); // consume CURRENT_DATE etc.
28831                self.advance(); // consume LParen
28832                let args = if self.check(TokenType::RParen) {
28833                    Vec::new()
28834                } else {
28835                    self.parse_function_arguments()?
28836                };
28837                self.expect(TokenType::RParen)?;
28838                let func = Expression::Function(Box::new(Function {
28839                    name: token.text.clone(),
28840                    args,
28841                    distinct: false,
28842                    trailing_comments: Vec::new(),
28843                    use_bracket_syntax: false,
28844                    no_parens: false,
28845                    quoted: false,
28846                    span: None,
28847                    inferred_type: None,
28848                }));
28849                return self.maybe_parse_subscript(func);
28850            } else {
28851                // No parens - parse as no-paren function
28852                let token = self.advance();
28853                let func = Expression::Function(Box::new(Function {
28854                    name: token.text.clone(),
28855                    args: Vec::new(),
28856                    distinct: false,
28857                    trailing_comments: Vec::new(),
28858                    use_bracket_syntax: false,
28859                    no_parens: true,
28860                    quoted: false,
28861                    span: None,
28862                    inferred_type: None,
28863                }));
28864                return self.maybe_parse_subscript(func);
28865            }
28866        }
28867
28868        // Type keyword followed by string literal -> CAST('value' AS TYPE)
28869        // E.g., NUMERIC '2.25' -> CAST('2.25' AS NUMERIC)
28870        if self.is_identifier_token() && self.check_next(TokenType::String) {
28871            let upper_name = self.peek().text.to_uppercase();
28872            if matches!(
28873                upper_name.as_str(),
28874                "NUMERIC" | "DECIMAL" | "BIGNUMERIC" | "BIGDECIMAL"
28875            ) {
28876                self.advance(); // consume the type keyword
28877                let str_token = self.advance(); // consume the string literal
28878                let data_type = match upper_name.as_str() {
28879                    "NUMERIC" | "DECIMAL" | "BIGNUMERIC" | "BIGDECIMAL" => {
28880                        crate::expressions::DataType::Decimal {
28881                            precision: None,
28882                            scale: None,
28883                        }
28884                    }
28885                    _ => unreachable!("type keyword already matched in outer if-condition"),
28886                };
28887                return Ok(Expression::Cast(Box::new(crate::expressions::Cast {
28888                    this: Expression::Literal(Literal::String(str_token.text)),
28889                    to: data_type,
28890                    trailing_comments: Vec::new(),
28891                    double_colon_syntax: false,
28892                    format: None,
28893                    default: None,
28894                    inferred_type: None,
28895                })));
28896            }
28897        }
28898
28899        // Identifier, Column, or Function
28900        if self.is_identifier_token() {
28901            // Check for no-paren functions like CURRENT_TIMESTAMP, CURRENT_DATE, etc.
28902            // These should be parsed as functions even without parentheses
28903            let upper_name = self.peek().text.to_uppercase();
28904            if !self.check_next(TokenType::LParen)
28905                && !self.check_next(TokenType::Dot)
28906                && crate::function_registry::is_no_paren_function_name_upper(upper_name.as_str())
28907                && !(matches!(
28908                    self.config.dialect,
28909                    Some(crate::dialects::DialectType::ClickHouse)
28910                ) && upper_name.as_str() == "CURRENT_TIMESTAMP")
28911            {
28912                let token = self.advance();
28913                let func = Expression::Function(Box::new(Function {
28914                    name: token.text.clone(), // Preserve original case; generator handles normalization
28915                    args: Vec::new(),
28916                    distinct: false,
28917                    trailing_comments: Vec::new(),
28918                    use_bracket_syntax: false,
28919                    no_parens: true, // These functions were called without parentheses
28920                    quoted: false,
28921                    span: None,
28922                    inferred_type: None,
28923                }));
28924                return self.maybe_parse_subscript(func);
28925            }
28926
28927            let ident = self.expect_identifier_with_quoted()?;
28928            let name = ident.name.clone();
28929            let quoted = ident.quoted;
28930
28931            // Check for function call (skip Teradata FORMAT phrase)
28932            let is_teradata_format_phrase = matches!(
28933                self.config.dialect,
28934                Some(crate::dialects::DialectType::Teradata)
28935            ) && self.check(TokenType::LParen)
28936                && self.check_next(TokenType::Format);
28937            if !is_teradata_format_phrase && self.match_token(TokenType::LParen) {
28938                let upper_name = name.to_uppercase();
28939                let func_expr = self.parse_typed_function(&name, &upper_name, quoted)?;
28940                let func_expr = self.maybe_parse_clickhouse_parameterized_agg(func_expr)?;
28941                // Check for OVER clause (window function)
28942                return self.maybe_parse_over(func_expr);
28943            }
28944
28945            // Check for qualified name (table.column or table.method())
28946            if self.match_token(TokenType::Dot) {
28947                if self.match_token(TokenType::Star) {
28948                    // table.* with potential modifiers
28949                    let star = self.parse_star_modifiers(Some(ident))?;
28950                    let mut star_expr = Expression::Star(star);
28951                    // ClickHouse: a.* APPLY(func) EXCEPT(col) REPLACE(expr AS col) in any order
28952                    if matches!(
28953                        self.config.dialect,
28954                        Some(crate::dialects::DialectType::ClickHouse)
28955                    ) {
28956                        loop {
28957                            if self.check(TokenType::Apply) {
28958                                self.advance();
28959                                let apply_expr = if self.match_token(TokenType::LParen) {
28960                                    let e = self.parse_expression()?;
28961                                    self.expect(TokenType::RParen)?;
28962                                    e
28963                                } else {
28964                                    self.parse_expression()?
28965                                };
28966                                star_expr =
28967                                    Expression::Apply(Box::new(crate::expressions::Apply {
28968                                        this: Box::new(star_expr),
28969                                        expression: Box::new(apply_expr),
28970                                    }));
28971                            } else if self.check(TokenType::Except)
28972                                || self.check(TokenType::Exclude)
28973                            {
28974                                self.advance();
28975                                self.match_identifier("STRICT");
28976                                if self.match_token(TokenType::LParen) {
28977                                    loop {
28978                                        if self.check(TokenType::RParen) {
28979                                            break;
28980                                        }
28981                                        let _ = self.parse_expression()?;
28982                                        if !self.match_token(TokenType::Comma) {
28983                                            break;
28984                                        }
28985                                    }
28986                                    self.expect(TokenType::RParen)?;
28987                                } else if self.is_identifier_token()
28988                                    || self.is_safe_keyword_as_identifier()
28989                                {
28990                                    let _ = self.parse_expression()?;
28991                                }
28992                            } else if self.check(TokenType::Replace) {
28993                                self.advance();
28994                                self.match_identifier("STRICT");
28995                                if self.match_token(TokenType::LParen) {
28996                                    loop {
28997                                        if self.check(TokenType::RParen) {
28998                                            break;
28999                                        }
29000                                        let _ = self.parse_expression()?;
29001                                        if self.match_token(TokenType::As) {
29002                                            if self.is_identifier_token()
29003                                                || self.is_safe_keyword_as_identifier()
29004                                            {
29005                                                self.advance();
29006                                            }
29007                                        }
29008                                        if !self.match_token(TokenType::Comma) {
29009                                            break;
29010                                        }
29011                                    }
29012                                    self.expect(TokenType::RParen)?;
29013                                } else {
29014                                    let _ = self.parse_expression()?;
29015                                    if self.match_token(TokenType::As) {
29016                                        if self.is_identifier_token()
29017                                            || self.is_safe_keyword_as_identifier()
29018                                        {
29019                                            self.advance();
29020                                        }
29021                                    }
29022                                }
29023                            } else {
29024                                break;
29025                            }
29026                        }
29027                    }
29028                    return Ok(star_expr);
29029                }
29030                // Handle numeric field access: a.1, t.2 (ClickHouse tuple field access)
29031                // Also handle negative: a.-1 (ClickHouse negative tuple index)
29032                if self.check(TokenType::Number) {
29033                    let field_name = self.advance().text;
29034                    let col_expr = Expression::Dot(Box::new(DotAccess {
29035                        this: Expression::Column(Column {
29036                            name: ident,
29037                            table: None,
29038                            join_mark: false,
29039                            trailing_comments: Vec::new(),
29040                            span: None,
29041                            inferred_type: None,
29042                        }),
29043                        field: Identifier::new(field_name),
29044                    }));
29045                    return self.maybe_parse_subscript(col_expr);
29046                }
29047                if matches!(
29048                    self.config.dialect,
29049                    Some(crate::dialects::DialectType::ClickHouse)
29050                ) && self.check(TokenType::Dash)
29051                    && self.current + 1 < self.tokens.len()
29052                    && self.tokens[self.current + 1].token_type == TokenType::Number
29053                {
29054                    self.advance(); // consume -
29055                    let num = self.advance().text;
29056                    let field_name = format!("-{}", num);
29057                    let col_expr = Expression::Dot(Box::new(DotAccess {
29058                        this: Expression::Column(Column {
29059                            name: ident,
29060                            table: None,
29061                            join_mark: false,
29062                            trailing_comments: Vec::new(),
29063                            span: None,
29064                            inferred_type: None,
29065                        }),
29066                        field: Identifier::new(field_name),
29067                    }));
29068                    return self.maybe_parse_subscript(col_expr);
29069                }
29070                // ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
29071                if matches!(
29072                    self.config.dialect,
29073                    Some(crate::dialects::DialectType::ClickHouse)
29074                ) && self.check(TokenType::Caret)
29075                {
29076                    self.advance(); // consume ^
29077                    let mut field_name = "^".to_string();
29078                    if self.check(TokenType::Identifier)
29079                        || self.check(TokenType::Var)
29080                        || self.check_keyword()
29081                    {
29082                        field_name.push_str(&self.advance().text);
29083                    }
29084                    let col_expr = Expression::Dot(Box::new(DotAccess {
29085                        this: Expression::Column(Column {
29086                            name: ident,
29087                            table: None,
29088                            join_mark: false,
29089                            trailing_comments: Vec::new(),
29090                            span: None,
29091                            inferred_type: None,
29092                        }),
29093                        field: Identifier::new(field_name),
29094                    }));
29095                    return self.maybe_parse_subscript(col_expr);
29096                }
29097                // Allow keywords as column names (e.g., a.filter, x.update)
29098                let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
29099
29100                // Handle Oracle/Redshift outer join marker (+) BEFORE checking for method call
29101                // This is critical: (+) looks like a method call but is actually a join marker
29102                if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
29103                    let saved_pos = self.current;
29104                    if self.match_token(TokenType::LParen)
29105                        && self.match_token(TokenType::Plus)
29106                        && self.match_token(TokenType::RParen)
29107                    {
29108                        let trailing_comments = self.previous_trailing_comments();
29109                        let col = Expression::Column(Column {
29110                            name: col_ident,
29111                            table: Some(ident),
29112                            join_mark: true,
29113                            trailing_comments,
29114                            span: None,
29115                            inferred_type: None,
29116                        });
29117                        return self.maybe_parse_subscript(col);
29118                    } else {
29119                        self.current = saved_pos;
29120                    }
29121                }
29122
29123                // Check if this is a method call (column followed by parentheses)
29124                if self.check(TokenType::LParen) {
29125                    // This is a method call like table.EXTRACT() or obj.INT()
29126                    self.advance(); // consume (
29127                    let args = if self.check(TokenType::RParen) {
29128                        Vec::new()
29129                    } else {
29130                        self.parse_expression_list()?
29131                    };
29132                    self.expect(TokenType::RParen)?;
29133                    let method_call = Expression::MethodCall(Box::new(MethodCall {
29134                        this: Expression::Column(Column {
29135                            name: ident.clone(),
29136                            table: None,
29137                            join_mark: false,
29138                            trailing_comments: Vec::new(),
29139                            span: None,
29140                            inferred_type: None,
29141                        }),
29142                        method: col_ident,
29143                        args,
29144                    }));
29145                    return self.maybe_parse_subscript(method_call);
29146                }
29147
29148                // Capture trailing comments from the column name token
29149                let trailing_comments = self.previous_trailing_comments();
29150                let col = Expression::Column(Column {
29151                    name: col_ident,
29152                    table: Some(ident),
29153                    join_mark: false,
29154                    trailing_comments,
29155                    span: None,
29156                    inferred_type: None,
29157                });
29158                return self.maybe_parse_subscript(col);
29159            }
29160
29161            // Check for Oracle pseudocolumns (ROWNUM, ROWID, LEVEL, SYSDATE, etc.)
29162            // Note: SQLite treats rowid as a regular column name, not a pseudocolumn
29163            // ClickHouse: skip pseudocolumn parsing as these are regular identifiers
29164            if !quoted
29165                && !matches!(
29166                    self.config.dialect,
29167                    Some(crate::dialects::DialectType::SQLite)
29168                        | Some(crate::dialects::DialectType::ClickHouse)
29169                )
29170            {
29171                if let Some(pseudocolumn_type) = PseudocolumnType::from_str(&name) {
29172                    return Ok(Expression::Pseudocolumn(Pseudocolumn {
29173                        kind: pseudocolumn_type,
29174                    }));
29175                }
29176            }
29177
29178            // Check for lambda expression: x -> body
29179            // But NOT if followed by a string literal (that's JSON extract: col -> '$.path')
29180            if self.check(TokenType::Arrow)
29181                && !self
29182                    .peek_nth(1)
29183                    .map_or(false, |t| t.token_type == TokenType::String)
29184            {
29185                self.advance(); // consume the Arrow token
29186                let body = self.parse_expression()?;
29187                return Ok(Expression::Lambda(Box::new(LambdaExpr {
29188                    parameters: vec![ident],
29189                    body,
29190                    colon: false,
29191                    parameter_types: Vec::new(),
29192                })));
29193            }
29194
29195            // Capture trailing comments from the identifier token
29196            let trailing_comments = self.previous_trailing_comments();
29197            let col = Expression::Column(Column {
29198                name: ident,
29199                table: None,
29200                join_mark: false,
29201                trailing_comments,
29202                span: None,
29203                inferred_type: None,
29204            });
29205            return self.maybe_parse_subscript(col);
29206        }
29207
29208        // Exasol-style IF expression: IF condition THEN true_value ELSE false_value ENDIF
29209        // Check for IF not followed by ( (which would be IF function call handled elsewhere)
29210        // This handles: IF age < 18 THEN 'minor' ELSE 'adult' ENDIF
29211        // IMPORTANT: This must be checked BEFORE is_safe_keyword_as_identifier() which would
29212        // treat IF as a column name when not followed by ( or .
29213        // For TSQL/Fabric: IF (cond) BEGIN ... END is an IF statement, not function
29214        if self.check(TokenType::If)
29215            && !self.check_next(TokenType::Dot)
29216            && (!self.check_next(TokenType::LParen) || matches!(
29217                self.config.dialect,
29218                Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
29219            ))
29220        {
29221            let saved_pos = self.current;
29222            self.advance(); // consume IF
29223            if let Some(if_expr) = self.parse_if()? {
29224                return Ok(if_expr);
29225            }
29226            // parse_if() returned None — IF is not an IF expression here,
29227            // restore position so it can be treated as an identifier
29228            self.current = saved_pos;
29229        }
29230
29231        // NEXT VALUE FOR sequence_name [OVER (ORDER BY ...)]
29232        // Must check before treating NEXT as a standalone identifier via is_safe_keyword_as_identifier
29233        if self.check(TokenType::Next)
29234            && self.current + 2 < self.tokens.len()
29235            && self.tokens[self.current + 1]
29236                .text
29237                .eq_ignore_ascii_case("VALUE")
29238            && self.tokens[self.current + 2]
29239                .text
29240                .eq_ignore_ascii_case("FOR")
29241        {
29242            self.advance(); // consume NEXT
29243            if let Some(expr) = self.parse_next_value_for()? {
29244                return Ok(expr);
29245            }
29246        }
29247
29248        // ClickHouse: `from` can be a column name when followed by comma or dot
29249        if matches!(
29250            self.config.dialect,
29251            Some(crate::dialects::DialectType::ClickHouse)
29252        ) && self.check(TokenType::From)
29253            && (self.check_next(TokenType::Comma) || self.check_next(TokenType::Dot))
29254        {
29255            let token = self.advance();
29256            let name = token.text.clone();
29257            if self.match_token(TokenType::Dot) {
29258                // from.col qualified reference
29259                let col_name = self.expect_identifier_or_keyword()?;
29260                return Ok(Expression::Column(crate::expressions::Column {
29261                    name: Identifier::new(col_name),
29262                    table: Some(Identifier::new(name)),
29263                    join_mark: false,
29264                    trailing_comments: Vec::new(),
29265                    span: None,
29266                    inferred_type: None,
29267                }));
29268            }
29269            return Ok(Expression::Column(crate::expressions::Column {
29270                name: Identifier::new(name),
29271                table: None,
29272                join_mark: false,
29273                trailing_comments: Vec::new(),
29274                span: None,
29275                inferred_type: None,
29276            }));
29277        }
29278
29279        // ClickHouse: `except` as identifier in expression context (set operations are handled at statement level)
29280        // except(args) is already handled above in the MINUS/EXCEPT/INTERSECT function block
29281        if matches!(
29282            self.config.dialect,
29283            Some(crate::dialects::DialectType::ClickHouse)
29284        ) && self.check(TokenType::Except)
29285            && !self.check_next(TokenType::LParen)
29286        {
29287            let token = self.advance();
29288            let name = token.text.clone();
29289            if self.match_token(TokenType::Dot) {
29290                let col_name = self.expect_identifier_or_keyword()?;
29291                return Ok(Expression::Column(crate::expressions::Column {
29292                    name: Identifier::new(col_name),
29293                    table: Some(Identifier::new(name)),
29294                    join_mark: false,
29295                    trailing_comments: Vec::new(),
29296                    span: None,
29297                    inferred_type: None,
29298                }));
29299            }
29300            return Ok(Expression::Column(crate::expressions::Column {
29301                name: Identifier::new(name),
29302                table: None,
29303                join_mark: false,
29304                trailing_comments: Vec::new(),
29305                span: None,
29306                inferred_type: None,
29307            }));
29308        }
29309
29310        // ClickHouse: structural keywords like FROM, ON, JOIN can be used as identifiers
29311        // in expression context when followed by an operator (e.g., from + 1, on.col)
29312        if matches!(
29313            self.config.dialect,
29314            Some(crate::dialects::DialectType::ClickHouse)
29315        ) && self.peek().token_type.is_keyword()
29316            && !self.is_safe_keyword_as_identifier()
29317        {
29318            let next_tt = self
29319                .peek_nth(1)
29320                .map(|t| t.token_type)
29321                .unwrap_or(TokenType::Semicolon);
29322            // A structural keyword can be used as an identifier when it appears
29323            // in expression context. We detect this by checking what follows.
29324            // Essentially: it's NOT an identifier only if the keyword itself starts
29325            // a clause (e.g., FROM followed by a table name). But when it's followed
29326            // by an operator, comma, close-paren, or even another clause keyword
29327            // (meaning it's the last token in an expression), it's an identifier.
29328            let is_expr_context = !matches!(
29329                next_tt,
29330                TokenType::Identifier
29331                    | TokenType::Var
29332                    | TokenType::QuotedIdentifier
29333                    | TokenType::LParen
29334                    | TokenType::Number
29335                    | TokenType::String
29336            );
29337            if is_expr_context {
29338                let token = self.advance();
29339                return Ok(Expression::Column(Column {
29340                    name: Identifier::new(token.text),
29341                    table: None,
29342                    join_mark: false,
29343                    trailing_comments: Vec::new(),
29344                    span: None,
29345                    inferred_type: None,
29346                }));
29347            }
29348        }
29349        // %s or %(name)s percent parameter (PostgreSQL psycopg2 style)
29350        // Must be checked BEFORE the keyword-as-identifier handler below, since
29351        // Percent is in is_keyword() and is_safe_keyword_as_identifier() returns true for it.
29352        if self.check(TokenType::Percent)
29353            && (
29354                self.check_next(TokenType::Var)  // %s
29355            || self.check_next(TokenType::LParen)
29356                // %(name)s
29357            )
29358        {
29359            self.advance(); // consume %
29360                            // Check for %(name)s - named parameter
29361            if self.match_token(TokenType::LParen) {
29362                // Get the parameter name
29363                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
29364                    let name = self.advance().text;
29365                    self.expect(TokenType::RParen)?;
29366                    // Expect 's' after the closing paren
29367                    if self.check(TokenType::Var) && self.peek().text == "s" {
29368                        self.advance(); // consume 's'
29369                    }
29370                    return Ok(Expression::Parameter(Box::new(Parameter {
29371                        name: Some(name),
29372                        index: None,
29373                        style: ParameterStyle::Percent,
29374                        quoted: false,
29375                        string_quoted: false,
29376                        expression: None,
29377                    })));
29378                } else {
29379                    return Err(self.parse_error("Expected parameter name after %("));
29380                }
29381            }
29382            // Check for %s - anonymous parameter
29383            if self.check(TokenType::Var) && self.peek().text == "s" {
29384                self.advance(); // consume 's'
29385                return Ok(Expression::Parameter(Box::new(Parameter {
29386                    name: None,
29387                    index: None,
29388                    style: ParameterStyle::Percent,
29389                    quoted: false,
29390                    string_quoted: false,
29391                    expression: None,
29392                })));
29393            }
29394            // Not a parameter - backtrack
29395            self.current -= 1;
29396        }
29397
29398        // Some keywords can be used as identifiers (column names, table names, etc.)
29399        // when they are "safe" keywords that don't affect query structure.
29400        // Structural keywords like FROM, WHERE, JOIN should NOT be usable as identifiers.
29401        if self.is_safe_keyword_as_identifier() {
29402            let token = self.advance();
29403            let name = token.text.clone();
29404
29405            // Check for function call (keyword followed by paren) - skip Teradata FORMAT phrase
29406            let is_teradata_format_phrase = matches!(
29407                self.config.dialect,
29408                Some(crate::dialects::DialectType::Teradata)
29409            ) && self.check(TokenType::LParen)
29410                && self.check_next(TokenType::Format);
29411            if !is_teradata_format_phrase && self.match_token(TokenType::LParen) {
29412                let upper_name = name.to_uppercase();
29413                let func_expr = self.parse_typed_function(&name, &upper_name, false)?;
29414                let func_expr = self.maybe_parse_clickhouse_parameterized_agg(func_expr)?;
29415                return self.maybe_parse_over(func_expr);
29416            }
29417
29418            // Check for qualified name (keyword.column or keyword.method())
29419            if self.match_token(TokenType::Dot) {
29420                if self.match_token(TokenType::Star) {
29421                    // keyword.* with potential modifiers
29422                    let ident = Identifier::new(name);
29423                    let star = self.parse_star_modifiers(Some(ident))?;
29424                    return Ok(Expression::Star(star));
29425                }
29426                // ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
29427                if matches!(
29428                    self.config.dialect,
29429                    Some(crate::dialects::DialectType::ClickHouse)
29430                ) && self.check(TokenType::Caret)
29431                {
29432                    self.advance(); // consume ^
29433                    let mut field_name = "^".to_string();
29434                    if self.check(TokenType::Identifier)
29435                        || self.check(TokenType::Var)
29436                        || self.check_keyword()
29437                    {
29438                        field_name.push_str(&self.advance().text);
29439                    }
29440                    let col = Expression::Dot(Box::new(DotAccess {
29441                        this: Expression::Column(Column {
29442                            name: Identifier::new(name),
29443                            table: None,
29444                            join_mark: false,
29445                            trailing_comments: Vec::new(),
29446                            span: None,
29447                            inferred_type: None,
29448                        }),
29449                        field: Identifier::new(field_name),
29450                    }));
29451                    return self.maybe_parse_subscript(col);
29452                }
29453
29454                // Handle numeric field access: keyword.1, keyword.2 (ClickHouse tuple field access)
29455                if self.check(TokenType::Number) {
29456                    let field_name = self.advance().text;
29457                    let col_expr = Expression::Dot(Box::new(DotAccess {
29458                        this: Expression::Column(Column {
29459                            name: Identifier::new(name),
29460                            table: None,
29461                            join_mark: false,
29462                            trailing_comments: Vec::new(),
29463                            span: None,
29464                            inferred_type: None,
29465                        }),
29466                        field: Identifier::new(field_name),
29467                    }));
29468                    return self.maybe_parse_subscript(col_expr);
29469                }
29470
29471                // Allow keywords as column names
29472                let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
29473
29474                // Check if this is a method call
29475                if self.check(TokenType::LParen) {
29476                    self.advance(); // consume (
29477                    let args = if self.check(TokenType::RParen) {
29478                        Vec::new()
29479                    } else {
29480                        self.parse_expression_list()?
29481                    };
29482                    self.expect(TokenType::RParen)?;
29483                    let method_call = Expression::MethodCall(Box::new(MethodCall {
29484                        this: Expression::Identifier(Identifier::new(name)),
29485                        method: col_ident,
29486                        args,
29487                    }));
29488                    return self.maybe_parse_subscript(method_call);
29489                }
29490
29491                // Capture trailing comments from the column name token
29492                let trailing_comments = self.previous_trailing_comments();
29493                let mut col = Expression::Column(Column {
29494                    name: col_ident,
29495                    table: Some(Identifier::new(name)),
29496                    join_mark: false,
29497                    trailing_comments,
29498                    span: None,
29499                    inferred_type: None,
29500                });
29501                // Handle Oracle/Redshift outer join marker (+) after column reference
29502                if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
29503                    let saved_pos = self.current;
29504                    if self.match_token(TokenType::LParen)
29505                        && self.match_token(TokenType::Plus)
29506                        && self.match_token(TokenType::RParen)
29507                    {
29508                        if let Expression::Column(ref mut c) = col {
29509                            c.join_mark = true;
29510                        }
29511                    } else {
29512                        self.current = saved_pos;
29513                    }
29514                }
29515                return self.maybe_parse_subscript(col);
29516            }
29517
29518            // Simple identifier (keyword used as column name)
29519            // Capture trailing comments from the keyword token
29520            let trailing_comments = self.previous_trailing_comments();
29521            let ident = Identifier::new(name);
29522            let col = Expression::Column(Column {
29523                name: ident,
29524                table: None,
29525                join_mark: false,
29526                trailing_comments,
29527                span: None,
29528                inferred_type: None,
29529            });
29530            return self.maybe_parse_subscript(col);
29531        }
29532
29533        // @@ system variable (MySQL/SQL Server): @@version, @@IDENTITY, @@GLOBAL.var
29534        if self.match_token(TokenType::AtAt) {
29535            // Get the variable name
29536            let name = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
29537                let mut n = self.advance().text;
29538                // Handle @@scope.variable (e.g., @@GLOBAL.max_connections, @@SESSION.sql_mode)
29539                if self.match_token(TokenType::Dot) {
29540                    if self.check(TokenType::Identifier)
29541                        || self.check(TokenType::Var)
29542                        || self.is_safe_keyword_as_identifier()
29543                    {
29544                        n.push('.');
29545                        n.push_str(&self.advance().text);
29546                    }
29547                }
29548                n
29549            } else if self.check_keyword() {
29550                // Handle @@keyword (e.g., @@sql_mode when sql_mode is a keyword)
29551                self.advance().text
29552            } else {
29553                return Err(self.parse_error("Expected variable name after @@"));
29554            };
29555            return Ok(Expression::Parameter(Box::new(Parameter {
29556                name: Some(name),
29557                index: None,
29558                style: ParameterStyle::DoubleAt,
29559                quoted: false,
29560                string_quoted: false,
29561                expression: None,
29562            })));
29563        }
29564
29565        // @ user variable/parameter: @x, @"x", @JOIN, @'foo'
29566        if self.match_token(TokenType::DAt) {
29567            // Get the variable name - can be identifier, quoted identifier, keyword, or string
29568            let (name, quoted, string_quoted) =
29569                if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
29570                    (self.advance().text, false, false)
29571                } else if self.check(TokenType::QuotedIdentifier) {
29572                    // Quoted identifier like @"x"
29573                    let token = self.advance();
29574                    (token.text, true, false)
29575                } else if self.check(TokenType::String) {
29576                    // String-quoted like @'foo'
29577                    let token = self.advance();
29578                    (token.text, false, true)
29579                } else if self.check(TokenType::Number) {
29580                    // Numeric like @1
29581                    let token = self.advance();
29582                    (token.text, false, false)
29583                } else if self.peek().token_type.is_keyword() {
29584                    // Keyword used as variable name like @JOIN
29585                    let token = self.advance();
29586                    (token.text, false, false)
29587                } else {
29588                    return Err(self.parse_error("Expected variable name after @"));
29589                };
29590            return Ok(Expression::Parameter(Box::new(Parameter {
29591                name: Some(name),
29592                index: None,
29593                style: ParameterStyle::At,
29594                quoted,
29595                string_quoted,
29596                expression: None,
29597            })));
29598        }
29599
29600        // Parameter: ? placeholder or $n positional parameter
29601        if self.check(TokenType::Parameter) {
29602            let token = self.advance();
29603            // Check if this is a positional parameter ($1, $2, etc.) or a plain ? placeholder
29604            if let Ok(index) = token.text.parse::<u32>() {
29605                // Positional parameter like $1, $2 (token text is just the number)
29606                let param = Expression::Parameter(Box::new(Parameter {
29607                    name: None,
29608                    index: Some(index),
29609                    style: ParameterStyle::Dollar,
29610                    quoted: false,
29611                    string_quoted: false,
29612                    expression: None,
29613                }));
29614                // Check for JSON path access: $1:name or dot access: $1.c1
29615                let result = self.parse_colon_json_path(param)?;
29616                return self.maybe_parse_subscript(result);
29617            } else {
29618                // Plain ? placeholder
29619                return Ok(Expression::Placeholder(Placeholder { index: None }));
29620            }
29621        }
29622
29623        // :name or :1 colon parameter
29624        if self.match_token(TokenType::Colon) {
29625            // Check for numeric parameter :1, :2, etc.
29626            if self.check(TokenType::Number) {
29627                let num_token = self.advance();
29628                if let Ok(index) = num_token.text.parse::<u32>() {
29629                    return Ok(Expression::Parameter(Box::new(Parameter {
29630                        name: None,
29631                        index: Some(index),
29632                        style: ParameterStyle::Colon,
29633                        quoted: false,
29634                        string_quoted: false,
29635                        expression: None,
29636                    })));
29637                }
29638                return Err(
29639                    self.parse_error(format!("Invalid colon parameter: :{}", num_token.text))
29640                );
29641            }
29642            // Get the parameter name
29643            if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
29644                let name = self.advance().text;
29645                return Ok(Expression::Parameter(Box::new(Parameter {
29646                    name: Some(name),
29647                    index: None,
29648                    style: ParameterStyle::Colon,
29649                    quoted: false,
29650                    string_quoted: false,
29651                    expression: None,
29652                })));
29653            } else {
29654                return Err(self.parse_error("Expected parameter name after :"));
29655            }
29656        }
29657
29658        // $n dollar parameter: $1, $2, etc.
29659        if self.match_token(TokenType::Dollar) {
29660            // Check for ${identifier} or ${kind:name} template variable syntax (Databricks, Hive)
29661            // Hive supports ${hiveconf:variable_name} syntax
29662            if self.match_token(TokenType::LBrace) {
29663                // Parse the variable name - can be identifier or keyword
29664                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
29665                    let name_token = self.advance();
29666                    // Check for ${kind:name} syntax (e.g., ${hiveconf:some_var})
29667                    let expression = if self.match_token(TokenType::Colon) {
29668                        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
29669                            let expr_token = self.advance();
29670                            Some(expr_token.text.clone())
29671                        } else {
29672                            return Err(self.parse_error("Expected identifier after : in ${...}"));
29673                        }
29674                    } else {
29675                        None
29676                    };
29677                    self.expect(TokenType::RBrace)?;
29678                    return Ok(Expression::Parameter(Box::new(Parameter {
29679                        name: Some(name_token.text.clone()),
29680                        index: None,
29681                        style: ParameterStyle::DollarBrace,
29682                        quoted: false,
29683                        string_quoted: false,
29684                        expression,
29685                    })));
29686                } else {
29687                    return Err(self.parse_error("Expected identifier after ${"));
29688                }
29689            }
29690            // Check for number following the dollar sign → positional parameter ($1, $2, etc.)
29691            if self.check(TokenType::Number) {
29692                let num_token = self.advance();
29693                // Parse the number as an index
29694                if let Ok(index) = num_token.text.parse::<u32>() {
29695                    let param_expr = Expression::Parameter(Box::new(Parameter {
29696                        name: None,
29697                        index: Some(index),
29698                        style: ParameterStyle::Dollar,
29699                        quoted: false,
29700                        string_quoted: false,
29701                        expression: None,
29702                    }));
29703                    // Check for JSON path access: $1:name or $1:name:subname
29704                    let result = self.parse_colon_json_path(param_expr)?;
29705                    // Also check for dot access: $1.c1 or $1:name.field
29706                    return self.maybe_parse_subscript(result);
29707                }
29708                // If it's not a valid integer, treat as error
29709                return Err(
29710                    self.parse_error(format!("Invalid dollar parameter: ${}", num_token.text))
29711                );
29712            }
29713            // Check for identifier following the dollar sign → session variable ($x, $query_id, etc.)
29714            if self.check(TokenType::Identifier)
29715                || self.check(TokenType::Var)
29716                || self.is_safe_keyword_as_identifier()
29717            {
29718                let name_token = self.advance();
29719                return Ok(Expression::Parameter(Box::new(Parameter {
29720                    name: Some(name_token.text.clone()),
29721                    index: None,
29722                    style: ParameterStyle::Dollar,
29723                    quoted: false,
29724                    string_quoted: false,
29725                    expression: None,
29726                })));
29727            }
29728            // Just a $ by itself - treat as error
29729            return Err(self.parse_error("Expected number or identifier after $"));
29730        }
29731
29732        // %s or %(name)s percent parameter (PostgreSQL psycopg2 style)
29733        if self.match_token(TokenType::Percent) {
29734            // Check for %(name)s - named parameter
29735            if self.match_token(TokenType::LParen) {
29736                // Get the parameter name
29737                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
29738                    let name = self.advance().text;
29739                    self.expect(TokenType::RParen)?;
29740                    // Expect 's' after the closing paren
29741                    if self.check(TokenType::Var) && self.peek().text == "s" {
29742                        self.advance(); // consume 's'
29743                    }
29744                    return Ok(Expression::Parameter(Box::new(Parameter {
29745                        name: Some(name),
29746                        index: None,
29747                        style: ParameterStyle::Percent,
29748                        quoted: false,
29749                        string_quoted: false,
29750                        expression: None,
29751                    })));
29752                } else {
29753                    return Err(self.parse_error("Expected parameter name after %("));
29754                }
29755            }
29756            // Check for %s - anonymous parameter
29757            if self.check(TokenType::Var) && self.peek().text == "s" {
29758                self.advance(); // consume 's'
29759                return Ok(Expression::Parameter(Box::new(Parameter {
29760                    name: None,
29761                    index: None,
29762                    style: ParameterStyle::Percent,
29763                    quoted: false,
29764                    string_quoted: false,
29765                    expression: None,
29766                })));
29767            }
29768            // If not followed by 's' or '(', it's not a parameter - error
29769            return Err(self.parse_error("Expected 's' or '(' after % for parameter"));
29770        }
29771
29772        // LEFT, RIGHT, OUTER, FULL, ALL etc. keywords as identifiers when followed by DOT
29773        // e.g., SELECT LEFT.FOO FROM ... or SELECT all.count FROM ...
29774        if (self.check(TokenType::Left)
29775            || self.check(TokenType::Right)
29776            || self.check(TokenType::Outer)
29777            || self.check(TokenType::Full)
29778            || self.check(TokenType::All)
29779            || self.check(TokenType::Only)
29780            || self.check(TokenType::Next)
29781            || self.check(TokenType::If))
29782            && self.check_next(TokenType::Dot)
29783        {
29784            let token = self.advance();
29785            let ident = Identifier::new(token.text);
29786            self.expect(TokenType::Dot)?;
29787            if self.match_token(TokenType::Star) {
29788                let star = self.parse_star_modifiers(Some(ident))?;
29789                return Ok(Expression::Star(star));
29790            }
29791            let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
29792            let trailing_comments = self.previous_trailing_comments();
29793            let mut col = Expression::Column(Column {
29794                name: col_ident,
29795                table: Some(ident),
29796                join_mark: false,
29797                trailing_comments,
29798                span: None,
29799                inferred_type: None,
29800            });
29801            // Handle Oracle/Redshift outer join marker (+) after column reference
29802            if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
29803                let saved_pos = self.current;
29804                if self.match_token(TokenType::LParen)
29805                    && self.match_token(TokenType::Plus)
29806                    && self.match_token(TokenType::RParen)
29807                {
29808                    if let Expression::Column(ref mut c) = col {
29809                        c.join_mark = true;
29810                    }
29811                } else {
29812                    self.current = saved_pos;
29813                }
29814            }
29815            return self.maybe_parse_subscript(col);
29816        }
29817
29818        // NEXT VALUE FOR sequence_name [OVER (ORDER BY ...)]
29819        // Must check before treating NEXT as a standalone identifier
29820        if self.check(TokenType::Next) {
29821            // NEXT(arg) - pattern navigation function in MATCH_RECOGNIZE
29822            if self.check_next(TokenType::LParen) {
29823                let token = self.advance();
29824                self.advance(); // consume LParen
29825                let args = self.parse_function_args_list()?;
29826                self.expect(TokenType::RParen)?;
29827                return Ok(Expression::Function(Box::new(Function {
29828                    name: token.text,
29829                    args,
29830                    distinct: false,
29831                    trailing_comments: Vec::new(),
29832                    use_bracket_syntax: false,
29833                    no_parens: false,
29834                    quoted: false,
29835                    span: None,
29836                    inferred_type: None,
29837                })));
29838            }
29839        }
29840
29841        // LEFT, RIGHT, OUTER, FULL, ONLY, NEXT as standalone identifiers (not followed by JOIN or LParen)
29842        // e.g., SELECT LEFT FROM ... or SELECT only FROM ...
29843        // If followed by LParen, it's a function call (e.g., NEXT(bar) in MATCH_RECOGNIZE)
29844        if self.can_be_alias_keyword()
29845            && !self.check_next(TokenType::Join)
29846            && !self.check_next(TokenType::LParen)
29847        {
29848            let token = self.advance();
29849            let trailing_comments = self.previous_trailing_comments();
29850            let col = Expression::Column(Column {
29851                name: Identifier::new(token.text),
29852                table: None,
29853                join_mark: false,
29854                trailing_comments,
29855                span: None,
29856                inferred_type: None,
29857            });
29858            return self.maybe_parse_subscript(col);
29859        }
29860
29861        Err(self.parse_error(format!("Unexpected token: {:?}", self.peek().token_type)))
29862    }
29863
29864    /// Check if function name is a known aggregate function
29865    fn is_aggregate_function(name: &str) -> bool {
29866        crate::function_registry::is_aggregate_function_name(name)
29867    }
29868
29869    /// Whether the source dialect uses LOG(base, value) order (base first).
29870    /// Default is true. BigQuery, TSQL, Tableau, Fabric use LOG(value, base).
29871    fn log_base_first(&self) -> bool {
29872        !matches!(
29873            self.config.dialect,
29874            Some(crate::dialects::DialectType::BigQuery)
29875                | Some(crate::dialects::DialectType::TSQL)
29876                | Some(crate::dialects::DialectType::Tableau)
29877                | Some(crate::dialects::DialectType::Fabric)
29878        )
29879    }
29880
29881    /// Whether the source dialect treats single-arg LOG(x) as LN(x).
29882    /// These dialects have LOG_DEFAULTS_TO_LN = True in Python sqlglot.
29883    fn log_defaults_to_ln(&self) -> bool {
29884        matches!(
29885            self.config.dialect,
29886            Some(crate::dialects::DialectType::MySQL)
29887                | Some(crate::dialects::DialectType::BigQuery)
29888                | Some(crate::dialects::DialectType::TSQL)
29889                | Some(crate::dialects::DialectType::ClickHouse)
29890                | Some(crate::dialects::DialectType::Hive)
29891                | Some(crate::dialects::DialectType::Spark)
29892                | Some(crate::dialects::DialectType::Databricks)
29893                | Some(crate::dialects::DialectType::Drill)
29894                | Some(crate::dialects::DialectType::Dremio)
29895        )
29896    }
29897
29898    /// Parse the subset of typed functions that are handled via function-registry metadata.
29899    fn try_parse_registry_typed_function(
29900        &mut self,
29901        name: &str,
29902        upper_name: &str,
29903        canonical_upper_name: &str,
29904        quoted: bool,
29905    ) -> Result<Option<Expression>> {
29906        let Some(spec) =
29907            crate::function_registry::typed_function_spec_by_canonical_upper(canonical_upper_name)
29908        else {
29909            return Ok(None);
29910        };
29911
29912        match (spec.parse_kind, spec.canonical_name) {
29913            (crate::function_registry::TypedParseKind::AggregateLike, "COUNT_IF") => {
29914                let distinct = self.match_token(TokenType::Distinct);
29915                let this = self.parse_expression()?;
29916                // ClickHouse: handle AS alias inside countIf args: countIf(expr AS d, pred)
29917                let this = if matches!(
29918                    self.config.dialect,
29919                    Some(crate::dialects::DialectType::ClickHouse)
29920                ) && self.check(TokenType::As)
29921                {
29922                    let next_idx = self.current + 1;
29923                    let after_alias_idx = self.current + 2;
29924                    let is_alias = next_idx < self.tokens.len()
29925                        && (matches!(
29926                            self.tokens[next_idx].token_type,
29927                            TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
29928                        ) || self.tokens[next_idx].token_type.is_keyword())
29929                        && after_alias_idx < self.tokens.len()
29930                        && matches!(
29931                            self.tokens[after_alias_idx].token_type,
29932                            TokenType::RParen | TokenType::Comma
29933                        );
29934                    if is_alias {
29935                        self.advance(); // consume AS
29936                        let alias_token = self.advance();
29937                        Expression::Alias(Box::new(crate::expressions::Alias {
29938                            this,
29939                            alias: Identifier::new(alias_token.text.clone()),
29940                            column_aliases: Vec::new(),
29941                            pre_alias_comments: Vec::new(),
29942                            trailing_comments: Vec::new(),
29943                            inferred_type: None,
29944                        }))
29945                    } else {
29946                        this
29947                    }
29948                } else {
29949                    this
29950                };
29951                if matches!(
29952                    self.config.dialect,
29953                    Some(crate::dialects::DialectType::ClickHouse)
29954                ) && self.match_token(TokenType::Comma)
29955                {
29956                    let mut args = vec![this];
29957                    let arg = self.parse_expression()?;
29958                    // Handle AS alias on subsequent args too
29959                    let arg = if self.check(TokenType::As) {
29960                        let next_idx = self.current + 1;
29961                        let after_alias_idx = self.current + 2;
29962                        let is_alias = next_idx < self.tokens.len()
29963                            && (matches!(
29964                                self.tokens[next_idx].token_type,
29965                                TokenType::Identifier
29966                                    | TokenType::Var
29967                                    | TokenType::QuotedIdentifier
29968                            ) || self.tokens[next_idx].token_type.is_keyword())
29969                            && after_alias_idx < self.tokens.len()
29970                            && matches!(
29971                                self.tokens[after_alias_idx].token_type,
29972                                TokenType::RParen | TokenType::Comma
29973                            );
29974                        if is_alias {
29975                            self.advance(); // consume AS
29976                            let alias_token = self.advance();
29977                            Expression::Alias(Box::new(crate::expressions::Alias {
29978                                this: arg,
29979                                alias: Identifier::new(alias_token.text.clone()),
29980                                column_aliases: Vec::new(),
29981                                pre_alias_comments: Vec::new(),
29982                                trailing_comments: Vec::new(),
29983                                inferred_type: None,
29984                            }))
29985                        } else {
29986                            arg
29987                        }
29988                    } else {
29989                        arg
29990                    };
29991                    args.push(arg);
29992                    while self.match_token(TokenType::Comma) {
29993                        args.push(self.parse_expression()?);
29994                    }
29995                    self.expect(TokenType::RParen)?;
29996                    return Ok(Some(Expression::CombinedAggFunc(Box::new(
29997                        CombinedAggFunc {
29998                            this: Box::new(Expression::Identifier(Identifier::new("countIf"))),
29999                            expressions: args,
30000                        },
30001                    ))));
30002                }
30003                self.expect(TokenType::RParen)?;
30004                let filter = self.parse_filter_clause()?;
30005                Ok(Some(Expression::CountIf(Box::new(AggFunc {
30006                    ignore_nulls: None,
30007                    this,
30008                    distinct,
30009                    filter,
30010                    order_by: Vec::new(),
30011                    having_max: None,
30012                    name: Some(name.to_string()),
30013                    limit: None,
30014                    inferred_type: None,
30015                }))))
30016            }
30017            (crate::function_registry::TypedParseKind::Binary, "STARTS_WITH")
30018            | (crate::function_registry::TypedParseKind::Binary, "ENDS_WITH") => {
30019                let this = self.parse_expression()?;
30020                self.expect(TokenType::Comma)?;
30021                let expression = self.parse_expression()?;
30022                self.expect(TokenType::RParen)?;
30023                let func = BinaryFunc {
30024                    original_name: None,
30025                    this,
30026                    expression,
30027                    inferred_type: None,
30028                };
30029                let expr = match spec.canonical_name {
30030                    "STARTS_WITH" => Expression::StartsWith(Box::new(func)),
30031                    "ENDS_WITH" => Expression::EndsWith(Box::new(func)),
30032                    _ => unreachable!("binary typed parse kind already matched in caller"),
30033                };
30034                Ok(Some(expr))
30035            }
30036            (crate::function_registry::TypedParseKind::Binary, "ATAN2") => {
30037                let this = self.parse_expression()?;
30038                self.expect(TokenType::Comma)?;
30039                let expression = self.parse_expression()?;
30040                self.expect(TokenType::RParen)?;
30041                Ok(Some(Expression::Atan2(Box::new(BinaryFunc {
30042                    original_name: None,
30043                    this,
30044                    expression,
30045                    inferred_type: None,
30046                }))))
30047            }
30048            (crate::function_registry::TypedParseKind::Binary, "MAP_FROM_ARRAYS")
30049            | (crate::function_registry::TypedParseKind::Binary, "MAP_CONTAINS_KEY")
30050            | (crate::function_registry::TypedParseKind::Binary, "ELEMENT_AT") => {
30051                let this = self.parse_expression()?;
30052                self.expect(TokenType::Comma)?;
30053                let expression = self.parse_expression()?;
30054                self.expect(TokenType::RParen)?;
30055                let func = BinaryFunc {
30056                    original_name: None,
30057                    this,
30058                    expression,
30059                    inferred_type: None,
30060                };
30061                let expr = match spec.canonical_name {
30062                    "MAP_FROM_ARRAYS" => Expression::MapFromArrays(Box::new(func)),
30063                    "MAP_CONTAINS_KEY" => Expression::MapContainsKey(Box::new(func)),
30064                    "ELEMENT_AT" => Expression::ElementAt(Box::new(func)),
30065                    _ => unreachable!("binary map parse kind already matched in caller"),
30066                };
30067                Ok(Some(expr))
30068            }
30069            (crate::function_registry::TypedParseKind::Binary, "CONTAINS")
30070            | (crate::function_registry::TypedParseKind::Binary, "MOD")
30071            | (crate::function_registry::TypedParseKind::Binary, "POW") => {
30072                let this = self.parse_expression()?;
30073                self.expect(TokenType::Comma)?;
30074                let expression = self.parse_expression()?;
30075                self.expect(TokenType::RParen)?;
30076                let expr = match spec.canonical_name {
30077                    "CONTAINS" => Expression::Contains(Box::new(BinaryFunc {
30078                        original_name: None,
30079                        this,
30080                        expression,
30081                        inferred_type: None,
30082                    })),
30083                    "MOD" => Expression::ModFunc(Box::new(BinaryFunc {
30084                        original_name: None,
30085                        this,
30086                        expression,
30087                        inferred_type: None,
30088                    })),
30089                    "POW" => Expression::Power(Box::new(BinaryFunc {
30090                        original_name: None,
30091                        this,
30092                        expression,
30093                        inferred_type: None,
30094                    })),
30095                    _ => unreachable!("binary scalar parse kind already matched in caller"),
30096                };
30097                Ok(Some(expr))
30098            }
30099            (crate::function_registry::TypedParseKind::Binary, "ADD_MONTHS")
30100            | (crate::function_registry::TypedParseKind::Binary, "MONTHS_BETWEEN")
30101            | (crate::function_registry::TypedParseKind::Binary, "NEXT_DAY") => {
30102                let this = self.parse_expression()?;
30103                self.expect(TokenType::Comma)?;
30104                let expression = self.parse_expression()?;
30105                if spec.canonical_name == "MONTHS_BETWEEN" && self.match_token(TokenType::Comma) {
30106                    let round_off = self.parse_expression()?;
30107                    self.expect(TokenType::RParen)?;
30108                    return Ok(Some(Expression::Function(Box::new(
30109                        crate::expressions::Function::new(
30110                            "MONTHS_BETWEEN".to_string(),
30111                            vec![this, expression, round_off],
30112                        ),
30113                    ))));
30114                }
30115                self.expect(TokenType::RParen)?;
30116                let func = BinaryFunc {
30117                    original_name: None,
30118                    this,
30119                    expression,
30120                    inferred_type: None,
30121                };
30122                let expr = match spec.canonical_name {
30123                    "ADD_MONTHS" => Expression::AddMonths(Box::new(func)),
30124                    "MONTHS_BETWEEN" => Expression::MonthsBetween(Box::new(func)),
30125                    "NEXT_DAY" => Expression::NextDay(Box::new(func)),
30126                    _ => unreachable!("date binary parse kind already matched in caller"),
30127                };
30128                Ok(Some(expr))
30129            }
30130            (crate::function_registry::TypedParseKind::Binary, "ARRAY_CONTAINS")
30131            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_POSITION")
30132            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_APPEND")
30133            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_PREPEND")
30134            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_UNION")
30135            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_EXCEPT")
30136            | (crate::function_registry::TypedParseKind::Binary, "ARRAY_REMOVE") => {
30137                let this = self.parse_expression()?;
30138                self.expect(TokenType::Comma)?;
30139                let expression = self.parse_expression()?;
30140                self.expect(TokenType::RParen)?;
30141                let func = BinaryFunc {
30142                    original_name: None,
30143                    this,
30144                    expression,
30145                    inferred_type: None,
30146                };
30147                let expr = match spec.canonical_name {
30148                    "ARRAY_CONTAINS" => Expression::ArrayContains(Box::new(func)),
30149                    "ARRAY_POSITION" => Expression::ArrayPosition(Box::new(func)),
30150                    "ARRAY_APPEND" => Expression::ArrayAppend(Box::new(func)),
30151                    "ARRAY_PREPEND" => Expression::ArrayPrepend(Box::new(func)),
30152                    "ARRAY_UNION" => Expression::ArrayUnion(Box::new(func)),
30153                    "ARRAY_EXCEPT" => Expression::ArrayExcept(Box::new(func)),
30154                    "ARRAY_REMOVE" => Expression::ArrayRemove(Box::new(func)),
30155                    _ => unreachable!("array binary parse kind already matched in caller"),
30156                };
30157                Ok(Some(expr))
30158            }
30159            (crate::function_registry::TypedParseKind::Unary, "LENGTH") => {
30160                let this = self.parse_expression()?;
30161                // PostgreSQL: LENGTH(string, encoding) accepts optional second argument
30162                if self.match_token(TokenType::Comma) {
30163                    let encoding = self.parse_expression()?;
30164                    self.expect(TokenType::RParen)?;
30165                    // Store as a regular function to preserve both arguments
30166                    Ok(Some(Expression::Function(Box::new(Function::new(
30167                        upper_name,
30168                        vec![this, encoding],
30169                    )))))
30170                } else {
30171                    self.expect(TokenType::RParen)?;
30172                    Ok(Some(Expression::Length(Box::new(UnaryFunc::new(this)))))
30173                }
30174            }
30175            (crate::function_registry::TypedParseKind::Unary, "LOWER") => {
30176                let this = self.parse_expression_with_clickhouse_alias()?;
30177                self.expect(TokenType::RParen)?;
30178                Ok(Some(Expression::Lower(Box::new(UnaryFunc::new(this)))))
30179            }
30180            (crate::function_registry::TypedParseKind::Unary, "UPPER") => {
30181                let this = self.parse_expression_with_clickhouse_alias()?;
30182                self.expect(TokenType::RParen)?;
30183                Ok(Some(Expression::Upper(Box::new(UnaryFunc::new(this)))))
30184            }
30185            (crate::function_registry::TypedParseKind::Unary, "TYPEOF") => {
30186                let this = self.parse_expression()?;
30187                // ClickHouse: expr AS alias inside function args
30188                let this = self.maybe_clickhouse_alias(this);
30189                if self.match_token(TokenType::Comma) {
30190                    // Preserve additional args via generic function form
30191                    let mut all_args = vec![this];
30192                    let remaining = self.parse_function_arguments()?;
30193                    all_args.extend(remaining);
30194                    self.expect(TokenType::RParen)?;
30195                    Ok(Some(Expression::Function(Box::new(Function {
30196                        name: name.to_string(),
30197                        args: all_args,
30198                        distinct: false,
30199                        trailing_comments: Vec::new(),
30200                        use_bracket_syntax: false,
30201                        no_parens: false,
30202                        quoted: false,
30203                        span: None,
30204                        inferred_type: None,
30205                    }))))
30206                } else {
30207                    self.expect(TokenType::RParen)?;
30208                    Ok(Some(Expression::Typeof(Box::new(UnaryFunc::new(this)))))
30209                }
30210            }
30211            (crate::function_registry::TypedParseKind::Unary, "DAYOFWEEK")
30212            | (crate::function_registry::TypedParseKind::Unary, "DAYOFYEAR")
30213            | (crate::function_registry::TypedParseKind::Unary, "DAYOFMONTH")
30214            | (crate::function_registry::TypedParseKind::Unary, "WEEKOFYEAR") => {
30215                let this = self.parse_expression()?;
30216                self.expect(TokenType::RParen)?;
30217                let func = UnaryFunc::new(this);
30218                let expr = match spec.canonical_name {
30219                    "DAYOFWEEK" => Expression::DayOfWeek(Box::new(func)),
30220                    "DAYOFYEAR" => Expression::DayOfYear(Box::new(func)),
30221                    "DAYOFMONTH" => Expression::DayOfMonth(Box::new(func)),
30222                    "WEEKOFYEAR" => Expression::WeekOfYear(Box::new(func)),
30223                    _ => unreachable!("date-part unary parse kind already matched in caller"),
30224                };
30225                Ok(Some(expr))
30226            }
30227            (crate::function_registry::TypedParseKind::Unary, "SIN")
30228            | (crate::function_registry::TypedParseKind::Unary, "COS")
30229            | (crate::function_registry::TypedParseKind::Unary, "TAN")
30230            | (crate::function_registry::TypedParseKind::Unary, "ASIN")
30231            | (crate::function_registry::TypedParseKind::Unary, "ACOS")
30232            | (crate::function_registry::TypedParseKind::Unary, "ATAN")
30233            | (crate::function_registry::TypedParseKind::Unary, "RADIANS")
30234            | (crate::function_registry::TypedParseKind::Unary, "DEGREES") => {
30235                let this = self.parse_expression()?;
30236                // MySQL: ATAN(y, x) with 2 args is equivalent to ATAN2(y, x)
30237                if spec.canonical_name == "ATAN" && self.match_token(TokenType::Comma) {
30238                    let expression = self.parse_expression()?;
30239                    self.expect(TokenType::RParen)?;
30240                    return Ok(Some(Expression::Atan2(Box::new(BinaryFunc {
30241                        original_name: Some("ATAN".to_string()),
30242                        this,
30243                        expression,
30244                        inferred_type: None,
30245                    }))));
30246                }
30247                self.expect(TokenType::RParen)?;
30248                let func = UnaryFunc::new(this);
30249                let expr = match spec.canonical_name {
30250                    "SIN" => Expression::Sin(Box::new(func)),
30251                    "COS" => Expression::Cos(Box::new(func)),
30252                    "TAN" => Expression::Tan(Box::new(func)),
30253                    "ASIN" => Expression::Asin(Box::new(func)),
30254                    "ACOS" => Expression::Acos(Box::new(func)),
30255                    "ATAN" => Expression::Atan(Box::new(func)),
30256                    "RADIANS" => Expression::Radians(Box::new(func)),
30257                    "DEGREES" => Expression::Degrees(Box::new(func)),
30258                    _ => unreachable!("trig unary parse kind already matched in caller"),
30259                };
30260                Ok(Some(expr))
30261            }
30262            (crate::function_registry::TypedParseKind::Unary, "YEAR")
30263            | (crate::function_registry::TypedParseKind::Unary, "MONTH")
30264            | (crate::function_registry::TypedParseKind::Unary, "DAY")
30265            | (crate::function_registry::TypedParseKind::Unary, "HOUR")
30266            | (crate::function_registry::TypedParseKind::Unary, "MINUTE")
30267            | (crate::function_registry::TypedParseKind::Unary, "SECOND")
30268            | (crate::function_registry::TypedParseKind::Unary, "DAYOFWEEK_ISO")
30269            | (crate::function_registry::TypedParseKind::Unary, "QUARTER")
30270            | (crate::function_registry::TypedParseKind::Unary, "EPOCH")
30271            | (crate::function_registry::TypedParseKind::Unary, "EPOCH_MS") => {
30272                let this = self.parse_expression()?;
30273                self.expect(TokenType::RParen)?;
30274                let func = UnaryFunc::new(this);
30275                let expr = match spec.canonical_name {
30276                    "YEAR" => Expression::Year(Box::new(func)),
30277                    "MONTH" => Expression::Month(Box::new(func)),
30278                    "DAY" => Expression::Day(Box::new(func)),
30279                    "HOUR" => Expression::Hour(Box::new(func)),
30280                    "MINUTE" => Expression::Minute(Box::new(func)),
30281                    "SECOND" => Expression::Second(Box::new(func)),
30282                    "DAYOFWEEK_ISO" => Expression::DayOfWeekIso(Box::new(func)),
30283                    "QUARTER" => Expression::Quarter(Box::new(func)),
30284                    "EPOCH" => Expression::Epoch(Box::new(func)),
30285                    "EPOCH_MS" => Expression::EpochMs(Box::new(func)),
30286                    _ => unreachable!("date unary parse kind already matched in caller"),
30287                };
30288                Ok(Some(expr))
30289            }
30290            (crate::function_registry::TypedParseKind::Unary, "ARRAY_LENGTH")
30291            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_SIZE")
30292            | (crate::function_registry::TypedParseKind::Unary, "CARDINALITY")
30293            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_REVERSE")
30294            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_DISTINCT")
30295            | (crate::function_registry::TypedParseKind::Unary, "ARRAY_COMPACT")
30296            | (crate::function_registry::TypedParseKind::Unary, "EXPLODE")
30297            | (crate::function_registry::TypedParseKind::Unary, "EXPLODE_OUTER") => {
30298                let this = self.parse_expression()?;
30299                // PostgreSQL ARRAY_LENGTH and ARRAY_SIZE can take a second dimension arg.
30300                // Preserve that by falling back to generic function form for 2-arg usage.
30301                if (spec.canonical_name == "ARRAY_LENGTH" || spec.canonical_name == "ARRAY_SIZE")
30302                    && self.match_token(TokenType::Comma)
30303                {
30304                    let dimension = self.parse_expression()?;
30305                    self.expect(TokenType::RParen)?;
30306                    return Ok(Some(Expression::Function(Box::new(Function {
30307                        name: name.to_string(),
30308                        args: vec![this, dimension],
30309                        distinct: false,
30310                        trailing_comments: Vec::new(),
30311                        use_bracket_syntax: false,
30312                        no_parens: false,
30313                        quoted: false,
30314                        span: None,
30315                        inferred_type: None,
30316                    }))));
30317                }
30318                self.expect(TokenType::RParen)?;
30319                let func = UnaryFunc::new(this);
30320                let expr = match spec.canonical_name {
30321                    "ARRAY_LENGTH" => Expression::ArrayLength(Box::new(func)),
30322                    "ARRAY_SIZE" => Expression::ArraySize(Box::new(func)),
30323                    "CARDINALITY" => Expression::Cardinality(Box::new(func)),
30324                    "ARRAY_REVERSE" => Expression::ArrayReverse(Box::new(func)),
30325                    "ARRAY_DISTINCT" => Expression::ArrayDistinct(Box::new(func)),
30326                    "ARRAY_COMPACT" => Expression::ArrayCompact(Box::new(func)),
30327                    "EXPLODE" => Expression::Explode(Box::new(func)),
30328                    "EXPLODE_OUTER" => Expression::ExplodeOuter(Box::new(func)),
30329                    _ => unreachable!("array unary parse kind already matched in caller"),
30330                };
30331                Ok(Some(expr))
30332            }
30333            (crate::function_registry::TypedParseKind::Unary, "MAP_FROM_ENTRIES")
30334            | (crate::function_registry::TypedParseKind::Unary, "MAP_KEYS")
30335            | (crate::function_registry::TypedParseKind::Unary, "MAP_VALUES") => {
30336                let this = self.parse_expression()?;
30337                self.expect(TokenType::RParen)?;
30338                let func = UnaryFunc::new(this);
30339                let expr = match spec.canonical_name {
30340                    "MAP_FROM_ENTRIES" => Expression::MapFromEntries(Box::new(func)),
30341                    "MAP_KEYS" => Expression::MapKeys(Box::new(func)),
30342                    "MAP_VALUES" => Expression::MapValues(Box::new(func)),
30343                    _ => unreachable!("map unary parse kind already matched in caller"),
30344                };
30345                Ok(Some(expr))
30346            }
30347            (crate::function_registry::TypedParseKind::Unary, "ABS") => {
30348                let this = self.parse_expression_with_clickhouse_alias()?;
30349                self.expect(TokenType::RParen)?;
30350                Ok(Some(Expression::Abs(Box::new(UnaryFunc::new(this)))))
30351            }
30352            (crate::function_registry::TypedParseKind::Unary, "SQRT")
30353            | (crate::function_registry::TypedParseKind::Unary, "EXP")
30354            | (crate::function_registry::TypedParseKind::Unary, "LN") => {
30355                let this = self.parse_expression()?;
30356                self.expect(TokenType::RParen)?;
30357                let expr = match spec.canonical_name {
30358                    "SQRT" => Expression::Sqrt(Box::new(UnaryFunc::new(this))),
30359                    "EXP" => Expression::Exp(Box::new(UnaryFunc::new(this))),
30360                    "LN" => Expression::Ln(Box::new(UnaryFunc::new(this))),
30361                    _ => unreachable!("math unary parse kind already matched in caller"),
30362                };
30363                Ok(Some(expr))
30364            }
30365            (crate::function_registry::TypedParseKind::Variadic, "TO_NUMBER")
30366            | (crate::function_registry::TypedParseKind::Variadic, "TRY_TO_NUMBER") => {
30367                let args = self.parse_expression_list()?;
30368                self.expect(TokenType::RParen)?;
30369                let this = args.get(0).cloned().unwrap_or(Expression::Null(Null {}));
30370                let format = args.get(1).cloned().map(Box::new);
30371                let precision = args.get(2).cloned().map(Box::new);
30372                let scale = args.get(3).cloned().map(Box::new);
30373                let safe = if spec.canonical_name == "TRY_TO_NUMBER" {
30374                    Some(Box::new(Expression::Boolean(BooleanLiteral {
30375                        value: true,
30376                    })))
30377                } else {
30378                    None
30379                };
30380                Ok(Some(Expression::ToNumber(Box::new(ToNumber {
30381                    this: Box::new(this),
30382                    format,
30383                    nlsparam: None,
30384                    precision,
30385                    scale,
30386                    safe,
30387                    safe_name: None,
30388                }))))
30389            }
30390            (crate::function_registry::TypedParseKind::Variadic, "SUBSTRING") => {
30391                let this = self.parse_expression()?;
30392                // ClickHouse: implicit/explicit alias: substring('1234' lhs FROM 2) or substring('1234' AS lhs FROM 2)
30393                let this = self.try_clickhouse_func_arg_alias(this);
30394
30395                // Check for SQL standard FROM syntax: SUBSTRING(str FROM pos [FOR len])
30396                if self.match_token(TokenType::From) {
30397                    let start = self.parse_expression()?;
30398                    let start = self.try_clickhouse_func_arg_alias(start);
30399                    let length = if self.match_token(TokenType::For) {
30400                        let len = self.parse_expression()?;
30401                        Some(self.try_clickhouse_func_arg_alias(len))
30402                    } else {
30403                        None
30404                    };
30405                    self.expect(TokenType::RParen)?;
30406                    Ok(Some(Expression::Substring(Box::new(SubstringFunc {
30407                        this,
30408                        start,
30409                        length,
30410                        from_for_syntax: true,
30411                    }))))
30412                } else if self.match_token(TokenType::For) {
30413                    // PostgreSQL: SUBSTRING(str FOR len) or SUBSTRING(str FOR len FROM pos)
30414                    let length_expr = self.parse_expression()?;
30415                    let length_expr = self.try_clickhouse_func_arg_alias(length_expr);
30416                    let start = if self.match_token(TokenType::From) {
30417                        let s = self.parse_expression()?;
30418                        self.try_clickhouse_func_arg_alias(s)
30419                    } else {
30420                        // No FROM, use 1 as default start position
30421                        Expression::Literal(Literal::Number("1".to_string()))
30422                    };
30423                    self.expect(TokenType::RParen)?;
30424                    Ok(Some(Expression::Substring(Box::new(SubstringFunc {
30425                        this,
30426                        start,
30427                        length: Some(length_expr),
30428                        from_for_syntax: true,
30429                    }))))
30430                } else if self.match_token(TokenType::Comma) {
30431                    // Comma-separated syntax: SUBSTRING(str, pos) or SUBSTRING(str, pos, len)
30432                    let start = self.parse_expression()?;
30433                    let start = self.try_clickhouse_func_arg_alias(start);
30434                    let length = if self.match_token(TokenType::Comma) {
30435                        let len = self.parse_expression()?;
30436                        Some(self.try_clickhouse_func_arg_alias(len))
30437                    } else {
30438                        None
30439                    };
30440                    self.expect(TokenType::RParen)?;
30441                    Ok(Some(Expression::Substring(Box::new(SubstringFunc {
30442                        this,
30443                        start,
30444                        length,
30445                        from_for_syntax: false,
30446                    }))))
30447                } else {
30448                    // Just SUBSTRING(str) with no other args - unusual but handle it
30449                    self.expect(TokenType::RParen)?;
30450                    // Treat as function call
30451                    Ok(Some(Expression::Function(Box::new(Function {
30452                        name: name.to_string(),
30453                        args: vec![this],
30454                        distinct: false,
30455                        trailing_comments: Vec::new(),
30456                        use_bracket_syntax: false,
30457                        no_parens: false,
30458                        quoted: false,
30459                        span: None,
30460                        inferred_type: None,
30461                    }))))
30462                }
30463            }
30464            (crate::function_registry::TypedParseKind::Variadic, "DATE_PART") => {
30465                let part = self.parse_expression()?;
30466                // For TSQL/Fabric, normalize date part aliases (e.g., "dd" -> DAY)
30467                let mut part = if matches!(
30468                    self.config.dialect,
30469                    Some(crate::dialects::DialectType::TSQL)
30470                        | Some(crate::dialects::DialectType::Fabric)
30471                ) {
30472                    self.normalize_tsql_date_part(part)
30473                } else {
30474                    part
30475                };
30476                // Accept both FROM and comma as separator (Snowflake supports both syntaxes)
30477                if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
30478                    return Err(self.parse_error("Expected FROM or comma in DATE_PART"));
30479                }
30480                let from_expr = self.parse_expression()?;
30481                self.expect(TokenType::RParen)?;
30482                if matches!(
30483                    self.config.dialect,
30484                    Some(crate::dialects::DialectType::Snowflake)
30485                ) {
30486                    if self
30487                        .try_parse_date_part_field_identifier_expr(&part)
30488                        .is_some()
30489                    {
30490                        part = self.convert_date_part_identifier_expr_to_var(part);
30491                    }
30492                }
30493                Ok(Some(Expression::Function(Box::new(Function {
30494                    name: "DATE_PART".to_string(),
30495                    args: vec![part, from_expr],
30496                    distinct: false,
30497                    trailing_comments: Vec::new(),
30498                    use_bracket_syntax: false,
30499                    no_parens: false,
30500                    quoted: false,
30501                    span: None,
30502                    inferred_type: None,
30503                }))))
30504            }
30505            (crate::function_registry::TypedParseKind::Variadic, "DATEADD") => {
30506                let mut first_arg = self.parse_expression()?;
30507                first_arg = self.try_clickhouse_func_arg_alias(first_arg);
30508                self.expect(TokenType::Comma)?;
30509                let second_arg = self.parse_expression()?;
30510                let second_arg = self.try_clickhouse_func_arg_alias(second_arg);
30511
30512                // Check if there's a third argument (traditional 3-arg syntax)
30513                if self.match_token(TokenType::Comma) {
30514                    let third_arg = self.parse_expression()?;
30515                    let third_arg = self.try_clickhouse_func_arg_alias(third_arg);
30516                    self.expect(TokenType::RParen)?;
30517                    if matches!(
30518                        self.config.dialect,
30519                        Some(crate::dialects::DialectType::Snowflake)
30520                    ) {
30521                        if self
30522                            .try_parse_date_part_unit_identifier_expr(&first_arg)
30523                            .is_some()
30524                        {
30525                            first_arg = self.convert_date_part_identifier_expr_to_var(first_arg);
30526                        }
30527                    }
30528                    Ok(Some(Expression::Function(Box::new(Function {
30529                        name: name.to_string(),
30530                        args: vec![first_arg, second_arg, third_arg],
30531                        distinct: false,
30532                        trailing_comments: Vec::new(),
30533                        use_bracket_syntax: false,
30534                        no_parens: false,
30535                        quoted: false,
30536                        span: None,
30537                        inferred_type: None,
30538                    }))))
30539                } else {
30540                    // BigQuery 2-arg syntax: DATE_ADD(date, interval)
30541                    self.expect(TokenType::RParen)?;
30542                    Ok(Some(Expression::Function(Box::new(Function {
30543                        name: name.to_string(),
30544                        args: vec![first_arg, second_arg],
30545                        distinct: false,
30546                        trailing_comments: Vec::new(),
30547                        use_bracket_syntax: false,
30548                        no_parens: false,
30549                        quoted: false,
30550                        span: None,
30551                        inferred_type: None,
30552                    }))))
30553                }
30554            }
30555            (crate::function_registry::TypedParseKind::Variadic, "DATEDIFF") => {
30556                // First argument (can be unit for DATEDIFF/TIMESTAMPDIFF or datetime for TIMEDIFF)
30557                let first_arg = self.parse_expression()?;
30558                let first_arg = self.try_clickhouse_func_arg_alias(first_arg);
30559                self.expect(TokenType::Comma)?;
30560                let second_arg = self.parse_expression()?;
30561                let second_arg = self.try_clickhouse_func_arg_alias(second_arg);
30562                // Third argument is optional (SQLite TIMEDIFF only takes 2 args)
30563                let mut args = if self.match_token(TokenType::Comma) {
30564                    let third_arg = self.parse_expression()?;
30565                    let third_arg = self.try_clickhouse_func_arg_alias(third_arg);
30566                    vec![first_arg, second_arg, third_arg]
30567                } else {
30568                    vec![first_arg, second_arg]
30569                };
30570                // ClickHouse: optional 4th timezone argument for dateDiff
30571                while self.match_token(TokenType::Comma) {
30572                    let arg = self.parse_expression()?;
30573                    args.push(self.try_clickhouse_func_arg_alias(arg));
30574                }
30575                self.expect(TokenType::RParen)?;
30576                if matches!(
30577                    self.config.dialect,
30578                    Some(crate::dialects::DialectType::Snowflake)
30579                ) && args.len() == 3
30580                {
30581                    if let Some(unit) = self.try_parse_date_part_unit_expr(&args[0]) {
30582                        return Ok(Some(Expression::DateDiff(Box::new(DateDiffFunc {
30583                            this: args[2].clone(),
30584                            expression: args[1].clone(),
30585                            unit: Some(unit),
30586                        }))));
30587                    }
30588                }
30589                Ok(Some(Expression::Function(Box::new(Function {
30590                    name: name.to_string(),
30591                    args,
30592                    distinct: false,
30593                    trailing_comments: Vec::new(),
30594                    use_bracket_syntax: false,
30595                    no_parens: false,
30596                    quoted: false,
30597                    span: None,
30598                    inferred_type: None,
30599                }))))
30600            }
30601            (crate::function_registry::TypedParseKind::Variadic, "RANDOM") => {
30602                // RANDOM() - no args, RANDOM(seed) - Snowflake, RANDOM(lower, upper) - Teradata
30603                if self.check(TokenType::RParen) {
30604                    self.expect(TokenType::RParen)?;
30605                    Ok(Some(Expression::Random(Random)))
30606                } else {
30607                    let first = self.parse_expression()?;
30608                    if self.match_token(TokenType::Comma) {
30609                        let second = self.parse_expression()?;
30610                        self.expect(TokenType::RParen)?;
30611                        Ok(Some(Expression::Rand(Box::new(Rand {
30612                            seed: None,
30613                            lower: Some(Box::new(first)),
30614                            upper: Some(Box::new(second)),
30615                        }))))
30616                    } else {
30617                        self.expect(TokenType::RParen)?;
30618                        Ok(Some(Expression::Rand(Box::new(Rand {
30619                            seed: Some(Box::new(first)),
30620                            lower: None,
30621                            upper: None,
30622                        }))))
30623                    }
30624                }
30625            }
30626            (crate::function_registry::TypedParseKind::Variadic, "RAND") => {
30627                let seed = if self.check(TokenType::RParen) {
30628                    None
30629                } else {
30630                    Some(Box::new(self.parse_expression()?))
30631                };
30632                self.expect(TokenType::RParen)?;
30633                Ok(Some(Expression::Rand(Box::new(Rand {
30634                    seed,
30635                    lower: None,
30636                    upper: None,
30637                }))))
30638            }
30639            (crate::function_registry::TypedParseKind::Variadic, "PI") => {
30640                self.expect(TokenType::RParen)?;
30641                Ok(Some(Expression::Pi(Pi)))
30642            }
30643            (crate::function_registry::TypedParseKind::Variadic, "LAST_DAY") => {
30644                let this = self.parse_expression()?;
30645                let unit = if self.match_token(TokenType::Comma) {
30646                    Some(self.parse_datetime_field()?)
30647                } else {
30648                    None
30649                };
30650                self.expect(TokenType::RParen)?;
30651                Ok(Some(Expression::LastDay(Box::new(LastDayFunc {
30652                    this,
30653                    unit,
30654                }))))
30655            }
30656            (crate::function_registry::TypedParseKind::Variadic, "POSITION") => {
30657                let expr = self
30658                    .parse_position()?
30659                    .ok_or_else(|| self.parse_error("Expected expression in POSITION"))?;
30660                self.expect(TokenType::RParen)?;
30661                Ok(Some(expr))
30662            }
30663            (crate::function_registry::TypedParseKind::Variadic, "STRPOS") => {
30664                let this = self.parse_expression()?;
30665                self.expect(TokenType::Comma)?;
30666                let substr = self.parse_expression()?;
30667                let occurrence = if self.match_token(TokenType::Comma) {
30668                    Some(Box::new(self.parse_expression()?))
30669                } else {
30670                    None
30671                };
30672                self.expect(TokenType::RParen)?;
30673                Ok(Some(Expression::StrPosition(Box::new(StrPosition {
30674                    this: Box::new(this),
30675                    substr: Some(Box::new(substr)),
30676                    position: None,
30677                    occurrence,
30678                }))))
30679            }
30680            (crate::function_registry::TypedParseKind::Variadic, "LOCATE") => {
30681                if self.check(TokenType::RParen) {
30682                    self.advance();
30683                    return Ok(Some(Expression::Function(Box::new(Function {
30684                        name: name.to_string(),
30685                        args: vec![],
30686                        distinct: false,
30687                        trailing_comments: Vec::new(),
30688                        use_bracket_syntax: false,
30689                        no_parens: false,
30690                        quoted: false,
30691                        span: None,
30692                        inferred_type: None,
30693                    }))));
30694                }
30695                let first = self.parse_expression()?;
30696                if !self.check(TokenType::Comma) && self.check(TokenType::RParen) {
30697                    self.advance();
30698                    return Ok(Some(Expression::Function(Box::new(Function {
30699                        name: name.to_string(),
30700                        args: vec![first],
30701                        distinct: false,
30702                        trailing_comments: Vec::new(),
30703                        use_bracket_syntax: false,
30704                        no_parens: false,
30705                        quoted: false,
30706                        span: None,
30707                        inferred_type: None,
30708                    }))));
30709                }
30710                self.expect(TokenType::Comma)?;
30711                let second = self.parse_expression()?;
30712                let position = if self.match_token(TokenType::Comma) {
30713                    Some(Box::new(self.parse_expression()?))
30714                } else {
30715                    None
30716                };
30717                self.expect(TokenType::RParen)?;
30718                Ok(Some(Expression::StrPosition(Box::new(StrPosition {
30719                    this: Box::new(second),
30720                    substr: Some(Box::new(first)),
30721                    position,
30722                    occurrence: None,
30723                }))))
30724            }
30725            (crate::function_registry::TypedParseKind::Variadic, "INSTR") => {
30726                let first = self.parse_expression()?;
30727                self.expect(TokenType::Comma)?;
30728                let second = self.parse_expression()?;
30729                let position = if self.match_token(TokenType::Comma) {
30730                    Some(Box::new(self.parse_expression()?))
30731                } else {
30732                    None
30733                };
30734                self.expect(TokenType::RParen)?;
30735                Ok(Some(Expression::StrPosition(Box::new(StrPosition {
30736                    this: Box::new(first),
30737                    substr: Some(Box::new(second)),
30738                    position,
30739                    occurrence: None,
30740                }))))
30741            }
30742            (crate::function_registry::TypedParseKind::Variadic, "NORMALIZE") => {
30743                let this = self.parse_expression()?;
30744                let form = if self.match_token(TokenType::Comma) {
30745                    Some(Box::new(self.parse_expression()?))
30746                } else {
30747                    None
30748                };
30749                self.expect(TokenType::RParen)?;
30750                Ok(Some(Expression::Normalize(Box::new(Normalize {
30751                    this: Box::new(this),
30752                    form,
30753                    is_casefold: None,
30754                }))))
30755            }
30756            (crate::function_registry::TypedParseKind::Variadic, "INITCAP") => {
30757                let this = self.parse_expression()?;
30758                let delimiter = if self.match_token(TokenType::Comma) {
30759                    Some(Box::new(self.parse_expression()?))
30760                } else {
30761                    None
30762                };
30763                self.expect(TokenType::RParen)?;
30764                if let Some(delim) = delimiter {
30765                    Ok(Some(Expression::Function(Box::new(Function::new(
30766                        "INITCAP".to_string(),
30767                        vec![this, *delim],
30768                    )))))
30769                } else {
30770                    Ok(Some(Expression::Initcap(Box::new(UnaryFunc::new(this)))))
30771                }
30772            }
30773            (crate::function_registry::TypedParseKind::Variadic, "FLOOR") => {
30774                let this = self.parse_expression()?;
30775                let to = if self.match_token(TokenType::To) {
30776                    self.parse_var()?
30777                } else {
30778                    None
30779                };
30780                let scale = if to.is_none() && self.match_token(TokenType::Comma) {
30781                    Some(self.parse_expression()?)
30782                } else {
30783                    None
30784                };
30785                if self.check(TokenType::Comma) {
30786                    let mut args = vec![this];
30787                    if let Some(s) = scale {
30788                        args.push(s);
30789                    }
30790                    while self.match_token(TokenType::Comma) {
30791                        args.push(self.parse_expression()?);
30792                    }
30793                    self.expect(TokenType::RParen)?;
30794                    return Ok(Some(Expression::Function(Box::new(Function {
30795                        name: name.to_string(),
30796                        args,
30797                        distinct: false,
30798                        trailing_comments: Vec::new(),
30799                        use_bracket_syntax: false,
30800                        no_parens: false,
30801                        quoted: false,
30802                        span: None,
30803                        inferred_type: None,
30804                    }))));
30805                }
30806                self.expect(TokenType::RParen)?;
30807                Ok(Some(Expression::Floor(Box::new(FloorFunc {
30808                    this,
30809                    scale,
30810                    to,
30811                }))))
30812            }
30813            (crate::function_registry::TypedParseKind::Variadic, "LOG") => {
30814                let first = self.parse_expression()?;
30815                if self.match_token(TokenType::Comma) {
30816                    let second = self.parse_expression()?;
30817                    self.expect(TokenType::RParen)?;
30818                    let (value, base) = if self.log_base_first() {
30819                        (second, first)
30820                    } else {
30821                        (first, second)
30822                    };
30823                    Ok(Some(Expression::Log(Box::new(LogFunc {
30824                        this: value,
30825                        base: Some(base),
30826                    }))))
30827                } else {
30828                    self.expect(TokenType::RParen)?;
30829                    if self.log_defaults_to_ln() {
30830                        Ok(Some(Expression::Ln(Box::new(UnaryFunc::new(first)))))
30831                    } else {
30832                        Ok(Some(Expression::Log(Box::new(LogFunc {
30833                            this: first,
30834                            base: None,
30835                        }))))
30836                    }
30837                }
30838            }
30839            (crate::function_registry::TypedParseKind::Variadic, "FLATTEN") => {
30840                let args = self.parse_function_arguments()?;
30841                self.expect(TokenType::RParen)?;
30842                Ok(Some(Expression::Function(Box::new(Function {
30843                    name: name.to_string(),
30844                    args,
30845                    distinct: false,
30846                    trailing_comments: Vec::new(),
30847                    use_bracket_syntax: false,
30848                    no_parens: false,
30849                    quoted: false,
30850                    span: None,
30851                    inferred_type: None,
30852                }))))
30853            }
30854            (crate::function_registry::TypedParseKind::Variadic, "ARRAY_INTERSECT") => {
30855                let mut expressions = vec![self.parse_expression()?];
30856                while self.match_token(TokenType::Comma) {
30857                    expressions.push(self.parse_expression()?);
30858                }
30859                self.expect(TokenType::RParen)?;
30860                Ok(Some(Expression::ArrayIntersect(Box::new(VarArgFunc {
30861                    expressions,
30862                    original_name: Some(name.to_string()),
30863                    inferred_type: None,
30864                }))))
30865            }
30866            (crate::function_registry::TypedParseKind::Variadic, "CURRENT_SCHEMAS") => {
30867                let args = if self.check(TokenType::RParen) {
30868                    Vec::new()
30869                } else {
30870                    vec![self.parse_expression()?]
30871                };
30872                self.expect(TokenType::RParen)?;
30873                Ok(Some(Expression::CurrentSchemas(Box::new(CurrentSchemas {
30874                    this: args.into_iter().next().map(Box::new),
30875                }))))
30876            }
30877            (crate::function_registry::TypedParseKind::Variadic, "COALESCE") => {
30878                let args = if self.check(TokenType::RParen) {
30879                    Vec::new()
30880                } else {
30881                    self.parse_expression_list()?
30882                };
30883                self.expect(TokenType::RParen)?;
30884                Ok(Some(Expression::Coalesce(Box::new(
30885                    crate::expressions::VarArgFunc {
30886                        original_name: None,
30887                        expressions: args,
30888                        inferred_type: None,
30889                    },
30890                ))))
30891            }
30892            (crate::function_registry::TypedParseKind::Variadic, "IFNULL") => {
30893                let args = self.parse_expression_list()?;
30894                self.expect(TokenType::RParen)?;
30895                if args.len() >= 2 {
30896                    Ok(Some(Expression::Coalesce(Box::new(
30897                        crate::expressions::VarArgFunc {
30898                            original_name: Some("IFNULL".to_string()),
30899                            expressions: args,
30900                            inferred_type: None,
30901                        },
30902                    ))))
30903                } else {
30904                    Ok(Some(Expression::Function(Box::new(Function {
30905                        name: name.to_string(),
30906                        args,
30907                        distinct: false,
30908                        trailing_comments: Vec::new(),
30909                        use_bracket_syntax: false,
30910                        no_parens: false,
30911                        quoted: false,
30912                        span: None,
30913                        inferred_type: None,
30914                    }))))
30915                }
30916            }
30917            (crate::function_registry::TypedParseKind::Variadic, "NVL") => {
30918                let args = self.parse_expression_list()?;
30919                self.expect(TokenType::RParen)?;
30920                if args.len() > 2 {
30921                    Ok(Some(Expression::Function(Box::new(Function {
30922                        name: "COALESCE".to_string(),
30923                        args,
30924                        distinct: false,
30925                        trailing_comments: Vec::new(),
30926                        use_bracket_syntax: false,
30927                        no_parens: false,
30928                        quoted: false,
30929                        span: None,
30930                        inferred_type: None,
30931                    }))))
30932                } else if args.len() == 2 {
30933                    Ok(Some(Expression::Nvl(Box::new(
30934                        crate::expressions::BinaryFunc {
30935                            original_name: Some("NVL".to_string()),
30936                            this: args[0].clone(),
30937                            expression: args[1].clone(),
30938                            inferred_type: None,
30939                        },
30940                    ))))
30941                } else {
30942                    Ok(Some(Expression::Function(Box::new(Function {
30943                        name: name.to_string(),
30944                        args,
30945                        distinct: false,
30946                        trailing_comments: Vec::new(),
30947                        use_bracket_syntax: false,
30948                        no_parens: false,
30949                        quoted: false,
30950                        span: None,
30951                        inferred_type: None,
30952                    }))))
30953                }
30954            }
30955            (crate::function_registry::TypedParseKind::Variadic, "NVL2") => {
30956                let args = self.parse_expression_list()?;
30957                self.expect(TokenType::RParen)?;
30958                if args.len() >= 3 {
30959                    Ok(Some(Expression::Nvl2(Box::new(
30960                        crate::expressions::Nvl2Func {
30961                            this: args[0].clone(),
30962                            true_value: args[1].clone(),
30963                            false_value: args[2].clone(),
30964                            inferred_type: None,
30965                        },
30966                    ))))
30967                } else {
30968                    Ok(Some(Expression::Function(Box::new(Function {
30969                        name: name.to_string(),
30970                        args,
30971                        distinct: false,
30972                        trailing_comments: Vec::new(),
30973                        use_bracket_syntax: false,
30974                        no_parens: false,
30975                        quoted: false,
30976                        span: None,
30977                        inferred_type: None,
30978                    }))))
30979                }
30980            }
30981            (crate::function_registry::TypedParseKind::Variadic, "EXTRACT") => {
30982                if matches!(
30983                    self.config.dialect,
30984                    Some(crate::dialects::DialectType::ClickHouse)
30985                ) && (self.check(TokenType::Identifier)
30986                    || self.check(TokenType::Var)
30987                    || self.peek().token_type.is_keyword()
30988                    || self.check(TokenType::String)
30989                    || self.check(TokenType::Number))
30990                    && (self.check_next(TokenType::Comma)
30991                        || self.check_next(TokenType::LParen)
30992                        || self.check_next(TokenType::Var)
30993                        || self.check_next(TokenType::Identifier))
30994                {
30995                    let args = self.parse_function_arguments()?;
30996                    self.expect(TokenType::RParen)?;
30997                    return Ok(Some(Expression::Function(Box::new(Function {
30998                        name: name.to_string(),
30999                        args,
31000                        distinct: false,
31001                        trailing_comments: Vec::new(),
31002                        use_bracket_syntax: false,
31003                        no_parens: false,
31004                        quoted: false,
31005                        span: None,
31006                        inferred_type: None,
31007                    }))));
31008                }
31009
31010                if self.check(TokenType::String) {
31011                    let args = self.parse_expression_list()?;
31012                    self.expect(TokenType::RParen)?;
31013                    return Ok(Some(Expression::Function(Box::new(Function {
31014                        name: name.to_string(),
31015                        args,
31016                        distinct: false,
31017                        trailing_comments: Vec::new(),
31018                        use_bracket_syntax: false,
31019                        no_parens: false,
31020                        quoted: false,
31021                        span: None,
31022                        inferred_type: None,
31023                    }))));
31024                }
31025
31026                let field = self.parse_datetime_field()?;
31027                if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
31028                    return Err(self.parse_error("Expected FROM or comma after EXTRACT field"));
31029                }
31030                let this = self.parse_expression()?;
31031                let this = self.try_clickhouse_func_arg_alias(this);
31032                self.expect(TokenType::RParen)?;
31033                Ok(Some(Expression::Extract(Box::new(ExtractFunc {
31034                    this,
31035                    field,
31036                }))))
31037            }
31038            (crate::function_registry::TypedParseKind::Variadic, "STRUCT") => {
31039                let args = if self.check(TokenType::RParen) {
31040                    Vec::new()
31041                } else {
31042                    self.parse_struct_args()?
31043                };
31044                self.expect(TokenType::RParen)?;
31045                Ok(Some(Expression::Function(Box::new(Function {
31046                    name: name.to_string(),
31047                    args,
31048                    distinct: false,
31049                    trailing_comments: Vec::new(),
31050                    use_bracket_syntax: false,
31051                    no_parens: false,
31052                    quoted: false,
31053                    span: None,
31054                    inferred_type: None,
31055                }))))
31056            }
31057            (crate::function_registry::TypedParseKind::Variadic, "CHAR") => {
31058                let args = self.parse_expression_list()?;
31059                let charset = if self.match_token(TokenType::Using) {
31060                    if !self.is_at_end() {
31061                        let charset_token = self.advance();
31062                        Some(charset_token.text.clone())
31063                    } else {
31064                        None
31065                    }
31066                } else {
31067                    None
31068                };
31069                self.expect(TokenType::RParen)?;
31070                if charset.is_some() {
31071                    Ok(Some(Expression::CharFunc(Box::new(
31072                        crate::expressions::CharFunc {
31073                            args,
31074                            charset,
31075                            name: None,
31076                        },
31077                    ))))
31078                } else {
31079                    Ok(Some(Expression::Function(Box::new(Function {
31080                        name: name.to_string(),
31081                        args,
31082                        distinct: false,
31083                        trailing_comments: Vec::new(),
31084                        use_bracket_syntax: false,
31085                        no_parens: false,
31086                        quoted: false,
31087                        span: None,
31088                        inferred_type: None,
31089                    }))))
31090                }
31091            }
31092            (crate::function_registry::TypedParseKind::Variadic, "CHR") => {
31093                let args = self.parse_expression_list()?;
31094                let charset = if self.match_token(TokenType::Using) {
31095                    if !self.is_at_end() {
31096                        let charset_token = self.advance();
31097                        Some(charset_token.text.clone())
31098                    } else {
31099                        None
31100                    }
31101                } else {
31102                    None
31103                };
31104                self.expect(TokenType::RParen)?;
31105                if charset.is_some() {
31106                    Ok(Some(Expression::CharFunc(Box::new(
31107                        crate::expressions::CharFunc {
31108                            args,
31109                            charset,
31110                            name: Some("CHR".to_string()),
31111                        },
31112                    ))))
31113                } else {
31114                    Ok(Some(Expression::Function(Box::new(Function {
31115                        name: name.to_string(),
31116                        args,
31117                        distinct: false,
31118                        trailing_comments: Vec::new(),
31119                        use_bracket_syntax: false,
31120                        no_parens: false,
31121                        quoted: false,
31122                        span: None,
31123                        inferred_type: None,
31124                    }))))
31125                }
31126            }
31127            (crate::function_registry::TypedParseKind::Variadic, "RANGE_N") => {
31128                let this = self.parse_bitwise_or()?;
31129                self.expect(TokenType::Between)?;
31130                let mut expressions = Vec::new();
31131                while !self.check(TokenType::Each) && !self.check(TokenType::RParen) {
31132                    expressions.push(self.parse_expression()?);
31133                    if !self.match_token(TokenType::Comma) {
31134                        break;
31135                    }
31136                }
31137                let each = if self.match_token(TokenType::Each) {
31138                    Some(Box::new(self.parse_expression()?))
31139                } else {
31140                    None
31141                };
31142                self.expect(TokenType::RParen)?;
31143                Ok(Some(Expression::RangeN(Box::new(RangeN {
31144                    this: Box::new(this),
31145                    expressions,
31146                    each,
31147                }))))
31148            }
31149            (crate::function_registry::TypedParseKind::Variadic, "XMLTABLE") => {
31150                if let Some(xml_table) = self.parse_xml_table()? {
31151                    self.expect(TokenType::RParen)?;
31152                    Ok(Some(xml_table))
31153                } else {
31154                    Err(self.parse_error("Failed to parse XMLTABLE"))
31155                }
31156            }
31157            (crate::function_registry::TypedParseKind::Variadic, "XMLELEMENT") => {
31158                if let Some(elem) = self.parse_xml_element()? {
31159                    self.expect(TokenType::RParen)?;
31160                    Ok(Some(elem))
31161                } else {
31162                    self.expect(TokenType::RParen)?;
31163                    Ok(Some(Expression::Function(Box::new(Function {
31164                        name: name.to_string(),
31165                        args: Vec::new(),
31166                        distinct: false,
31167                        trailing_comments: Vec::new(),
31168                        use_bracket_syntax: false,
31169                        no_parens: false,
31170                        quoted: false,
31171                        span: None,
31172                        inferred_type: None,
31173                    }))))
31174                }
31175            }
31176            (crate::function_registry::TypedParseKind::Variadic, "XMLATTRIBUTES") => {
31177                let mut attrs = Vec::new();
31178                if !self.check(TokenType::RParen) {
31179                    loop {
31180                        let expr = self.parse_expression()?;
31181                        if self.match_token(TokenType::As) {
31182                            let alias_ident = self.expect_identifier_or_keyword_with_quoted()?;
31183                            attrs.push(Expression::Alias(Box::new(Alias {
31184                                this: expr,
31185                                alias: alias_ident,
31186                                column_aliases: Vec::new(),
31187                                pre_alias_comments: Vec::new(),
31188                                trailing_comments: Vec::new(),
31189                                inferred_type: None,
31190                            })));
31191                        } else {
31192                            attrs.push(expr);
31193                        }
31194                        if !self.match_token(TokenType::Comma) {
31195                            break;
31196                        }
31197                    }
31198                }
31199                self.expect(TokenType::RParen)?;
31200                Ok(Some(Expression::Function(Box::new(Function {
31201                    name: "XMLATTRIBUTES".to_string(),
31202                    args: attrs,
31203                    distinct: false,
31204                    trailing_comments: Vec::new(),
31205                    use_bracket_syntax: false,
31206                    no_parens: false,
31207                    quoted: false,
31208                    span: None,
31209                    inferred_type: None,
31210                }))))
31211            }
31212            (crate::function_registry::TypedParseKind::Variadic, "XMLCOMMENT") => {
31213                let args = if self.check(TokenType::RParen) {
31214                    Vec::new()
31215                } else {
31216                    self.parse_expression_list()?
31217                };
31218                self.expect(TokenType::RParen)?;
31219                Ok(Some(Expression::Function(Box::new(Function {
31220                    name: "XMLCOMMENT".to_string(),
31221                    args,
31222                    distinct: false,
31223                    trailing_comments: Vec::new(),
31224                    use_bracket_syntax: false,
31225                    no_parens: false,
31226                    quoted: false,
31227                    span: None,
31228                    inferred_type: None,
31229                }))))
31230            }
31231            (crate::function_registry::TypedParseKind::Variadic, "MATCH") => {
31232                let expressions = if self.check(TokenType::Table)
31233                    && !matches!(
31234                        self.config.dialect,
31235                        Some(crate::dialects::DialectType::ClickHouse)
31236                    ) {
31237                    self.advance();
31238                    let table_name = self.expect_identifier_or_keyword()?;
31239                    vec![Expression::Var(Box::new(Var {
31240                        this: format!("TABLE {}", table_name),
31241                    }))]
31242                } else {
31243                    self.parse_expression_list()?
31244                };
31245
31246                self.expect(TokenType::RParen)?;
31247
31248                if !self.check_keyword_text("AGAINST") {
31249                    return Ok(Some(Expression::Function(Box::new(Function {
31250                        name: "MATCH".to_string(),
31251                        args: expressions,
31252                        distinct: false,
31253                        trailing_comments: Vec::new(),
31254                        use_bracket_syntax: false,
31255                        no_parens: false,
31256                        quoted: false,
31257                        span: None,
31258                        inferred_type: None,
31259                    }))));
31260                }
31261
31262                self.advance();
31263                self.expect(TokenType::LParen)?;
31264                let search_expr = self.parse_primary()?;
31265
31266                let modifier = if self.match_text_seq(&["IN", "NATURAL", "LANGUAGE", "MODE"]) {
31267                    if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
31268                        Some(Box::new(Expression::Var(Box::new(Var {
31269                            this: "IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION".to_string(),
31270                        }))))
31271                    } else {
31272                        Some(Box::new(Expression::Var(Box::new(Var {
31273                            this: "IN NATURAL LANGUAGE MODE".to_string(),
31274                        }))))
31275                    }
31276                } else if self.match_text_seq(&["IN", "BOOLEAN", "MODE"]) {
31277                    Some(Box::new(Expression::Var(Box::new(Var {
31278                        this: "IN BOOLEAN MODE".to_string(),
31279                    }))))
31280                } else if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
31281                    Some(Box::new(Expression::Var(Box::new(Var {
31282                        this: "WITH QUERY EXPANSION".to_string(),
31283                    }))))
31284                } else {
31285                    None
31286                };
31287
31288                self.expect(TokenType::RParen)?;
31289
31290                Ok(Some(Expression::MatchAgainst(Box::new(MatchAgainst {
31291                    this: Box::new(search_expr),
31292                    expressions,
31293                    modifier,
31294                }))))
31295            }
31296            (crate::function_registry::TypedParseKind::Variadic, "TRANSFORM") => {
31297                let expressions = if self.check(TokenType::RParen) {
31298                    Vec::new()
31299                } else {
31300                    self.parse_function_args_with_lambda()?
31301                };
31302                self.expect(TokenType::RParen)?;
31303
31304                let row_format_before = if self.match_token(TokenType::Row) {
31305                    self.parse_row()?
31306                } else {
31307                    None
31308                };
31309
31310                let record_writer = if self.match_text_seq(&["RECORDWRITER"]) {
31311                    Some(Box::new(self.parse_expression()?))
31312                } else {
31313                    None
31314                };
31315
31316                if self.match_token(TokenType::Using) {
31317                    let command_script = Some(Box::new(self.parse_expression()?));
31318                    let schema = if self.match_token(TokenType::As) {
31319                        self.parse_schema()?
31320                    } else {
31321                        None
31322                    };
31323
31324                    let row_format_after = if self.match_token(TokenType::Row) {
31325                        self.parse_row()?
31326                    } else {
31327                        None
31328                    };
31329
31330                    let record_reader = if self.match_text_seq(&["RECORDREADER"]) {
31331                        Some(Box::new(self.parse_expression()?))
31332                    } else {
31333                        None
31334                    };
31335
31336                    Ok(Some(Expression::QueryTransform(Box::new(QueryTransform {
31337                        expressions,
31338                        command_script,
31339                        schema: schema.map(Box::new),
31340                        row_format_before: row_format_before.map(Box::new),
31341                        record_writer,
31342                        row_format_after: row_format_after.map(Box::new),
31343                        record_reader,
31344                    }))))
31345                } else {
31346                    Ok(Some(Expression::Function(Box::new(Function {
31347                        name: name.to_string(),
31348                        args: expressions,
31349                        distinct: false,
31350                        trailing_comments: Vec::new(),
31351                        use_bracket_syntax: false,
31352                        no_parens: false,
31353                        quoted,
31354                        span: None,
31355                        inferred_type: None,
31356                    }))))
31357                }
31358            }
31359            (crate::function_registry::TypedParseKind::Variadic, "CONVERT") => {
31360                let is_try = upper_name == "TRY_CONVERT";
31361                let is_tsql = matches!(
31362                    self.config.dialect,
31363                    Some(crate::dialects::DialectType::TSQL)
31364                        | Some(crate::dialects::DialectType::Fabric)
31365                );
31366
31367                if is_tsql {
31368                    let saved = self.current;
31369                    let orig_type_text = if self.current < self.tokens.len() {
31370                        self.tokens[self.current].text.to_uppercase()
31371                    } else {
31372                        String::new()
31373                    };
31374                    let dt = self.parse_data_type();
31375                    if let Ok(mut dt) = dt {
31376                        if self.match_token(TokenType::Comma) {
31377                            if orig_type_text == "NVARCHAR" || orig_type_text == "NCHAR" {
31378                                dt = match dt {
31379                                    crate::expressions::DataType::VarChar { length, .. } => {
31380                                        if let Some(len) = length {
31381                                            crate::expressions::DataType::Custom {
31382                                                name: format!("{}({})", orig_type_text, len),
31383                                            }
31384                                        } else {
31385                                            crate::expressions::DataType::Custom {
31386                                                name: orig_type_text.clone(),
31387                                            }
31388                                        }
31389                                    }
31390                                    crate::expressions::DataType::Char { length } => {
31391                                        if let Some(len) = length {
31392                                            crate::expressions::DataType::Custom {
31393                                                name: format!("{}({})", orig_type_text, len),
31394                                            }
31395                                        } else {
31396                                            crate::expressions::DataType::Custom {
31397                                                name: orig_type_text.clone(),
31398                                            }
31399                                        }
31400                                    }
31401                                    other => other,
31402                                };
31403                            }
31404                            let value = self.parse_expression()?;
31405                            let style = if self.match_token(TokenType::Comma) {
31406                                Some(self.parse_expression()?)
31407                            } else {
31408                                None
31409                            };
31410                            self.expect(TokenType::RParen)?;
31411                            let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
31412                            let mut args = vec![Expression::DataType(dt), value];
31413                            if let Some(s) = style {
31414                                args.push(s);
31415                            }
31416                            return Ok(Some(Expression::Function(Box::new(Function {
31417                                name: func_name.to_string(),
31418                                args,
31419                                distinct: false,
31420                                trailing_comments: Vec::new(),
31421                                use_bracket_syntax: false,
31422                                no_parens: false,
31423                                quoted: false,
31424                                span: None,
31425                                inferred_type: None,
31426                            }))));
31427                        }
31428                        self.current = saved;
31429                    } else {
31430                        self.current = saved;
31431                    }
31432                }
31433
31434                let this = self.parse_expression()?;
31435                if self.match_token(TokenType::Using) {
31436                    let charset = self.expect_identifier()?;
31437                    self.expect(TokenType::RParen)?;
31438                    Ok(Some(Expression::Cast(Box::new(Cast {
31439                        this,
31440                        to: DataType::CharacterSet { name: charset },
31441                        trailing_comments: Vec::new(),
31442                        double_colon_syntax: false,
31443                        format: None,
31444                        default: None,
31445                        inferred_type: None,
31446                    }))))
31447                } else if self.match_token(TokenType::Comma) {
31448                    let mut args = vec![this];
31449                    args.push(self.parse_expression()?);
31450                    while self.match_token(TokenType::Comma) {
31451                        args.push(self.parse_expression()?);
31452                    }
31453                    self.expect(TokenType::RParen)?;
31454                    let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
31455                    Ok(Some(Expression::Function(Box::new(Function {
31456                        name: func_name.to_string(),
31457                        args,
31458                        distinct: false,
31459                        trailing_comments: Vec::new(),
31460                        use_bracket_syntax: false,
31461                        no_parens: false,
31462                        quoted: false,
31463                        span: None,
31464                        inferred_type: None,
31465                    }))))
31466                } else {
31467                    self.expect(TokenType::RParen)?;
31468                    let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
31469                    Ok(Some(Expression::Function(Box::new(Function {
31470                        name: func_name.to_string(),
31471                        args: vec![this],
31472                        distinct: false,
31473                        trailing_comments: Vec::new(),
31474                        use_bracket_syntax: false,
31475                        no_parens: false,
31476                        quoted: false,
31477                        span: None,
31478                        inferred_type: None,
31479                    }))))
31480                }
31481            }
31482            (crate::function_registry::TypedParseKind::Variadic, "TRIM") => {
31483                let (position, position_explicit) = if self.match_token(TokenType::Leading) {
31484                    (TrimPosition::Leading, true)
31485                } else if self.match_token(TokenType::Trailing) {
31486                    (TrimPosition::Trailing, true)
31487                } else if self.match_token(TokenType::Both) {
31488                    (TrimPosition::Both, true)
31489                } else {
31490                    (TrimPosition::Both, false)
31491                };
31492
31493                if position_explicit || self.check(TokenType::From) {
31494                    if self.match_token(TokenType::From) {
31495                        let this = self.parse_expression()?;
31496                        self.expect(TokenType::RParen)?;
31497                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
31498                            this,
31499                            characters: None,
31500                            position,
31501                            sql_standard_syntax: true,
31502                            position_explicit,
31503                        }))))
31504                    } else {
31505                        let first_expr = self.parse_bitwise_or()?;
31506                        let first_expr = self.try_clickhouse_func_arg_alias(first_expr);
31507                        if self.match_token(TokenType::From) {
31508                            let this = self.parse_bitwise_or()?;
31509                            let this = self.try_clickhouse_func_arg_alias(this);
31510                            self.expect(TokenType::RParen)?;
31511                            Ok(Some(Expression::Trim(Box::new(TrimFunc {
31512                                this,
31513                                characters: Some(first_expr),
31514                                position,
31515                                sql_standard_syntax: true,
31516                                position_explicit,
31517                            }))))
31518                        } else {
31519                            self.expect(TokenType::RParen)?;
31520                            Ok(Some(Expression::Trim(Box::new(TrimFunc {
31521                                this: first_expr,
31522                                characters: None,
31523                                position,
31524                                sql_standard_syntax: true,
31525                                position_explicit,
31526                            }))))
31527                        }
31528                    }
31529                } else {
31530                    let first_expr = self.parse_expression()?;
31531                    let first_expr = self.try_clickhouse_func_arg_alias(first_expr);
31532                    if self.match_token(TokenType::From) {
31533                        let this = self.parse_expression()?;
31534                        let this = self.try_clickhouse_func_arg_alias(this);
31535                        self.expect(TokenType::RParen)?;
31536                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
31537                            this,
31538                            characters: Some(first_expr),
31539                            position: TrimPosition::Both,
31540                            sql_standard_syntax: true,
31541                            position_explicit: false,
31542                        }))))
31543                    } else if self.match_token(TokenType::Comma) {
31544                        let second_expr = self.parse_expression()?;
31545                        self.expect(TokenType::RParen)?;
31546                        let trim_pattern_first = matches!(
31547                            self.config.dialect,
31548                            Some(crate::dialects::DialectType::Spark)
31549                        );
31550                        let (this, characters) = if trim_pattern_first {
31551                            (second_expr, first_expr)
31552                        } else {
31553                            (first_expr, second_expr)
31554                        };
31555                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
31556                            this,
31557                            characters: Some(characters),
31558                            position: TrimPosition::Both,
31559                            sql_standard_syntax: false,
31560                            position_explicit: false,
31561                        }))))
31562                    } else {
31563                        self.expect(TokenType::RParen)?;
31564                        Ok(Some(Expression::Trim(Box::new(TrimFunc {
31565                            this: first_expr,
31566                            characters: None,
31567                            position: TrimPosition::Both,
31568                            sql_standard_syntax: false,
31569                            position_explicit: false,
31570                        }))))
31571                    }
31572                }
31573            }
31574            (crate::function_registry::TypedParseKind::Variadic, "OVERLAY") => {
31575                if matches!(
31576                    self.config.dialect,
31577                    Some(crate::dialects::DialectType::ClickHouse)
31578                ) {
31579                    let args = self.parse_function_arguments()?;
31580                    self.expect(TokenType::RParen)?;
31581                    return Ok(Some(Expression::Function(Box::new(Function {
31582                        name: name.to_string(),
31583                        args,
31584                        distinct: false,
31585                        trailing_comments: Vec::new(),
31586                        use_bracket_syntax: false,
31587                        no_parens: false,
31588                        quoted: false,
31589                        span: None,
31590                        inferred_type: None,
31591                    }))));
31592                }
31593
31594                let this = self.parse_expression()?;
31595                if self.match_token(TokenType::Placing) {
31596                    let replacement = self.parse_expression()?;
31597                    self.expect(TokenType::From)?;
31598                    let from = self.parse_expression()?;
31599                    let length = if self.match_token(TokenType::For) {
31600                        Some(self.parse_expression()?)
31601                    } else {
31602                        None
31603                    };
31604                    self.expect(TokenType::RParen)?;
31605                    Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
31606                        this,
31607                        replacement,
31608                        from,
31609                        length,
31610                    }))))
31611                } else if self.match_token(TokenType::Comma) {
31612                    let replacement = self.parse_expression()?;
31613                    if self.match_token(TokenType::Comma) {
31614                        let from = self.parse_expression()?;
31615                        let length = if self.match_token(TokenType::Comma) {
31616                            Some(self.parse_expression()?)
31617                        } else {
31618                            None
31619                        };
31620                        self.expect(TokenType::RParen)?;
31621                        Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
31622                            this,
31623                            replacement,
31624                            from,
31625                            length,
31626                        }))))
31627                    } else {
31628                        self.expect(TokenType::RParen)?;
31629                        Ok(Some(Expression::Function(Box::new(Function {
31630                            name: name.to_string(),
31631                            args: vec![this, replacement],
31632                            distinct: false,
31633                            trailing_comments: Vec::new(),
31634                            use_bracket_syntax: false,
31635                            no_parens: false,
31636                            quoted: false,
31637                            span: None,
31638                            inferred_type: None,
31639                        }))))
31640                    }
31641                } else {
31642                    self.expect(TokenType::RParen)?;
31643                    Ok(Some(Expression::Function(Box::new(Function {
31644                        name: name.to_string(),
31645                        args: vec![this],
31646                        distinct: false,
31647                        trailing_comments: Vec::new(),
31648                        use_bracket_syntax: false,
31649                        no_parens: false,
31650                        quoted: false,
31651                        span: None,
31652                        inferred_type: None,
31653                    }))))
31654                }
31655            }
31656            (crate::function_registry::TypedParseKind::Variadic, "CEIL") => {
31657                let this = self.parse_expression()?;
31658                // Check for TO unit syntax (Druid: CEIL(__time TO WEEK))
31659                let to = if self.match_token(TokenType::To) {
31660                    // Parse the time unit as a variable/identifier
31661                    self.parse_var()?
31662                } else {
31663                    None
31664                };
31665                let decimals = if to.is_none() && self.match_token(TokenType::Comma) {
31666                    Some(self.parse_expression()?)
31667                } else {
31668                    None
31669                };
31670                self.expect(TokenType::RParen)?;
31671                Ok(Some(Expression::Ceil(Box::new(CeilFunc {
31672                    this,
31673                    decimals,
31674                    to,
31675                }))))
31676            }
31677            (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_FROM_PARTS")
31678            | (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_NTZ_FROM_PARTS")
31679            | (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_LTZ_FROM_PARTS")
31680            | (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_TZ_FROM_PARTS")
31681            | (crate::function_registry::TypedParseKind::Variadic, "DATE_FROM_PARTS")
31682            | (crate::function_registry::TypedParseKind::Variadic, "TIME_FROM_PARTS") => {
31683                let args = self.parse_expression_list()?;
31684                self.expect(TokenType::RParen)?;
31685                Ok(Some(Expression::Function(Box::new(Function {
31686                    name: name.to_string(),
31687                    args,
31688                    distinct: false,
31689                    trailing_comments: Vec::new(),
31690                    use_bracket_syntax: false,
31691                    no_parens: false,
31692                    quoted: false,
31693                    span: None,
31694                    inferred_type: None,
31695                }))))
31696            }
31697            (crate::function_registry::TypedParseKind::CastLike, "TRY_CAST") => {
31698                let this = self.parse_expression()?;
31699                self.expect(TokenType::As)?;
31700                let to = self.parse_data_type()?;
31701                self.expect(TokenType::RParen)?;
31702                Ok(Some(Expression::TryCast(Box::new(Cast {
31703                    this,
31704                    to,
31705                    trailing_comments: Vec::new(),
31706                    double_colon_syntax: false,
31707                    format: None,
31708                    default: None,
31709                    inferred_type: None,
31710                }))))
31711            }
31712            (crate::function_registry::TypedParseKind::Conditional, "IF") => {
31713                // ClickHouse: if() with zero args is valid in test queries
31714                if self.check(TokenType::RParen) {
31715                    self.advance();
31716                    return Ok(Some(Expression::Function(Box::new(Function {
31717                        name: name.to_string(),
31718                        args: vec![],
31719                        distinct: false,
31720                        trailing_comments: Vec::new(),
31721                        use_bracket_syntax: false,
31722                        no_parens: false,
31723                        quoted: false,
31724                        span: None,
31725                        inferred_type: None,
31726                    }))));
31727                }
31728                let args = self.parse_expression_list()?;
31729                self.expect(TokenType::RParen)?;
31730                let expr = if args.len() == 3 {
31731                    Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31732                        original_name: Some(upper_name.to_string()),
31733                        condition: args[0].clone(),
31734                        true_value: args[1].clone(),
31735                        false_value: Some(args[2].clone()),
31736                        inferred_type: None,
31737                    }))
31738                } else if args.len() == 2 {
31739                    // IF with 2 args: condition, true_value (no false_value)
31740                    Expression::IfFunc(Box::new(crate::expressions::IfFunc {
31741                        original_name: Some(upper_name.to_string()),
31742                        condition: args[0].clone(),
31743                        true_value: args[1].clone(),
31744                        false_value: None,
31745                        inferred_type: None,
31746                    }))
31747                } else {
31748                    return Err(self.parse_error("IF function requires 2 or 3 arguments"));
31749                };
31750                Ok(Some(expr))
31751            }
31752            _ => {
31753                self.try_parse_registry_grouped_typed_family(name, upper_name, canonical_upper_name)
31754            }
31755        }
31756    }
31757
31758    /// Route heavy typed-function families via registry metadata groups.
31759    fn try_parse_registry_grouped_typed_family(
31760        &mut self,
31761        name: &str,
31762        upper_name: &str,
31763        canonical_upper_name: &str,
31764    ) -> Result<Option<Expression>> {
31765        use crate::function_registry::TypedDispatchGroup;
31766
31767        match crate::function_registry::typed_dispatch_group_by_name_upper(canonical_upper_name) {
31768            Some(TypedDispatchGroup::AggregateFamily) => self
31769                .parse_typed_aggregate_family(name, upper_name, canonical_upper_name)
31770                .map(Some),
31771            Some(TypedDispatchGroup::WindowFamily) => self
31772                .parse_typed_window_family(name, upper_name, canonical_upper_name)
31773                .map(Some),
31774            Some(TypedDispatchGroup::JsonFamily) => self
31775                .parse_typed_json_family(name, upper_name, canonical_upper_name)
31776                .map(Some),
31777            Some(TypedDispatchGroup::TranslateTeradataFamily) => {
31778                if matches!(
31779                    self.config.dialect,
31780                    Some(crate::dialects::DialectType::Teradata)
31781                ) {
31782                    self.parse_typed_translate_teradata_family(
31783                        name,
31784                        upper_name,
31785                        canonical_upper_name,
31786                    )
31787                    .map(Some)
31788                } else {
31789                    Ok(None)
31790                }
31791            }
31792            None => Ok(None),
31793        }
31794    }
31795
31796    fn make_unquoted_function(name: &str, args: Vec<Expression>) -> Expression {
31797        Expression::Function(Box::new(Function {
31798            name: name.to_string(),
31799            args,
31800            distinct: false,
31801            trailing_comments: Vec::new(),
31802            use_bracket_syntax: false,
31803            no_parens: false,
31804            quoted: false,
31805            span: None,
31806            inferred_type: None,
31807        }))
31808    }
31809
31810    fn make_simple_aggregate(
31811        name: &str,
31812        args: Vec<Expression>,
31813        distinct: bool,
31814        filter: Option<Expression>,
31815    ) -> Expression {
31816        Expression::AggregateFunction(Box::new(AggregateFunction {
31817            name: name.to_string(),
31818            args,
31819            distinct,
31820            filter,
31821            order_by: Vec::new(),
31822            limit: None,
31823            ignore_nulls: None,
31824            inferred_type: None,
31825        }))
31826    }
31827
31828    /// Parse phase-3 typed-function slices that are straightforward pass-throughs.
31829    fn try_parse_phase3_typed_function(
31830        &mut self,
31831        name: &str,
31832        _upper_name: &str,
31833        canonical_upper_name: &str,
31834    ) -> Result<Option<Expression>> {
31835        let Some(behavior) =
31836            crate::function_registry::parser_dispatch_behavior_by_name_upper(canonical_upper_name)
31837        else {
31838            return Ok(None);
31839        };
31840
31841        match behavior {
31842            crate::function_registry::ParserDispatchBehavior::ExprListFunction => {
31843                let args = self.parse_expression_list()?;
31844                self.expect(TokenType::RParen)?;
31845                Ok(Some(Self::make_unquoted_function(name, args)))
31846            }
31847            crate::function_registry::ParserDispatchBehavior::OptionalExprListFunction => {
31848                let args = if self.check(TokenType::RParen) {
31849                    Vec::new()
31850                } else {
31851                    self.parse_expression_list()?
31852                };
31853                self.expect(TokenType::RParen)?;
31854                Ok(Some(Self::make_unquoted_function(name, args)))
31855            }
31856            crate::function_registry::ParserDispatchBehavior::FunctionArgumentsFunction => {
31857                let args = self.parse_function_arguments()?;
31858                self.expect(TokenType::RParen)?;
31859                Ok(Some(Self::make_unquoted_function(name, args)))
31860            }
31861            crate::function_registry::ParserDispatchBehavior::ZeroArgFunction => {
31862                self.expect(TokenType::RParen)?;
31863                Ok(Some(Self::make_unquoted_function(name, Vec::new())))
31864            }
31865            crate::function_registry::ParserDispatchBehavior::ExprListMaybeAggregateByFilter => {
31866                let args = if self.check(TokenType::RParen) {
31867                    Vec::new()
31868                } else {
31869                    self.parse_expression_list()?
31870                };
31871                self.expect(TokenType::RParen)?;
31872                let filter = self.parse_filter_clause()?;
31873                if filter.is_some() {
31874                    Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
31875                } else {
31876                    Ok(Some(Self::make_unquoted_function(name, args)))
31877                }
31878            }
31879            crate::function_registry::ParserDispatchBehavior::ExprListMaybeAggregateByAggSuffix => {
31880                let args = self.parse_expression_list()?;
31881                self.expect(TokenType::RParen)?;
31882                let filter = self.parse_filter_clause()?;
31883                if canonical_upper_name.ends_with("_AGG") || filter.is_some() {
31884                    Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
31885                } else {
31886                    Ok(Some(Self::make_unquoted_function(name, args)))
31887                }
31888            }
31889            crate::function_registry::ParserDispatchBehavior::HashLike => {
31890                let args = self.parse_expression_list()?;
31891                self.expect(TokenType::RParen)?;
31892                let filter = self.parse_filter_clause()?;
31893                if canonical_upper_name == "HASH_AGG" || filter.is_some() {
31894                    Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
31895                } else {
31896                    Ok(Some(Self::make_unquoted_function(name, args)))
31897                }
31898            }
31899            crate::function_registry::ParserDispatchBehavior::HllAggregate => {
31900                let distinct = self.match_token(TokenType::Distinct);
31901                let args = if self.match_token(TokenType::Star) {
31902                    vec![Expression::Star(Star {
31903                        table: None,
31904                        except: None,
31905                        replace: None,
31906                        rename: None,
31907                        trailing_comments: Vec::new(),
31908                        span: None,
31909                    })]
31910                } else if self.check(TokenType::RParen) {
31911                    Vec::new()
31912                } else {
31913                    self.parse_expression_list()?
31914                };
31915                self.expect(TokenType::RParen)?;
31916                let filter = self.parse_filter_clause()?;
31917                Ok(Some(Self::make_simple_aggregate(
31918                    name, args, distinct, filter,
31919                )))
31920            }
31921            crate::function_registry::ParserDispatchBehavior::PercentileAggregate => {
31922                let distinct = self.match_token(TokenType::Distinct);
31923                if !distinct {
31924                    self.match_token(TokenType::All);
31925                }
31926                let args = self.parse_expression_list()?;
31927                self.expect(TokenType::RParen)?;
31928                let filter = self.parse_filter_clause()?;
31929                Ok(Some(Self::make_simple_aggregate(
31930                    name, args, distinct, filter,
31931                )))
31932            }
31933            crate::function_registry::ParserDispatchBehavior::ExprListAggregate => {
31934                let args = self.parse_expression_list()?;
31935                self.expect(TokenType::RParen)?;
31936                let filter = self.parse_filter_clause()?;
31937                Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
31938            }
31939            crate::function_registry::ParserDispatchBehavior::UnaryAggregate => {
31940                let this = self.parse_expression()?;
31941                self.expect(TokenType::RParen)?;
31942                let filter = self.parse_filter_clause()?;
31943                Ok(Some(Self::make_simple_aggregate(
31944                    name,
31945                    vec![this],
31946                    false,
31947                    filter,
31948                )))
31949            }
31950            crate::function_registry::ParserDispatchBehavior::TranslateNonTeradata => {
31951                if matches!(
31952                    self.config.dialect,
31953                    Some(crate::dialects::DialectType::Teradata)
31954                ) {
31955                    return Ok(None);
31956                }
31957                let args = self.parse_expression_list()?;
31958                self.expect(TokenType::RParen)?;
31959                Ok(Some(Self::make_unquoted_function(name, args)))
31960            }
31961        }
31962    }
31963
31964    /// Parse a typed function call (after the opening paren)
31965    /// Following Python SQLGlot pattern: match all function aliases to typed expressions
31966    fn parse_typed_function(
31967        &mut self,
31968        name: &str,
31969        upper_name: &str,
31970        quoted: bool,
31971    ) -> Result<Expression> {
31972        let canonical_upper_name =
31973            crate::function_registry::canonical_typed_function_name_upper(upper_name);
31974
31975        // Handle internal function rewrites (sqlglot internal functions that map to CAST)
31976        if canonical_upper_name == "TIME_TO_TIME_STR" {
31977            let arg = self.parse_expression()?;
31978            self.expect(TokenType::RParen)?;
31979            return Ok(Expression::Cast(Box::new(Cast {
31980                this: arg,
31981                to: DataType::Text,
31982                trailing_comments: Vec::new(),
31983                double_colon_syntax: false,
31984                format: None,
31985                default: None,
31986                inferred_type: None,
31987            })));
31988        }
31989
31990        if let Some(expr) =
31991            self.try_parse_registry_typed_function(name, upper_name, canonical_upper_name, quoted)?
31992        {
31993            return Ok(expr);
31994        }
31995        if let Some(expr) =
31996            self.try_parse_phase3_typed_function(name, upper_name, canonical_upper_name)?
31997        {
31998            return Ok(expr);
31999        }
32000
32001        self.parse_generic_function(name, quoted)
32002    }
32003
32004    fn parse_typed_aggregate_family(
32005        &mut self,
32006        name: &str,
32007        upper_name: &str,
32008        canonical_upper_name: &str,
32009    ) -> Result<Expression> {
32010        match canonical_upper_name {
32011            // COUNT function
32012            "COUNT" => {
32013                let (this, star, distinct) = if self.check(TokenType::RParen) {
32014                    (None, false, false)
32015                } else if self.match_token(TokenType::Star) {
32016                    (None, true, false)
32017                } else if self.match_token(TokenType::All) {
32018                    // COUNT(ALL expr) - ALL is the default, just consume it
32019                    (Some(self.parse_expression()?), false, false)
32020                } else if self.match_token(TokenType::Distinct) {
32021                    let first_expr = self.parse_expression()?;
32022                    // Check for multiple columns: COUNT(DISTINCT a, b, c)
32023                    if self.match_token(TokenType::Comma) {
32024                        let mut args = vec![first_expr];
32025                        loop {
32026                            args.push(self.parse_expression()?);
32027                            if !self.match_token(TokenType::Comma) {
32028                                break;
32029                            }
32030                        }
32031                        // Return as a tuple expression for COUNT DISTINCT over multiple columns
32032                        (
32033                            Some(Expression::Tuple(Box::new(Tuple { expressions: args }))),
32034                            false,
32035                            true,
32036                        )
32037                    } else {
32038                        (Some(first_expr), false, true)
32039                    }
32040                } else {
32041                    let first_expr = self.parse_expression()?;
32042                    // ClickHouse: consume optional AS alias inside function args (e.g., count(NULL AS a))
32043                    let first_expr = if matches!(
32044                        self.config.dialect,
32045                        Some(crate::dialects::DialectType::ClickHouse)
32046                    ) && self.check(TokenType::As)
32047                    {
32048                        self.advance(); // consume AS
32049                        let alias = self.expect_identifier_or_keyword_with_quoted()?;
32050                        Expression::Alias(Box::new(Alias {
32051                            this: first_expr,
32052                            alias,
32053                            column_aliases: Vec::new(),
32054                            pre_alias_comments: Vec::new(),
32055                            trailing_comments: Vec::new(),
32056                            inferred_type: None,
32057                        }))
32058                    } else {
32059                        first_expr
32060                    };
32061                    // Check for multiple arguments (rare but possible)
32062                    if self.match_token(TokenType::Comma) {
32063                        let mut args = vec![first_expr];
32064                        loop {
32065                            args.push(self.parse_expression()?);
32066                            if !self.match_token(TokenType::Comma) {
32067                                break;
32068                            }
32069                        }
32070                        self.expect(TokenType::RParen)?;
32071                        // Multiple args without DISTINCT - treat as generic function
32072                        return Ok(Expression::Function(Box::new(Function {
32073                            name: name.to_string(),
32074                            args,
32075                            distinct: false,
32076                            trailing_comments: Vec::new(),
32077                            use_bracket_syntax: false,
32078                            no_parens: false,
32079                            quoted: false,
32080                            span: None,
32081                            inferred_type: None,
32082                        })));
32083                    }
32084                    (Some(first_expr), false, false)
32085                };
32086                // BigQuery: RESPECT NULLS / IGNORE NULLS inside COUNT
32087                let ignore_nulls = if self.match_token(TokenType::Ignore)
32088                    && self.match_token(TokenType::Nulls)
32089                {
32090                    Some(true)
32091                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
32092                {
32093                    Some(false)
32094                } else {
32095                    None
32096                };
32097                self.expect(TokenType::RParen)?;
32098                let filter = self.parse_filter_clause()?;
32099                // Also check for IGNORE NULLS / RESPECT NULLS after the closing paren
32100                let ignore_nulls = if ignore_nulls.is_some() {
32101                    ignore_nulls
32102                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
32103                    Some(true)
32104                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
32105                    Some(false)
32106                } else {
32107                    None
32108                };
32109                Ok(Expression::Count(Box::new(CountFunc {
32110                    this,
32111                    star,
32112                    distinct,
32113                    filter,
32114                    ignore_nulls,
32115                    original_name: Some(name.to_string()),
32116                    inferred_type: None,
32117                })))
32118            }
32119
32120            // LIST function: LIST(SELECT ...) in Materialize - list constructor with subquery
32121            "LIST" => {
32122                let is_materialize = matches!(
32123                    self.config.dialect,
32124                    Some(crate::dialects::DialectType::Materialize)
32125                );
32126                if is_materialize && self.check(TokenType::Select) {
32127                    let query = self.parse_select()?;
32128                    self.expect(TokenType::RParen)?;
32129                    return Ok(Expression::List(Box::new(List {
32130                        expressions: vec![query],
32131                    })));
32132                }
32133                // For non-Materialize or non-subquery, parse as either generic function or aggregate.
32134                let distinct = self.match_token(TokenType::Distinct);
32135                let args = if self.check(TokenType::RParen) {
32136                    Vec::new()
32137                } else {
32138                    self.parse_function_arguments()?
32139                };
32140                let order_by = if self.match_token(TokenType::Order) {
32141                    self.expect(TokenType::By)?;
32142                    self.parse_order_by_list()?
32143                } else {
32144                    Vec::new()
32145                };
32146                let limit = if self.match_token(TokenType::Limit) {
32147                    Some(Box::new(self.parse_expression()?))
32148                } else {
32149                    None
32150                };
32151                self.expect(TokenType::RParen)?;
32152                let filter = self.parse_filter_clause()?;
32153
32154                if distinct || !order_by.is_empty() || limit.is_some() || filter.is_some() {
32155                    Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
32156                        name: name.to_string(),
32157                        args,
32158                        distinct,
32159                        filter,
32160                        order_by,
32161                        limit,
32162                        ignore_nulls: None,
32163                        inferred_type: None,
32164                    })))
32165                } else {
32166                    Ok(Expression::Function(Box::new(Function {
32167                        name: name.to_string(),
32168                        args,
32169                        distinct: false,
32170                        trailing_comments: Vec::new(),
32171                        use_bracket_syntax: false,
32172                        no_parens: false,
32173                        quoted: false,
32174                        span: None,
32175                        inferred_type: None,
32176                    })))
32177                }
32178            }
32179
32180            // MAP function: MAP(SELECT ...) in Materialize - map constructor with subquery
32181            "MAP" => {
32182                let is_materialize = matches!(
32183                    self.config.dialect,
32184                    Some(crate::dialects::DialectType::Materialize)
32185                );
32186                if is_materialize && self.check(TokenType::Select) {
32187                    let query = self.parse_select()?;
32188                    self.expect(TokenType::RParen)?;
32189                    return Ok(Expression::ToMap(Box::new(ToMap {
32190                        this: Box::new(query),
32191                    })));
32192                }
32193                // For non-Materialize or non-subquery, fall through to generic handling
32194                let args = if self.check(TokenType::RParen) {
32195                    Vec::new()
32196                } else {
32197                    self.parse_function_arguments()?
32198                };
32199                self.expect(TokenType::RParen)?;
32200                Ok(Expression::Function(Box::new(Function {
32201                    name: name.to_string(),
32202                    args,
32203                    distinct: false,
32204                    trailing_comments: Vec::new(),
32205                    use_bracket_syntax: false,
32206                    no_parens: false,
32207                    quoted: false,
32208                    span: None,
32209                    inferred_type: None,
32210                })))
32211            }
32212
32213            // ARRAY function: ARRAY(SELECT ...) or ARRAY((SELECT ...) LIMIT n) is an array constructor with subquery
32214            // Different from ARRAY<type> which is a data type
32215            "ARRAY" => {
32216                // Check if this is ARRAY(SELECT ...) - array subquery constructor
32217                if self.check(TokenType::Select) {
32218                    let query = self.parse_select()?;
32219                    self.expect(TokenType::RParen)?;
32220                    // Pass the query directly as an argument to ARRAY function
32221                    // The generator will handle it correctly
32222                    return Ok(Expression::Function(Box::new(Function {
32223                        name: name.to_string(),
32224                        args: vec![query],
32225                        distinct: false,
32226                        trailing_comments: Vec::new(),
32227                        use_bracket_syntax: false,
32228                        no_parens: false,
32229                        quoted: false,
32230                        span: None,
32231                        inferred_type: None,
32232                    })));
32233                }
32234                // Check if this is ARRAY((SELECT ...) LIMIT n) - BigQuery allows LIMIT outside the subquery parens
32235                // This is common for constructs like ARRAY((SELECT AS STRUCT ...) LIMIT 10)
32236                if self.check(TokenType::LParen) {
32237                    // This could be a parenthesized subquery with modifiers after it
32238                    // Save position in case we need to backtrack
32239                    let saved_pos = self.current;
32240                    self.advance(); // consume opening paren
32241
32242                    // Check if there's a SELECT or WITH inside
32243                    if self.check(TokenType::Select) || self.check(TokenType::With) {
32244                        let inner_query = self.parse_statement()?;
32245                        self.expect(TokenType::RParen)?; // close inner parens
32246
32247                        // Now check for LIMIT/OFFSET modifiers outside the inner parens
32248                        let limit = if self.match_token(TokenType::Limit) {
32249                            let expr = self.parse_expression()?;
32250                            Some(Limit {
32251                                this: expr,
32252                                percent: false,
32253                                comments: Vec::new(),
32254                            })
32255                        } else {
32256                            None
32257                        };
32258
32259                        let offset = if self.match_token(TokenType::Offset) {
32260                            let expr = self.parse_expression()?;
32261                            let rows = if self.match_token(TokenType::Row)
32262                                || self.match_token(TokenType::Rows)
32263                            {
32264                                Some(true)
32265                            } else {
32266                                None
32267                            };
32268                            Some(Offset { this: expr, rows })
32269                        } else {
32270                            None
32271                        };
32272
32273                        self.expect(TokenType::RParen)?; // close ARRAY parens
32274
32275                        // Wrap the inner query in a Subquery with the modifiers
32276                        let subquery = Expression::Subquery(Box::new(Subquery {
32277                            this: inner_query,
32278                            alias: None,
32279                            column_aliases: Vec::new(),
32280                            order_by: None,
32281                            limit,
32282                            offset,
32283                            lateral: false,
32284                            modifiers_inside: false,
32285                            trailing_comments: Vec::new(),
32286                            distribute_by: None,
32287                            sort_by: None,
32288                            cluster_by: None,
32289                            inferred_type: None,
32290                        }));
32291
32292                        return Ok(Expression::Function(Box::new(Function {
32293                            name: name.to_string(),
32294                            args: vec![subquery],
32295                            distinct: false,
32296                            trailing_comments: Vec::new(),
32297                            use_bracket_syntax: false,
32298                            no_parens: false,
32299                            quoted: false,
32300                            span: None,
32301                            inferred_type: None,
32302                        })));
32303                    } else {
32304                        // Not a subquery, backtrack and parse as regular arguments
32305                        self.current = saved_pos;
32306                    }
32307                }
32308                // Otherwise fall through to parse as generic function or error
32309                // This could be ARRAY(...values...) or invalid syntax
32310                let args = if self.check(TokenType::RParen) {
32311                    Vec::new()
32312                } else {
32313                    self.parse_function_arguments()?
32314                };
32315                self.expect(TokenType::RParen)?;
32316                Ok(Expression::Function(Box::new(Function {
32317                    name: name.to_string(),
32318                    args,
32319                    distinct: false,
32320                    trailing_comments: Vec::new(),
32321                    use_bracket_syntax: false,
32322                    no_parens: false,
32323                    quoted: false,
32324                    span: None,
32325                    inferred_type: None,
32326                })))
32327            }
32328
32329            // Simple aggregate functions (SUM, AVG, MIN, MAX, etc.)
32330            // These can have multiple arguments in some contexts (e.g., MAX(a, b) is a scalar function)
32331            "SUM"
32332            | "AVG"
32333            | "MIN"
32334            | "MAX"
32335            | "ARRAY_AGG"
32336            | "ARRAY_CONCAT_AGG"
32337            | "STDDEV"
32338            | "STDDEV_POP"
32339            | "STDDEV_SAMP"
32340            | "VARIANCE"
32341            | "VAR_POP"
32342            | "VAR_SAMP"
32343            | "MEDIAN"
32344            | "MODE"
32345            | "FIRST"
32346            | "LAST"
32347            | "ANY_VALUE"
32348            | "APPROX_DISTINCT"
32349            | "APPROX_COUNT_DISTINCT"
32350            | "BIT_AND"
32351            | "BIT_OR"
32352            | "BIT_XOR" => {
32353                let distinct = if self.match_token(TokenType::Distinct) {
32354                    true
32355                } else {
32356                    self.match_token(TokenType::All); // ALL is the default, just consume it
32357                    false
32358                };
32359
32360                // MODE() can have zero arguments when used with WITHIN GROUP
32361                // e.g., MODE() WITHIN GROUP (ORDER BY col)
32362                if self.check(TokenType::RParen) {
32363                    // Empty args - will likely be followed by WITHIN GROUP
32364                    self.expect(TokenType::RParen)?;
32365                    let filter = self.parse_filter_clause()?;
32366                    let agg = AggFunc {
32367                        ignore_nulls: None,
32368                        this: Expression::Null(Null {}), // Placeholder for 0-arg aggregate
32369                        distinct: false,
32370                        filter,
32371                        order_by: Vec::new(),
32372                        having_max: None,
32373                        name: Some(name.to_string()),
32374                        limit: None,
32375                        inferred_type: None,
32376                    };
32377                    return Ok(match upper_name {
32378                        "MODE" => Expression::Mode(Box::new(agg)),
32379                        _ => {
32380                            // ClickHouse: allow zero-arg aggregates (server will validate)
32381                            if matches!(
32382                                self.config.dialect,
32383                                Some(crate::dialects::DialectType::ClickHouse)
32384                            ) {
32385                                Expression::Function(Box::new(Function {
32386                                    name: name.to_string(),
32387                                    args: Vec::new(),
32388                                    distinct: false,
32389                                    trailing_comments: Vec::new(),
32390                                    use_bracket_syntax: false,
32391                                    no_parens: false,
32392                                    quoted: false,
32393                                    span: None,
32394                                    inferred_type: None,
32395                                }))
32396                            } else {
32397                                return Err(self.parse_error(format!(
32398                                    "{} cannot have zero arguments",
32399                                    upper_name
32400                                )));
32401                            }
32402                        }
32403                    });
32404                }
32405
32406                let first_arg = self.parse_expression_with_clickhouse_alias()?;
32407
32408                // Check if there are more arguments (multi-arg scalar function like MAX(a, b))
32409                if self.match_token(TokenType::Comma) {
32410                    // Special handling for FIRST, LAST, ANY_VALUE with boolean second arg
32411                    // In Spark/Hive: first(col, true) means FIRST(col) IGNORE NULLS
32412                    let is_ignore_nulls_func = matches!(upper_name, "FIRST" | "LAST" | "ANY_VALUE");
32413
32414                    let second_arg = self.parse_expression()?;
32415
32416                    // Check if this is the IGNORE NULLS pattern: func(col, true)
32417                    if is_ignore_nulls_func && self.check(TokenType::RParen) {
32418                        if let Expression::Boolean(BooleanLiteral { value: true }) = &second_arg {
32419                            // This is func(col, true) -> FUNC(col) IGNORE NULLS
32420                            self.expect(TokenType::RParen)?;
32421                            let filter = self.parse_filter_clause()?;
32422                            let agg = AggFunc {
32423                                ignore_nulls: Some(true),
32424                                this: first_arg,
32425                                distinct,
32426                                filter,
32427                                order_by: Vec::new(),
32428                                having_max: None,
32429                                name: Some(name.to_string()),
32430                                limit: None,
32431                                inferred_type: None,
32432                            };
32433                            return Ok(match upper_name {
32434                                "FIRST" => Expression::First(Box::new(agg)),
32435                                "LAST" => Expression::Last(Box::new(agg)),
32436                                "ANY_VALUE" => Expression::AnyValue(Box::new(agg)),
32437                                _ => unreachable!(
32438                                    "function name already matched by is_ignore_nulls_func guard"
32439                                ),
32440                            });
32441                        }
32442                    }
32443
32444                    // Multiple arguments - treat as generic function call
32445                    let mut args = vec![first_arg, second_arg];
32446                    while self.match_token(TokenType::Comma) {
32447                        args.push(self.parse_expression()?);
32448                    }
32449                    self.expect(TokenType::RParen)?;
32450                    Ok(Expression::Function(Box::new(Function {
32451                        name: name.to_string(),
32452                        args,
32453                        distinct: false,
32454                        trailing_comments: Vec::new(),
32455                        use_bracket_syntax: false,
32456                        no_parens: false,
32457                        quoted: false,
32458                        span: None,
32459                        inferred_type: None,
32460                    })))
32461                } else {
32462                    // Check for IGNORE NULLS / RESPECT NULLS (BigQuery style)
32463                    let ignore_nulls = if self.match_token(TokenType::Ignore)
32464                        && self.match_token(TokenType::Nulls)
32465                    {
32466                        Some(true)
32467                    } else if self.match_token(TokenType::Respect)
32468                        && self.match_token(TokenType::Nulls)
32469                    {
32470                        Some(false)
32471                    } else {
32472                        None
32473                    };
32474
32475                    // Check for HAVING MAX/MIN inside aggregate (BigQuery syntax)
32476                    // e.g., ANY_VALUE(fruit HAVING MAX sold)
32477                    let having_max = if self.match_token(TokenType::Having) {
32478                        let is_max = if self.check_keyword_text("MAX") {
32479                            self.advance();
32480                            true
32481                        } else if self.check_keyword_text("MIN") {
32482                            self.advance();
32483                            false
32484                        } else {
32485                            return Err(
32486                                self.parse_error("Expected MAX or MIN after HAVING in aggregate")
32487                            );
32488                        };
32489                        let expr = self.parse_expression()?;
32490                        Some((Box::new(expr), is_max))
32491                    } else {
32492                        None
32493                    };
32494
32495                    // Check for ORDER BY inside aggregate (e.g., ARRAY_AGG(x ORDER BY y))
32496                    let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
32497                        self.parse_order_by_list()?
32498                    } else {
32499                        Vec::new()
32500                    };
32501                    // Check for LIMIT inside aggregate (e.g., ARRAY_AGG(x ORDER BY y LIMIT 2))
32502                    // Also supports LIMIT offset, count (e.g., ARRAY_AGG(x ORDER BY y LIMIT 1, 10))
32503                    let limit = if self.match_token(TokenType::Limit) {
32504                        let first = self.parse_expression()?;
32505                        if self.match_token(TokenType::Comma) {
32506                            let second = self.parse_expression()?;
32507                            // Store as Tuple(offset, count)
32508                            Some(Box::new(Expression::Tuple(Box::new(Tuple {
32509                                expressions: vec![first, second],
32510                            }))))
32511                        } else {
32512                            Some(Box::new(first))
32513                        }
32514                    } else {
32515                        None
32516                    };
32517                    // Single argument - treat as aggregate function
32518                    self.expect(TokenType::RParen)?;
32519                    let filter = self.parse_filter_clause()?;
32520                    // Also check for IGNORE NULLS / RESPECT NULLS after the closing paren
32521                    // e.g., FIRST(col) IGNORE NULLS (Hive/Spark/generic SQL syntax)
32522                    let ignore_nulls = if ignore_nulls.is_some() {
32523                        ignore_nulls
32524                    } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
32525                        Some(true)
32526                    } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
32527                        Some(false)
32528                    } else {
32529                        None
32530                    };
32531                    let agg = AggFunc {
32532                        ignore_nulls,
32533                        this: first_arg,
32534                        distinct,
32535                        filter,
32536                        order_by,
32537                        having_max,
32538                        name: Some(name.to_string()),
32539                        limit,
32540                        inferred_type: None,
32541                    };
32542                    Ok(match upper_name {
32543                        "SUM" => Expression::Sum(Box::new(agg)),
32544                        "AVG" => Expression::Avg(Box::new(agg)),
32545                        "MIN" => Expression::Min(Box::new(agg)),
32546                        "MAX" => Expression::Max(Box::new(agg)),
32547                        "ARRAY_AGG" => Expression::ArrayAgg(Box::new(agg)),
32548                        "ARRAY_CONCAT_AGG" => Expression::ArrayConcatAgg(Box::new(agg)),
32549                        "STDDEV" => Expression::Stddev(Box::new(agg)),
32550                        "STDDEV_POP" => Expression::StddevPop(Box::new(agg)),
32551                        "STDDEV_SAMP" => Expression::StddevSamp(Box::new(agg)),
32552                        "VARIANCE" => Expression::Variance(Box::new(agg)),
32553                        "VAR_POP" => Expression::VarPop(Box::new(agg)),
32554                        "VAR_SAMP" => Expression::VarSamp(Box::new(agg)),
32555                        "MEDIAN" => Expression::Median(Box::new(agg)),
32556                        "MODE" => Expression::Mode(Box::new(agg)),
32557                        "FIRST" => Expression::First(Box::new(agg)),
32558                        "LAST" => Expression::Last(Box::new(agg)),
32559                        "ANY_VALUE" => Expression::AnyValue(Box::new(agg)),
32560                        "APPROX_DISTINCT" => Expression::ApproxDistinct(Box::new(agg)),
32561                        "APPROX_COUNT_DISTINCT" => Expression::ApproxCountDistinct(Box::new(agg)),
32562                        "BIT_AND" => Expression::BitwiseAndAgg(Box::new(agg)),
32563                        "BIT_OR" => Expression::BitwiseOrAgg(Box::new(agg)),
32564                        "BIT_XOR" => Expression::BitwiseXorAgg(Box::new(agg)),
32565                        _ => unreachable!("aggregate function name already matched in caller"),
32566                    })
32567                }
32568            }
32569
32570            // STRING_AGG - STRING_AGG([DISTINCT] expr [, separator] [ORDER BY order_list])
32571            "STRING_AGG" => {
32572                let distinct = self.match_token(TokenType::Distinct);
32573                let this = self.parse_expression()?;
32574                // Separator is optional
32575                let separator = if self.match_token(TokenType::Comma) {
32576                    Some(self.parse_expression()?)
32577                } else {
32578                    None
32579                };
32580                let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
32581                    Some(self.parse_order_by_list()?)
32582                } else {
32583                    None
32584                };
32585                // BigQuery: LIMIT inside STRING_AGG
32586                let limit = if self.match_token(TokenType::Limit) {
32587                    Some(Box::new(self.parse_expression()?))
32588                } else {
32589                    None
32590                };
32591                self.expect(TokenType::RParen)?;
32592                let filter = self.parse_filter_clause()?;
32593                Ok(Expression::StringAgg(Box::new(StringAggFunc {
32594                    this,
32595                    separator,
32596                    order_by,
32597                    distinct,
32598                    filter,
32599                    limit,
32600                    inferred_type: None,
32601                })))
32602            }
32603
32604            // GROUP_CONCAT - GROUP_CONCAT([DISTINCT] expr [, expr...] [ORDER BY order_list] [SEPARATOR 'sep'])
32605            // MySQL allows multiple args which get wrapped in CONCAT:
32606            // GROUP_CONCAT(a, b, c SEPARATOR ',') -> GroupConcat(CONCAT(a, b, c), SEPARATOR=',')
32607            "GROUP_CONCAT" => {
32608                let distinct = self.match_token(TokenType::Distinct);
32609                let first = self.parse_expression()?;
32610                // Check for additional comma-separated expressions (before ORDER BY or SEPARATOR)
32611                let mut exprs = vec![first];
32612                while self.match_token(TokenType::Comma) {
32613                    // Check if the next tokens are ORDER BY or SEPARATOR
32614                    // If so, the comma was part of the separator syntax (not more args)
32615                    if self.check(TokenType::Order) || self.check(TokenType::Separator) {
32616                        // This shouldn't happen normally in valid SQL, backtrack
32617                        break;
32618                    }
32619                    exprs.push(self.parse_expression()?);
32620                }
32621                // If multiple expressions, wrap in CONCAT (matches Python sqlglot behavior)
32622                let this = if exprs.len() == 1 {
32623                    exprs.pop().unwrap()
32624                } else {
32625                    Expression::Function(Box::new(Function::new("CONCAT".to_string(), exprs)))
32626                };
32627                // Parse optional ORDER BY
32628                let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
32629                    Some(self.parse_order_by_list()?)
32630                } else {
32631                    None
32632                };
32633                // Parse optional SEPARATOR - can be a string literal or expression (e.g., variable)
32634                let separator = if self.match_token(TokenType::Separator) {
32635                    Some(self.parse_expression()?)
32636                } else {
32637                    None
32638                };
32639                self.expect(TokenType::RParen)?;
32640                let filter = self.parse_filter_clause()?;
32641                Ok(Expression::GroupConcat(Box::new(GroupConcatFunc {
32642                    this,
32643                    separator,
32644                    order_by,
32645                    distinct,
32646                    filter,
32647                    inferred_type: None,
32648                })))
32649            }
32650
32651            // LISTAGG - LISTAGG([DISTINCT] expr [, separator [ON OVERFLOW ...]]) WITHIN GROUP (ORDER BY ...)
32652            "LISTAGG" => {
32653                // Check for optional DISTINCT
32654                let distinct = self.match_token(TokenType::Distinct);
32655                let this = self.parse_expression()?;
32656                let separator = if self.match_token(TokenType::Comma) {
32657                    Some(self.parse_expression()?)
32658                } else {
32659                    None
32660                };
32661                // Parse optional ON OVERFLOW clause
32662                let on_overflow = if self.match_token(TokenType::On) {
32663                    if self.match_identifier("OVERFLOW") {
32664                        if self.match_identifier("ERROR") {
32665                            Some(ListAggOverflow::Error)
32666                        } else if self.match_token(TokenType::Truncate) {
32667                            // Optional filler string
32668                            let filler = if self.check(TokenType::String) {
32669                                Some(self.parse_expression()?)
32670                            } else {
32671                                None
32672                            };
32673                            // WITH COUNT or WITHOUT COUNT
32674                            let with_count = if self.match_token(TokenType::With) {
32675                                self.match_identifier("COUNT");
32676                                true
32677                            } else if self.match_identifier("WITHOUT") {
32678                                self.match_identifier("COUNT");
32679                                false
32680                            } else {
32681                                true // default is WITH COUNT
32682                            };
32683                            Some(ListAggOverflow::Truncate { filler, with_count })
32684                        } else {
32685                            None
32686                        }
32687                    } else {
32688                        None
32689                    }
32690                } else {
32691                    None
32692                };
32693                self.expect(TokenType::RParen)?;
32694                // WITHIN GROUP (ORDER BY ...) is handled by maybe_parse_over
32695                Ok(Expression::ListAgg(Box::new(ListAggFunc {
32696                    this,
32697                    separator,
32698                    on_overflow,
32699                    order_by: None,
32700                    distinct,
32701                    filter: None,
32702                    inferred_type: None,
32703                })))
32704            }
32705            _ => unreachable!(
32706                "phase-6 aggregate parser called with non-aggregate family name '{}'",
32707                canonical_upper_name
32708            ),
32709        }
32710    }
32711
32712    fn parse_typed_window_family(
32713        &mut self,
32714        name: &str,
32715        upper_name: &str,
32716        canonical_upper_name: &str,
32717    ) -> Result<Expression> {
32718        match canonical_upper_name {
32719            // Window functions with no arguments (ClickHouse allows args in row_number)
32720            "ROW_NUMBER" => {
32721                if self.check(TokenType::RParen) {
32722                    self.advance();
32723                    Ok(Expression::RowNumber(RowNumber))
32724                } else {
32725                    // ClickHouse: row_number(column1) — parse as regular function
32726                    let args = self.parse_function_args_list()?;
32727                    self.expect(TokenType::RParen)?;
32728                    let trailing_comments = self.previous_trailing_comments();
32729                    Ok(Expression::Function(Box::new(Function {
32730                        name: name.to_string(),
32731                        args,
32732                        distinct: false,
32733                        trailing_comments,
32734                        use_bracket_syntax: false,
32735                        no_parens: false,
32736                        quoted: false,
32737                        span: None,
32738                        inferred_type: None,
32739                    })))
32740                }
32741            }
32742            "RANK" => {
32743                // DuckDB allows: RANK(ORDER BY col) OVER (...)
32744                // Oracle allows: RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
32745                let (order_by, args) = if self.check(TokenType::RParen) {
32746                    // RANK() - no arguments
32747                    (None, Vec::new())
32748                } else if self.match_token(TokenType::Order) {
32749                    // DuckDB: RANK(ORDER BY col)
32750                    self.expect(TokenType::By)?;
32751                    (Some(self.parse_order_by()?.expressions), Vec::new())
32752                } else {
32753                    // Oracle hypothetical: RANK(val1, val2, ...)
32754                    let mut args = vec![self.parse_expression()?];
32755                    while self.match_token(TokenType::Comma) {
32756                        args.push(self.parse_expression()?);
32757                    }
32758                    (None, args)
32759                };
32760                self.expect(TokenType::RParen)?;
32761                Ok(Expression::Rank(Rank { order_by, args }))
32762            }
32763            "DENSE_RANK" => {
32764                // Oracle allows: DENSE_RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
32765                let args = if self.check(TokenType::RParen) {
32766                    Vec::new()
32767                } else {
32768                    let mut args = vec![self.parse_expression()?];
32769                    while self.match_token(TokenType::Comma) {
32770                        args.push(self.parse_expression()?);
32771                    }
32772                    args
32773                };
32774                self.expect(TokenType::RParen)?;
32775                Ok(Expression::DenseRank(DenseRank { args }))
32776            }
32777            "PERCENT_RANK" => {
32778                // DuckDB allows: PERCENT_RANK(ORDER BY col) OVER (...)
32779                // Oracle allows: PERCENT_RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
32780                let (order_by, args) = if self.check(TokenType::RParen) {
32781                    // PERCENT_RANK() - no arguments
32782                    (None, Vec::new())
32783                } else if self.match_token(TokenType::Order) {
32784                    // DuckDB: PERCENT_RANK(ORDER BY col)
32785                    self.expect(TokenType::By)?;
32786                    (Some(self.parse_order_by()?.expressions), Vec::new())
32787                } else {
32788                    // Oracle hypothetical: PERCENT_RANK(val1, val2, ...)
32789                    let mut args = vec![self.parse_expression()?];
32790                    while self.match_token(TokenType::Comma) {
32791                        args.push(self.parse_expression()?);
32792                    }
32793                    (None, args)
32794                };
32795                self.expect(TokenType::RParen)?;
32796                Ok(Expression::PercentRank(PercentRank { order_by, args }))
32797            }
32798            "CUME_DIST" => {
32799                // DuckDB allows: CUME_DIST(ORDER BY col) OVER (...)
32800                // Oracle allows: CUME_DIST(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
32801                let (order_by, args) = if self.check(TokenType::RParen) {
32802                    // CUME_DIST() - no arguments
32803                    (None, Vec::new())
32804                } else if self.match_token(TokenType::Order) {
32805                    // DuckDB: CUME_DIST(ORDER BY col)
32806                    self.expect(TokenType::By)?;
32807                    (Some(self.parse_order_by()?.expressions), Vec::new())
32808                } else {
32809                    // Oracle hypothetical: CUME_DIST(val1, val2, ...)
32810                    let mut args = vec![self.parse_expression()?];
32811                    while self.match_token(TokenType::Comma) {
32812                        args.push(self.parse_expression()?);
32813                    }
32814                    (None, args)
32815                };
32816                self.expect(TokenType::RParen)?;
32817                Ok(Expression::CumeDist(CumeDist { order_by, args }))
32818            }
32819
32820            // NTILE
32821            "NTILE" => {
32822                // num_buckets is optional (Databricks allows NTILE() with no args)
32823                let num_buckets = if self.check(TokenType::RParen) {
32824                    None
32825                } else {
32826                    Some(self.parse_expression()?)
32827                };
32828
32829                // ClickHouse: NTILE can have extra args (e.g., ntile(3, 2)) — skip them
32830                while matches!(
32831                    self.config.dialect,
32832                    Some(crate::dialects::DialectType::ClickHouse)
32833                ) && self.match_token(TokenType::Comma)
32834                {
32835                    let _ = self.parse_expression()?;
32836                }
32837
32838                // DuckDB allows: NTILE(n ORDER BY col) OVER (...)
32839                let order_by = if self.match_token(TokenType::Order) {
32840                    self.expect(TokenType::By)?;
32841                    Some(self.parse_order_by()?.expressions)
32842                } else {
32843                    None
32844                };
32845                self.expect(TokenType::RParen)?;
32846                Ok(Expression::NTile(Box::new(NTileFunc {
32847                    num_buckets,
32848                    order_by,
32849                })))
32850            }
32851
32852            // LEAD / LAG
32853            "LEAD" | "LAG" => {
32854                let this = self.parse_expression()?;
32855                let (offset, default) = if self.match_token(TokenType::Comma) {
32856                    let off = self.parse_expression()?;
32857                    let def = if self.match_token(TokenType::Comma) {
32858                        Some(self.parse_expression()?)
32859                    } else {
32860                        None
32861                    };
32862                    (Some(off), def)
32863                } else {
32864                    (None, None)
32865                };
32866                self.expect(TokenType::RParen)?;
32867                // Check for IGNORE NULLS / RESPECT NULLS
32868                let ignore_nulls = if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
32869                    Some(true)
32870                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
32871                    Some(false)
32872                } else {
32873                    None
32874                };
32875                let func = LeadLagFunc {
32876                    this,
32877                    offset,
32878                    default,
32879                    ignore_nulls,
32880                };
32881                Ok(if upper_name == "LEAD" {
32882                    Expression::Lead(Box::new(func))
32883                } else {
32884                    Expression::Lag(Box::new(func))
32885                })
32886            }
32887
32888            // FIRST_VALUE / LAST_VALUE
32889            "FIRST_VALUE" | "LAST_VALUE" => {
32890                let this = self.parse_expression()?;
32891                // Check for IGNORE NULLS / RESPECT NULLS inside the parens
32892                let mut ignore_nulls_inside = if self.match_token(TokenType::Ignore)
32893                    && self.match_token(TokenType::Nulls)
32894                {
32895                    Some(true)
32896                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
32897                {
32898                    Some(false) // RESPECT NULLS explicitly sets to false
32899                } else {
32900                    None
32901                };
32902                // Spark/Hive: first_value(col, true) means FIRST_VALUE(col) IGNORE NULLS
32903                if ignore_nulls_inside.is_none() && self.match_token(TokenType::Comma) {
32904                    let second_arg = self.parse_expression()?;
32905                    if let Expression::Boolean(BooleanLiteral { value: true }) = &second_arg {
32906                        ignore_nulls_inside = Some(true);
32907                    }
32908                    // If second arg is not true, just ignore it (not standard)
32909                }
32910                self.expect(TokenType::RParen)?;
32911                // Also check for IGNORE NULLS / RESPECT NULLS after the parens (some dialects use this syntax)
32912                let ignore_nulls: Option<bool> = if ignore_nulls_inside.is_some() {
32913                    ignore_nulls_inside
32914                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
32915                    Some(true)
32916                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
32917                    Some(false)
32918                } else {
32919                    None
32920                };
32921                let func = ValueFunc { this, ignore_nulls };
32922                Ok(if upper_name == "FIRST_VALUE" {
32923                    Expression::FirstValue(Box::new(func))
32924                } else {
32925                    Expression::LastValue(Box::new(func))
32926                })
32927            }
32928
32929            // NTH_VALUE
32930            "NTH_VALUE" => {
32931                let this = self.parse_expression()?;
32932                self.expect(TokenType::Comma)?;
32933                let offset = self.parse_expression()?;
32934                // Check for IGNORE NULLS / RESPECT NULLS inside the parens
32935                let ignore_nulls_inside = if self.match_token(TokenType::Ignore)
32936                    && self.match_token(TokenType::Nulls)
32937                {
32938                    Some(true)
32939                } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
32940                {
32941                    Some(false)
32942                } else {
32943                    None
32944                };
32945                self.expect(TokenType::RParen)?;
32946                // Check for Snowflake FROM FIRST / FROM LAST after the parens
32947                let from_first = if self.match_keywords(&[TokenType::From, TokenType::First]) {
32948                    Some(true)
32949                } else if self.match_keywords(&[TokenType::From, TokenType::Last]) {
32950                    Some(false)
32951                } else {
32952                    None
32953                };
32954                // Also check for IGNORE NULLS / RESPECT NULLS after the parens (and after FROM FIRST/LAST)
32955                let ignore_nulls: Option<bool> = if ignore_nulls_inside.is_some() {
32956                    ignore_nulls_inside
32957                } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
32958                    Some(true)
32959                } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
32960                    Some(false)
32961                } else {
32962                    None
32963                };
32964                Ok(Expression::NthValue(Box::new(NthValueFunc {
32965                    this,
32966                    offset,
32967                    ignore_nulls,
32968                    from_first,
32969                })))
32970            }
32971            _ => unreachable!(
32972                "phase-6 window parser called with non-window family name '{}'",
32973                canonical_upper_name
32974            ),
32975        }
32976    }
32977
32978    fn parse_typed_json_family(
32979        &mut self,
32980        name: &str,
32981        upper_name: &str,
32982        canonical_upper_name: &str,
32983    ) -> Result<Expression> {
32984        match canonical_upper_name {
32985            // JSON functions
32986            "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" | "JSON_QUERY" | "JSON_VALUE" => {
32987                let this = self.parse_expression()?;
32988                // Path is optional for some dialects (e.g., TSQL JSON_QUERY with 1 arg defaults to '$')
32989                let path = if self.match_token(TokenType::Comma) {
32990                    self.parse_expression()?
32991                } else {
32992                    // Default path is '$' when not provided
32993                    Expression::Literal(Literal::String("$".to_string()))
32994                };
32995
32996                // SQLite JSON_EXTRACT supports multiple paths - check for additional paths
32997                // If multiple paths, use generic Function instead of typed expression
32998                if self.check(TokenType::Comma)
32999                    && !self.check_identifier("WITH")
33000                    && !self.check_identifier("WITHOUT")
33001                    && !self.check_identifier("KEEP")
33002                    && !self.check_identifier("OMIT")
33003                    && !self.check_identifier("NULL")
33004                    && !self.check_identifier("ERROR")
33005                    && !self.check_identifier("EMPTY")
33006                    && !self.check(TokenType::Returning)
33007                {
33008                    let mut args = vec![this, path];
33009                    while self.match_token(TokenType::Comma) {
33010                        args.push(self.parse_expression()?);
33011                    }
33012                    self.expect(TokenType::RParen)?;
33013                    return Ok(Expression::Function(Box::new(Function {
33014                        name: name.to_string(),
33015                        args,
33016                        distinct: false,
33017                        trailing_comments: Vec::new(),
33018                        use_bracket_syntax: false,
33019                        no_parens: false,
33020                        quoted: false,
33021                        span: None,
33022                        inferred_type: None,
33023                    })));
33024                }
33025
33026                // Parse JSON_QUERY/JSON_VALUE options (Trino/Presto style)
33027                // Options: WITH/WITHOUT [CONDITIONAL|UNCONDITIONAL] [ARRAY] WRAPPER
33028                //          KEEP QUOTES / OMIT QUOTES [ON SCALAR STRING]
33029                //          NULL ON ERROR / ERROR ON ERROR / EMPTY ON ERROR
33030                //          RETURNING type
33031                let mut wrapper_option: Option<String> = None;
33032                let mut quotes_option: Option<String> = None;
33033                let mut on_scalar_string = false;
33034                let mut on_error: Option<String> = None;
33035                let mut returning: Option<DataType> = None;
33036
33037                // Keep parsing options until we see RParen
33038                while !self.check(TokenType::RParen) {
33039                    // WITH [CONDITIONAL|UNCONDITIONAL] [ARRAY] WRAPPER - match in order of specificity
33040                    if self.match_text_seq(&["WITH", "UNCONDITIONAL", "ARRAY", "WRAPPER"]) {
33041                        wrapper_option = Some("WITH UNCONDITIONAL ARRAY WRAPPER".to_string());
33042                    } else if self.match_text_seq(&["WITH", "CONDITIONAL", "ARRAY", "WRAPPER"]) {
33043                        wrapper_option = Some("WITH CONDITIONAL ARRAY WRAPPER".to_string());
33044                    } else if self.match_text_seq(&["WITH", "UNCONDITIONAL", "WRAPPER"]) {
33045                        wrapper_option = Some("WITH UNCONDITIONAL WRAPPER".to_string());
33046                    } else if self.match_text_seq(&["WITH", "CONDITIONAL", "WRAPPER"]) {
33047                        wrapper_option = Some("WITH CONDITIONAL WRAPPER".to_string());
33048                    } else if self.match_text_seq(&["WITH", "ARRAY", "WRAPPER"]) {
33049                        wrapper_option = Some("WITH ARRAY WRAPPER".to_string());
33050                    } else if self.match_text_seq(&["WITH", "WRAPPER"]) {
33051                        wrapper_option = Some("WITH WRAPPER".to_string());
33052                    // WITHOUT [CONDITIONAL] [ARRAY] WRAPPER
33053                    } else if self.match_text_seq(&["WITHOUT", "CONDITIONAL", "ARRAY", "WRAPPER"]) {
33054                        wrapper_option = Some("WITHOUT CONDITIONAL ARRAY WRAPPER".to_string());
33055                    } else if self.match_text_seq(&["WITHOUT", "CONDITIONAL", "WRAPPER"]) {
33056                        wrapper_option = Some("WITHOUT CONDITIONAL WRAPPER".to_string());
33057                    } else if self.match_text_seq(&["WITHOUT", "ARRAY", "WRAPPER"]) {
33058                        wrapper_option = Some("WITHOUT ARRAY WRAPPER".to_string());
33059                    } else if self.match_text_seq(&["WITHOUT", "WRAPPER"]) {
33060                        wrapper_option = Some("WITHOUT WRAPPER".to_string());
33061                    } else if self.match_text_seq(&["KEEP", "QUOTES"]) {
33062                        // KEEP QUOTES
33063                        quotes_option = Some("KEEP QUOTES".to_string());
33064                    } else if self.match_text_seq(&["OMIT", "QUOTES", "ON", "SCALAR", "STRING"]) {
33065                        // OMIT QUOTES ON SCALAR STRING
33066                        quotes_option = Some("OMIT QUOTES".to_string());
33067                        on_scalar_string = true;
33068                    } else if self.match_text_seq(&["OMIT", "QUOTES"]) {
33069                        // OMIT QUOTES
33070                        quotes_option = Some("OMIT QUOTES".to_string());
33071                    } else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
33072                        on_error = Some("NULL ON ERROR".to_string());
33073                    } else if self.match_text_seq(&["ERROR", "ON", "ERROR"]) {
33074                        on_error = Some("ERROR ON ERROR".to_string());
33075                    } else if self.match_text_seq(&["EMPTY", "ON", "ERROR"]) {
33076                        on_error = Some("EMPTY ON ERROR".to_string());
33077                    } else if self.match_token(TokenType::Returning) {
33078                        // RETURNING type
33079                        returning = Some(self.parse_data_type()?);
33080                    } else {
33081                        // No more options recognized, break
33082                        break;
33083                    }
33084                }
33085
33086                self.expect(TokenType::RParen)?;
33087                let func = JsonExtractFunc {
33088                    this,
33089                    path,
33090                    returning,
33091                    arrow_syntax: false,
33092                    hash_arrow_syntax: false,
33093                    wrapper_option,
33094                    quotes_option,
33095                    on_scalar_string,
33096                    on_error,
33097                };
33098                Ok(match upper_name {
33099                    "JSON_EXTRACT" => Expression::JsonExtract(Box::new(func)),
33100                    "JSON_EXTRACT_SCALAR" => Expression::JsonExtractScalar(Box::new(func)),
33101                    "JSON_QUERY" => Expression::JsonQuery(Box::new(func)),
33102                    "JSON_VALUE" => Expression::JsonValue(Box::new(func)),
33103                    _ => unreachable!("JSON function name already matched in caller"),
33104                })
33105            }
33106            // JSON_KEYS, TO_JSON, PARSE_JSON etc. support additional args including named args (BigQuery)
33107            // e.g., JSON_KEYS(expr, depth, mode => 'lax'), TO_JSON(expr, stringify_wide_numbers => FALSE)
33108            // e.g., PARSE_JSON('{}', wide_number_mode => 'exact')
33109            "JSON_ARRAY_LENGTH" | "JSON_KEYS" | "JSON_TYPE" | "TO_JSON" | "PARSE_JSON" => {
33110                let this = self.parse_expression()?;
33111                // ClickHouse: expr AS alias inside function args
33112                let this = self.maybe_clickhouse_alias(this);
33113
33114                // Check for additional arguments (comma-separated, possibly named)
33115                if self.match_token(TokenType::Comma) {
33116                    // Has additional arguments - parse as generic Function to preserve all args
33117                    let mut all_args = vec![this];
33118                    let remaining = self.parse_function_arguments()?;
33119                    all_args.extend(remaining);
33120                    self.expect(TokenType::RParen)?;
33121                    Ok(Expression::Function(Box::new(Function {
33122                        name: name.to_string(),
33123                        args: all_args,
33124                        distinct: false,
33125                        trailing_comments: Vec::new(),
33126                        use_bracket_syntax: false,
33127                        no_parens: false,
33128                        quoted: false,
33129                        span: None,
33130                        inferred_type: None,
33131                    })))
33132                } else {
33133                    // Single argument - use typed expression
33134                    self.expect(TokenType::RParen)?;
33135                    let func = UnaryFunc::new(this);
33136                    Ok(match canonical_upper_name {
33137                        "JSON_ARRAY_LENGTH" => Expression::JsonArrayLength(Box::new(func)),
33138                        "JSON_KEYS" => Expression::JsonKeys(Box::new(func)),
33139                        "JSON_TYPE" => Expression::JsonType(Box::new(func)),
33140                        "TO_JSON" => Expression::ToJson(Box::new(func)),
33141                        "PARSE_JSON" => Expression::ParseJson(Box::new(func)),
33142                        _ => unreachable!("JSON function name already matched in caller"),
33143                    })
33144                }
33145            }
33146
33147            // JSON_OBJECT with SQL standard syntax: JSON_OBJECT('key': value, ...) or JSON_OBJECT(*)
33148            "JSON_OBJECT" => {
33149                let mut pairs = Vec::new();
33150                let mut star = false;
33151                if !self.check(TokenType::RParen) {
33152                    // Check for JSON_OBJECT(*) syntax
33153                    if self.check(TokenType::Star) && self.check_next(TokenType::RParen) {
33154                        self.advance(); // consume *
33155                        star = true;
33156                    } else {
33157                        loop {
33158                            // Check for KEY keyword for KEY 'key' IS value syntax (KEY is a keyword token)
33159                            let has_key_keyword = self.match_token(TokenType::Key);
33160                            // Parse key: try string first (for 'key' syntax), then column
33161                            let key = if let Some(s) = self.parse_string()? {
33162                                s
33163                            } else {
33164                                // Use parse_column for key to avoid interpreting colon as JSON path
33165                                self.parse_column()?.ok_or_else(|| {
33166                                    self.parse_error("Expected key expression in JSON_OBJECT")
33167                                })?
33168                            };
33169
33170                            // Support colon, VALUE keyword (identifier), and IS keyword (for KEY...IS syntax)
33171                            let has_separator = self.match_token(TokenType::Colon)
33172                                || self.match_identifier("VALUE")
33173                                || (has_key_keyword && self.match_token(TokenType::Is));
33174
33175                            if has_separator {
33176                                let value = self.parse_bitwise()?.ok_or_else(|| {
33177                                    self.parse_error("Expected value expression in JSON_OBJECT")
33178                                })?;
33179                                // Check for FORMAT JSON after value
33180                                let value_with_format = if self.match_text_seq(&["FORMAT", "JSON"])
33181                                {
33182                                    Expression::JSONFormat(Box::new(JSONFormat {
33183                                        this: Some(Box::new(value)),
33184                                        options: Vec::new(),
33185                                        is_json: None,
33186                                        to_json: None,
33187                                    }))
33188                                } else {
33189                                    value
33190                                };
33191                                pairs.push((key, value_with_format));
33192                            } else {
33193                                // Just key/value pairs without separator
33194                                if self.match_token(TokenType::Comma) {
33195                                    let value = self.parse_bitwise()?.ok_or_else(|| {
33196                                        self.parse_error("Expected value expression in JSON_OBJECT")
33197                                    })?;
33198                                    pairs.push((key, value));
33199                                } else {
33200                                    return Err(self
33201                                        .parse_error("Expected value expression in JSON_OBJECT"));
33202                                }
33203                            }
33204                            if !self.match_token(TokenType::Comma) {
33205                                break;
33206                            }
33207                        }
33208                    }
33209                }
33210                // Parse optional modifiers: NULL ON NULL, ABSENT ON NULL, WITH UNIQUE KEYS
33211                let null_handling = if self.match_token(TokenType::Null) {
33212                    self.match_token(TokenType::On);
33213                    self.match_token(TokenType::Null);
33214                    Some(JsonNullHandling::NullOnNull)
33215                } else if self.match_identifier("ABSENT") {
33216                    self.match_token(TokenType::On);
33217                    self.match_token(TokenType::Null);
33218                    Some(JsonNullHandling::AbsentOnNull)
33219                } else {
33220                    None
33221                };
33222                let with_unique_keys = if self.match_token(TokenType::With) {
33223                    self.match_token(TokenType::Unique);
33224                    self.match_identifier("KEYS");
33225                    true
33226                } else {
33227                    false
33228                };
33229                // Parse optional RETURNING clause: RETURNING type [FORMAT JSON] [ENCODING encoding]
33230                let (returning_type, format_json, encoding) = if self
33231                    .match_token(TokenType::Returning)
33232                {
33233                    let return_type = self.parse_data_type()?;
33234                    // Optional FORMAT JSON
33235                    let has_format_json = if self.match_token(TokenType::Format) {
33236                        // JSON might be a keyword or identifier
33237                        let _ = self.match_token(TokenType::Json) || self.match_identifier("JSON");
33238                        true
33239                    } else {
33240                        false
33241                    };
33242                    // Optional ENCODING encoding
33243                    let enc = if self.match_identifier("ENCODING") {
33244                        Some(self.expect_identifier_or_keyword()?)
33245                    } else {
33246                        None
33247                    };
33248                    (Some(return_type), has_format_json, enc)
33249                } else {
33250                    (None, false, None)
33251                };
33252                self.expect(TokenType::RParen)?;
33253                Ok(Expression::JsonObject(Box::new(JsonObjectFunc {
33254                    pairs,
33255                    null_handling,
33256                    with_unique_keys,
33257                    returning_type,
33258                    format_json,
33259                    encoding,
33260                    star,
33261                })))
33262            }
33263
33264            // JSON_ARRAY function with Oracle-specific options
33265            // JSON_ARRAY(expr [FORMAT JSON], ... [NULL ON NULL | ABSENT ON NULL] [RETURNING type] [STRICT])
33266            "JSON_ARRAY" => {
33267                let mut expressions = Vec::new();
33268                if !self.check(TokenType::RParen) {
33269                    loop {
33270                        let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
33271                        // Check for FORMAT JSON after each expression
33272                        let expr_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
33273                            Expression::JSONFormat(Box::new(JSONFormat {
33274                                this: Some(Box::new(expr)),
33275                                options: Vec::new(),
33276                                is_json: None,
33277                                to_json: None,
33278                            }))
33279                        } else {
33280                            expr
33281                        };
33282                        expressions.push(expr_with_format);
33283                        if !self.match_token(TokenType::Comma) {
33284                            break;
33285                        }
33286                    }
33287                }
33288                // Parse NULL ON NULL or ABSENT ON NULL
33289                let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
33290                    Some(Box::new(Expression::Var(Box::new(Var {
33291                        this: "NULL ON NULL".to_string(),
33292                    }))))
33293                } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
33294                    Some(Box::new(Expression::Var(Box::new(Var {
33295                        this: "ABSENT ON NULL".to_string(),
33296                    }))))
33297                } else {
33298                    None
33299                };
33300                // Parse RETURNING type
33301                let return_type = if self.match_token(TokenType::Returning) {
33302                    let dt = self.parse_data_type()?;
33303                    Some(Box::new(Expression::DataType(dt)))
33304                } else {
33305                    None
33306                };
33307                // Parse STRICT
33308                let strict = if self.match_identifier("STRICT") {
33309                    Some(Box::new(Expression::Boolean(BooleanLiteral {
33310                        value: true,
33311                    })))
33312                } else {
33313                    None
33314                };
33315                self.expect(TokenType::RParen)?;
33316                Ok(Expression::JSONArray(Box::new(JSONArray {
33317                    expressions,
33318                    null_handling,
33319                    return_type,
33320                    strict,
33321                })))
33322            }
33323
33324            // JSON_ARRAYAGG function with Oracle-specific options
33325            // JSON_ARRAYAGG(expr [FORMAT JSON] [ORDER BY ...] [NULL ON NULL | ABSENT ON NULL] [RETURNING type] [STRICT])
33326            "JSON_ARRAYAGG" => {
33327                let this = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
33328                // Check for FORMAT JSON after the expression
33329                let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
33330                    Expression::JSONFormat(Box::new(JSONFormat {
33331                        this: Some(Box::new(this)),
33332                        options: Vec::new(),
33333                        is_json: None,
33334                        to_json: None,
33335                    }))
33336                } else {
33337                    this
33338                };
33339                // Parse ORDER BY clause
33340                let order = if self.match_token(TokenType::Order) {
33341                    self.match_token(TokenType::By);
33342                    // Parse comma-separated ordered expressions
33343                    let mut order_exprs = Vec::new();
33344                    loop {
33345                        if let Some(ordered) = self.parse_ordered_item()? {
33346                            order_exprs.push(ordered);
33347                        } else {
33348                            break;
33349                        }
33350                        if !self.match_token(TokenType::Comma) {
33351                            break;
33352                        }
33353                    }
33354                    if !order_exprs.is_empty() {
33355                        Some(Box::new(Expression::OrderBy(Box::new(OrderBy {
33356                            expressions: order_exprs,
33357                            siblings: false,
33358                            comments: Vec::new(),
33359                        }))))
33360                    } else {
33361                        None
33362                    }
33363                } else {
33364                    None
33365                };
33366                // Parse NULL ON NULL or ABSENT ON NULL
33367                let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
33368                    Some(Box::new(Expression::Var(Box::new(Var {
33369                        this: "NULL ON NULL".to_string(),
33370                    }))))
33371                } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
33372                    Some(Box::new(Expression::Var(Box::new(Var {
33373                        this: "ABSENT ON NULL".to_string(),
33374                    }))))
33375                } else {
33376                    None
33377                };
33378                // Parse RETURNING type
33379                let return_type = if self.match_token(TokenType::Returning) {
33380                    let dt = self.parse_data_type()?;
33381                    Some(Box::new(Expression::DataType(dt)))
33382                } else {
33383                    None
33384                };
33385                // Parse STRICT
33386                let strict = if self.match_identifier("STRICT") {
33387                    Some(Box::new(Expression::Boolean(BooleanLiteral {
33388                        value: true,
33389                    })))
33390                } else {
33391                    None
33392                };
33393                self.expect(TokenType::RParen)?;
33394                Ok(Expression::JSONArrayAgg(Box::new(JSONArrayAgg {
33395                    this: Box::new(this_with_format),
33396                    order,
33397                    null_handling,
33398                    return_type,
33399                    strict,
33400                })))
33401            }
33402
33403            // JSON_OBJECTAGG with KEY...VALUE syntax
33404            // JSON_OBJECTAGG(KEY key VALUE value) or JSON_OBJECTAGG(key: value)
33405            "JSON_OBJECTAGG" => {
33406                // Check for KEY keyword (KEY is a keyword token, not an identifier)
33407                let _has_key_keyword = self.match_token(TokenType::Key);
33408                // Parse key: use column parsing to avoid colon being interpreted as JSON path
33409                let key = self.parse_column()?.unwrap_or(Expression::Null(Null));
33410
33411                // Support colon or VALUE keyword (VALUE is an identifier, not a keyword)
33412                let _ = self.match_token(TokenType::Colon) || self.match_identifier("VALUE");
33413
33414                let value = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
33415                // Check for FORMAT JSON after value
33416                let value_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
33417                    Expression::JSONFormat(Box::new(JSONFormat {
33418                        this: Some(Box::new(value)),
33419                        options: Vec::new(),
33420                        is_json: None,
33421                        to_json: None,
33422                    }))
33423                } else {
33424                    value
33425                };
33426                // Parse NULL ON NULL or ABSENT ON NULL
33427                let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
33428                    Some(Box::new(Expression::Var(Box::new(Var {
33429                        this: "NULL ON NULL".to_string(),
33430                    }))))
33431                } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
33432                    Some(Box::new(Expression::Var(Box::new(Var {
33433                        this: "ABSENT ON NULL".to_string(),
33434                    }))))
33435                } else {
33436                    None
33437                };
33438                // Parse WITH/WITHOUT UNIQUE KEYS
33439                let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE"]) {
33440                    self.match_identifier("KEYS");
33441                    Some(Box::new(Expression::Boolean(BooleanLiteral {
33442                        value: true,
33443                    })))
33444                } else if self.match_text_seq(&["WITHOUT", "UNIQUE"]) {
33445                    self.match_identifier("KEYS");
33446                    Some(Box::new(Expression::Boolean(BooleanLiteral {
33447                        value: false,
33448                    })))
33449                } else {
33450                    None
33451                };
33452                // Parse RETURNING type
33453                let return_type = if self.match_token(TokenType::Returning) {
33454                    let dt = self.parse_data_type()?;
33455                    Some(Box::new(Expression::DataType(dt)))
33456                } else {
33457                    None
33458                };
33459                self.expect(TokenType::RParen)?;
33460                Ok(Expression::JSONObjectAgg(Box::new(JSONObjectAgg {
33461                    expressions: vec![Expression::JSONKeyValue(Box::new(JSONKeyValue {
33462                        this: Box::new(key),
33463                        expression: Box::new(value_with_format),
33464                    }))],
33465                    null_handling,
33466                    unique_keys,
33467                    return_type,
33468                    encoding: None,
33469                })))
33470            }
33471
33472            // JSON_TABLE function - MySQL/Oracle table function for JSON data
33473            // JSON_TABLE(json_doc [FORMAT JSON], path COLUMNS (column_list)) [AS alias]
33474            "JSON_TABLE" => {
33475                // Parse the JSON expression
33476                let this = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
33477                // Check for FORMAT JSON after the expression
33478                let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
33479                    Expression::JSONFormat(Box::new(JSONFormat {
33480                        this: Some(Box::new(this)),
33481                        options: Vec::new(),
33482                        is_json: None,
33483                        to_json: None,
33484                    }))
33485                } else {
33486                    this
33487                };
33488
33489                // Parse path (after comma)
33490                let path = if self.match_token(TokenType::Comma) {
33491                    if let Some(s) = self.parse_string()? {
33492                        Some(Box::new(s))
33493                    } else {
33494                        None
33495                    }
33496                } else {
33497                    None
33498                };
33499
33500                // Oracle uses "ERROR ON ERROR" (value then behavior) instead of "ON ERROR ERROR"
33501                // Parse error handling: ERROR ON ERROR or NULL ON ERROR
33502                let error_handling =
33503                    if self.match_identifier("ERROR") && self.match_text_seq(&["ON", "ERROR"]) {
33504                        Some(Box::new(Expression::Var(Box::new(Var {
33505                            this: "ERROR ON ERROR".to_string(),
33506                        }))))
33507                    } else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
33508                        Some(Box::new(Expression::Var(Box::new(Var {
33509                            this: "NULL ON ERROR".to_string(),
33510                        }))))
33511                    } else {
33512                        None
33513                    };
33514
33515                // Parse empty handling: ERROR ON EMPTY or NULL ON EMPTY
33516                let empty_handling =
33517                    if self.match_identifier("ERROR") && self.match_text_seq(&["ON", "EMPTY"]) {
33518                        Some(Box::new(Expression::Var(Box::new(Var {
33519                            this: "ERROR ON EMPTY".to_string(),
33520                        }))))
33521                    } else if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
33522                        Some(Box::new(Expression::Var(Box::new(Var {
33523                            this: "NULL ON EMPTY".to_string(),
33524                        }))))
33525                    } else {
33526                        None
33527                    };
33528
33529                // Parse COLUMNS clause
33530                let schema = self.parse_json_table_columns()?;
33531
33532                self.expect(TokenType::RParen)?;
33533
33534                Ok(Expression::JSONTable(Box::new(JSONTable {
33535                    this: Box::new(this_with_format),
33536                    schema: schema.map(Box::new),
33537                    path,
33538                    error_handling,
33539                    empty_handling,
33540                })))
33541            }
33542            _ => unreachable!(
33543                "phase-6 json parser called with non-json family name '{}'",
33544                canonical_upper_name
33545            ),
33546        }
33547    }
33548
33549    fn parse_typed_translate_teradata_family(
33550        &mut self,
33551        name: &str,
33552        _upper_name: &str,
33553        canonical_upper_name: &str,
33554    ) -> Result<Expression> {
33555        match canonical_upper_name {
33556            // Teradata: TRANSLATE(x USING charset [WITH ERROR])
33557            "TRANSLATE"
33558                if matches!(
33559                    self.config.dialect,
33560                    Some(crate::dialects::DialectType::Teradata)
33561                ) =>
33562            {
33563                let this = self.parse_expression()?;
33564                if self.match_token(TokenType::Using) {
33565                    let expression = self.parse_expression()?;
33566                    let with_error = if self.match_text_seq(&["WITH", "ERROR"]) {
33567                        Some(Box::new(Expression::Boolean(BooleanLiteral {
33568                            value: true,
33569                        })))
33570                    } else {
33571                        None
33572                    };
33573                    self.expect(TokenType::RParen)?;
33574                    Ok(Expression::TranslateCharacters(Box::new(
33575                        TranslateCharacters {
33576                            this: Box::new(this),
33577                            expression: Box::new(expression),
33578                            with_error,
33579                        },
33580                    )))
33581                } else {
33582                    let mut args = vec![this];
33583                    if self.match_token(TokenType::Comma) {
33584                        let mut rest = self.parse_expression_list()?;
33585                        args.append(&mut rest);
33586                    }
33587                    self.expect(TokenType::RParen)?;
33588                    Ok(Expression::Function(Box::new(Function {
33589                        name: name.to_string(),
33590                        args,
33591                        distinct: false,
33592                        trailing_comments: Vec::new(),
33593                        use_bracket_syntax: false,
33594                        no_parens: false,
33595                        quoted: false,
33596                        span: None,
33597                        inferred_type: None,
33598                    })))
33599                }
33600            }
33601
33602            _ => unreachable!(
33603                "phase-6 translate parser called with non-translate family name '{}'",
33604                canonical_upper_name
33605            ),
33606        }
33607    }
33608
33609    /// Parse a generic function call (fallback for unrecognized functions)
33610    fn parse_generic_function(&mut self, name: &str, quoted: bool) -> Result<Expression> {
33611        let is_known_agg = Self::is_aggregate_function(name);
33612
33613        let (args, distinct) = if self.check(TokenType::RParen) {
33614            (Vec::new(), false)
33615        } else if self.check(TokenType::Star) {
33616            // Check for DuckDB *COLUMNS(...) syntax first
33617            if self.check_next_identifier("COLUMNS")
33618                && self
33619                    .tokens
33620                    .get(self.current + 2)
33621                    .map(|t| t.token_type == TokenType::LParen)
33622                    .unwrap_or(false)
33623            {
33624                // Parse *COLUMNS(...) as a function argument
33625                (self.parse_function_arguments()?, false)
33626            } else {
33627                // Regular star: parse star modifiers like EXCLUDE/EXCEPT/REPLACE/RENAME
33628                // e.g., COLUMNS(* EXCLUDE (empid, dept))
33629                self.advance(); // consume *
33630                let star = self.parse_star_modifiers(None)?;
33631                let mut args = vec![Expression::Star(star)];
33632                // ClickHouse: func(*, col1, col2) — star followed by more args
33633                if self.match_token(TokenType::Comma) {
33634                    let rest = self.parse_function_arguments()?;
33635                    args.extend(rest);
33636                }
33637                (args, false)
33638            }
33639        } else if self.check(TokenType::Distinct)
33640            && !self.check_next(TokenType::Comma)
33641            && !self.check_next(TokenType::RParen)
33642        {
33643            // DISTINCT as aggregate modifier: func(DISTINCT expr)
33644            // Not when followed by comma or rparen — then DISTINCT is used as an identifier value
33645            self.advance(); // consume DISTINCT
33646            (self.parse_function_arguments()?, true)
33647        } else if is_known_agg && self.match_token(TokenType::All) {
33648            // ALL is the default quantifier, just consume it
33649            (self.parse_function_arguments()?, false)
33650        } else {
33651            (self.parse_function_arguments()?, false)
33652        };
33653
33654        // For known aggregate functions, check for IGNORE NULLS, ORDER BY, LIMIT inside parens
33655        let (ignore_nulls, order_by, agg_limit) = if is_known_agg {
33656            let ignore_nulls = if self.match_token(TokenType::Ignore)
33657                && self.match_token(TokenType::Nulls)
33658            {
33659                Some(true)
33660            } else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls) {
33661                Some(false)
33662            } else {
33663                None
33664            };
33665
33666            let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
33667                self.parse_order_by_list()?
33668            } else {
33669                Vec::new()
33670            };
33671            let limit = if self.match_token(TokenType::Limit) {
33672                Some(Box::new(self.parse_expression()?))
33673            } else {
33674                None
33675            };
33676            (ignore_nulls, order_by, limit)
33677        } else {
33678            (None, Vec::new(), None)
33679        };
33680
33681        // ClickHouse: SETTINGS key=value, ... before closing paren in function calls
33682        if matches!(
33683            self.config.dialect,
33684            Some(crate::dialects::DialectType::ClickHouse)
33685        ) && self.check(TokenType::Settings)
33686            && self.current + 2 < self.tokens.len()
33687            && (self.tokens[self.current + 1].token_type == TokenType::Var
33688                || self.tokens[self.current + 1].token_type == TokenType::Identifier)
33689            && self.tokens[self.current + 2].token_type == TokenType::Eq
33690        {
33691            self.advance(); // consume SETTINGS
33692            loop {
33693                let _key = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
33694                    self.advance().text
33695                } else {
33696                    break;
33697                };
33698                if self.match_token(TokenType::Eq) {
33699                    let _value = self.parse_primary()?;
33700                }
33701                if !self.match_token(TokenType::Comma) {
33702                    break;
33703                }
33704            }
33705        }
33706
33707        self.expect(TokenType::RParen)?;
33708        let trailing_comments = self.previous_trailing_comments();
33709
33710        // Check for WITHIN GROUP (ORDER BY ...)
33711        if self.match_identifier("WITHIN") {
33712            if self.match_identifier("GROUP") {
33713                self.expect(TokenType::LParen)?;
33714                self.expect(TokenType::Order)?;
33715                self.expect(TokenType::By)?;
33716                let within_order = self.parse_order_by_list()?;
33717                self.expect(TokenType::RParen)?;
33718
33719                let func_expr = Expression::AggregateFunction(Box::new(AggregateFunction {
33720                    name: name.to_string(),
33721                    args,
33722                    distinct,
33723                    filter: None,
33724                    order_by: Vec::new(),
33725                    limit: None,
33726                    ignore_nulls: None,
33727                    inferred_type: None,
33728                }));
33729
33730                let within = Expression::WithinGroup(Box::new(WithinGroup {
33731                    this: func_expr,
33732                    order_by: within_order,
33733                }));
33734
33735                // Check for FILTER after WITHIN GROUP
33736                let filter = self.parse_filter_clause()?;
33737                if let Some(filter_expr) = filter {
33738                    return Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
33739                        name: format!("__WITHIN_GROUP_{}", name),
33740                        args: vec![within, filter_expr],
33741                        distinct: false,
33742                        filter: None,
33743                        order_by: Vec::new(),
33744                        limit: None,
33745                        ignore_nulls: None,
33746                        inferred_type: None,
33747                    })));
33748                }
33749
33750                return Ok(within);
33751            }
33752        }
33753
33754        let filter = self.parse_filter_clause()?;
33755
33756        // Check for postfix IGNORE NULLS / RESPECT NULLS after RParen
33757        let ignore_nulls = if ignore_nulls.is_some() {
33758            ignore_nulls
33759        } else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
33760            Some(true)
33761        } else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
33762            Some(false)
33763        } else {
33764            None
33765        };
33766
33767        if filter.is_some() || is_known_agg || ignore_nulls.is_some() {
33768            Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
33769                name: name.to_string(),
33770                args,
33771                distinct,
33772                filter,
33773                order_by,
33774                limit: agg_limit,
33775                ignore_nulls,
33776                inferred_type: None,
33777            })))
33778        } else {
33779            let mut func = Function::new(name.to_string(), args);
33780            func.distinct = distinct;
33781            func.trailing_comments = trailing_comments;
33782            func.quoted = quoted;
33783            Ok(Expression::Function(Box::new(func)))
33784        }
33785    }
33786
33787    /// Check for an AS alias after an expression in ClickHouse function arg context.
33788    fn maybe_clickhouse_alias(&mut self, expr: Expression) -> Expression {
33789        if matches!(
33790            self.config.dialect,
33791            Some(crate::dialects::DialectType::ClickHouse)
33792        ) && self.check(TokenType::As)
33793            && !self.check_next(TokenType::RParen)
33794            && !self.check_next(TokenType::Comma)
33795        {
33796            let next_idx = self.current + 1;
33797            let is_alias = next_idx < self.tokens.len()
33798                && matches!(
33799                    self.tokens[next_idx].token_type,
33800                    TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
33801                );
33802            if is_alias {
33803                self.advance(); // consume AS
33804                let alias_token = self.advance();
33805                let alias_name = Identifier {
33806                    name: alias_token.text.clone(),
33807                    quoted: alias_token.token_type == TokenType::QuotedIdentifier,
33808                    trailing_comments: Vec::new(),
33809                    span: None,
33810                };
33811                return Expression::Alias(Box::new(crate::expressions::Alias {
33812                    this: expr,
33813                    alias: alias_name,
33814                    column_aliases: Vec::new(),
33815                    pre_alias_comments: Vec::new(),
33816                    trailing_comments: Vec::new(),
33817                    inferred_type: None,
33818                }));
33819            }
33820        }
33821        expr
33822    }
33823
33824    /// Parse an expression, then check for AS alias in ClickHouse function arg context.
33825    /// ClickHouse allows: func(expr AS alias, ...) where AS creates a named alias inside function args.
33826    fn parse_expression_with_clickhouse_alias(&mut self) -> Result<Expression> {
33827        let expr = self.parse_expression()?;
33828        Ok(self.maybe_clickhouse_alias(expr))
33829    }
33830
33831    /// Parse function arguments, handling named arguments (name => value, name := value)
33832    /// and TABLE/MODEL prefixed arguments (BigQuery)
33833    fn parse_function_arguments(&mut self) -> Result<Vec<Expression>> {
33834        let mut args = Vec::new();
33835
33836        loop {
33837            // ClickHouse: SETTINGS key=value, ... terminates function args
33838            // Only break if SETTINGS is followed by identifier = value pattern
33839            if matches!(
33840                self.config.dialect,
33841                Some(crate::dialects::DialectType::ClickHouse)
33842            ) && self.check(TokenType::Settings)
33843                && self.current + 2 < self.tokens.len()
33844                && (self.tokens[self.current + 1].token_type == TokenType::Var
33845                    || self.tokens[self.current + 1].token_type == TokenType::Identifier)
33846                && self.tokens[self.current + 2].token_type == TokenType::Eq
33847            {
33848                break; // will be consumed by SETTINGS handler after loop
33849            }
33850
33851            // ClickHouse: bare SELECT/WITH as function argument (e.g., view(SELECT 1), remote(..., view(SELECT ...)))
33852            if matches!(
33853                self.config.dialect,
33854                Some(crate::dialects::DialectType::ClickHouse)
33855            ) && (self.check(TokenType::Select) || self.check(TokenType::With))
33856            {
33857                let query = self.parse_statement()?;
33858                args.push(query);
33859                if !self.match_token(TokenType::Comma) {
33860                    break;
33861                }
33862                continue;
33863            }
33864
33865            // Check for TABLE ref or MODEL ref as function argument (BigQuery)
33866            // e.g., GAP_FILL(TABLE device_data, ...) or ML.PREDICT(MODEL mydataset.mymodel, ...)
33867            let is_table_or_model_arg = if !self.is_at_end() {
33868                self.check(TokenType::Table) || self.peek().text.eq_ignore_ascii_case("MODEL")
33869            } else {
33870                false
33871            };
33872            let arg = if is_table_or_model_arg {
33873                let prefix = self.peek().text.to_uppercase();
33874                let saved_pos = self.current;
33875                self.advance(); // consume TABLE or MODEL
33876
33877                // Only treat as TABLE/MODEL argument if followed by an identifier (table name),
33878                // not by => (which would be a named arg like "table => value")
33879                if !self.is_at_end()
33880                    && !self.check(TokenType::FArrow)
33881                    && !self.check(TokenType::ColonEq)
33882                {
33883                    // Parse the table/model reference (supports dotted names like dataset.table)
33884                    if let Some(table_expr) = self.parse_table_parts()? {
33885                        Expression::TableArgument(Box::new(TableArgument {
33886                            prefix,
33887                            this: table_expr,
33888                        }))
33889                    } else {
33890                        // Failed to parse table parts, backtrack and treat as regular expression
33891                        self.current = saved_pos;
33892                        self.parse_expression()?
33893                    }
33894                } else {
33895                    // TABLE/MODEL followed by => or :=, backtrack and handle as named arg
33896                    self.current = saved_pos;
33897                    if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
33898                        let ident_token = self.advance();
33899                        let ident_name = ident_token.text.clone();
33900                        if self.match_token(TokenType::FArrow) {
33901                            let value = self.parse_expression()?;
33902                            Expression::NamedArgument(Box::new(NamedArgument {
33903                                name: Identifier::new(ident_name),
33904                                value,
33905                                separator: NamedArgSeparator::DArrow,
33906                            }))
33907                        } else if self.match_token(TokenType::ColonEq) {
33908                            let value = self.parse_expression()?;
33909                            Expression::NamedArgument(Box::new(NamedArgument {
33910                                name: Identifier::new(ident_name),
33911                                value,
33912                                separator: NamedArgSeparator::ColonEq,
33913                            }))
33914                        } else {
33915                            self.current = saved_pos;
33916                            self.parse_expression()?
33917                        }
33918                    } else {
33919                        self.parse_expression()?
33920                    }
33921                }
33922            } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
33923                // Try to parse:
33924                // 1. Named argument: identifier => value or identifier := value
33925                // 2. Snowflake lambda with type: identifier type -> body (e.g., a int -> a + 1)
33926                // Save position to backtrack if not a named argument
33927                let saved_pos = self.current;
33928
33929                // Try to get identifier
33930                let ident_token = self.advance();
33931                let ident_name = ident_token.text.clone();
33932
33933                // PostgreSQL/Redshift VARIADIC keyword: backtrack and let parse_expression handle it
33934                // VARIADIC ARRAY[...] must not be misinterpreted as a lambda with type annotation
33935                if ident_name.eq_ignore_ascii_case("VARIADIC")
33936                    && matches!(
33937                        self.config.dialect,
33938                        Some(crate::dialects::DialectType::PostgreSQL)
33939                            | Some(crate::dialects::DialectType::Redshift)
33940                    )
33941                {
33942                    self.current = saved_pos;
33943                    self.parse_expression()?
33944                }
33945                // Check for Snowflake lambda with type annotation: a int -> body
33946                // Look ahead to see if we have a type token followed by ->
33947                else if !self.is_at_end()
33948                    && self.is_type_keyword()
33949                    && !self.check(TokenType::FArrow)
33950                    && !self.check(TokenType::ColonEq)
33951                {
33952                    // Parse type annotation
33953                    let type_annotation = self.parse_data_type()?;
33954
33955                    // Check for arrow
33956                    if self.match_token(TokenType::Arrow) {
33957                        // This is a Snowflake lambda: param type -> body
33958                        let body = self.parse_expression()?;
33959                        Expression::Lambda(Box::new(LambdaExpr {
33960                            parameters: vec![Identifier::new(ident_name)],
33961                            body,
33962                            colon: false,
33963                            parameter_types: vec![Some(type_annotation)],
33964                        }))
33965                    } else {
33966                        // Not a lambda, backtrack and parse as regular expression
33967                        self.current = saved_pos;
33968                        self.parse_expression()?
33969                    }
33970                }
33971                // ClickHouse: simple lambda without type annotation: ident -> body
33972                else if self.match_token(TokenType::Arrow) {
33973                    let body = self.parse_expression()?;
33974                    Expression::Lambda(Box::new(LambdaExpr {
33975                        parameters: vec![Identifier::new(ident_name)],
33976                        body,
33977                        colon: false,
33978                        parameter_types: Vec::new(),
33979                    }))
33980                }
33981                // Check for named argument separator (=> is FArrow)
33982                else if self.match_token(TokenType::FArrow) {
33983                    // name => value
33984                    let value = self.parse_expression()?;
33985                    Expression::NamedArgument(Box::new(NamedArgument {
33986                        name: Identifier::new(ident_name),
33987                        value,
33988                        separator: NamedArgSeparator::DArrow,
33989                    }))
33990                } else if self.match_token(TokenType::ColonEq) {
33991                    // name := value
33992                    let value = self.parse_expression()?;
33993                    Expression::NamedArgument(Box::new(NamedArgument {
33994                        name: Identifier::new(ident_name),
33995                        value,
33996                        separator: NamedArgSeparator::ColonEq,
33997                    }))
33998                } else {
33999                    // Not a named argument, backtrack and parse as regular expression
34000                    self.current = saved_pos;
34001                    self.parse_expression()?
34002                }
34003            } else {
34004                // Regular expression
34005                self.parse_expression()?
34006            };
34007
34008            // Handle AS alias inside function arguments (e.g. ClickHouse: arrayJoin([1,2,3] AS src))
34009            let arg = if matches!(
34010                self.config.dialect,
34011                Some(crate::dialects::DialectType::ClickHouse)
34012            ) && self.check(TokenType::As)
34013                && !self.check_next(TokenType::RParen)
34014                && !self.check_next(TokenType::Comma)
34015            {
34016                // Look ahead: AS followed by identifier/keyword, then ) or , means it's an alias
34017                let next_idx = self.current + 1;
34018                let after_alias_idx = self.current + 2;
34019                let is_alias_token = next_idx < self.tokens.len()
34020                    && (matches!(
34021                        self.tokens[next_idx].token_type,
34022                        TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
34023                    ) || self.tokens[next_idx].token_type.is_keyword());
34024                // Ensure the token AFTER the alias is ) or , (function arg boundary)
34025                let is_alias = is_alias_token
34026                    && after_alias_idx < self.tokens.len()
34027                    && matches!(
34028                        self.tokens[after_alias_idx].token_type,
34029                        TokenType::RParen | TokenType::Comma
34030                    );
34031                if is_alias {
34032                    self.advance(); // consume AS
34033                    let alias_token = self.advance();
34034                    let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
34035                        let mut ident = Identifier::new(alias_token.text.clone());
34036                        ident.quoted = true;
34037                        ident
34038                    } else {
34039                        Identifier::new(alias_token.text.clone())
34040                    };
34041                    Expression::Alias(Box::new(crate::expressions::Alias {
34042                        this: arg,
34043                        alias: alias_name,
34044                        column_aliases: Vec::new(),
34045                        pre_alias_comments: Vec::new(),
34046                        trailing_comments: Vec::new(),
34047                        inferred_type: None,
34048                    }))
34049                } else {
34050                    arg
34051                }
34052            } else {
34053                arg
34054            };
34055
34056            // ClickHouse: implicit alias without AS keyword: func(expr identifier, ...)
34057            let arg = self.try_clickhouse_implicit_alias(arg);
34058
34059            // Handle trailing comments
34060            let trailing_comments = self.previous_trailing_comments();
34061            let arg = if trailing_comments.is_empty() {
34062                arg
34063            } else {
34064                match &arg {
34065                    Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
34066                        Expression::Annotated(Box::new(Annotated {
34067                            this: arg,
34068                            trailing_comments,
34069                        }))
34070                    }
34071                    _ => arg,
34072                }
34073            };
34074
34075            args.push(arg);
34076
34077            if !self.match_token(TokenType::Comma) {
34078                break;
34079            }
34080            // Skip consecutive commas (Snowflake allows skipping optional named args)
34081            // e.g., ROUND(SCALE => 1, EXPR => 2.25, , ROUNDING_MODE => 'HALF_TO_EVEN')
34082            while self.check(TokenType::Comma) {
34083                self.advance();
34084            }
34085        }
34086
34087        // ClickHouse: SETTINGS key=value, ... at end of function args before RParen
34088        if matches!(
34089            self.config.dialect,
34090            Some(crate::dialects::DialectType::ClickHouse)
34091        ) && self.check(TokenType::Settings)
34092            && self.current + 2 < self.tokens.len()
34093            && (self.tokens[self.current + 1].token_type == TokenType::Var
34094                || self.tokens[self.current + 1].token_type == TokenType::Identifier)
34095            && self.tokens[self.current + 2].token_type == TokenType::Eq
34096        {
34097            self.advance(); // consume SETTINGS
34098            loop {
34099                let _key = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
34100                    self.advance().text
34101                } else {
34102                    break;
34103                };
34104                if self.match_token(TokenType::Eq) {
34105                    let _value = self.parse_primary()?;
34106                }
34107                if !self.match_token(TokenType::Comma) {
34108                    break;
34109                }
34110            }
34111        }
34112
34113        Ok(args)
34114    }
34115
34116    /// Parse optional FILTER clause
34117    fn parse_filter_clause(&mut self) -> Result<Option<Expression>> {
34118        if self.match_token(TokenType::Filter) {
34119            self.expect(TokenType::LParen)?;
34120            // WHERE is optional (DuckDB allows FILTER(condition) without WHERE)
34121            self.match_token(TokenType::Where);
34122            let filter_expr = self.parse_expression()?;
34123            self.expect(TokenType::RParen)?;
34124            Ok(Some(filter_expr))
34125        } else {
34126            Ok(None)
34127        }
34128    }
34129
34130    /// Parse STRUCT arguments with optional AS aliases: STRUCT(x, y AS name, ...)
34131    fn parse_struct_args(&mut self) -> Result<Vec<Expression>> {
34132        let mut args = Vec::new();
34133
34134        loop {
34135            let expr = self.parse_expression()?;
34136
34137            // Check for AS alias
34138            if self.match_token(TokenType::As) {
34139                let alias = self.expect_identifier_or_keyword()?;
34140                args.push(Expression::Alias(Box::new(Alias {
34141                    this: expr,
34142                    alias: Identifier::new(alias),
34143                    column_aliases: Vec::new(),
34144                    pre_alias_comments: Vec::new(),
34145                    trailing_comments: Vec::new(),
34146                    inferred_type: None,
34147                })));
34148            } else {
34149                args.push(expr);
34150            }
34151
34152            if !self.match_token(TokenType::Comma) {
34153                break;
34154            }
34155        }
34156
34157        Ok(args)
34158    }
34159
34160    /// Maybe parse OVER clause for window functions or WITHIN GROUP for ordered-set aggregates
34161    fn maybe_parse_over(&mut self, expr: Expression) -> Result<Expression> {
34162        let expr = self.maybe_parse_subscript(expr)?;
34163
34164        // For Oracle: Check for interval span after expression (e.g., (expr) DAY(9) TO SECOND(3))
34165        // https://docs.oracle.com/en/database/oracle/oracle-database/26/sqlrf/Interval-Expressions.html
34166        let expr = if matches!(
34167            self.config.dialect,
34168            Some(crate::dialects::DialectType::Oracle)
34169        ) {
34170            self.try_parse_oracle_interval_span(expr)?
34171        } else {
34172            expr
34173        };
34174
34175        // Check for WITHIN GROUP (for ordered-set aggregate functions like LISTAGG, PERCENTILE_CONT)
34176        let expr = if self.check(TokenType::Within) && self.check_next(TokenType::Group) {
34177            self.advance(); // consume WITHIN
34178            self.advance(); // consume GROUP
34179            self.expect(TokenType::LParen)?;
34180            self.expect(TokenType::Order)?;
34181            self.expect(TokenType::By)?;
34182            let order_by = self.parse_order_by_list()?;
34183            self.expect(TokenType::RParen)?;
34184            Expression::WithinGroup(Box::new(WithinGroup {
34185                this: expr,
34186                order_by,
34187            }))
34188        } else {
34189            expr
34190        };
34191
34192        // Check for FILTER clause (can follow WITHIN GROUP or standalone aggregate)
34193        // SQL:2003 syntax: aggregate_function(...) FILTER (WHERE condition)
34194        let expr = if self.match_token(TokenType::Filter) {
34195            self.expect(TokenType::LParen)?;
34196            // WHERE is required in standard SQL FILTER clause
34197            self.expect(TokenType::Where)?;
34198            let filter_expr = self.parse_expression()?;
34199            self.expect(TokenType::RParen)?;
34200            Expression::Filter(Box::new(Filter {
34201                this: Box::new(expr),
34202                expression: Box::new(filter_expr),
34203            }))
34204        } else {
34205            expr
34206        };
34207
34208        // ClickHouse: IGNORE NULLS / RESPECT NULLS modifier after function call (before OVER)
34209        // This handles cases like: func(args) IGNORE NULLS OVER w
34210        // and parametric aggregates: func(params)(args) IGNORE NULLS
34211        let expr = if matches!(
34212            self.config.dialect,
34213            Some(crate::dialects::DialectType::ClickHouse)
34214        ) && (self.match_keywords(&[TokenType::Ignore, TokenType::Nulls])
34215            || self.match_keywords(&[TokenType::Respect, TokenType::Nulls]))
34216        {
34217            // Consume the modifier — we don't need to store it for transpilation
34218            expr
34219        } else {
34220            expr
34221        };
34222
34223        // Check for KEEP clause (Oracle: aggregate KEEP (DENSE_RANK FIRST|LAST ORDER BY ...))
34224        // Only if KEEP is followed by LPAREN - otherwise KEEP is used as an alias
34225        let keep = if self.check(TokenType::Keep) && self.check_next(TokenType::LParen) {
34226            self.advance(); // consume KEEP
34227            Some(self.parse_keep_clause()?)
34228        } else {
34229            None
34230        };
34231
34232        // Check for OVER clause (can follow KEEP, FILTER, WITHIN GROUP, or standalone aggregate)
34233        if self.match_token(TokenType::Over) {
34234            let over = self.parse_over_clause()?;
34235            Ok(Expression::WindowFunction(Box::new(WindowFunction {
34236                this: expr,
34237                over,
34238                keep,
34239                inferred_type: None,
34240            })))
34241        } else if keep.is_some() {
34242            // KEEP without OVER - still a window-like construct
34243            // Create a WindowFunction with empty Over
34244            Ok(Expression::WindowFunction(Box::new(WindowFunction {
34245                this: expr,
34246                over: Over {
34247                    window_name: None,
34248                    partition_by: Vec::new(),
34249                    order_by: Vec::new(),
34250                    frame: None,
34251                    alias: None,
34252                },
34253                keep,
34254                inferred_type: None,
34255            })))
34256        } else {
34257            Ok(expr)
34258        }
34259    }
34260
34261    /// ClickHouse: parse parameterized aggregate functions like func(params)(args)
34262    fn maybe_parse_clickhouse_parameterized_agg(&mut self, expr: Expression) -> Result<Expression> {
34263        if !matches!(
34264            self.config.dialect,
34265            Some(crate::dialects::DialectType::ClickHouse)
34266        ) {
34267            return Ok(expr);
34268        }
34269        if !self.check(TokenType::LParen) {
34270            return Ok(expr);
34271        }
34272
34273        let (name, quoted, params) = match expr {
34274            Expression::Function(func) => (func.name, func.quoted, func.args),
34275            Expression::AggregateFunction(agg) => {
34276                if agg.distinct
34277                    || agg.filter.is_some()
34278                    || !agg.order_by.is_empty()
34279                    || agg.limit.is_some()
34280                    || agg.ignore_nulls.is_some()
34281                {
34282                    return Ok(Expression::AggregateFunction(agg));
34283                }
34284                (agg.name, false, agg.args)
34285            }
34286            _ => return Ok(expr),
34287        };
34288
34289        self.advance(); // consume (
34290                        // Handle DISTINCT in second arg list: func(params)(DISTINCT args)
34291        let distinct = self.match_token(TokenType::Distinct);
34292        let expressions = if self.check(TokenType::RParen) {
34293            Vec::new()
34294        } else {
34295            self.parse_function_arguments()?
34296        };
34297        self.expect(TokenType::RParen)?;
34298
34299        let ident = Identifier {
34300            name,
34301            quoted,
34302            trailing_comments: Vec::new(),
34303            span: None,
34304        };
34305
34306        // If DISTINCT was used, wrap the result to indicate it
34307        // For now, we just include it in the CombinedParameterizedAgg
34308        let _ = distinct; // DISTINCT is consumed but not separately tracked in this AST node
34309        Ok(Expression::CombinedParameterizedAgg(Box::new(
34310            CombinedParameterizedAgg {
34311                this: Box::new(Expression::Identifier(ident)),
34312                params,
34313                expressions,
34314            },
34315        )))
34316    }
34317
34318    /// Parse Oracle KEEP clause: KEEP (DENSE_RANK FIRST|LAST ORDER BY ...)
34319    fn parse_keep_clause(&mut self) -> Result<Keep> {
34320        self.expect(TokenType::LParen)?;
34321
34322        // Expect DENSE_RANK
34323        if !self.match_identifier("DENSE_RANK") {
34324            return Err(self.parse_error("Expected DENSE_RANK in KEEP clause"));
34325        }
34326
34327        // Expect FIRST or LAST
34328        let first = if self.match_token(TokenType::First) {
34329            true
34330        } else if self.match_token(TokenType::Last) {
34331            false
34332        } else {
34333            return Err(self.parse_error("Expected FIRST or LAST in KEEP clause"));
34334        };
34335
34336        // Expect ORDER BY
34337        self.expect(TokenType::Order)?;
34338        self.expect(TokenType::By)?;
34339
34340        let order_by = self.parse_order_by_list()?;
34341
34342        self.expect(TokenType::RParen)?;
34343
34344        Ok(Keep { first, order_by })
34345    }
34346
34347    /// Parse a JSON path operand - just the immediate literal/identifier without any subscript processing
34348    /// This is used for JSON arrow operators (->, ->>) to get proper left-to-right associativity
34349    fn parse_json_path_operand(&mut self) -> Result<Expression> {
34350        // Negative number literal (e.g., -1)
34351        if self.check(TokenType::Dash) {
34352            let dash_pos = self.current;
34353            self.advance(); // consume the dash
34354            if self.check(TokenType::Number) {
34355                let token = self.advance();
34356                return Ok(Expression::Neg(Box::new(UnaryOp {
34357                    this: Expression::Literal(Literal::Number(token.text)),
34358                    inferred_type: None,
34359                })));
34360            }
34361            // Not a negative number, backtrack
34362            self.current = dash_pos;
34363        }
34364
34365        // Number literal
34366        if self.check(TokenType::Number) {
34367            let token = self.advance();
34368            // Check for numeric literal suffix encoded as "number::TYPE" by tokenizer
34369            if let Some(sep_pos) = token.text.find("::") {
34370                let num_part = &token.text[..sep_pos];
34371                let type_name = &token.text[sep_pos + 2..];
34372                let num_expr = Expression::Literal(Literal::Number(num_part.to_string()));
34373                let data_type = match type_name {
34374                    "BIGINT" => crate::expressions::DataType::BigInt { length: None },
34375                    "SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
34376                    "TINYINT" => crate::expressions::DataType::TinyInt { length: None },
34377                    "DOUBLE" => crate::expressions::DataType::Double {
34378                        precision: None,
34379                        scale: None,
34380                    },
34381                    "FLOAT" => crate::expressions::DataType::Float {
34382                        precision: None,
34383                        scale: None,
34384                        real_spelling: false,
34385                    },
34386                    "DECIMAL" => crate::expressions::DataType::Decimal {
34387                        precision: None,
34388                        scale: None,
34389                    },
34390                    _ => crate::expressions::DataType::Custom {
34391                        name: type_name.to_string(),
34392                    },
34393                };
34394                return Ok(Expression::TryCast(Box::new(crate::expressions::Cast {
34395                    this: num_expr,
34396                    to: data_type,
34397                    trailing_comments: Vec::new(),
34398                    double_colon_syntax: false,
34399                    format: None,
34400                    default: None,
34401                    inferred_type: None,
34402                })));
34403            }
34404            return Ok(Expression::Literal(Literal::Number(token.text)));
34405        }
34406
34407        // String literal
34408        if self.check(TokenType::String) {
34409            let token = self.advance();
34410            return Ok(Expression::Literal(Literal::String(token.text)));
34411        }
34412
34413        // Parenthesized expression (for complex paths)
34414        if self.match_token(TokenType::LParen) {
34415            let expr = self.parse_expression()?;
34416            self.expect(TokenType::RParen)?;
34417            return Ok(Expression::Paren(Box::new(Paren {
34418                this: expr,
34419                trailing_comments: Vec::new(),
34420            })));
34421        }
34422
34423        // Array literal: ['$.family', '$.species']
34424        // Used in DuckDB for multi-path JSON extraction
34425        if self.match_token(TokenType::LBracket) {
34426            // Empty array: []
34427            if self.match_token(TokenType::RBracket) {
34428                return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
34429                    expressions: Vec::new(),
34430                    bracket_notation: true,
34431                    use_list_keyword: false,
34432                })));
34433            }
34434
34435            // Parse array elements
34436            let mut expressions = vec![self.parse_expression()?];
34437            while self.match_token(TokenType::Comma) {
34438                if self.check(TokenType::RBracket) {
34439                    break;
34440                }
34441                expressions.push(self.parse_expression()?);
34442            }
34443            self.expect(TokenType::RBracket)?;
34444
34445            return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
34446                expressions,
34447                bracket_notation: true,
34448                use_list_keyword: false,
34449            })));
34450        }
34451
34452        // Identifier (possibly qualified like table.column)
34453        if self.is_identifier_token() {
34454            let first_ident = self.expect_identifier_with_quoted()?;
34455
34456            // Check for qualified name: identifier.identifier
34457            if self.match_token(TokenType::Dot) {
34458                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
34459                    let second_ident = if self.is_identifier_token() {
34460                        self.expect_identifier_with_quoted()?
34461                    } else {
34462                        let token = self.advance();
34463                        Identifier::new(token.text)
34464                    };
34465                    return Ok(Expression::Column(Column {
34466                        name: second_ident,
34467                        table: Some(first_ident),
34468                        join_mark: false,
34469                        trailing_comments: Vec::new(),
34470                        span: None,
34471                        inferred_type: None,
34472                    }));
34473                }
34474            }
34475
34476            return Ok(Expression::Column(Column {
34477                name: first_ident,
34478                table: None,
34479                join_mark: false,
34480                trailing_comments: Vec::new(),
34481                span: None,
34482                inferred_type: None,
34483            }));
34484        }
34485
34486        // Keywords as identifiers (possibly qualified)
34487        if self.is_safe_keyword_as_identifier() {
34488            let token = self.advance();
34489            let first_ident = Identifier::new(token.text);
34490
34491            // Check for qualified name: identifier.identifier
34492            if self.match_token(TokenType::Dot) {
34493                if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
34494                    let second_ident = if self.is_identifier_token() {
34495                        self.expect_identifier_with_quoted()?
34496                    } else {
34497                        let token = self.advance();
34498                        Identifier::new(token.text)
34499                    };
34500                    return Ok(Expression::Column(Column {
34501                        name: second_ident,
34502                        table: Some(first_ident),
34503                        join_mark: false,
34504                        trailing_comments: Vec::new(),
34505                        span: None,
34506                        inferred_type: None,
34507                    }));
34508                }
34509            }
34510
34511            return Ok(Expression::Column(Column {
34512                name: first_ident,
34513                table: None,
34514                join_mark: false,
34515                trailing_comments: Vec::new(),
34516                span: None,
34517                inferred_type: None,
34518            }));
34519        }
34520
34521        Err(self.parse_error(format!(
34522            "Unexpected token in JSON path: {:?}",
34523            self.peek().token_type
34524        )))
34525    }
34526
34527    /// Maybe parse subscript access (array[index], struct.field)
34528    fn maybe_parse_subscript(&mut self, mut expr: Expression) -> Result<Expression> {
34529        loop {
34530            if self.match_token(TokenType::LBracket) {
34531                // Check if expr is an array/list constructor keyword (ARRAY[...] or LIST[...])
34532                let array_constructor_type = match &expr {
34533                    Expression::Column(col) if col.table.is_none() => {
34534                        let upper = col.name.name.to_uppercase();
34535                        if upper == "ARRAY" || upper == "LIST" {
34536                            Some(upper)
34537                        } else {
34538                            None
34539                        }
34540                    }
34541                    Expression::Identifier(id) => {
34542                        let upper = id.name.to_uppercase();
34543                        if upper == "ARRAY" || upper == "LIST" {
34544                            Some(upper)
34545                        } else {
34546                            None
34547                        }
34548                    }
34549                    _ => None,
34550                };
34551
34552                if let Some(constructor_type) = array_constructor_type {
34553                    // Parse ARRAY[expr, expr, ...] or LIST[expr, expr, ...]
34554                    // bracket_notation=false means we have the ARRAY/LIST keyword prefix
34555                    let use_list_keyword = constructor_type == "LIST";
34556                    if self.check(TokenType::RBracket) {
34557                        // Empty array: ARRAY[]
34558                        self.advance();
34559                        expr = Expression::ArrayFunc(Box::new(ArrayConstructor {
34560                            expressions: Vec::new(),
34561                            bracket_notation: false, // Has ARRAY/LIST keyword
34562                            use_list_keyword,
34563                        }));
34564                    } else {
34565                        let expressions = self.parse_expression_list()?;
34566                        self.expect(TokenType::RBracket)?;
34567                        expr = Expression::ArrayFunc(Box::new(ArrayConstructor {
34568                            expressions,
34569                            bracket_notation: false, // Has ARRAY/LIST keyword
34570                            use_list_keyword,
34571                        }));
34572                    }
34573                    continue;
34574                }
34575
34576                // Special case: MAP[...] constructor syntax
34577                // Check if expr is a MAP identifier
34578                // ClickHouse: map[key] is always subscript access, not a MAP constructor
34579                let is_map_constructor = !matches!(
34580                    self.config.dialect,
34581                    Some(crate::dialects::DialectType::ClickHouse)
34582                ) && match &expr {
34583                    Expression::Column(col) => {
34584                        col.name.name.to_uppercase() == "MAP" && col.table.is_none()
34585                    }
34586                    Expression::Identifier(id) => id.name.to_uppercase() == "MAP",
34587                    _ => false,
34588                };
34589
34590                if is_map_constructor {
34591                    let is_materialize = matches!(
34592                        self.config.dialect,
34593                        Some(crate::dialects::DialectType::Materialize)
34594                    );
34595
34596                    // Materialize: MAP[] empty map or MAP['a' => 1, ...] with fat arrow
34597                    if is_materialize {
34598                        if self.check(TokenType::RBracket) {
34599                            // Empty map: MAP[]
34600                            self.advance();
34601                            expr = Expression::ToMap(Box::new(ToMap {
34602                                this: Box::new(Expression::Struct(Box::new(Struct {
34603                                    fields: Vec::new(),
34604                                }))),
34605                            }));
34606                            continue;
34607                        }
34608
34609                        // Parse MAP['a' => 1, 'b' => 2, ...] with fat arrow entries
34610                        // Store entries as PropertyEQ expressions (key => value)
34611                        let mut entries = Vec::new();
34612                        loop {
34613                            let key = self.parse_expression()?;
34614                            self.expect(TokenType::FArrow)?;
34615                            let value = self.parse_expression()?;
34616                            // Store as PropertyEQ which will be output as key => value
34617                            entries.push((
34618                                None,
34619                                Expression::PropertyEQ(Box::new(BinaryOp::new(key, value))),
34620                            ));
34621
34622                            if !self.match_token(TokenType::Comma) {
34623                                break;
34624                            }
34625                        }
34626                        self.expect(TokenType::RBracket)?;
34627
34628                        expr = Expression::ToMap(Box::new(ToMap {
34629                            this: Box::new(Expression::Struct(Box::new(Struct {
34630                                fields: entries,
34631                            }))),
34632                        }));
34633                        continue;
34634                    }
34635
34636                    // DuckDB/BigQuery: MAP[keys, values] syntax
34637                    let keys = self.parse_expression()?;
34638                    self.expect(TokenType::Comma)?;
34639                    let values = self.parse_expression()?;
34640                    self.expect(TokenType::RBracket)?;
34641                    expr = Expression::Function(Box::new(Function {
34642                        name: "MAP".to_string(),
34643                        args: vec![keys, values],
34644                        distinct: false,
34645                        trailing_comments: Vec::new(),
34646                        use_bracket_syntax: true,
34647                        no_parens: false,
34648                        quoted: false,
34649                        span: None,
34650                        inferred_type: None,
34651                    }));
34652                    continue;
34653                }
34654
34655                // Check for slice syntax: [start:end:step]
34656                // Handle [:...] case where start is omitted
34657                if self.check(TokenType::Colon) {
34658                    self.advance(); // consume first :
34659                                    // Parse end - use parse_slice_element to avoid : being interpreted as parameter
34660                    let end = self.parse_slice_element()?;
34661                    // Check for step (second colon)
34662                    let step = if self.match_token(TokenType::Colon) {
34663                        self.parse_slice_element()?
34664                    } else {
34665                        None
34666                    };
34667                    self.expect(TokenType::RBracket)?;
34668                    if step.is_some() {
34669                        // Three-part slice with step: Subscript with Slice index
34670                        let slice = Expression::Slice(Box::new(Slice {
34671                            this: None, // start is omitted
34672                            expression: end.map(Box::new),
34673                            step: step.map(Box::new),
34674                        }));
34675                        expr = Expression::Subscript(Box::new(Subscript {
34676                            this: expr,
34677                            index: slice,
34678                        }));
34679                    } else {
34680                        expr = Expression::ArraySlice(Box::new(ArraySlice {
34681                            this: expr,
34682                            start: None,
34683                            end,
34684                        }));
34685                    }
34686                } else {
34687                    let start = self.parse_slice_element()?;
34688                    // Check if this is a slice
34689                    if self.match_token(TokenType::Colon) {
34690                        let end = self.parse_slice_element()?;
34691                        // Check for step (second colon)
34692                        let step = if self.match_token(TokenType::Colon) {
34693                            self.parse_slice_element()?
34694                        } else {
34695                            None
34696                        };
34697                        self.expect(TokenType::RBracket)?;
34698                        if step.is_some() {
34699                            // Three-part slice with step: Subscript with Slice index
34700                            let slice = Expression::Slice(Box::new(Slice {
34701                                this: start.map(Box::new),
34702                                expression: end.map(Box::new),
34703                                step: step.map(Box::new),
34704                            }));
34705                            expr = Expression::Subscript(Box::new(Subscript {
34706                                this: expr,
34707                                index: slice,
34708                            }));
34709                        } else {
34710                            expr = Expression::ArraySlice(Box::new(ArraySlice {
34711                                this: expr,
34712                                start,
34713                                end,
34714                            }));
34715                        }
34716                    } else {
34717                        self.expect(TokenType::RBracket)?;
34718                        // Simple subscript access - start must be Some
34719                        let index =
34720                            start.unwrap_or_else(|| Expression::Null(crate::expressions::Null));
34721                        expr = Expression::Subscript(Box::new(Subscript { this: expr, index }));
34722                    }
34723                }
34724            } else if self.match_token(TokenType::DotColon) {
34725                let data_type = self.parse_data_type()?;
34726                expr = Expression::JSONCast(Box::new(JSONCast {
34727                    this: Box::new(expr),
34728                    to: data_type,
34729                }));
34730            } else if self.match_token(TokenType::Dot) {
34731                // Handle chained dot access (a.b.c.d)
34732                if self.match_token(TokenType::Star) {
34733                    // expr.* - struct field expansion with potential modifiers (EXCEPT, REPLACE, etc.)
34734                    let table_name = match &expr {
34735                        Expression::Column(col) => {
34736                            if let Some(ref table) = col.table {
34737                                Some(Identifier::new(format!("{}.{}", table.name, col.name.name)))
34738                            } else {
34739                                Some(col.name.clone())
34740                            }
34741                        }
34742                        Expression::Dot(d) => {
34743                            fn dot_to_name_inner(expr: &Expression) -> String {
34744                                match expr {
34745                                    Expression::Column(col) => {
34746                                        if let Some(ref table) = col.table {
34747                                            format!("{}.{}", table.name, col.name.name)
34748                                        } else {
34749                                            col.name.name.clone()
34750                                        }
34751                                    }
34752                                    Expression::Dot(d) => {
34753                                        format!("{}.{}", dot_to_name_inner(&d.this), d.field.name)
34754                                    }
34755                                    _ => String::new(),
34756                                }
34757                            }
34758                            Some(Identifier::new(dot_to_name_inner(&Expression::Dot(
34759                                d.clone(),
34760                            ))))
34761                        }
34762                        _ => None,
34763                    };
34764                    if table_name.is_some() {
34765                        let star = self.parse_star_modifiers(table_name)?;
34766                        expr = Expression::Star(star);
34767                        // ClickHouse: a.* APPLY(func) EXCEPT(col) REPLACE(expr AS col) in any order
34768                        if matches!(
34769                            self.config.dialect,
34770                            Some(crate::dialects::DialectType::ClickHouse)
34771                        ) {
34772                            loop {
34773                                if self.check(TokenType::Apply) {
34774                                    self.advance();
34775                                    let apply_expr = if self.match_token(TokenType::LParen) {
34776                                        let e = self.parse_expression()?;
34777                                        self.expect(TokenType::RParen)?;
34778                                        e
34779                                    } else {
34780                                        self.parse_expression()?
34781                                    };
34782                                    expr = Expression::Apply(Box::new(crate::expressions::Apply {
34783                                        this: Box::new(expr),
34784                                        expression: Box::new(apply_expr),
34785                                    }));
34786                                } else if self.check(TokenType::Except)
34787                                    || self.check(TokenType::Exclude)
34788                                {
34789                                    self.advance();
34790                                    self.match_identifier("STRICT");
34791                                    if self.match_token(TokenType::LParen) {
34792                                        loop {
34793                                            if self.check(TokenType::RParen) {
34794                                                break;
34795                                            }
34796                                            let _ = self.parse_expression()?;
34797                                            if !self.match_token(TokenType::Comma) {
34798                                                break;
34799                                            }
34800                                        }
34801                                        self.expect(TokenType::RParen)?;
34802                                    } else if self.is_identifier_token()
34803                                        || self.is_safe_keyword_as_identifier()
34804                                    {
34805                                        let _ = self.parse_expression()?;
34806                                    }
34807                                } else if self.check(TokenType::Replace) {
34808                                    self.advance();
34809                                    self.match_identifier("STRICT");
34810                                    if self.match_token(TokenType::LParen) {
34811                                        loop {
34812                                            if self.check(TokenType::RParen) {
34813                                                break;
34814                                            }
34815                                            let _ = self.parse_expression()?;
34816                                            if self.match_token(TokenType::As) {
34817                                                if self.is_identifier_token()
34818                                                    || self.is_safe_keyword_as_identifier()
34819                                                {
34820                                                    self.advance();
34821                                                }
34822                                            }
34823                                            if !self.match_token(TokenType::Comma) {
34824                                                break;
34825                                            }
34826                                        }
34827                                        self.expect(TokenType::RParen)?;
34828                                    } else {
34829                                        let _ = self.parse_expression()?;
34830                                        if self.match_token(TokenType::As) {
34831                                            if self.is_identifier_token()
34832                                                || self.is_safe_keyword_as_identifier()
34833                                            {
34834                                                self.advance();
34835                                            }
34836                                        }
34837                                    }
34838                                } else {
34839                                    break;
34840                                }
34841                            }
34842                        }
34843                    } else {
34844                        // For complex expressions (like CAST, function calls), use Dot with * as field
34845                        expr = Expression::Dot(Box::new(DotAccess {
34846                            this: expr,
34847                            field: Identifier::new("*"),
34848                        }));
34849                    }
34850                } else if self.check(TokenType::Identifier)
34851                    || self.check(TokenType::Var)
34852                    || self.check(TokenType::QuotedIdentifier)
34853                    || self.check_keyword()
34854                {
34855                    let is_quoted = self.check(TokenType::QuotedIdentifier);
34856                    let field_name = self.advance().text;
34857                    // Check if this is a method call (field followed by parentheses)
34858                    if self.check(TokenType::LParen) && !is_quoted {
34859                        // This is a method call like a.b.C() or x.EXTRACT()
34860                        self.advance(); // consume (
34861                        let args = if self.check(TokenType::RParen) {
34862                            Vec::new()
34863                        } else {
34864                            self.parse_expression_list()?
34865                        };
34866                        self.expect(TokenType::RParen)?;
34867                        // Create a method call expression (DotAccess with function call)
34868                        expr = Expression::MethodCall(Box::new(MethodCall {
34869                            this: expr,
34870                            method: Identifier::new(field_name),
34871                            args,
34872                        }));
34873                    } else {
34874                        let mut ident = Identifier::new(field_name);
34875                        if is_quoted {
34876                            ident.quoted = true;
34877                        }
34878                        expr = Expression::Dot(Box::new(DotAccess {
34879                            this: expr,
34880                            field: ident,
34881                        }));
34882                    }
34883                } else if self.check(TokenType::Number) {
34884                    // Handle numeric field access like a.0 or x.1
34885                    let field_name = self.advance().text;
34886                    expr = Expression::Dot(Box::new(DotAccess {
34887                        this: expr,
34888                        field: Identifier::new(field_name),
34889                    }));
34890                } else if matches!(
34891                    self.config.dialect,
34892                    Some(crate::dialects::DialectType::ClickHouse)
34893                ) && self.check(TokenType::Caret)
34894                {
34895                    // ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
34896                    self.advance(); // consume ^
34897                                    // What follows should be an identifier path
34898                    let mut field_name = "^".to_string();
34899                    if self.check(TokenType::Identifier)
34900                        || self.check(TokenType::Var)
34901                        || self.check_keyword()
34902                    {
34903                        field_name.push_str(&self.advance().text);
34904                    }
34905                    expr = Expression::Dot(Box::new(DotAccess {
34906                        this: expr,
34907                        field: Identifier::new(field_name),
34908                    }));
34909                } else if matches!(
34910                    self.config.dialect,
34911                    Some(crate::dialects::DialectType::ClickHouse)
34912                ) && self.check(TokenType::Colon)
34913                {
34914                    // ClickHouse: json.path.:Type — the : prefix means type cast on JSON path
34915                    self.advance(); // consume :
34916                                    // Consume the type name
34917                    let mut type_name = ":".to_string();
34918                    if self.check(TokenType::Identifier)
34919                        || self.check(TokenType::Var)
34920                        || self.check_keyword()
34921                    {
34922                        type_name.push_str(&self.advance().text);
34923                    }
34924                    expr = Expression::Dot(Box::new(DotAccess {
34925                        this: expr,
34926                        field: Identifier::new(type_name),
34927                    }));
34928                } else if matches!(
34929                    self.config.dialect,
34930                    Some(crate::dialects::DialectType::ClickHouse)
34931                ) && self.check(TokenType::Dash)
34932                    && self
34933                        .peek_nth(1)
34934                        .is_some_and(|t| t.token_type == TokenType::Number)
34935                {
34936                    // ClickHouse: tuple.-1 — negative tuple index
34937                    self.advance(); // consume -
34938                    let num = self.advance().text;
34939                    expr = Expression::Dot(Box::new(DotAccess {
34940                        this: expr,
34941                        field: Identifier::new(format!("-{}", num)),
34942                    }));
34943                } else {
34944                    return Err(self.parse_error("Expected field name after dot"));
34945                }
34946            } else if self.match_token(TokenType::Collate) {
34947                // Parse COLLATE 'collation_name' or COLLATE "collation_name" or COLLATE collation_name
34948                let (collation, quoted, double_quoted) = if self.check(TokenType::String) {
34949                    // Single-quoted string: COLLATE 'de_DE'
34950                    (self.advance().text, true, false)
34951                } else if self.check(TokenType::QuotedIdentifier) {
34952                    // Double-quoted identifier: COLLATE "de_DE"
34953                    (self.advance().text, false, true)
34954                } else {
34955                    // Unquoted identifier: COLLATE de_DE
34956                    (self.expect_identifier_or_keyword()?, false, false)
34957                };
34958                expr = Expression::Collation(Box::new(CollationExpr {
34959                    this: expr,
34960                    collation,
34961                    quoted,
34962                    double_quoted,
34963                }));
34964            } else if self.check(TokenType::DColon)
34965                || self.check(TokenType::DColonDollar)
34966                || self.check(TokenType::DColonPercent)
34967                || self.check(TokenType::DColonQMark)
34968            {
34969                // For SingleStore, :: variants are JSON path extraction
34970                // For other dialects, :: is cast syntax (PostgreSQL-style)
34971                if matches!(
34972                    self.config.dialect,
34973                    Some(crate::dialects::DialectType::SingleStore)
34974                ) {
34975                    // SingleStore JSON path extraction: expr::key, expr::$key, expr::%key, expr::?key
34976                    if self.match_token(TokenType::DColon) {
34977                        // ::key -> JSON_EXTRACT_JSON(expr, 'key')
34978                        let path_key =
34979                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
34980                                self.advance().text
34981                            } else if self.check(TokenType::Number) {
34982                                self.advance().text
34983                            } else if self.check(TokenType::QuotedIdentifier) {
34984                                self.advance().text
34985                            } else {
34986                                return Err(self.parse_error(
34987                                    "Expected identifier or number after :: in JSON path",
34988                                ));
34989                            };
34990                        expr = Expression::Function(Box::new(Function::new(
34991                            "JSON_EXTRACT_JSON".to_string(),
34992                            vec![expr, Expression::string(&path_key)],
34993                        )));
34994                    } else if self.match_token(TokenType::DColonDollar) {
34995                        // ::$key -> JSON_EXTRACT_STRING(expr, 'key')
34996                        let path_key =
34997                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
34998                                self.advance().text
34999                            } else if self.check(TokenType::Number) {
35000                                self.advance().text
35001                            } else {
35002                                return Err(self.parse_error(
35003                                    "Expected identifier or number after ::$ in JSON path",
35004                                ));
35005                            };
35006                        expr = Expression::Function(Box::new(Function::new(
35007                            "JSON_EXTRACT_STRING".to_string(),
35008                            vec![expr, Expression::string(&path_key)],
35009                        )));
35010                    } else if self.match_token(TokenType::DColonPercent) {
35011                        // ::%key -> JSON_EXTRACT_DOUBLE(expr, 'key')
35012                        let path_key =
35013                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
35014                                self.advance().text
35015                            } else if self.check(TokenType::Number) {
35016                                self.advance().text
35017                            } else {
35018                                return Err(self.parse_error(
35019                                    "Expected identifier or number after ::% in JSON path",
35020                                ));
35021                            };
35022                        expr = Expression::Function(Box::new(Function::new(
35023                            "JSON_EXTRACT_DOUBLE".to_string(),
35024                            vec![expr, Expression::string(&path_key)],
35025                        )));
35026                    } else if self.match_token(TokenType::DColonQMark) {
35027                        // ::?key -> SingleStoreJsonPathQMark function (for JSON_MATCH_ANY patterns)
35028                        let path_key =
35029                            if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
35030                                self.advance().text
35031                            } else if self.check(TokenType::Number) {
35032                                self.advance().text
35033                            } else {
35034                                return Err(self.parse_error(
35035                                    "Expected identifier or number after ::? in JSON path",
35036                                ));
35037                            };
35038                        // Use a special function name that SingleStore generator will recognize
35039                        expr = Expression::Function(Box::new(Function::new(
35040                            "__SS_JSON_PATH_QMARK__".to_string(),
35041                            vec![expr, Expression::string(&path_key)],
35042                        )));
35043                    }
35044                } else {
35045                    // PostgreSQL :: cast operator: expr::type
35046                    self.advance(); // consume DColon
35047                                    // Use parse_data_type_for_cast to avoid consuming subscripts as array dimensions
35048                    let data_type = self.parse_data_type_for_cast()?;
35049                    expr = Expression::Cast(Box::new(Cast {
35050                        this: expr,
35051                        to: data_type,
35052                        trailing_comments: Vec::new(),
35053                        double_colon_syntax: true,
35054                        format: None,
35055                        default: None,
35056                        inferred_type: None,
35057                    }));
35058                }
35059            } else if self.match_token(TokenType::ColonGt) {
35060                // SingleStore :> cast operator: expr :> type
35061                let data_type = self.parse_data_type_for_cast()?;
35062                expr = Expression::Cast(Box::new(Cast {
35063                    this: expr,
35064                    to: data_type,
35065                    trailing_comments: Vec::new(),
35066                    double_colon_syntax: false, // Use :> syntax in generator
35067                    format: None,
35068                    default: None,
35069                    inferred_type: None,
35070                }));
35071            } else if self.match_token(TokenType::NColonGt) {
35072                // SingleStore !:> try cast operator: expr !:> type
35073                let data_type = self.parse_data_type_for_cast()?;
35074                expr = Expression::TryCast(Box::new(Cast {
35075                    this: expr,
35076                    to: data_type,
35077                    trailing_comments: Vec::new(),
35078                    double_colon_syntax: false,
35079                    format: None,
35080                    default: None,
35081                    inferred_type: None,
35082                }));
35083            } else if self.match_token(TokenType::QDColon) {
35084                // Databricks ?:: try cast operator: expr?::type
35085                let data_type = self.parse_data_type_for_cast()?;
35086                expr = Expression::TryCast(Box::new(Cast {
35087                    this: expr,
35088                    to: data_type,
35089                    trailing_comments: Vec::new(),
35090                    double_colon_syntax: true, // Uses :: style syntax
35091                    format: None,
35092                    default: None,
35093                    inferred_type: None,
35094                }));
35095            } else if self.check(TokenType::Arrow)
35096                && !matches!(
35097                    self.config.dialect,
35098                    Some(crate::dialects::DialectType::ClickHouse)
35099                )
35100            {
35101                self.advance(); // consume ->
35102                                // JSON extract operator: expr -> path (PostgreSQL, MySQL, DuckDB)
35103                                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
35104                let path = self.parse_json_path_operand()?;
35105                expr = Expression::JsonExtract(Box::new(JsonExtractFunc {
35106                    this: expr,
35107                    path,
35108                    returning: None,
35109                    arrow_syntax: true,
35110                    hash_arrow_syntax: false,
35111                    wrapper_option: None,
35112                    quotes_option: None,
35113                    on_scalar_string: false,
35114                    on_error: None,
35115                }));
35116            } else if self.match_token(TokenType::DArrow) {
35117                // JSON extract text operator: expr ->> path (PostgreSQL, MySQL, DuckDB)
35118                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
35119                let path = self.parse_json_path_operand()?;
35120                expr = Expression::JsonExtractScalar(Box::new(JsonExtractFunc {
35121                    this: expr,
35122                    path,
35123                    returning: None,
35124                    arrow_syntax: true,
35125                    hash_arrow_syntax: false,
35126                    wrapper_option: None,
35127                    quotes_option: None,
35128                    on_scalar_string: false,
35129                    on_error: None,
35130                }));
35131            } else if self.match_token(TokenType::HashArrow) {
35132                // JSONB path extract: expr #> path (PostgreSQL)
35133                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
35134                let path = self.parse_json_path_operand()?;
35135                expr = Expression::JsonExtractPath(Box::new(JsonPathFunc {
35136                    this: expr,
35137                    paths: vec![path],
35138                }));
35139            } else if self.match_token(TokenType::DHashArrow) {
35140                // JSONB path extract text: expr #>> path (PostgreSQL)
35141                // For now, use JsonExtractScalar since the result is text
35142                // Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
35143                let path = self.parse_json_path_operand()?;
35144                expr = Expression::JsonExtractScalar(Box::new(JsonExtractFunc {
35145                    this: expr,
35146                    path,
35147                    returning: None,
35148                    arrow_syntax: false,     // This is #>> not ->>
35149                    hash_arrow_syntax: true, // Mark as #>> operator
35150                    wrapper_option: None,
35151                    quotes_option: None,
35152                    on_scalar_string: false,
35153                    on_error: None,
35154                }));
35155            } else if self.check_join_marker() {
35156                // Oracle/Redshift-style outer join marker: column (+)
35157                // Only applies to Column expressions
35158                if let Expression::Column(col) = &mut expr {
35159                    self.advance(); // consume (
35160                    self.advance(); // consume +
35161                    self.advance(); // consume )
35162                    col.join_mark = true;
35163                    // Don't continue - join marker is terminal (no more postfix ops after it)
35164                    break;
35165                }
35166                // If not a Column, just break - the marker is invalid in this context
35167                else {
35168                    break;
35169                }
35170            } else {
35171                break;
35172            }
35173        }
35174        Ok(expr)
35175    }
35176
35177    /// Check if the next tokens are the Oracle-style join marker (+)
35178    fn check_join_marker(&self) -> bool {
35179        self.check(TokenType::LParen)
35180            && self
35181                .peek_nth(1)
35182                .map_or(false, |t| t.token_type == TokenType::Plus)
35183            && self
35184                .peek_nth(2)
35185                .map_or(false, |t| t.token_type == TokenType::RParen)
35186    }
35187
35188    /// Parse OVER clause
35189    fn parse_over_clause(&mut self) -> Result<Over> {
35190        // Handle OVER window_name (without parentheses)
35191        if !self.check(TokenType::LParen) {
35192            // OVER window_name - just a named window reference
35193            let window_name = self.expect_identifier_or_keyword()?;
35194            return Ok(Over {
35195                window_name: Some(Identifier::new(window_name)),
35196                partition_by: Vec::new(),
35197                order_by: Vec::new(),
35198                frame: None,
35199                alias: None,
35200            });
35201        }
35202
35203        self.expect(TokenType::LParen)?;
35204
35205        // Check for named window reference at start of OVER clause
35206        // e.g., OVER (w ORDER BY y) - w is a window name that can be extended
35207        let window_name = if (self.check(TokenType::Identifier)
35208            || self.check(TokenType::Var)
35209            || self.check_keyword())
35210            && !self.check(TokenType::Partition)
35211            && !self.check(TokenType::Order)
35212            && !self.check(TokenType::Rows)
35213            && !self.check(TokenType::Range)
35214            && !self.check(TokenType::Groups)
35215            && !self.check(TokenType::Distribute)
35216            && !self.check(TokenType::Sort)
35217        {
35218            // Look ahead to see if next token indicates this is a window name
35219            let pos = self.current;
35220            let name = self.advance().text;
35221            // If next token is a keyword that can follow a window name, this is a named reference
35222            if self.check(TokenType::Order)
35223                || self.check(TokenType::Partition)
35224                || self.check(TokenType::Rows)
35225                || self.check(TokenType::Range)
35226                || self.check(TokenType::Groups)
35227                || self.check(TokenType::RParen)
35228                || self.check(TokenType::Distribute)
35229                || self.check(TokenType::Sort)
35230            {
35231                Some(Identifier::new(name))
35232            } else {
35233                // Not a named window, restore position
35234                self.current = pos;
35235                None
35236            }
35237        } else {
35238            None
35239        };
35240
35241        // Parse PARTITION BY or DISTRIBUTE BY (Hive uses DISTRIBUTE BY in window specs)
35242        let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
35243            self.parse_expression_list()?
35244        } else if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
35245            // Hive: DISTRIBUTE BY is equivalent to PARTITION BY in window specs
35246            self.parse_expression_list()?
35247        } else {
35248            Vec::new()
35249        };
35250
35251        // Parse ORDER BY or SORT BY (Hive uses SORT BY in window specs)
35252        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By])
35253            || self.match_keywords(&[TokenType::Sort, TokenType::By])
35254        {
35255            let mut exprs = Vec::new();
35256            loop {
35257                let expr = self.parse_expression()?;
35258                let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
35259                    (true, false)
35260                } else if self.match_token(TokenType::Asc) {
35261                    (false, true)
35262                } else {
35263                    (false, false)
35264                };
35265                // ClickHouse/SQL: COLLATE 'collation' in window ORDER BY
35266                if self.match_token(TokenType::Collate) {
35267                    // Consume collation name (string or identifier)
35268                    if self.check(TokenType::String) {
35269                        self.advance();
35270                    } else if self.check(TokenType::QuotedIdentifier) {
35271                        self.advance();
35272                    } else {
35273                        let _ = self.expect_identifier_or_keyword();
35274                    }
35275                }
35276                let nulls_first = if self.match_token(TokenType::Nulls) {
35277                    if self.match_token(TokenType::First) {
35278                        Some(true)
35279                    } else if self.match_token(TokenType::Last) {
35280                        Some(false)
35281                    } else {
35282                        return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
35283                    }
35284                } else {
35285                    None
35286                };
35287                // ClickHouse: WITH FILL in window ORDER BY
35288                let with_fill = if matches!(
35289                    self.config.dialect,
35290                    Some(crate::dialects::DialectType::ClickHouse)
35291                ) && self.check(TokenType::With)
35292                    && self.current + 1 < self.tokens.len()
35293                    && self.tokens[self.current + 1]
35294                        .text
35295                        .eq_ignore_ascii_case("FILL")
35296                {
35297                    self.advance(); // consume WITH
35298                    self.advance(); // consume FILL
35299                    let from_ = if self.match_token(TokenType::From) {
35300                        Some(Box::new(self.parse_or()?))
35301                    } else {
35302                        None
35303                    };
35304                    let to = if self.match_text_seq(&["TO"]) {
35305                        Some(Box::new(self.parse_or()?))
35306                    } else {
35307                        None
35308                    };
35309                    let step = if self.match_text_seq(&["STEP"]) {
35310                        Some(Box::new(self.parse_or()?))
35311                    } else {
35312                        None
35313                    };
35314                    let staleness = if self.match_text_seq(&["STALENESS"]) {
35315                        Some(Box::new(self.parse_or()?))
35316                    } else {
35317                        None
35318                    };
35319                    let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
35320                        if self.match_token(TokenType::LParen) {
35321                            let items = self.parse_expression_list()?;
35322                            self.expect(TokenType::RParen)?;
35323                            if items.len() == 1 {
35324                                Some(Box::new(items.into_iter().next().unwrap()))
35325                            } else {
35326                                Some(Box::new(Expression::Tuple(Box::new(
35327                                    crate::expressions::Tuple { expressions: items },
35328                                ))))
35329                            }
35330                        } else {
35331                            None
35332                        }
35333                    } else {
35334                        None
35335                    };
35336                    Some(Box::new(WithFill {
35337                        from_,
35338                        to,
35339                        step,
35340                        staleness,
35341                        interpolate,
35342                    }))
35343                } else {
35344                    None
35345                };
35346                exprs.push(Ordered {
35347                    this: expr,
35348                    desc,
35349                    nulls_first,
35350                    explicit_asc,
35351                    with_fill,
35352                });
35353                if !self.match_token(TokenType::Comma) {
35354                    break;
35355                }
35356            }
35357            exprs
35358        } else {
35359            Vec::new()
35360        };
35361
35362        // Parse window frame
35363        let frame = self.parse_window_frame()?;
35364
35365        self.expect(TokenType::RParen)?;
35366
35367        Ok(Over {
35368            window_name,
35369            partition_by,
35370            order_by,
35371            frame,
35372            alias: None,
35373        })
35374    }
35375
35376    /// Parse window frame specification (ROWS/RANGE/GROUPS BETWEEN ...)
35377    fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>> {
35378        let (kind, kind_text) = if self.match_token(TokenType::Rows) {
35379            (
35380                WindowFrameKind::Rows,
35381                self.tokens[self.current - 1].text.clone(),
35382            )
35383        } else if self.match_token(TokenType::Range) {
35384            (
35385                WindowFrameKind::Range,
35386                self.tokens[self.current - 1].text.clone(),
35387            )
35388        } else if self.match_token(TokenType::Groups) {
35389            (
35390                WindowFrameKind::Groups,
35391                self.tokens[self.current - 1].text.clone(),
35392            )
35393        } else {
35394            return Ok(None);
35395        };
35396
35397        // Parse BETWEEN or single bound
35398        let (start, start_side_text, end, end_side_text) = if self.match_token(TokenType::Between) {
35399            let (start, st) = self.parse_window_frame_bound()?;
35400            self.expect(TokenType::And)?;
35401            let (end, et) = self.parse_window_frame_bound()?;
35402            (start, st, Some(end), et)
35403        } else {
35404            let (start, st) = self.parse_window_frame_bound()?;
35405            (start, st, None, None)
35406        };
35407
35408        // Parse optional EXCLUDE clause
35409        let exclude = if self.match_token(TokenType::Exclude) {
35410            if self.match_token(TokenType::Current) {
35411                self.expect(TokenType::Row)?;
35412                Some(WindowFrameExclude::CurrentRow)
35413            } else if self.match_token(TokenType::Group) {
35414                Some(WindowFrameExclude::Group)
35415            } else if self.match_token(TokenType::Ties) {
35416                Some(WindowFrameExclude::Ties)
35417            } else if self.match_token(TokenType::No) {
35418                self.expect(TokenType::Others)?;
35419                Some(WindowFrameExclude::NoOthers)
35420            } else {
35421                return Err(self
35422                    .parse_error("Expected CURRENT ROW, GROUP, TIES, or NO OTHERS after EXCLUDE"));
35423            }
35424        } else {
35425            None
35426        };
35427
35428        Ok(Some(WindowFrame {
35429            kind,
35430            start,
35431            end,
35432            exclude,
35433            kind_text: Some(kind_text),
35434            start_side_text,
35435            end_side_text,
35436        }))
35437    }
35438
35439    /// Parse a window frame bound, returning the bound and the original text of the side keyword
35440    fn parse_window_frame_bound(&mut self) -> Result<(WindowFrameBound, Option<String>)> {
35441        if self.match_token(TokenType::Current) {
35442            self.expect(TokenType::Row)?;
35443            Ok((WindowFrameBound::CurrentRow, None))
35444        } else if self.match_token(TokenType::Unbounded) {
35445            if self.match_token(TokenType::Preceding) {
35446                let text = self.tokens[self.current - 1].text.clone();
35447                Ok((WindowFrameBound::UnboundedPreceding, Some(text)))
35448            } else if self.match_token(TokenType::Following) {
35449                let text = self.tokens[self.current - 1].text.clone();
35450                Ok((WindowFrameBound::UnboundedFollowing, Some(text)))
35451            } else {
35452                Err(self.parse_error("Expected PRECEDING or FOLLOWING after UNBOUNDED"))
35453            }
35454        } else if self.match_token(TokenType::Preceding) {
35455            let text = self.tokens[self.current - 1].text.clone();
35456            // PRECEDING [value] (inverted syntax for some dialects)
35457            // If no value follows (e.g., just "PRECEDING" or "PRECEDING)"), use BarePreceding
35458            if self.check(TokenType::RParen) || self.check(TokenType::Comma) {
35459                Ok((WindowFrameBound::BarePreceding, Some(text)))
35460            } else {
35461                let expr = self.parse_primary()?;
35462                Ok((WindowFrameBound::Preceding(Box::new(expr)), Some(text)))
35463            }
35464        } else if self.match_token(TokenType::Following) {
35465            let text = self.tokens[self.current - 1].text.clone();
35466            // FOLLOWING [value] (inverted syntax for some dialects)
35467            // If no value follows (e.g., just "FOLLOWING" or "FOLLOWING)"), use BareFollowing
35468            if self.check(TokenType::RParen) || self.check(TokenType::Comma) {
35469                Ok((WindowFrameBound::BareFollowing, Some(text)))
35470            } else {
35471                let expr = self.parse_primary()?;
35472                Ok((WindowFrameBound::Following(Box::new(expr)), Some(text)))
35473            }
35474        } else {
35475            // <expr> PRECEDING | FOLLOWING (standard syntax)
35476            // Use parse_addition to handle expressions like 1 + 1 PRECEDING
35477            let expr = self.parse_addition()?;
35478            if self.match_token(TokenType::Preceding) {
35479                let text = self.tokens[self.current - 1].text.clone();
35480                Ok((WindowFrameBound::Preceding(Box::new(expr)), Some(text)))
35481            } else if self.match_token(TokenType::Following) {
35482                let text = self.tokens[self.current - 1].text.clone();
35483                Ok((WindowFrameBound::Following(Box::new(expr)), Some(text)))
35484            } else {
35485                // Bare numeric bounds without PRECEDING/FOLLOWING
35486                // (e.g., RANGE BETWEEN 1 AND 3)
35487                Ok((WindowFrameBound::Value(Box::new(expr)), None))
35488            }
35489        }
35490    }
35491
35492    /// Try to parse INTERVAL expression. Returns None if INTERVAL should be treated as identifier.
35493    fn try_parse_interval(&mut self) -> Result<Option<Expression>> {
35494        self.try_parse_interval_internal(true)
35495    }
35496
35497    /// Internal interval parsing that optionally matches the INTERVAL keyword.
35498    /// When match_interval is false, it parses a chained interval value-unit pair
35499    /// without requiring the INTERVAL keyword.
35500    fn try_parse_interval_internal(&mut self, match_interval: bool) -> Result<Option<Expression>> {
35501        let start_pos = self.current;
35502
35503        // Consume the INTERVAL keyword if required
35504        if match_interval {
35505            if !self.check(TokenType::Interval) {
35506                return Ok(None);
35507            }
35508            self.expect(TokenType::Interval)?;
35509
35510            // Check if next token is an operator - if so, INTERVAL is used as identifier
35511            if self.check(TokenType::Eq)
35512                || self.check(TokenType::Neq)
35513                || self.check(TokenType::Lt)
35514                || self.check(TokenType::Gt)
35515                || self.check(TokenType::Lte)
35516                || self.check(TokenType::Gte)
35517                || self.check(TokenType::And)
35518                || self.check(TokenType::Or)
35519                || self.check(TokenType::Is)
35520                || self.check(TokenType::In)
35521                || self.check(TokenType::Like)
35522                || self.check(TokenType::ILike)
35523                || self.check(TokenType::Between)
35524                || self.check(TokenType::Then)
35525                || self.check(TokenType::Else)
35526                || self.check(TokenType::When)
35527                || self.check(TokenType::End)
35528                || self.check(TokenType::Comma)
35529                || self.check(TokenType::RParen)
35530                || self.check(TokenType::DColon)
35531            {
35532                // INTERVAL is used as identifier
35533                self.current = start_pos;
35534                return Ok(None);
35535            }
35536        }
35537
35538        // Parse the value after INTERVAL
35539        // IMPORTANT: For string literals, don't use parse_primary() because it calls
35540        // maybe_parse_subscript() which would consume postfix operators like ::TYPE.
35541        // Those should be applied to the full INTERVAL expression, not just the value inside.
35542        // e.g., INTERVAL '1 hour'::VARCHAR should be CAST(INTERVAL '1 hour' AS VARCHAR)
35543        //       not INTERVAL CAST('1 hour' AS VARCHAR)
35544        // For non-string values, use parse_addition() to handle expressions like
35545        // INTERVAL 2 * 2 MONTH or INTERVAL DAYOFMONTH(dt) - 1 DAY (MySQL syntax)
35546        // This matches Python sqlglot's _parse_term() behavior which handles +, -, *, /, %
35547        let value = if self.check(TokenType::String) {
35548            let token = self.advance();
35549            Some(Expression::Literal(Literal::String(token.text)))
35550        } else if !self.is_at_end() && !self.is_statement_terminator() {
35551            Some(self.parse_addition()?)
35552        } else {
35553            None
35554        };
35555
35556        // Check if we should treat INTERVAL as an identifier instead
35557        // This happens when:
35558        // - No value was parsed, OR
35559        // - Value is an unqualified, unquoted column reference AND
35560        //   what follows is NOT a valid interval unit
35561        if let Some(ref val) = value {
35562            if let Expression::Column(col) = val {
35563                // Column without table qualifier
35564                if col.table.is_none() {
35565                    // Check if identifier is quoted
35566                    let is_quoted = col.name.quoted;
35567                    if !is_quoted {
35568                        // Check if next token is a valid interval unit
35569                        if !self.is_valid_interval_unit() && !self.check(TokenType::As) {
35570                            // Backtrack - INTERVAL is used as identifier
35571                            self.current = start_pos;
35572                            return Ok(None);
35573                        }
35574                    }
35575                }
35576            } else if let Expression::Identifier(id) = val {
35577                // Bare identifier without table qualifier
35578                let is_quoted = id.quoted;
35579                if !is_quoted {
35580                    // Check if next token is a valid interval unit
35581                    if !self.is_valid_interval_unit() && !self.check(TokenType::As) {
35582                        // Backtrack - INTERVAL is used as identifier
35583                        self.current = start_pos;
35584                        return Ok(None);
35585                    }
35586                }
35587            }
35588        } else if self.is_at_end() || self.is_statement_terminator() {
35589            // No value, and at end/terminator - INTERVAL is an identifier
35590            self.current = start_pos;
35591            return Ok(None);
35592        }
35593
35594        // Now parse the optional unit
35595        let mut unit = self.try_parse_interval_unit()?;
35596
35597        // Split compound interval strings like '1 day' into value '1' and unit DAY
35598        // This matches Python sqlglot's INTERVAL_STRING_RE behavior
35599        // Only apply in generic mode -- dialects like PostgreSQL preserve compound strings
35600        let is_generic = self.config.dialect.is_none()
35601            || matches!(
35602                self.config.dialect,
35603                Some(crate::dialects::DialectType::Generic)
35604            );
35605        let value = if unit.is_none() && is_generic {
35606            if let Some(Expression::Literal(Literal::String(ref s))) = value {
35607                let trimmed = s.trim();
35608                // Match pattern: optional negative sign, digits (optional decimal), space(s), alpha unit
35609                let mut split_pos = None;
35610                let mut found_space = false;
35611                let bytes = trimmed.as_bytes();
35612                let mut i = 0;
35613                // Skip optional negative sign
35614                if i < bytes.len() && bytes[i] == b'-' {
35615                    i += 1;
35616                }
35617                // Expect digits
35618                let digit_start = i;
35619                while i < bytes.len() && bytes[i].is_ascii_digit() {
35620                    i += 1;
35621                }
35622                if i > digit_start {
35623                    // Optional decimal part
35624                    if i < bytes.len() && bytes[i] == b'.' {
35625                        i += 1;
35626                        while i < bytes.len() && bytes[i].is_ascii_digit() {
35627                            i += 1;
35628                        }
35629                    }
35630                    // Expect whitespace
35631                    let space_start = i;
35632                    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
35633                        i += 1;
35634                    }
35635                    if i > space_start {
35636                        found_space = true;
35637                        split_pos = Some(i);
35638                    }
35639                }
35640                if found_space {
35641                    if let Some(pos) = split_pos {
35642                        let unit_text = &trimmed[pos..];
35643                        // Verify it's all alpha
35644                        if !unit_text.is_empty()
35645                            && unit_text.chars().all(|c| c.is_ascii_alphabetic())
35646                        {
35647                            let num_part = trimmed[..pos].trim_end().to_string();
35648                            let unit_upper = unit_text.to_uppercase();
35649                            // Try to parse as interval unit
35650                            if let Some(parsed_unit) =
35651                                Self::parse_interval_unit_from_string(&unit_upper)
35652                            {
35653                                // Check if the original text had an 'S' suffix (plural)
35654                                let is_plural = unit_text.to_uppercase().ends_with('S');
35655                                unit = Some(IntervalUnitSpec::Simple {
35656                                    unit: parsed_unit,
35657                                    use_plural: is_plural,
35658                                });
35659                                Some(Expression::Literal(Literal::String(num_part)))
35660                            } else {
35661                                value
35662                            }
35663                        } else {
35664                            value
35665                        }
35666                    } else {
35667                        value
35668                    }
35669                } else {
35670                    value
35671                }
35672            } else {
35673                value
35674            }
35675        } else {
35676            value
35677        };
35678
35679        // Convert number literals to string literals in intervals (canonical form).
35680        // Most dialects support INTERVAL '5' DAY, so we normalize to this form
35681        // for easier transpilation. This matches Python sqlglot's behavior in
35682        // _parse_interval_span: "if this and this.is_number: this = exp.Literal.string(this.to_py())"
35683        let value = match value {
35684            Some(Expression::Literal(Literal::Number(n))) if unit.is_some() => {
35685                Some(Expression::Literal(Literal::String(n)))
35686            }
35687            other => other,
35688        };
35689
35690        let interval = Expression::Interval(Box::new(Interval { this: value, unit }));
35691
35692        // Support for chained multi-unit interval syntax (Spark/Hive):
35693        // INTERVAL '5' HOURS '30' MINUTES -> INTERVAL '5' HOURS + INTERVAL '30' MINUTES
35694        // This is done by optionally matching a PLUS sign, and if followed by
35695        // another string or number (without INTERVAL keyword), recursively parsing
35696        // and creating an Add expression.
35697        let before_plus = self.current;
35698        let has_plus = self.match_token(TokenType::Plus);
35699
35700        // Check if followed by a STRING or NUMBER (potential chained interval)
35701        if self.check(TokenType::String) || self.check(TokenType::Number) {
35702            // Recursively parse the chained interval without the INTERVAL keyword
35703            if let Some(next_interval) = self.try_parse_interval_internal(false)? {
35704                return Ok(Some(Expression::Add(Box::new(BinaryOp::new(
35705                    interval,
35706                    next_interval,
35707                )))));
35708            }
35709        }
35710
35711        // If we consumed a PLUS but didn't find a chained interval, backtrack
35712        if has_plus {
35713            self.current = before_plus;
35714        }
35715
35716        Ok(Some(interval))
35717    }
35718
35719    /// Check if current token is a valid interval unit
35720    fn is_valid_interval_unit(&self) -> bool {
35721        if self.is_at_end() {
35722            return false;
35723        }
35724        let text = self.peek().text.to_uppercase();
35725        matches!(
35726            text.as_str(),
35727            "YEAR"
35728                | "YEARS"
35729                | "MONTH"
35730                | "MONTHS"
35731                | "DAY"
35732                | "DAYS"
35733                | "HOUR"
35734                | "HOURS"
35735                | "MINUTE"
35736                | "MINUTES"
35737                | "SECOND"
35738                | "SECONDS"
35739                | "MILLISECOND"
35740                | "MILLISECONDS"
35741                | "MICROSECOND"
35742                | "MICROSECONDS"
35743                | "NANOSECOND"
35744                | "NANOSECONDS"
35745                | "WEEK"
35746                | "WEEKS"
35747                | "QUARTER"
35748                | "QUARTERS"
35749        )
35750    }
35751
35752    /// Check if current token terminates a statement/expression context
35753    fn is_statement_terminator(&self) -> bool {
35754        if self.is_at_end() {
35755            return true;
35756        }
35757        matches!(
35758            self.peek().token_type,
35759            TokenType::Semicolon
35760                | TokenType::RParen
35761                | TokenType::RBracket
35762                | TokenType::Comma
35763                | TokenType::From
35764                | TokenType::Where
35765                | TokenType::GroupBy
35766                | TokenType::Having
35767                | TokenType::OrderBy
35768                | TokenType::Limit
35769                | TokenType::Union
35770                | TokenType::Intersect
35771                | TokenType::Except
35772                | TokenType::End
35773                | TokenType::Then
35774                | TokenType::Else
35775                | TokenType::When
35776        )
35777    }
35778
35779    /// Try to parse interval unit - returns None if no unit present
35780    fn try_parse_interval_unit(&mut self) -> Result<Option<IntervalUnitSpec>> {
35781        // First, check if there's a function (like CURRENT_DATE, CAST(...))
35782        if self.is_function_start() {
35783            let func = self.parse_primary()?;
35784            return Ok(Some(IntervalUnitSpec::Expr(Box::new(func))));
35785        }
35786
35787        // Try to parse a simple unit or span
35788        if let Some((unit, use_plural)) = self.try_parse_simple_interval_unit()? {
35789            // Check for "TO" to make it a span (e.g., YEAR TO MONTH)
35790            // Use lookahead to avoid consuming TO when it's part of WITH FILL
35791            if self.check_keyword_text("TO") {
35792                let saved = self.current;
35793                self.advance(); // consume TO
35794                if let Some((end_unit, _)) = self.try_parse_simple_interval_unit()? {
35795                    return Ok(Some(IntervalUnitSpec::Span(IntervalSpan {
35796                        this: unit,
35797                        expression: end_unit,
35798                    })));
35799                } else {
35800                    // Not followed by a valid interval unit — backtrack
35801                    self.current = saved;
35802                }
35803            }
35804            return Ok(Some(IntervalUnitSpec::Simple { unit, use_plural }));
35805        }
35806
35807        // No unit found
35808        Ok(None)
35809    }
35810
35811    /// Parse an interval unit from a string (used for splitting compound interval strings)
35812    fn parse_interval_unit_from_string(s: &str) -> Option<IntervalUnit> {
35813        // Strip trailing 'S' for plural forms
35814        let base = if s.ends_with('S') && s.len() > 1 {
35815            &s[..s.len() - 1]
35816        } else {
35817            s
35818        };
35819        match base {
35820            "YEAR" => Some(IntervalUnit::Year),
35821            "MONTH" => Some(IntervalUnit::Month),
35822            "DAY" => Some(IntervalUnit::Day),
35823            "HOUR" => Some(IntervalUnit::Hour),
35824            "MINUTE" => Some(IntervalUnit::Minute),
35825            "SECOND" => Some(IntervalUnit::Second),
35826            "MILLISECOND" => Some(IntervalUnit::Millisecond),
35827            "MICROSECOND" => Some(IntervalUnit::Microsecond),
35828            "QUARTER" => Some(IntervalUnit::Quarter),
35829            "WEEK" => Some(IntervalUnit::Week),
35830            _ => None,
35831        }
35832    }
35833
35834    /// Try to parse a simple interval unit (YEAR, MONTH, etc.) - returns (unit, is_plural)
35835    fn try_parse_simple_interval_unit(&mut self) -> Result<Option<(IntervalUnit, bool)>> {
35836        if self.is_at_end() {
35837            return Ok(None);
35838        }
35839
35840        let text_upper = self.peek().text.to_uppercase();
35841        let result = match text_upper.as_str() {
35842            "YEAR" => Some((IntervalUnit::Year, false)),
35843            "YEARS" => Some((IntervalUnit::Year, true)),
35844            "MONTH" => Some((IntervalUnit::Month, false)),
35845            "MONTHS" => Some((IntervalUnit::Month, true)),
35846            "DAY" => Some((IntervalUnit::Day, false)),
35847            "DAYS" => Some((IntervalUnit::Day, true)),
35848            "HOUR" => Some((IntervalUnit::Hour, false)),
35849            "HOURS" => Some((IntervalUnit::Hour, true)),
35850            "MINUTE" => Some((IntervalUnit::Minute, false)),
35851            "MINUTES" => Some((IntervalUnit::Minute, true)),
35852            "SECOND" => Some((IntervalUnit::Second, false)),
35853            "SECONDS" => Some((IntervalUnit::Second, true)),
35854            "MILLISECOND" => Some((IntervalUnit::Millisecond, false)),
35855            "MILLISECONDS" => Some((IntervalUnit::Millisecond, true)),
35856            "MICROSECOND" => Some((IntervalUnit::Microsecond, false)),
35857            "MICROSECONDS" => Some((IntervalUnit::Microsecond, true)),
35858            "NANOSECOND" => Some((IntervalUnit::Nanosecond, false)),
35859            "NANOSECONDS" => Some((IntervalUnit::Nanosecond, true)),
35860            "QUARTER" => Some((IntervalUnit::Quarter, false)),
35861            "QUARTERS" => Some((IntervalUnit::Quarter, true)),
35862            "WEEK" => Some((IntervalUnit::Week, false)),
35863            "WEEKS" => Some((IntervalUnit::Week, true)),
35864            _ => None,
35865        };
35866
35867        if result.is_some() {
35868            self.advance(); // consume the unit token
35869        }
35870
35871        Ok(result)
35872    }
35873
35874    /// Check if current position starts a function call or no-paren function
35875    fn is_function_start(&self) -> bool {
35876        if self.is_at_end() {
35877            return false;
35878        }
35879        let token_type = self.peek().token_type;
35880
35881        // Check NO_PAREN_FUNCTIONS configuration map
35882        if NO_PAREN_FUNCTIONS.contains(&token_type) {
35883            if !matches!(
35884                self.config.dialect,
35885                Some(crate::dialects::DialectType::ClickHouse)
35886            ) || token_type != TokenType::CurrentTimestamp
35887            {
35888                return true;
35889            }
35890        }
35891
35892        // Cast functions are always functions
35893        if matches!(
35894            token_type,
35895            TokenType::Cast | TokenType::TryCast | TokenType::SafeCast
35896        ) {
35897            return true;
35898        }
35899
35900        // Check NO_PAREN_FUNCTION_NAMES for string-based lookup
35901        // (handles cases where functions are tokenized as Var/Identifier)
35902        let text_upper = self.peek().text.to_uppercase();
35903        if crate::function_registry::is_no_paren_function_name_upper(text_upper.as_str()) {
35904            if !matches!(
35905                self.config.dialect,
35906                Some(crate::dialects::DialectType::ClickHouse)
35907            ) || text_upper.as_str() != "CURRENT_TIMESTAMP"
35908            {
35909                return true;
35910            }
35911        }
35912
35913        // Identifier followed by left paren (function call)
35914        if self.is_identifier_token() && self.check_next(TokenType::LParen) {
35915            return true;
35916        }
35917
35918        false
35919    }
35920
35921    /// Try to parse Oracle interval span after an expression.
35922    /// Syntax: (expr) DAY[(precision)] TO SECOND[(fractional_precision)]
35923    /// This is used in Oracle for interval expressions like:
35924    /// (SYSTIMESTAMP - order_date) DAY(9) TO SECOND(3)
35925    fn try_parse_oracle_interval_span(&mut self, expr: Expression) -> Result<Expression> {
35926        let start_pos = self.current;
35927
35928        // Check if current token is an interval unit keyword (DAY, HOUR, MINUTE, SECOND, YEAR, MONTH)
35929        let start_unit_name = if !self.is_at_end() {
35930            let text = self.peek().text.to_uppercase();
35931            if matches!(
35932                text.as_str(),
35933                "DAY" | "HOUR" | "MINUTE" | "SECOND" | "YEAR" | "MONTH"
35934            ) {
35935                Some(text)
35936            } else {
35937                None
35938            }
35939        } else {
35940            None
35941        };
35942
35943        if start_unit_name.is_none() {
35944            return Ok(expr);
35945        }
35946
35947        let start_unit_name = start_unit_name.unwrap();
35948        self.advance(); // consume the unit keyword
35949
35950        // Parse optional precision: DAY(9) or just DAY
35951        let start_unit = if self.match_token(TokenType::LParen) {
35952            // Parse precision
35953            let precision = self.parse_expression()?;
35954            self.expect(TokenType::RParen)?;
35955            // Create a function-like expression for the unit with precision
35956            Expression::Anonymous(Box::new(Anonymous {
35957                this: Box::new(Expression::Identifier(Identifier {
35958                    name: start_unit_name.clone(),
35959                    quoted: false,
35960                    trailing_comments: Vec::new(),
35961                    span: None,
35962                })),
35963                expressions: vec![precision],
35964            }))
35965        } else {
35966            // Simple unit without precision
35967            Expression::Var(Box::new(Var {
35968                this: start_unit_name,
35969            }))
35970        };
35971
35972        // Check for TO keyword
35973        if !self.match_keyword("TO") {
35974            // Not an interval span, backtrack
35975            self.current = start_pos;
35976            return Ok(expr);
35977        }
35978
35979        // Parse end unit
35980        let end_unit_name = if !self.is_at_end() {
35981            let text = self.peek().text.to_uppercase();
35982            if matches!(
35983                text.as_str(),
35984                "DAY" | "HOUR" | "MINUTE" | "SECOND" | "YEAR" | "MONTH"
35985            ) {
35986                Some(text)
35987            } else {
35988                None
35989            }
35990        } else {
35991            None
35992        };
35993
35994        let end_unit_name = match end_unit_name {
35995            Some(name) => name,
35996            None => {
35997                // No valid end unit, backtrack
35998                self.current = start_pos;
35999                return Ok(expr);
36000            }
36001        };
36002
36003        self.advance(); // consume the end unit keyword
36004
36005        // Parse optional precision for end unit: SECOND(3) or just SECOND
36006        let end_unit = if self.match_token(TokenType::LParen) {
36007            // Parse fractional precision
36008            let precision = self.parse_expression()?;
36009            self.expect(TokenType::RParen)?;
36010            // Create a function-like expression for the unit with precision
36011            Expression::Anonymous(Box::new(Anonymous {
36012                this: Box::new(Expression::Identifier(Identifier {
36013                    name: end_unit_name.clone(),
36014                    quoted: false,
36015                    trailing_comments: Vec::new(),
36016                    span: None,
36017                })),
36018                expressions: vec![precision],
36019            }))
36020        } else {
36021            // Simple unit without precision
36022            Expression::Var(Box::new(Var {
36023                this: end_unit_name,
36024            }))
36025        };
36026
36027        // Create an Interval expression with ExprSpan unit
36028        Ok(Expression::Interval(Box::new(Interval {
36029            this: Some(expr),
36030            unit: Some(IntervalUnitSpec::ExprSpan(IntervalSpanExpr {
36031                this: Box::new(start_unit),
36032                expression: Box::new(end_unit),
36033            })),
36034        })))
36035    }
36036
36037    /// Check if the current position starts a typed column list (for table function aliases)
36038    /// like: (col1 type1, col2 type2)
36039    /// This peeks ahead to see if the first column name is followed by a type token,
36040    /// rather than a comma or closing paren (which would indicate simple column aliases).
36041    /// Used for PostgreSQL functions like JSON_TO_RECORDSET that have typed column definitions.
36042    fn check_typed_column_list(&self) -> bool {
36043        // We're positioned after '(' - check pattern: identifier type
36044        // If we see identifier followed by something that's not ',' or ')', it's typed
36045        if self.is_at_end() {
36046            return false;
36047        }
36048
36049        // Check if current is an identifier (column name)
36050        let has_identifier = self.check(TokenType::Identifier)
36051            || self.check(TokenType::QuotedIdentifier)
36052            || self.check(TokenType::Var);
36053
36054        if !has_identifier {
36055            return false;
36056        }
36057
36058        // Look at next token (after the identifier)
36059        let next_pos = self.current + 1;
36060        if next_pos >= self.tokens.len() {
36061            return false;
36062        }
36063
36064        let next_token = &self.tokens[next_pos];
36065
36066        // If next token is comma or rparen, it's simple column aliases
36067        if next_token.token_type == TokenType::Comma || next_token.token_type == TokenType::RParen {
36068            return false;
36069        }
36070
36071        // If next token could be a type name (identifier, var, or type keyword), it's typed columns
36072        // Check for type tokens or identifiers that could be type names
36073        TYPE_TOKENS.contains(&next_token.token_type)
36074            || next_token.token_type == TokenType::Identifier
36075            || next_token.token_type == TokenType::Var
36076    }
36077
36078    /// Check if current token is a no-paren function
36079    fn is_no_paren_function(&self) -> bool {
36080        if self.is_at_end() {
36081            return false;
36082        }
36083        let token_type = self.peek().token_type;
36084        if NO_PAREN_FUNCTIONS.contains(&token_type) {
36085            if !matches!(
36086                self.config.dialect,
36087                Some(crate::dialects::DialectType::ClickHouse)
36088            ) || token_type != TokenType::CurrentTimestamp
36089            {
36090                return true;
36091            }
36092        }
36093        let text_upper = self.peek().text.to_uppercase();
36094        if crate::function_registry::is_no_paren_function_name_upper(text_upper.as_str()) {
36095            if !matches!(
36096                self.config.dialect,
36097                Some(crate::dialects::DialectType::ClickHouse)
36098            ) || text_upper.as_str() != "CURRENT_TIMESTAMP"
36099            {
36100                return true;
36101            }
36102        }
36103        false
36104    }
36105
36106    /// Match a keyword by text (case-insensitive)
36107    fn match_keyword(&mut self, keyword: &str) -> bool {
36108        if self.is_at_end() {
36109            return false;
36110        }
36111        if self.peek().text.to_uppercase() == keyword {
36112            self.advance();
36113            true
36114        } else {
36115            false
36116        }
36117    }
36118
36119    /// Match a sequence of keywords by text (case-insensitive)
36120    fn match_text_seq(&mut self, keywords: &[&str]) -> bool {
36121        for (i, &kw) in keywords.iter().enumerate() {
36122            if self.current + i >= self.tokens.len() {
36123                return false;
36124            }
36125            if self.tokens[self.current + i].text.to_uppercase() != kw {
36126                return false;
36127            }
36128        }
36129        self.current += keywords.len();
36130        true
36131    }
36132
36133    /// Check (without consuming) if the next tokens match a sequence of keywords by text (case-insensitive)
36134    fn check_text_seq(&self, keywords: &[&str]) -> bool {
36135        for (i, &kw) in keywords.iter().enumerate() {
36136            if self.current + i >= self.tokens.len() {
36137                return false;
36138            }
36139            if self.tokens[self.current + i].text.to_uppercase() != kw {
36140                return false;
36141            }
36142        }
36143        true
36144    }
36145
36146    /// Match any of the given texts (case-insensitive)
36147    fn match_texts(&mut self, texts: &[&str]) -> bool {
36148        if self.is_at_end() {
36149            return false;
36150        }
36151        let current_text = self.peek().text.to_uppercase();
36152        for text in texts {
36153            if current_text == text.to_uppercase() {
36154                self.advance();
36155                return true;
36156            }
36157        }
36158        false
36159    }
36160
36161    /// Parse CASE expression
36162    fn parse_case(&mut self) -> Result<Expression> {
36163        self.expect(TokenType::Case)?;
36164        // Capture trailing comments from the CASE keyword (e.g., CASE /* test */ WHEN ...)
36165        let case_comments = self.previous_trailing_comments();
36166
36167        // Check for simple CASE (CASE expr WHEN ...)
36168        let operand = if !self.check(TokenType::When) {
36169            Some(self.parse_expression()?)
36170        } else {
36171            None
36172        };
36173
36174        let mut whens = Vec::new();
36175        while self.match_token(TokenType::When) {
36176            let condition = self.parse_expression()?;
36177            self.expect(TokenType::Then)?;
36178            let mut result = self.parse_expression()?;
36179            // ClickHouse: CASE WHEN x THEN 1 as alias WHEN y THEN alias / 2 END
36180            // Aliases can appear in CASE THEN expressions
36181            if matches!(
36182                self.config.dialect,
36183                Some(crate::dialects::DialectType::ClickHouse)
36184            ) && self.match_token(TokenType::As)
36185            {
36186                let alias = self.expect_identifier_or_keyword()?;
36187                result = Expression::Alias(Box::new(Alias {
36188                    this: result,
36189                    alias: Identifier::new(alias),
36190                    column_aliases: Vec::new(),
36191                    pre_alias_comments: Vec::new(),
36192                    trailing_comments: Vec::new(),
36193                    inferred_type: None,
36194                }));
36195            }
36196            whens.push((condition, result));
36197        }
36198
36199        let else_ = if self.match_token(TokenType::Else) {
36200            Some(self.parse_expression()?)
36201        } else {
36202            None
36203        };
36204
36205        self.expect(TokenType::End)?;
36206
36207        Ok(Expression::Case(Box::new(Case {
36208            operand,
36209            whens,
36210            else_,
36211            comments: case_comments,
36212            inferred_type: None,
36213        })))
36214    }
36215
36216    /// Parse CAST expression
36217    fn parse_cast(&mut self) -> Result<Expression> {
36218        self.expect(TokenType::Cast)?;
36219        self.expect(TokenType::LParen)?;
36220        // Use parse_or() instead of parse_expression() to avoid consuming AS
36221        // as an alias (e.g. CAST((1, 2) AS Tuple(a Int8, b Int16)))
36222        // Python sqlglot uses _parse_disjunction() here, which is equivalent.
36223        let expr = self.parse_or()?;
36224
36225        // ClickHouse: ternary operator inside CAST: CAST(cond ? true_val : false_val AS Type)
36226        let expr = if matches!(
36227            self.config.dialect,
36228            Some(crate::dialects::DialectType::ClickHouse)
36229        ) && self.match_token(TokenType::Parameter)
36230        {
36231            if self.check(TokenType::Colon) {
36232                return Err(
36233                    self.parse_error("Expected true expression after ? in ClickHouse ternary")
36234                );
36235            }
36236            let true_value = self.parse_or()?;
36237            let false_value = if self.match_token(TokenType::Colon) {
36238                self.parse_or()?
36239            } else {
36240                Expression::Null(Null)
36241            };
36242            Expression::IfFunc(Box::new(IfFunc {
36243                original_name: None,
36244                condition: expr,
36245                true_value,
36246                false_value: Some(false_value),
36247                inferred_type: None,
36248            }))
36249        } else {
36250            expr
36251        };
36252
36253        // ClickHouse: implicit alias in CAST: cast('1234' lhs AS UInt32) or cast('1234' lhs, 'UInt32')
36254        let expr = self.try_clickhouse_implicit_alias(expr);
36255
36256        // ClickHouse: CAST(expr, 'type_string') or CAST(expr, expression) syntax with comma instead of AS
36257        if matches!(
36258            self.config.dialect,
36259            Some(crate::dialects::DialectType::ClickHouse)
36260        ) && self.match_token(TokenType::Comma)
36261        {
36262            // Parse as expression to handle concat and other operations: CAST(x, 'Str' || 'ing')
36263            let type_expr = self.parse_expression()?;
36264            // ClickHouse: alias on type expr: cast('1234' lhs, 'UInt32' rhs) or cast('1234', 'UInt32' AS rhs)
36265            let type_expr = self.try_clickhouse_func_arg_alias(type_expr);
36266            self.expect(TokenType::RParen)?;
36267            let _trailing_comments = self.previous_trailing_comments();
36268            return Ok(Expression::CastToStrType(Box::new(CastToStrType {
36269                this: Box::new(expr),
36270                to: Some(Box::new(type_expr)),
36271            })));
36272        }
36273
36274        self.expect(TokenType::As)?;
36275
36276        // ClickHouse: CAST(expr AS alias AS Type) — inner alias before type
36277        // If the next token is an identifier followed by AS, treat it as an alias
36278        let expr = if matches!(
36279            self.config.dialect,
36280            Some(crate::dialects::DialectType::ClickHouse)
36281        ) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
36282            && self
36283                .peek_nth(1)
36284                .map_or(false, |t| t.token_type == TokenType::As)
36285        {
36286            let alias = self.expect_identifier_or_keyword_with_quoted()?;
36287            self.expect(TokenType::As)?;
36288            Expression::Alias(Box::new(Alias::new(expr, alias)))
36289        } else if matches!(
36290            self.config.dialect,
36291            Some(crate::dialects::DialectType::ClickHouse)
36292        ) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
36293            && self
36294                .peek_nth(1)
36295                .map_or(false, |t| t.token_type == TokenType::Comma)
36296        {
36297            // ClickHouse: CAST(expr AS alias, type_string) — alias before comma syntax
36298            let alias = self.expect_identifier_or_keyword_with_quoted()?;
36299            let expr = Expression::Alias(Box::new(Alias::new(expr, alias)));
36300            self.expect(TokenType::Comma)?;
36301            let type_expr = self.parse_expression()?;
36302            let type_expr = self.try_clickhouse_func_arg_alias(type_expr);
36303            self.expect(TokenType::RParen)?;
36304            let _trailing_comments = self.previous_trailing_comments();
36305            return Ok(Expression::CastToStrType(Box::new(CastToStrType {
36306                this: Box::new(expr),
36307                to: Some(Box::new(type_expr)),
36308            })));
36309        } else {
36310            expr
36311        };
36312
36313        // Teradata: CAST(x AS FORMAT 'fmt') (no explicit type)
36314        if matches!(
36315            self.config.dialect,
36316            Some(crate::dialects::DialectType::Teradata)
36317        ) && self.match_token(TokenType::Format)
36318        {
36319            let format = Some(Box::new(self.parse_expression()?));
36320            self.expect(TokenType::RParen)?;
36321            let trailing_comments = self.previous_trailing_comments();
36322            return Ok(Expression::Cast(Box::new(Cast {
36323                this: expr,
36324                to: DataType::Unknown,
36325                trailing_comments,
36326                double_colon_syntax: false,
36327                format,
36328                default: None,
36329                inferred_type: None,
36330            })));
36331        }
36332
36333        let data_type = self.parse_data_type()?;
36334
36335        // Parse optional DEFAULT ... ON CONVERSION ERROR (Oracle)
36336        // CAST(x AS type DEFAULT val ON CONVERSION ERROR)
36337        let default = if self.match_token(TokenType::Default) {
36338            let default_val = self.parse_primary()?;
36339            // Expect "ON CONVERSION ERROR"
36340            if !self.match_text_seq(&["ON", "CONVERSION", "ERROR"]) {
36341                return Err(self.parse_error("Expected ON CONVERSION ERROR"));
36342            }
36343            Some(Box::new(default_val))
36344        } else {
36345            None
36346        };
36347
36348        // Parse optional FORMAT clause for BigQuery: CAST(x AS STRING FORMAT 'format_string')
36349        // Or for Oracle with comma: CAST(x AS DATE DEFAULT NULL ON CONVERSION ERROR, 'format')
36350        let format = if self.match_token(TokenType::Format) {
36351            Some(Box::new(self.parse_expression()?))
36352        } else if self.match_token(TokenType::Comma) {
36353            // Oracle date format: CAST(x AS DATE, 'format')
36354            Some(Box::new(self.parse_expression()?))
36355        } else {
36356            None
36357        };
36358
36359        self.expect(TokenType::RParen)?;
36360        let trailing_comments = self.previous_trailing_comments();
36361
36362        Ok(Expression::Cast(Box::new(Cast {
36363            this: expr,
36364            to: data_type,
36365            trailing_comments,
36366            double_colon_syntax: false,
36367            format,
36368            default,
36369            inferred_type: None,
36370        })))
36371    }
36372
36373    /// Parse TRY_CAST expression
36374    fn parse_try_cast(&mut self) -> Result<Expression> {
36375        self.expect(TokenType::TryCast)?;
36376        self.expect(TokenType::LParen)?;
36377        let expr = self.parse_or()?;
36378        self.expect(TokenType::As)?;
36379        let data_type = self.parse_data_type()?;
36380
36381        // Parse optional FORMAT clause
36382        let format = if self.match_token(TokenType::Format) {
36383            Some(Box::new(self.parse_expression()?))
36384        } else {
36385            None
36386        };
36387
36388        self.expect(TokenType::RParen)?;
36389        let trailing_comments = self.previous_trailing_comments();
36390
36391        Ok(Expression::TryCast(Box::new(Cast {
36392            this: expr,
36393            to: data_type,
36394            trailing_comments,
36395            double_colon_syntax: false,
36396            format,
36397            default: None,
36398            inferred_type: None,
36399        })))
36400    }
36401
36402    /// Parse SAFE_CAST expression (BigQuery)
36403    fn parse_safe_cast(&mut self) -> Result<Expression> {
36404        self.expect(TokenType::SafeCast)?;
36405        self.expect(TokenType::LParen)?;
36406        let expr = self.parse_or()?;
36407        self.expect(TokenType::As)?;
36408        let data_type = self.parse_data_type()?;
36409
36410        // Parse optional FORMAT clause
36411        let format = if self.match_token(TokenType::Format) {
36412            Some(Box::new(self.parse_expression()?))
36413        } else {
36414            None
36415        };
36416
36417        self.expect(TokenType::RParen)?;
36418        let trailing_comments = self.previous_trailing_comments();
36419
36420        Ok(Expression::SafeCast(Box::new(Cast {
36421            this: expr,
36422            to: data_type,
36423            trailing_comments,
36424            double_colon_syntax: false,
36425            format,
36426            default: None,
36427            inferred_type: None,
36428        })))
36429    }
36430
36431    /// Parse a data type
36432    fn parse_data_type(&mut self) -> Result<DataType> {
36433        // Handle special token types that represent data type keywords
36434        // Teradata tokenizes ST_GEOMETRY as TokenType::Geometry
36435        if self.check(TokenType::Geometry) {
36436            let _token = self.advance();
36437            let (subtype, srid) = self.parse_spatial_type_args()?;
36438            return Ok(DataType::Geometry { subtype, srid });
36439        }
36440        // Data types can be keywords (DATE, TIMESTAMP, etc.) or identifiers
36441        let mut raw_name = self.expect_identifier_or_keyword()?;
36442        // Allow dotted custom types like SYSUDTLIB.INT
36443        while self.match_token(TokenType::Dot) {
36444            let part = self.expect_identifier_or_keyword()?;
36445            raw_name.push('.');
36446            raw_name.push_str(&part);
36447        }
36448        let mut name = raw_name.to_uppercase();
36449
36450        // SQL standard: NATIONAL CHAR/CHARACTER → NCHAR
36451        if name == "NATIONAL" {
36452            let next_upper = if !self.is_at_end() {
36453                self.peek().text.to_uppercase()
36454            } else {
36455                String::new()
36456            };
36457            if next_upper == "CHAR" || next_upper == "CHARACTER" {
36458                self.advance(); // consume CHAR/CHARACTER
36459                name = "NCHAR".to_string();
36460                // NATIONAL CHARACTER VARYING → NVARCHAR equivalent
36461                if next_upper == "CHARACTER" && self.check_identifier("VARYING") {
36462                    self.advance(); // consume VARYING
36463                    let length = if self.match_token(TokenType::LParen) {
36464                        if self.check(TokenType::RParen) {
36465                            self.advance();
36466                            None
36467                        } else {
36468                            let n = self.expect_number()? as u32;
36469                            self.expect(TokenType::RParen)?;
36470                            Some(n)
36471                        }
36472                    } else {
36473                        None
36474                    };
36475                    return Ok(DataType::VarChar {
36476                        length,
36477                        parenthesized_length: false,
36478                    });
36479                }
36480            }
36481        }
36482
36483        let base_type = match name.as_str() {
36484            "INT" | "INTEGER" => {
36485                // MySQL allows INT(N) for display width; ClickHouse allows INT()
36486                let length = if self.match_token(TokenType::LParen) {
36487                    if self.check(TokenType::RParen) {
36488                        self.advance();
36489                        None
36490                    } else {
36491                        let n = self.expect_number()? as u32;
36492                        self.expect(TokenType::RParen)?;
36493                        Some(n)
36494                    }
36495                } else {
36496                    None
36497                };
36498                let integer_spelling = name == "INTEGER";
36499                Ok(DataType::Int {
36500                    length,
36501                    integer_spelling,
36502                })
36503            }
36504            "BIGINT" => {
36505                // MySQL allows BIGINT(N) for display width; ClickHouse allows BIGINT()
36506                let length = if self.match_token(TokenType::LParen) {
36507                    if self.check(TokenType::RParen) {
36508                        self.advance();
36509                        None
36510                    } else {
36511                        let n = self.expect_number()? as u32;
36512                        self.expect(TokenType::RParen)?;
36513                        Some(n)
36514                    }
36515                } else {
36516                    None
36517                };
36518                Ok(DataType::BigInt { length })
36519            }
36520            "SMALLINT" => {
36521                let length = if self.match_token(TokenType::LParen) {
36522                    if self.check(TokenType::RParen) {
36523                        self.advance();
36524                        None
36525                    } else {
36526                        let n = self.expect_number()? as u32;
36527                        self.expect(TokenType::RParen)?;
36528                        Some(n)
36529                    }
36530                } else {
36531                    None
36532                };
36533                Ok(DataType::SmallInt { length })
36534            }
36535            "TINYINT" => {
36536                let length = if self.match_token(TokenType::LParen) {
36537                    if self.check(TokenType::RParen) {
36538                        self.advance();
36539                        None
36540                    } else {
36541                        let n = self.expect_number()? as u32;
36542                        self.expect(TokenType::RParen)?;
36543                        Some(n)
36544                    }
36545                } else {
36546                    None
36547                };
36548                Ok(DataType::TinyInt { length })
36549            }
36550            "FLOAT" | "REAL" => {
36551                let real_spelling = name == "REAL";
36552                // MySQL allows FLOAT(precision) or FLOAT(precision, scale)
36553                let (precision, scale) = if self.match_token(TokenType::LParen) {
36554                    let p = self.expect_number()? as u32;
36555                    let s = if self.match_token(TokenType::Comma) {
36556                        Some(self.expect_number()? as u32)
36557                    } else {
36558                        None
36559                    };
36560                    self.expect(TokenType::RParen)?;
36561                    (Some(p), s)
36562                } else {
36563                    (None, None)
36564                };
36565                Ok(DataType::Float {
36566                    precision,
36567                    scale,
36568                    real_spelling,
36569                })
36570            }
36571            "BINARY_FLOAT" => {
36572                // Oracle's BINARY_FLOAT -> DataType::Float
36573                Ok(DataType::Float {
36574                    precision: None,
36575                    scale: None,
36576                    real_spelling: false,
36577                })
36578            }
36579            "BINARY_DOUBLE" => {
36580                // Oracle's BINARY_DOUBLE -> DataType::Double
36581                Ok(DataType::Double {
36582                    precision: None,
36583                    scale: None,
36584                })
36585            }
36586            "DOUBLE" => {
36587                // Handle DOUBLE PRECISION (PostgreSQL standard SQL)
36588                let _ = self.match_identifier("PRECISION");
36589                // MySQL allows DOUBLE(precision, scale)
36590                let (precision, scale) = if self.match_token(TokenType::LParen) {
36591                    let p = self.expect_number()? as u32;
36592                    let s = if self.match_token(TokenType::Comma) {
36593                        Some(self.expect_number()? as u32)
36594                    } else {
36595                        None
36596                    };
36597                    self.expect(TokenType::RParen)?;
36598                    (Some(p), s)
36599                } else {
36600                    (None, None)
36601                };
36602                Ok(DataType::Double { precision, scale })
36603            }
36604            "DECIMAL" | "NUMERIC" => {
36605                let (precision, scale) = if self.match_token(TokenType::LParen) {
36606                    let p = self.expect_number()? as u32;
36607                    let s = if self.match_token(TokenType::Comma) {
36608                        Some(self.expect_number()? as u32)
36609                    } else {
36610                        None
36611                    };
36612                    self.expect(TokenType::RParen)?;
36613                    (Some(p), s)
36614                } else {
36615                    (None, None)
36616                };
36617                Ok(DataType::Decimal { precision, scale })
36618            }
36619            "BOOLEAN" | "BOOL" => Ok(DataType::Boolean),
36620            "CHAR" | "CHARACTER" | "NCHAR" => {
36621                let is_nchar = name == "NCHAR";
36622                // SQL standard: CHARACTER LARGE OBJECT → CLOB/TEXT
36623                if self.match_identifier("LARGE") && self.match_identifier("OBJECT") {
36624                    return Ok(DataType::Text);
36625                }
36626                // Check for VARYING to convert to VARCHAR (SQL standard: CHAR VARYING, CHARACTER VARYING)
36627                if self.match_identifier("VARYING") {
36628                    let length = if self.match_token(TokenType::LParen) {
36629                        if self.check(TokenType::RParen) {
36630                            self.advance();
36631                            None
36632                        } else {
36633                            let n = self.expect_number()? as u32;
36634                            self.expect(TokenType::RParen)?;
36635                            Some(n)
36636                        }
36637                    } else {
36638                        None
36639                    };
36640                    Ok(DataType::VarChar {
36641                        length,
36642                        parenthesized_length: false,
36643                    })
36644                } else {
36645                    let length = if self.match_token(TokenType::LParen) {
36646                        // Allow empty parens like NCHAR() - treat as no length specified
36647                        if self.check(TokenType::RParen) {
36648                            self.advance(); // consume RParen
36649                            None
36650                        } else {
36651                            let n = self.expect_number()? as u32;
36652                            self.expect(TokenType::RParen)?;
36653                            Some(n)
36654                        }
36655                    } else {
36656                        None
36657                    };
36658                    // CHAR CHARACTER SET charset (MySQL CAST context, no length)
36659                    // When length is specified (e.g., CHAR(4) CHARACTER SET LATIN),
36660                    // CHARACTER SET is a column attribute handled at the column def level
36661                    if length.is_none()
36662                        && self.match_identifier("CHARACTER")
36663                        && self.match_token(TokenType::Set)
36664                    {
36665                        let charset = self.expect_identifier_or_keyword()?;
36666                        return Ok(DataType::CharacterSet { name: charset });
36667                    }
36668                    // Preserve NCHAR as Custom DataType so target dialects can map it properly
36669                    // (Oracle keeps NCHAR, TSQL keeps NCHAR, others map to CHAR)
36670                    if is_nchar {
36671                        let name = if let Some(len) = length {
36672                            format!("NCHAR({})", len)
36673                        } else {
36674                            "NCHAR".to_string()
36675                        };
36676                        return Ok(DataType::Custom { name });
36677                    }
36678                    Ok(DataType::Char { length })
36679                }
36680            }
36681            "VARCHAR" | "NVARCHAR" => {
36682                let is_nvarchar = name == "NVARCHAR";
36683                if self.match_token(TokenType::LParen) {
36684                    // Allow empty parens like NVARCHAR() - treat as no length specified
36685                    if self.check(TokenType::RParen) {
36686                        self.advance(); // consume RParen
36687                        if is_nvarchar {
36688                            return Ok(DataType::Custom {
36689                                name: "NVARCHAR".to_string(),
36690                            });
36691                        }
36692                        Ok(DataType::VarChar {
36693                            length: None,
36694                            parenthesized_length: false,
36695                        })
36696                    } else if self.check_identifier("MAX") {
36697                        // TSQL: VARCHAR(MAX) / NVARCHAR(MAX)
36698                        self.advance(); // consume MAX
36699                        self.expect(TokenType::RParen)?;
36700                        let type_name = if is_nvarchar {
36701                            "NVARCHAR(MAX)"
36702                        } else {
36703                            "VARCHAR(MAX)"
36704                        };
36705                        Ok(DataType::Custom {
36706                            name: type_name.to_string(),
36707                        })
36708                    } else {
36709                        // Hive allows VARCHAR((50)) - extra parentheses around the length
36710                        let parenthesized_length = self.match_token(TokenType::LParen);
36711                        let n = self.expect_number()? as u32;
36712                        if parenthesized_length {
36713                            self.expect(TokenType::RParen)?;
36714                        }
36715                        self.expect(TokenType::RParen)?;
36716                        // Preserve NVARCHAR as Custom DataType so target dialects can map properly
36717                        if is_nvarchar {
36718                            return Ok(DataType::Custom {
36719                                name: format!("NVARCHAR({})", n),
36720                            });
36721                        }
36722                        Ok(DataType::VarChar {
36723                            length: Some(n),
36724                            parenthesized_length,
36725                        })
36726                    }
36727                } else {
36728                    if is_nvarchar {
36729                        return Ok(DataType::Custom {
36730                            name: "NVARCHAR".to_string(),
36731                        });
36732                    }
36733                    Ok(DataType::VarChar {
36734                        length: None,
36735                        parenthesized_length: false,
36736                    })
36737                }
36738            }
36739            "TEXT" | "NTEXT" => {
36740                // TEXT(n) - optional length parameter
36741                if self.match_token(TokenType::LParen) {
36742                    let n = self.expect_number()? as u32;
36743                    self.expect(TokenType::RParen)?;
36744                    Ok(DataType::TextWithLength { length: n })
36745                } else {
36746                    Ok(DataType::Text)
36747                }
36748            }
36749            "STRING" => {
36750                // BigQuery STRING(n) - parameterized string with max length
36751                let length = if self.match_token(TokenType::LParen) {
36752                    let n = self.expect_number()? as u32;
36753                    self.expect(TokenType::RParen)?;
36754                    Some(n)
36755                } else {
36756                    None
36757                };
36758                Ok(DataType::String { length })
36759            }
36760            "DATE" => Ok(DataType::Date),
36761            "TIME" => {
36762                // ClickHouse: Time('timezone') is a custom type with string arg
36763                if matches!(
36764                    self.config.dialect,
36765                    Some(crate::dialects::DialectType::ClickHouse)
36766                ) && self.check(TokenType::LParen)
36767                    && self.current + 1 < self.tokens.len()
36768                    && self.tokens[self.current + 1].token_type == TokenType::String
36769                {
36770                    self.advance(); // consume LParen
36771                    let args = self.parse_custom_type_args_balanced()?;
36772                    self.expect(TokenType::RParen)?;
36773                    return Ok(DataType::Custom {
36774                        name: format!("Time({})", args),
36775                    });
36776                }
36777                let precision = if self.match_token(TokenType::LParen) {
36778                    if self.check(TokenType::RParen) {
36779                        self.advance();
36780                        None
36781                    } else {
36782                        let p = self.expect_number()? as u32;
36783                        self.expect(TokenType::RParen)?;
36784                        Some(p)
36785                    }
36786                } else {
36787                    None
36788                };
36789                // Handle TIME WITH/WITHOUT TIME ZONE
36790                let timezone = if self.match_token(TokenType::With) {
36791                    self.match_keyword("TIME");
36792                    self.match_keyword("ZONE");
36793                    true
36794                } else if self.match_keyword("WITHOUT") {
36795                    self.match_keyword("TIME");
36796                    self.match_keyword("ZONE");
36797                    false
36798                } else {
36799                    false
36800                };
36801                Ok(DataType::Time {
36802                    precision,
36803                    timezone,
36804                })
36805            }
36806            "TIMETZ" => {
36807                let precision = if self.match_token(TokenType::LParen) {
36808                    let p = self.expect_number()? as u32;
36809                    self.expect(TokenType::RParen)?;
36810                    Some(p)
36811                } else {
36812                    None
36813                };
36814                Ok(DataType::Time {
36815                    precision,
36816                    timezone: true,
36817                })
36818            }
36819            "TIMESTAMP" => {
36820                // Parse optional precision: TIMESTAMP(p)
36821                let precision = if self.match_token(TokenType::LParen) {
36822                    let p = self.expect_number()? as u32;
36823                    self.expect(TokenType::RParen)?;
36824                    Some(p)
36825                } else {
36826                    None
36827                };
36828                // Parse optional WITH/WITHOUT TIME ZONE or WITH LOCAL TIME ZONE
36829                // Note: TIME is a keyword (TokenType::Time) and LOCAL is a keyword (TokenType::Local)
36830                if self.match_token(TokenType::With) {
36831                    // Check for LOCAL TIME ZONE (Exasol) vs TIME ZONE
36832                    // LOCAL is tokenized as TokenType::Local, not as Identifier
36833                    if self.match_token(TokenType::Local) {
36834                        self.match_keyword("TIME");
36835                        self.match_keyword("ZONE");
36836                        // TIMESTAMP WITH LOCAL TIME ZONE - return as custom type for Exasol handling
36837                        Ok(DataType::Custom {
36838                            name: "TIMESTAMPLTZ".to_string(),
36839                        })
36840                    } else {
36841                        self.match_keyword("TIME");
36842                        self.match_keyword("ZONE");
36843                        Ok(DataType::Timestamp {
36844                            precision,
36845                            timezone: true,
36846                        })
36847                    }
36848                } else if self.match_keyword("WITHOUT") {
36849                    self.match_keyword("TIME");
36850                    self.match_keyword("ZONE");
36851                    Ok(DataType::Timestamp {
36852                        precision,
36853                        timezone: false,
36854                    })
36855                } else {
36856                    Ok(DataType::Timestamp {
36857                        precision,
36858                        timezone: false,
36859                    })
36860                }
36861            }
36862            "TIMESTAMPTZ" => {
36863                let precision = if self.match_token(TokenType::LParen) {
36864                    let p = self.expect_number()? as u32;
36865                    self.expect(TokenType::RParen)?;
36866                    Some(p)
36867                } else {
36868                    None
36869                };
36870                Ok(DataType::Timestamp {
36871                    precision,
36872                    timezone: true,
36873                })
36874            }
36875            "TIMESTAMPLTZ" | "TIMESTAMP_LTZ" => {
36876                let precision = if self.match_token(TokenType::LParen) {
36877                    let p = self.expect_number()? as u32;
36878                    self.expect(TokenType::RParen)?;
36879                    Some(p)
36880                } else {
36881                    None
36882                };
36883                let name = if let Some(p) = precision {
36884                    format!("TIMESTAMPLTZ({})", p)
36885                } else {
36886                    "TIMESTAMPLTZ".to_string()
36887                };
36888                Ok(DataType::Custom { name })
36889            }
36890            "INTERVAL" => {
36891                // Parse optional unit (DAYS, DAY, HOUR, etc.)
36892                // Don't consume GENERATED, AS, NOT, NULL, etc. which are column constraints
36893                let unit = if (self.check(TokenType::Identifier)
36894                    || self.check(TokenType::Var)
36895                    || self.check_keyword())
36896                    && !self.check(TokenType::Generated)
36897                    && !self.check(TokenType::As)
36898                    && !self.check(TokenType::Not)
36899                    && !self.check(TokenType::Null)
36900                    && !self.check(TokenType::Default)
36901                    && !self.check(TokenType::PrimaryKey)
36902                    && !self.check(TokenType::Unique)
36903                    && !self.check(TokenType::Check)
36904                    && !self.check(TokenType::Constraint)
36905                    && !self.check(TokenType::References)
36906                    && !self.check(TokenType::Collate)
36907                    && !self.check(TokenType::Comment)
36908                    && !self.check(TokenType::RParen)
36909                    && !self.check(TokenType::Comma)
36910                {
36911                    Some(self.advance().text.to_uppercase())
36912                } else {
36913                    None
36914                };
36915                // Parse optional TO unit for range intervals like DAY TO HOUR
36916                let to = if self.match_token(TokenType::To) {
36917                    if self.check(TokenType::Identifier)
36918                        || self.check(TokenType::Var)
36919                        || self.check_keyword()
36920                    {
36921                        Some(self.advance().text.to_uppercase())
36922                    } else {
36923                        None
36924                    }
36925                } else {
36926                    None
36927                };
36928                Ok(DataType::Interval { unit, to })
36929            }
36930            "JSON" => {
36931                if matches!(
36932                    self.config.dialect,
36933                    Some(crate::dialects::DialectType::ClickHouse)
36934                ) && self.match_token(TokenType::LParen)
36935                {
36936                    // ClickHouse: JSON(subcolumn_specs) e.g. JSON(a String, b UInt32) or JSON(max_dynamic_paths=8)
36937                    let args = self.parse_custom_type_args_balanced()?;
36938                    self.expect(TokenType::RParen)?;
36939                    Ok(DataType::Custom {
36940                        name: format!("JSON({})", args),
36941                    })
36942                } else {
36943                    Ok(DataType::Json)
36944                }
36945            }
36946            "JSONB" => Ok(DataType::JsonB),
36947            "UUID" => Ok(DataType::Uuid),
36948            "BLOB" => Ok(DataType::Blob),
36949            "BYTEA" => Ok(DataType::VarBinary { length: None }),
36950            "BIT" => {
36951                let length = if self.match_token(TokenType::LParen) {
36952                    let n = self.expect_number()? as u32;
36953                    self.expect(TokenType::RParen)?;
36954                    Some(n)
36955                } else {
36956                    None
36957                };
36958                Ok(DataType::Bit { length })
36959            }
36960            "VARBIT" | "BIT VARYING" => {
36961                let length = if self.match_token(TokenType::LParen) {
36962                    let n = self.expect_number()? as u32;
36963                    self.expect(TokenType::RParen)?;
36964                    Some(n)
36965                } else {
36966                    None
36967                };
36968                Ok(DataType::VarBit { length })
36969            }
36970            "BINARY" => {
36971                // SQL standard: BINARY LARGE OBJECT → BLOB
36972                if self.match_identifier("LARGE") && self.match_identifier("OBJECT") {
36973                    return Ok(DataType::Blob);
36974                }
36975                // Handle BINARY VARYING (SQL standard for VARBINARY)
36976                if self.match_identifier("VARYING") {
36977                    let length = if self.match_token(TokenType::LParen) {
36978                        let len = self.expect_number()? as u32;
36979                        self.expect(TokenType::RParen)?;
36980                        Some(len)
36981                    } else {
36982                        None
36983                    };
36984                    Ok(DataType::VarBinary { length })
36985                } else {
36986                    let length = if self.match_token(TokenType::LParen) {
36987                        let len = self.expect_number()? as u32;
36988                        self.expect(TokenType::RParen)?;
36989                        Some(len)
36990                    } else {
36991                        None
36992                    };
36993                    Ok(DataType::Binary { length })
36994                }
36995            }
36996            "VARBINARY" => {
36997                let length = if self.match_token(TokenType::LParen) {
36998                    let len = self.expect_number()? as u32;
36999                    self.expect(TokenType::RParen)?;
37000                    Some(len)
37001                } else {
37002                    None
37003                };
37004                Ok(DataType::VarBinary { length })
37005            }
37006            // Generic types with angle bracket or parentheses syntax: ARRAY<T>, ARRAY(T), MAP<K,V>, MAP(K,V)
37007            "ARRAY" => {
37008                if self.match_token(TokenType::Lt) {
37009                    // ARRAY<element_type> - angle bracket style
37010                    let element_type = self.parse_data_type()?;
37011                    self.expect_gt()?;
37012                    Ok(DataType::Array {
37013                        element_type: Box::new(element_type),
37014                        dimension: None,
37015                    })
37016                } else if self.match_token(TokenType::LParen) {
37017                    // ARRAY(element_type) - Snowflake parentheses style
37018                    let element_type = self.parse_data_type()?;
37019                    self.expect(TokenType::RParen)?;
37020                    Ok(DataType::Array {
37021                        element_type: Box::new(element_type),
37022                        dimension: None,
37023                    })
37024                } else {
37025                    // Just ARRAY without type parameter
37026                    Ok(DataType::Custom {
37027                        name: "ARRAY".to_string(),
37028                    })
37029                }
37030            }
37031            "MAP" => {
37032                if self.match_token(TokenType::Lt) {
37033                    // MAP<key_type, value_type> - angle bracket style
37034                    let key_type = self.parse_data_type()?;
37035                    self.expect(TokenType::Comma)?;
37036                    let value_type = self.parse_data_type()?;
37037                    self.expect_gt()?;
37038                    Ok(DataType::Map {
37039                        key_type: Box::new(key_type),
37040                        value_type: Box::new(value_type),
37041                    })
37042                } else if self.match_token(TokenType::LBracket) {
37043                    // Materialize: MAP[TEXT => INT] type syntax
37044                    let key_type = self.parse_data_type()?;
37045                    self.expect(TokenType::FArrow)?;
37046                    let value_type = self.parse_data_type()?;
37047                    self.expect(TokenType::RBracket)?;
37048                    Ok(DataType::Map {
37049                        key_type: Box::new(key_type),
37050                        value_type: Box::new(value_type),
37051                    })
37052                } else if self.match_token(TokenType::LParen) {
37053                    // MAP(key_type, value_type) - Snowflake parentheses style
37054                    let key_type = self.parse_data_type()?;
37055                    self.expect(TokenType::Comma)?;
37056                    let value_type = self.parse_data_type()?;
37057                    self.expect(TokenType::RParen)?;
37058                    Ok(DataType::Map {
37059                        key_type: Box::new(key_type),
37060                        value_type: Box::new(value_type),
37061                    })
37062                } else {
37063                    // Just MAP without type parameters
37064                    Ok(DataType::Custom {
37065                        name: "MAP".to_string(),
37066                    })
37067                }
37068            }
37069            // VECTOR(type, dimension) - Snowflake vector type
37070            // VECTOR(dimension, element_type_alias) or VECTOR(dimension) - SingleStore vector type
37071            "VECTOR" => {
37072                if self.match_token(TokenType::LParen) {
37073                    if self.check(TokenType::Number) {
37074                        // SingleStore format: VECTOR(dimension) or VECTOR(dimension, type_alias)
37075                        let dimension = self.expect_number()? as u32;
37076                        let element_type = if self.match_token(TokenType::Comma) {
37077                            // Parse the type alias (I8, I16, I32, I64, F32, F64)
37078                            let type_alias = self.expect_identifier_or_keyword()?;
37079                            let mapped_type = match type_alias.to_uppercase().as_str() {
37080                                "I8" => DataType::TinyInt { length: None },
37081                                "I16" => DataType::SmallInt { length: None },
37082                                "I32" => DataType::Int {
37083                                    length: None,
37084                                    integer_spelling: false,
37085                                },
37086                                "I64" => DataType::BigInt { length: None },
37087                                "F32" => DataType::Float {
37088                                    precision: None,
37089                                    scale: None,
37090                                    real_spelling: false,
37091                                },
37092                                "F64" => DataType::Double {
37093                                    precision: None,
37094                                    scale: None,
37095                                },
37096                                _ => DataType::Custom {
37097                                    name: type_alias.to_string(),
37098                                },
37099                            };
37100                            Some(Box::new(mapped_type))
37101                        } else {
37102                            // Just dimension, no type
37103                            None
37104                        };
37105                        self.expect(TokenType::RParen)?;
37106                        Ok(DataType::Vector {
37107                            element_type,
37108                            dimension: Some(dimension),
37109                        })
37110                    } else {
37111                        // Snowflake format: VECTOR(type, dimension)
37112                        let element_type = self.parse_data_type()?;
37113                        self.expect(TokenType::Comma)?;
37114                        let dimension = self.expect_number()? as u32;
37115                        self.expect(TokenType::RParen)?;
37116                        Ok(DataType::Vector {
37117                            element_type: Some(Box::new(element_type)),
37118                            dimension: Some(dimension),
37119                        })
37120                    }
37121                } else {
37122                    Ok(DataType::Custom {
37123                        name: "VECTOR".to_string(),
37124                    })
37125                }
37126            }
37127            // OBJECT(field1 type1, field2 type2, ...) - Snowflake structured object type
37128            "OBJECT" => {
37129                if self.match_token(TokenType::LParen) {
37130                    // ClickHouse: Object('json') — string literal argument
37131                    if matches!(
37132                        self.config.dialect,
37133                        Some(crate::dialects::DialectType::ClickHouse)
37134                    ) && self.check(TokenType::String)
37135                    {
37136                        let arg = self.advance().text;
37137                        self.expect(TokenType::RParen)?;
37138                        return Ok(DataType::Custom {
37139                            name: format!("Object('{}')", arg),
37140                        });
37141                    }
37142                    let mut fields = Vec::new();
37143                    if !self.check(TokenType::RParen) {
37144                        loop {
37145                            let field_name = self.expect_identifier_or_keyword()?;
37146                            let field_type = self.parse_data_type()?;
37147                            // Optional NOT NULL constraint
37148                            let not_null = if self.match_keyword("NOT") {
37149                                // Consume NULL if present
37150                                self.match_keyword("NULL");
37151                                true
37152                            } else {
37153                                false
37154                            };
37155                            fields.push((field_name, field_type, not_null));
37156                            if !self.match_token(TokenType::Comma) {
37157                                break;
37158                            }
37159                        }
37160                    }
37161                    self.expect(TokenType::RParen)?;
37162                    // Check for RENAME FIELDS or ADD FIELDS modifier
37163                    let modifier = if self.match_keyword("RENAME") {
37164                        if self.match_keyword("FIELDS") {
37165                            Some("RENAME FIELDS".to_string())
37166                        } else {
37167                            Some("RENAME".to_string())
37168                        }
37169                    } else if self.match_keyword("ADD") {
37170                        if self.match_keyword("FIELDS") {
37171                            Some("ADD FIELDS".to_string())
37172                        } else {
37173                            Some("ADD".to_string())
37174                        }
37175                    } else {
37176                        None
37177                    };
37178                    Ok(DataType::Object { fields, modifier })
37179                } else {
37180                    Ok(DataType::Custom {
37181                        name: "OBJECT".to_string(),
37182                    })
37183                }
37184            }
37185            "STRUCT" => {
37186                if self.match_token(TokenType::Lt) {
37187                    // STRUCT<field1 type1, field2 type2, ...> - BigQuery angle-bracket syntax
37188                    let fields = self.parse_struct_type_fields(false)?;
37189                    self.expect_gt()?;
37190                    Ok(DataType::Struct {
37191                        fields,
37192                        nested: false,
37193                    })
37194                } else if self.match_token(TokenType::LParen) {
37195                    // STRUCT(field1 type1, field2 type2, ...) - DuckDB parenthesized syntax
37196                    let fields = self.parse_struct_type_fields(true)?;
37197                    self.expect(TokenType::RParen)?;
37198                    Ok(DataType::Struct {
37199                        fields,
37200                        nested: true,
37201                    })
37202                } else {
37203                    // Just STRUCT without type parameters
37204                    Ok(DataType::Custom {
37205                        name: "STRUCT".to_string(),
37206                    })
37207                }
37208            }
37209            "ROW" => {
37210                // ROW(field1 type1, field2 type2, ...) - same as STRUCT with parens
37211                if self.match_token(TokenType::LParen) {
37212                    let fields = self.parse_struct_type_fields(true)?;
37213                    self.expect(TokenType::RParen)?;
37214                    Ok(DataType::Struct {
37215                        fields,
37216                        nested: true,
37217                    })
37218                } else {
37219                    Ok(DataType::Custom {
37220                        name: "ROW".to_string(),
37221                    })
37222                }
37223            }
37224            "RECORD" => {
37225                // RECORD(field1 type1, field2 type2, ...) - SingleStore record type (like ROW/STRUCT)
37226                if self.match_token(TokenType::LParen) {
37227                    let fields = self.parse_struct_type_fields(true)?;
37228                    self.expect(TokenType::RParen)?;
37229                    // Use Struct with nested=true, generator will output RECORD for SingleStore
37230                    Ok(DataType::Struct {
37231                        fields,
37232                        nested: true,
37233                    })
37234                } else {
37235                    Ok(DataType::Custom {
37236                        name: "RECORD".to_string(),
37237                    })
37238                }
37239            }
37240            "ENUM" => {
37241                // ENUM('RED', 'GREEN', 'BLUE') - DuckDB enum type
37242                // ClickHouse: Enum('hello' = 1, 'world' = 2)
37243                // ClickHouse also allows NULL in enum: Enum('a', 'b', NULL)
37244                if self.match_token(TokenType::LParen) {
37245                    let mut values = Vec::new();
37246                    let mut assignments = Vec::new();
37247                    if !self.check(TokenType::RParen) {
37248                        loop {
37249                            let val = if matches!(
37250                                self.config.dialect,
37251                                Some(crate::dialects::DialectType::ClickHouse)
37252                            ) && self.check(TokenType::Null)
37253                            {
37254                                self.advance();
37255                                "NULL".to_string()
37256                            } else {
37257                                self.expect_string()?
37258                            };
37259                            values.push(val);
37260                            // ClickHouse: optional = value assignment (including negative numbers)
37261                            if self.match_token(TokenType::Eq) {
37262                                let negative = self.match_token(TokenType::Dash);
37263                                let num_token = self.advance();
37264                                let val = if negative {
37265                                    format!("-{}", num_token.text)
37266                                } else {
37267                                    num_token.text.clone()
37268                                };
37269                                assignments.push(Some(val));
37270                            } else {
37271                                assignments.push(None);
37272                            }
37273                            if !self.match_token(TokenType::Comma) {
37274                                break;
37275                            }
37276                        }
37277                    }
37278                    self.expect(TokenType::RParen)?;
37279                    Ok(DataType::Enum {
37280                        values,
37281                        assignments,
37282                    })
37283                } else {
37284                    Ok(DataType::Custom {
37285                        name: "ENUM".to_string(),
37286                    })
37287                }
37288            }
37289            "SET" => {
37290                // MySQL SET('a', 'b', 'c') type
37291                if self.match_token(TokenType::LParen) {
37292                    let mut values = Vec::new();
37293                    if !self.check(TokenType::RParen) {
37294                        loop {
37295                            let val = self.expect_string()?;
37296                            values.push(val);
37297                            if !self.match_token(TokenType::Comma) {
37298                                break;
37299                            }
37300                        }
37301                    }
37302                    self.expect(TokenType::RParen)?;
37303                    Ok(DataType::Set { values })
37304                } else {
37305                    Ok(DataType::Custom {
37306                        name: "SET".to_string(),
37307                    })
37308                }
37309            }
37310            "UNION" if self.check(TokenType::LParen) => {
37311                // UNION(num INT, str TEXT) - DuckDB union type (only when followed by paren)
37312                self.advance(); // consume LParen
37313                let struct_fields = self.parse_struct_type_fields(true)?;
37314                self.expect(TokenType::RParen)?;
37315                // Convert StructField to (String, DataType) for Union
37316                let fields: Vec<(String, DataType)> = struct_fields
37317                    .into_iter()
37318                    .map(|f| (f.name, f.data_type))
37319                    .collect();
37320                Ok(DataType::Union { fields })
37321            }
37322            // Spatial types
37323            "GEOMETRY" => {
37324                let (subtype, srid) = self.parse_spatial_type_args()?;
37325                Ok(DataType::Geometry { subtype, srid })
37326            }
37327            "GEOGRAPHY" => {
37328                let (subtype, srid) = self.parse_spatial_type_args()?;
37329                Ok(DataType::Geography { subtype, srid })
37330            }
37331            // MySQL spatial subtypes without wrapper
37332            "POINT" | "LINESTRING" | "POLYGON" | "MULTIPOINT" | "MULTILINESTRING"
37333            | "MULTIPOLYGON" | "GEOMETRYCOLLECTION" => {
37334                // Check for optional SRID clause (MySQL syntax)
37335                let srid = if self.match_identifier("SRID") {
37336                    Some(self.expect_number()? as u32)
37337                } else {
37338                    None
37339                };
37340                Ok(DataType::Geometry {
37341                    subtype: Some(name),
37342                    srid,
37343                })
37344            }
37345            // BigQuery ANY TYPE - templated parameter type for UDFs
37346            "ANY" => {
37347                if self.match_token(TokenType::Type) {
37348                    Ok(DataType::Custom {
37349                        name: "ANY TYPE".to_string(),
37350                    })
37351                } else {
37352                    Ok(DataType::Custom {
37353                        name: "ANY".to_string(),
37354                    })
37355                }
37356            }
37357            // LONG VARCHAR (Exasol) - same as TEXT
37358            "LONG" => {
37359                if self.match_identifier("VARCHAR") {
37360                    Ok(DataType::Text)
37361                } else {
37362                    Ok(DataType::Custom {
37363                        name: "LONG".to_string(),
37364                    })
37365                }
37366            }
37367            // MySQL SIGNED [INTEGER] / UNSIGNED [INTEGER] in CAST context
37368            // CAST(x AS SIGNED INTEGER) -> CAST(x AS SIGNED)
37369            "SIGNED" | "UNSIGNED" => {
37370                // Consume optional INTEGER keyword after SIGNED/UNSIGNED
37371                if self.check_identifier("INTEGER")
37372                    || self.check_keyword_text("INTEGER")
37373                    || self.check_keyword_text("INT")
37374                {
37375                    self.advance();
37376                }
37377                Ok(DataType::Custom { name })
37378            }
37379            // ClickHouse Nullable(T) wrapper type
37380            "NULLABLE" => {
37381                self.expect(TokenType::LParen)?;
37382                let inner = self.parse_data_type()?;
37383                self.expect(TokenType::RParen)?;
37384                Ok(DataType::Nullable {
37385                    inner: Box::new(inner),
37386                })
37387            }
37388            _ => {
37389                // Handle custom types with optional parenthesized precision/args
37390                // e.g., DATETIME2(2), DATETIMEOFFSET(7), NVARCHAR2(100)
37391                // Use uppercase name for known SQL custom types, but preserve original case
37392                // for user-defined type names (e.g., UserDefinedTableType)
37393                let is_known = convert_name_is_known_custom(&name);
37394                let custom_name = if is_known {
37395                    name.clone()
37396                } else {
37397                    raw_name.clone()
37398                };
37399                if self.match_token(TokenType::LParen) {
37400                    if matches!(
37401                        self.config.dialect,
37402                        Some(crate::dialects::DialectType::ClickHouse)
37403                    ) {
37404                        let args = self.parse_custom_type_args_balanced()?;
37405                        self.expect(TokenType::RParen)?;
37406                        Ok(DataType::Custom {
37407                            name: format!("{}({})", custom_name, args),
37408                        })
37409                    } else {
37410                        let mut args = Vec::new();
37411                        let mut after_comma = true; // treat first token as start of new arg
37412                        loop {
37413                            if self.check(TokenType::RParen) {
37414                                break;
37415                            }
37416                            let token = self.advance();
37417                            // If the previous token was space-separated (not comma-separated),
37418                            // append to the last arg. E.g., VARCHAR2(2328 CHAR) -> "2328 CHAR"
37419                            if !after_comma && !args.is_empty() {
37420                                if let Some(last) = args.last_mut() {
37421                                    *last = format!("{} {}", last, token.text);
37422                                }
37423                            } else {
37424                                args.push(token.text.clone());
37425                            }
37426                            after_comma = self.match_token(TokenType::Comma);
37427                        }
37428                        self.expect(TokenType::RParen)?;
37429                        // Include args in the name: DATETIME2(2), VARCHAR2(2328 CHAR)
37430                        Ok(DataType::Custom {
37431                            name: format!("{}({})", custom_name, args.join(", ")),
37432                        })
37433                    }
37434                } else {
37435                    Ok(DataType::Custom { name: custom_name })
37436                }
37437            }
37438        }?;
37439
37440        // UNSIGNED/SIGNED modifiers for integer types (MySQL) are handled
37441        // by the column definition parser which sets col.unsigned = true.
37442        // Do NOT consume them here; the column parser needs to see them.
37443        let mut result_type = base_type;
37444
37445        // Materialize: handle postfix LIST syntax (INT LIST, INT LIST LIST LIST)
37446        let is_materialize = matches!(
37447            self.config.dialect,
37448            Some(crate::dialects::DialectType::Materialize)
37449        );
37450        if is_materialize {
37451            while self.check_identifier("LIST") || self.check(TokenType::List) {
37452                self.advance(); // consume LIST
37453                result_type = DataType::List {
37454                    element_type: Box::new(result_type),
37455                };
37456            }
37457        }
37458
37459        // PostgreSQL array syntax: TYPE[], TYPE[N], TYPE[N][M], etc.
37460        let result_type = self.maybe_parse_array_dimensions(result_type)?;
37461
37462        // ClickHouse: mark string-like standard types as non-nullable by converting to Custom
37463        // This prevents the generator from wrapping them in Nullable() during identity transforms.
37464        // Types parsed from other dialects remain standard and will get Nullable wrapping when
37465        // transpiling to ClickHouse.
37466        if matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) {
37467            return Ok(Self::clickhouse_mark_non_nullable(result_type));
37468        }
37469
37470        Ok(result_type)
37471    }
37472
37473    /// Convert standard types to Custom equivalents for ClickHouse to prevent Nullable wrapping.
37474    /// This mirrors Python sqlglot's behavior of marking ClickHouse-parsed types as non-nullable.
37475    fn clickhouse_mark_non_nullable(dt: DataType) -> DataType {
37476        match dt {
37477            DataType::Text => DataType::Custom { name: "String".to_string() },
37478            DataType::VarChar { .. } => DataType::Custom { name: "String".to_string() },
37479            DataType::Char { .. } => DataType::Custom { name: "String".to_string() },
37480            DataType::String { .. } => DataType::Custom { name: "String".to_string() },
37481            _ => dt,
37482        }
37483    }
37484
37485    /// Parse a data type for cast syntax (::TYPE)
37486    /// For dialects that support fixed-size arrays (like DuckDB), brackets like [3] are
37487    /// parsed as array dimensions (e.g., x::INT[3] means cast to INT[3] array type).
37488    /// For other dialects (like Snowflake), brackets are subscript operations
37489    /// (e.g., x::VARIANT[0] means cast to VARIANT, then subscript with [0]).
37490    fn parse_data_type_for_cast(&mut self) -> Result<DataType> {
37491        // Check if dialect supports array type suffixes (e.g., INT[], VARCHAR[3])
37492        // PostgreSQL: INT[], TEXT[] (no fixed size)
37493        // DuckDB: INT[3] (fixed size arrays)
37494        let supports_array_type_suffix = matches!(
37495            self.config.dialect,
37496            Some(crate::dialects::DialectType::DuckDB)
37497                | Some(crate::dialects::DialectType::PostgreSQL)
37498                | Some(crate::dialects::DialectType::Redshift)
37499        );
37500
37501        // Check if it's a quoted identifier (e.g., "udt") — preserve case and quoting
37502        let is_quoted = self.check(TokenType::QuotedIdentifier);
37503        let raw_name = self.expect_identifier_or_keyword()?;
37504        if is_quoted {
37505            // Check if the quoted name matches a known type — if so, normalize it
37506            let known_type = self.convert_name_to_type(&raw_name);
37507            if let Ok(ref dt) = known_type {
37508                if !matches!(dt, DataType::Custom { .. }) {
37509                    return known_type;
37510                }
37511            }
37512            // Truly custom type — preserve original case with quotes
37513            return Ok(DataType::Custom {
37514                name: format!("\"{}\"", raw_name),
37515            });
37516        }
37517        let name = raw_name.to_uppercase();
37518
37519        // Handle parametric types like ARRAY<T>, MAP<K,V>
37520        let base_type = match name.as_str() {
37521            "ARRAY" => {
37522                if self.match_token(TokenType::Lt) {
37523                    let element_type = self.parse_data_type()?;
37524                    self.expect_gt()?;
37525                    DataType::Array {
37526                        element_type: Box::new(element_type),
37527                        dimension: None,
37528                    }
37529                } else if self.match_token(TokenType::LParen) {
37530                    // ClickHouse: Array(Type) syntax with parentheses
37531                    let element_type = self.parse_data_type_for_cast()?;
37532                    self.expect(TokenType::RParen)?;
37533                    DataType::Array {
37534                        element_type: Box::new(element_type),
37535                        dimension: None,
37536                    }
37537                } else {
37538                    DataType::Custom { name }
37539                }
37540            }
37541            "MAP" => {
37542                if self.match_token(TokenType::Lt) {
37543                    let key_type = self.parse_data_type()?;
37544                    self.expect(TokenType::Comma)?;
37545                    let value_type = self.parse_data_type()?;
37546                    self.expect_gt()?;
37547                    DataType::Map {
37548                        key_type: Box::new(key_type),
37549                        value_type: Box::new(value_type),
37550                    }
37551                } else if self.match_token(TokenType::LParen) {
37552                    // Snowflake: MAP(key_type, value_type) syntax
37553                    let key_type = self.parse_data_type_for_cast()?;
37554                    self.expect(TokenType::Comma)?;
37555                    let value_type = self.parse_data_type_for_cast()?;
37556                    self.expect(TokenType::RParen)?;
37557                    DataType::Map {
37558                        key_type: Box::new(key_type),
37559                        value_type: Box::new(value_type),
37560                    }
37561                } else if self.match_token(TokenType::LBracket) {
37562                    // Materialize: MAP[TEXT => INT] type syntax
37563                    let key_type = self.parse_data_type_for_cast()?;
37564                    self.expect(TokenType::FArrow)?;
37565                    let value_type = self.parse_data_type_for_cast()?;
37566                    self.expect(TokenType::RBracket)?;
37567                    DataType::Map {
37568                        key_type: Box::new(key_type),
37569                        value_type: Box::new(value_type),
37570                    }
37571                } else {
37572                    DataType::Custom { name }
37573                }
37574            }
37575            "STRUCT" => {
37576                if self.match_token(TokenType::Lt) {
37577                    let fields = self.parse_struct_type_fields(false)?;
37578                    self.expect_gt()?;
37579                    DataType::Struct {
37580                        fields,
37581                        nested: false,
37582                    }
37583                } else if self.match_token(TokenType::LParen) {
37584                    let fields = self.parse_struct_type_fields(true)?;
37585                    self.expect(TokenType::RParen)?;
37586                    DataType::Struct {
37587                        fields,
37588                        nested: true,
37589                    }
37590                } else {
37591                    DataType::Custom { name }
37592                }
37593            }
37594            "ROW" => {
37595                if self.match_token(TokenType::LParen) {
37596                    let fields = self.parse_struct_type_fields(true)?;
37597                    self.expect(TokenType::RParen)?;
37598                    DataType::Struct {
37599                        fields,
37600                        nested: true,
37601                    }
37602                } else {
37603                    DataType::Custom { name }
37604                }
37605            }
37606            "RECORD" => {
37607                // SingleStore RECORD type (like ROW/STRUCT)
37608                if self.match_token(TokenType::LParen) {
37609                    let fields = self.parse_struct_type_fields(true)?;
37610                    self.expect(TokenType::RParen)?;
37611                    DataType::Struct {
37612                        fields,
37613                        nested: true,
37614                    }
37615                } else {
37616                    DataType::Custom { name }
37617                }
37618            }
37619            // Multi-word types that need special handling in cast context
37620            "DOUBLE" => {
37621                // Handle DOUBLE PRECISION
37622                let _ = self.match_identifier("PRECISION");
37623                // ClickHouse/SQL: DOUBLE(precision) or DOUBLE(precision, scale)
37624                let (precision, scale) = if self.match_token(TokenType::LParen) {
37625                    let p = Some(self.expect_number()? as u32);
37626                    let s = if self.match_token(TokenType::Comma) {
37627                        Some(self.expect_number()? as u32)
37628                    } else {
37629                        None
37630                    };
37631                    self.expect(TokenType::RParen)?;
37632                    (p, s)
37633                } else {
37634                    (None, None)
37635                };
37636                DataType::Double { precision, scale }
37637            }
37638            "CHARACTER" | "CHAR" | "NCHAR" => {
37639                // Handle CHARACTER VARYING / CHAR VARYING
37640                if self.match_identifier("VARYING") {
37641                    let length = if self.match_token(TokenType::LParen) {
37642                        let len = Some(self.expect_number()? as u32);
37643                        self.expect(TokenType::RParen)?;
37644                        len
37645                    } else {
37646                        None
37647                    };
37648                    DataType::VarChar {
37649                        length,
37650                        parenthesized_length: false,
37651                    }
37652                } else {
37653                    let length = if self.match_token(TokenType::LParen) {
37654                        let len = Some(self.expect_number()? as u32);
37655                        self.expect(TokenType::RParen)?;
37656                        len
37657                    } else {
37658                        None
37659                    };
37660                    // CHAR CHARACTER SET charset (MySQL CAST context, no length)
37661                    if length.is_none()
37662                        && self.match_identifier("CHARACTER")
37663                        && self.match_token(TokenType::Set)
37664                    {
37665                        let charset = self.expect_identifier_or_keyword()?;
37666                        return Ok(DataType::CharacterSet { name: charset });
37667                    }
37668                    DataType::Char { length }
37669                }
37670            }
37671            "TIME" => {
37672                // Handle TIME(precision) WITH/WITHOUT TIME ZONE
37673                let precision = if self.match_token(TokenType::LParen) {
37674                    let p = Some(self.expect_number()? as u32);
37675                    self.expect(TokenType::RParen)?;
37676                    p
37677                } else {
37678                    None
37679                };
37680                let timezone = if self.match_token(TokenType::With) {
37681                    self.match_keyword("TIME");
37682                    self.match_keyword("ZONE");
37683                    true
37684                } else if self.match_keyword("WITHOUT") {
37685                    self.match_keyword("TIME");
37686                    self.match_keyword("ZONE");
37687                    false
37688                } else {
37689                    false
37690                };
37691                DataType::Time {
37692                    precision,
37693                    timezone,
37694                }
37695            }
37696            "TIMETZ" => {
37697                let precision = if self.match_token(TokenType::LParen) {
37698                    let p = Some(self.expect_number()? as u32);
37699                    self.expect(TokenType::RParen)?;
37700                    p
37701                } else {
37702                    None
37703                };
37704                DataType::Time {
37705                    precision,
37706                    timezone: true,
37707                }
37708            }
37709            "TIMESTAMP" => {
37710                // Handle TIMESTAMP(precision) WITH/WITHOUT TIME ZONE or WITH LOCAL TIME ZONE
37711                let precision = if self.match_token(TokenType::LParen) {
37712                    let p = Some(self.expect_number()? as u32);
37713                    self.expect(TokenType::RParen)?;
37714                    p
37715                } else {
37716                    None
37717                };
37718                // Note: TIME is a keyword (TokenType::Time), so use match_keyword instead of match_identifier
37719                if self.match_token(TokenType::With) {
37720                    // Check for LOCAL TIME ZONE vs TIME ZONE
37721                    if self.match_token(TokenType::Local) {
37722                        self.match_keyword("TIME");
37723                        self.match_keyword("ZONE");
37724                        // TIMESTAMP WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
37725                        DataType::Custom {
37726                            name: "TIMESTAMPLTZ".to_string(),
37727                        }
37728                    } else {
37729                        self.match_keyword("TIME");
37730                        self.match_keyword("ZONE");
37731                        DataType::Timestamp {
37732                            precision,
37733                            timezone: true,
37734                        }
37735                    }
37736                } else if self.match_keyword("WITHOUT") {
37737                    self.match_keyword("TIME");
37738                    self.match_keyword("ZONE");
37739                    DataType::Timestamp {
37740                        precision,
37741                        timezone: false,
37742                    }
37743                } else {
37744                    DataType::Timestamp {
37745                        precision,
37746                        timezone: false,
37747                    }
37748                }
37749            }
37750            "TIMESTAMPTZ" => {
37751                let precision = if self.match_token(TokenType::LParen) {
37752                    let p = self.expect_number()? as u32;
37753                    self.expect(TokenType::RParen)?;
37754                    Some(p)
37755                } else {
37756                    None
37757                };
37758                DataType::Timestamp {
37759                    precision,
37760                    timezone: true,
37761                }
37762            }
37763            "TIMESTAMPLTZ" | "TIMESTAMP_LTZ" => {
37764                let precision = if self.match_token(TokenType::LParen) {
37765                    let p = self.expect_number()? as u32;
37766                    self.expect(TokenType::RParen)?;
37767                    Some(p)
37768                } else {
37769                    None
37770                };
37771                let dt_name = if let Some(p) = precision {
37772                    format!("TIMESTAMPLTZ({})", p)
37773                } else {
37774                    "TIMESTAMPLTZ".to_string()
37775                };
37776                DataType::Custom { name: dt_name }
37777            }
37778            "INTERVAL" => {
37779                // Parse optional unit (DAY, HOUR, etc.) after INTERVAL in cast context
37780                let unit = if (self.check(TokenType::Identifier)
37781                    || self.check(TokenType::Var)
37782                    || self.check_keyword())
37783                    && !self.check(TokenType::RParen)
37784                    && !self.check(TokenType::Comma)
37785                    && !self.check(TokenType::As)
37786                    && !self.check(TokenType::Not)
37787                    && !self.check(TokenType::Null)
37788                {
37789                    Some(self.advance().text.to_uppercase())
37790                } else {
37791                    None
37792                };
37793                // Parse optional TO unit for range intervals like DAY TO HOUR
37794                let to = if self.match_token(TokenType::To) {
37795                    if self.check(TokenType::Identifier)
37796                        || self.check(TokenType::Var)
37797                        || self.check_keyword()
37798                    {
37799                        Some(self.advance().text.to_uppercase())
37800                    } else {
37801                        None
37802                    }
37803                } else {
37804                    None
37805                };
37806                DataType::Interval { unit, to }
37807            }
37808            // VARCHAR/NVARCHAR with optional (N) or (MAX) parameter
37809            "VARCHAR" | "NVARCHAR" => {
37810                let is_nvarchar = name == "NVARCHAR";
37811                if self.match_token(TokenType::LParen) {
37812                    if self.check(TokenType::RParen) {
37813                        self.advance();
37814                        DataType::VarChar {
37815                            length: None,
37816                            parenthesized_length: false,
37817                        }
37818                    } else if self.check_identifier("MAX") {
37819                        self.advance();
37820                        self.expect(TokenType::RParen)?;
37821                        let type_name = if is_nvarchar {
37822                            "NVARCHAR(MAX)"
37823                        } else {
37824                            "VARCHAR(MAX)"
37825                        };
37826                        DataType::Custom {
37827                            name: type_name.to_string(),
37828                        }
37829                    } else {
37830                        let n = self.expect_number()? as u32;
37831                        self.expect(TokenType::RParen)?;
37832                        DataType::VarChar {
37833                            length: Some(n),
37834                            parenthesized_length: false,
37835                        }
37836                    }
37837                } else {
37838                    DataType::VarChar {
37839                        length: None,
37840                        parenthesized_length: false,
37841                    }
37842                }
37843            }
37844            // VARBINARY with optional (N) or (MAX) parameter
37845            "VARBINARY" => {
37846                if self.match_token(TokenType::LParen) {
37847                    if self.check(TokenType::RParen) {
37848                        self.advance();
37849                        DataType::VarBinary { length: None }
37850                    } else if self.check_identifier("MAX") {
37851                        self.advance();
37852                        self.expect(TokenType::RParen)?;
37853                        DataType::Custom {
37854                            name: "VARBINARY(MAX)".to_string(),
37855                        }
37856                    } else {
37857                        let n = self.expect_number()? as u32;
37858                        self.expect(TokenType::RParen)?;
37859                        DataType::VarBinary { length: Some(n) }
37860                    }
37861                } else {
37862                    DataType::VarBinary { length: None }
37863                }
37864            }
37865            // DECIMAL/NUMERIC with optional (precision, scale)
37866            "DECIMAL" | "NUMERIC" | "NUMBER" => {
37867                if self.match_token(TokenType::LParen) {
37868                    let precision = Some(self.expect_number()? as u32);
37869                    let scale = if self.match_token(TokenType::Comma) {
37870                        Some(self.expect_number()? as u32)
37871                    } else {
37872                        None
37873                    };
37874                    self.expect(TokenType::RParen)?;
37875                    DataType::Decimal { precision, scale }
37876                } else {
37877                    DataType::Decimal {
37878                        precision: None,
37879                        scale: None,
37880                    }
37881                }
37882            }
37883            // INT/INTEGER/BIGINT/SMALLINT/TINYINT with optional (N) display width
37884            "INT" | "INTEGER" => {
37885                let length = if self.match_token(TokenType::LParen) {
37886                    let n = Some(self.expect_number()? as u32);
37887                    self.expect(TokenType::RParen)?;
37888                    n
37889                } else {
37890                    None
37891                };
37892                DataType::Int {
37893                    length,
37894                    integer_spelling: name == "INTEGER",
37895                }
37896            }
37897            "BIGINT" => {
37898                let length = if self.match_token(TokenType::LParen) {
37899                    let n = Some(self.expect_number()? as u32);
37900                    self.expect(TokenType::RParen)?;
37901                    n
37902                } else {
37903                    None
37904                };
37905                DataType::BigInt { length }
37906            }
37907            "SMALLINT" => {
37908                let length = if self.match_token(TokenType::LParen) {
37909                    let n = Some(self.expect_number()? as u32);
37910                    self.expect(TokenType::RParen)?;
37911                    n
37912                } else {
37913                    None
37914                };
37915                DataType::SmallInt { length }
37916            }
37917            "TINYINT" => {
37918                let length = if self.match_token(TokenType::LParen) {
37919                    let n = Some(self.expect_number()? as u32);
37920                    self.expect(TokenType::RParen)?;
37921                    n
37922                } else {
37923                    None
37924                };
37925                DataType::TinyInt { length }
37926            }
37927            // FLOAT with optional (precision)
37928            "FLOAT" | "REAL" | "BINARY_FLOAT" => {
37929                let (precision, scale) = if self.match_token(TokenType::LParen) {
37930                    let n = Some(self.expect_number()? as u32);
37931                    let s = if self.match_token(TokenType::Comma) {
37932                        Some(self.expect_number()? as u32)
37933                    } else {
37934                        None
37935                    };
37936                    self.expect(TokenType::RParen)?;
37937                    (n, s)
37938                } else {
37939                    (None, None)
37940                };
37941                DataType::Float {
37942                    precision,
37943                    scale,
37944                    real_spelling: name == "REAL",
37945                }
37946            }
37947            "BINARY_DOUBLE" => DataType::Double {
37948                precision: None,
37949                scale: None,
37950            },
37951            // BINARY with optional (length)
37952            "BINARY" => {
37953                let length = if self.match_token(TokenType::LParen) {
37954                    let n = Some(self.expect_number()? as u32);
37955                    self.expect(TokenType::RParen)?;
37956                    n
37957                } else {
37958                    None
37959                };
37960                DataType::Binary { length }
37961            }
37962            // MySQL SIGNED [INTEGER] / UNSIGNED [INTEGER] in CAST context
37963            // CAST(x AS SIGNED INTEGER) -> CAST(x AS SIGNED)
37964            // CAST(x AS UNSIGNED INTEGER) -> CAST(x AS UNSIGNED)
37965            "SIGNED" | "UNSIGNED" => {
37966                // Consume optional INTEGER keyword after SIGNED/UNSIGNED
37967                if self.check_identifier("INTEGER")
37968                    || self.check_keyword_text("INTEGER")
37969                    || self.check_keyword_text("INT")
37970                {
37971                    self.advance();
37972                }
37973                DataType::Custom { name }
37974            }
37975            // ClickHouse Nullable(T) wrapper type
37976            "NULLABLE" => {
37977                self.expect(TokenType::LParen)?;
37978                let inner = self.parse_data_type_for_cast()?;
37979                self.expect(TokenType::RParen)?;
37980                DataType::Nullable {
37981                    inner: Box::new(inner),
37982                }
37983            }
37984            // For simple types, use convert_name_to_type to get proper DataType variants
37985            // This ensures VARCHAR becomes DataType::VarChar, not DataType::Custom
37986            // For user-defined types in generic mode, preserve original case from raw_name
37987            _ => {
37988                let base = self.convert_name_to_type(&name)?;
37989                // ClickHouse: consume parenthesized args for custom types like DateTime('UTC'),
37990                // LowCardinality(String), Variant(String, UInt64), JSON(max_dynamic_paths=8)
37991                if matches!(
37992                    self.config.dialect,
37993                    Some(crate::dialects::DialectType::ClickHouse)
37994                ) && self.check(TokenType::LParen)
37995                    && (matches!(
37996                        base,
37997                        DataType::Custom { .. } | DataType::Json | DataType::JsonB
37998                    ))
37999                {
38000                    self.advance(); // consume (
38001                    let args = self.parse_custom_type_args_balanced()?;
38002                    self.expect(TokenType::RParen)?;
38003                    let base_name = match &base {
38004                        DataType::Json => "JSON".to_string(),
38005                        DataType::JsonB => "JSONB".to_string(),
38006                        DataType::Custom { name } => name.clone(),
38007                        _ => unreachable!(),
38008                    };
38009                    DataType::Custom {
38010                        name: format!("{}({})", base_name, args),
38011                    }
38012                } else if matches!(base, DataType::Custom { .. }) && self.check(TokenType::Dot) {
38013                    // Handle schema-qualified user-defined types (e.g., app.status_enum)
38014                    // by consuming dot-separated identifiers like Python sqlglot's
38015                    // _parse_user_defined_type()
38016                    // Use raw_name to preserve original case for schema-qualified types
38017                    let mut type_name = raw_name.to_string();
38018                    while self.match_token(TokenType::Dot) {
38019                        let tok = self.advance();
38020                        type_name = format!("{}.{}", type_name, tok.text);
38021                    }
38022                    DataType::Custom { name: type_name }
38023                } else if matches!(base, DataType::Custom { .. }) && self.config.dialect.is_none() {
38024                    // Preserve original case for user-defined types in generic mode
38025                    DataType::Custom {
38026                        name: raw_name.to_string(),
38027                    }
38028                } else {
38029                    base
38030                }
38031            }
38032        };
38033
38034        // Materialize: handle postfix LIST syntax (INT LIST, INT LIST LIST LIST)
38035        let is_materialize = matches!(
38036            self.config.dialect,
38037            Some(crate::dialects::DialectType::Materialize)
38038        );
38039        let mut result_type = base_type;
38040        if is_materialize {
38041            while self.check_identifier("LIST") || self.check(TokenType::List) {
38042                self.advance(); // consume LIST
38043                result_type = DataType::List {
38044                    element_type: Box::new(result_type),
38045                };
38046            }
38047        }
38048
38049        // For dialects that support array type suffixes (DuckDB, PostgreSQL, Redshift),
38050        // parse array dimensions. For other dialects, brackets after a cast are subscript operations.
38051        if supports_array_type_suffix {
38052            self.maybe_parse_array_dimensions(result_type)
38053        } else {
38054            Ok(result_type)
38055        }
38056    }
38057
38058    /// Parse custom type arguments with balanced parentheses, preserving nested types
38059    fn parse_custom_type_args_balanced(&mut self) -> Result<String> {
38060        let mut depth = 0usize;
38061        let mut out = String::new();
38062        let mut prev_wordish = false;
38063
38064        while !self.is_at_end() {
38065            if self.check(TokenType::RParen) && depth == 0 {
38066                break;
38067            }
38068
38069            let token = self.advance();
38070            match token.token_type {
38071                TokenType::LParen => {
38072                    out.push('(');
38073                    depth += 1;
38074                    prev_wordish = false;
38075                }
38076                TokenType::RParen => {
38077                    if depth == 0 {
38078                        break;
38079                    }
38080                    depth -= 1;
38081                    out.push(')');
38082                    prev_wordish = true;
38083                }
38084                TokenType::Comma => {
38085                    out.push_str(", ");
38086                    prev_wordish = false;
38087                }
38088                TokenType::Eq => {
38089                    out.push_str(" = ");
38090                    prev_wordish = false;
38091                }
38092                TokenType::Plus => {
38093                    out.push_str(" + ");
38094                    prev_wordish = false;
38095                }
38096                TokenType::Dash => {
38097                    out.push('-');
38098                    prev_wordish = false;
38099                }
38100                TokenType::Dot => {
38101                    out.push('.');
38102                    prev_wordish = false;
38103                }
38104                TokenType::String | TokenType::DollarString => {
38105                    if prev_wordish {
38106                        out.push(' ');
38107                    }
38108                    let escaped = token.text.replace('\'', "''");
38109                    out.push('\'');
38110                    out.push_str(&escaped);
38111                    out.push('\'');
38112                    prev_wordish = true;
38113                }
38114                TokenType::Number | TokenType::Parameter => {
38115                    if prev_wordish {
38116                        out.push(' ');
38117                    }
38118                    out.push_str(&token.text);
38119                    prev_wordish = true;
38120                }
38121                TokenType::QuotedIdentifier => {
38122                    if prev_wordish {
38123                        out.push(' ');
38124                    }
38125                    out.push('"');
38126                    out.push_str(&token.text);
38127                    out.push('"');
38128                    prev_wordish = true;
38129                }
38130                _ => {
38131                    if prev_wordish {
38132                        out.push(' ');
38133                    }
38134                    out.push_str(&token.text);
38135                    prev_wordish = true;
38136                }
38137            }
38138        }
38139
38140        Ok(out)
38141    }
38142
38143    /// Try to parse a data type optionally - returns None if no valid type found
38144    /// Used for JSON_TABLE column definitions where type may or may not be present
38145    fn parse_data_type_optional(&mut self) -> Result<Option<DataType>> {
38146        // Check if current token looks like a type name
38147        if !self.check(TokenType::Identifier)
38148            && !self.check(TokenType::Var)
38149            && !self.check_keyword()
38150        {
38151            return Ok(None);
38152        }
38153
38154        // Don't try to parse PATH as a type
38155        if self.check_identifier("PATH") {
38156            return Ok(None);
38157        }
38158
38159        // ClickHouse: ALIAS, EPHEMERAL, MATERIALIZED are column modifiers, not types
38160        if matches!(
38161            self.config.dialect,
38162            Some(crate::dialects::DialectType::ClickHouse)
38163        ) && (self.check_identifier("ALIAS")
38164            || self.check_identifier("EPHEMERAL")
38165            || self.check(TokenType::Materialized))
38166        {
38167            return Ok(None);
38168        }
38169
38170        let saved_pos = self.current;
38171        match self.parse_data_type() {
38172            Ok(dt) => Ok(Some(dt)),
38173            Err(_) => {
38174                self.current = saved_pos;
38175                Ok(None)
38176            }
38177        }
38178    }
38179
38180    /// Convert a DataType to a string representation for JSONColumnDef.kind
38181    fn data_type_to_string(&self, dt: &DataType) -> String {
38182        match dt {
38183            DataType::Int {
38184                length: Some(n),
38185                integer_spelling: true,
38186            } => format!("INTEGER({})", n),
38187            DataType::Int {
38188                length: Some(n), ..
38189            } => format!("INT({})", n),
38190            DataType::Int {
38191                length: None,
38192                integer_spelling: true,
38193            } => "INTEGER".to_string(),
38194            DataType::Int { length: None, .. } => "INT".to_string(),
38195            DataType::BigInt { length: Some(n) } => format!("BIGINT({})", n),
38196            DataType::BigInt { length: None } => "BIGINT".to_string(),
38197            DataType::SmallInt { length: Some(n) } => format!("SMALLINT({})", n),
38198            DataType::SmallInt { length: None } => "SMALLINT".to_string(),
38199            DataType::TinyInt { length: Some(n) } => format!("TINYINT({})", n),
38200            DataType::TinyInt { length: None } => "TINYINT".to_string(),
38201            DataType::Float {
38202                precision: Some(p),
38203                scale: Some(s),
38204                ..
38205            } => format!("FLOAT({}, {})", p, s),
38206            DataType::Float {
38207                precision: Some(p),
38208                scale: None,
38209                ..
38210            } => format!("FLOAT({})", p),
38211            DataType::Float {
38212                precision: None, ..
38213            } => "FLOAT".to_string(),
38214            DataType::Double {
38215                precision: Some(p),
38216                scale: Some(s),
38217            } => format!("DOUBLE({}, {})", p, s),
38218            DataType::Double {
38219                precision: Some(p),
38220                scale: None,
38221            } => format!("DOUBLE({})", p),
38222            DataType::Double {
38223                precision: None, ..
38224            } => "DOUBLE".to_string(),
38225            DataType::Decimal {
38226                precision: Some(p),
38227                scale: Some(s),
38228            } => format!("DECIMAL({}, {})", p, s),
38229            DataType::Decimal {
38230                precision: Some(p),
38231                scale: None,
38232            } => format!("DECIMAL({})", p),
38233            DataType::Decimal {
38234                precision: None, ..
38235            } => "DECIMAL".to_string(),
38236            DataType::VarChar {
38237                length: Some(n), ..
38238            } => format!("VARCHAR({})", n),
38239            DataType::VarChar { length: None, .. } => "VARCHAR".to_string(),
38240            DataType::Char { length: Some(n) } => format!("CHAR({})", n),
38241            DataType::Char { length: None } => "CHAR".to_string(),
38242            DataType::Text => "TEXT".to_string(),
38243            DataType::Boolean => "BOOLEAN".to_string(),
38244            DataType::Date => "DATE".to_string(),
38245            DataType::Time {
38246                precision: Some(p), ..
38247            } => format!("TIME({})", p),
38248            DataType::Time {
38249                precision: None, ..
38250            } => "TIME".to_string(),
38251            DataType::Timestamp {
38252                precision: Some(p),
38253                timezone: true,
38254            } => format!("TIMESTAMPTZ({})", p),
38255            DataType::Timestamp {
38256                precision: Some(p),
38257                timezone: false,
38258            } => format!("TIMESTAMP({})", p),
38259            DataType::Timestamp {
38260                precision: None,
38261                timezone: true,
38262            } => "TIMESTAMPTZ".to_string(),
38263            DataType::Timestamp {
38264                precision: None,
38265                timezone: false,
38266            } => "TIMESTAMP".to_string(),
38267            DataType::Json => "JSON".to_string(),
38268            DataType::JsonB => "JSONB".to_string(),
38269            DataType::Binary { length: Some(n) } => format!("BINARY({})", n),
38270            DataType::Binary { length: None } => "BINARY".to_string(),
38271            DataType::VarBinary { length: Some(n) } => format!("VARBINARY({})", n),
38272            DataType::VarBinary { length: None } => "VARBINARY".to_string(),
38273            DataType::String { length: Some(n) } => format!("STRING({})", n),
38274            DataType::String { length: None } => "STRING".to_string(),
38275            DataType::Array { element_type, .. } => {
38276                format!("ARRAY({})", self.data_type_to_string(element_type))
38277            }
38278            DataType::Nullable { inner } => {
38279                format!("Nullable({})", self.data_type_to_string(inner))
38280            }
38281            DataType::Custom { name } => name.clone(),
38282            _ => format!("{:?}", dt),
38283        }
38284    }
38285
38286    /// Parse optional array dimensions after a type: [], [N], [N][M], ARRAY, ARRAY[N], etc.
38287    fn maybe_parse_array_dimensions(&mut self, base_type: DataType) -> Result<DataType> {
38288        let mut current_type = base_type;
38289
38290        // Handle PostgreSQL ARRAY keyword suffix: type ARRAY or type ARRAY[3]
38291        if self.check_identifier("ARRAY") {
38292            self.advance(); // consume ARRAY
38293                            // Check for optional dimension: ARRAY[N]
38294            let dimension = if self.match_token(TokenType::LBracket) {
38295                let dim = if self.check(TokenType::Number) {
38296                    let n = self.expect_number()? as u32;
38297                    Some(n)
38298                } else {
38299                    None
38300                };
38301                self.expect(TokenType::RBracket)?;
38302                dim
38303            } else {
38304                None
38305            };
38306            current_type = DataType::Array {
38307                element_type: Box::new(current_type),
38308                dimension,
38309            };
38310        }
38311
38312        // Handle bracket-based array dimensions: TYPE[], TYPE[N], TYPE[][N], etc.
38313        while self.match_token(TokenType::LBracket) {
38314            // Check for optional dimension: [N] or just []
38315            let dimension = if self.check(TokenType::Number) {
38316                let n = self.expect_number()? as u32;
38317                Some(n)
38318            } else {
38319                None
38320            };
38321            self.expect(TokenType::RBracket)?;
38322
38323            current_type = DataType::Array {
38324                element_type: Box::new(current_type),
38325                dimension,
38326            };
38327        }
38328
38329        Ok(current_type)
38330    }
38331
38332    /// Parse spatial type arguments like GEOMETRY(Point, 4326) or GEOGRAPHY
38333    fn parse_spatial_type_args(&mut self) -> Result<(Option<String>, Option<u32>)> {
38334        if self.match_token(TokenType::LParen) {
38335            // First arg can be a subtype name (POINT, LINESTRING, etc.) or a numeric dimension
38336            if self.check(TokenType::Number) {
38337                // Numeric argument (e.g., ST_GEOMETRY(1) in Teradata)
38338                let n = self.expect_number()? as u32;
38339                self.expect(TokenType::RParen)?;
38340                return Ok((None, Some(n)));
38341            }
38342            // Parse subtype
38343            let subtype = Some(self.expect_identifier()?.to_uppercase());
38344
38345            // Parse optional SRID
38346            let srid = if self.match_token(TokenType::Comma) {
38347                Some(self.expect_number()? as u32)
38348            } else {
38349                None
38350            };
38351
38352            self.expect(TokenType::RParen)?;
38353            Ok((subtype, srid))
38354        } else {
38355            Ok((None, None))
38356        }
38357    }
38358
38359    /// Parse struct/row/union type fields: name TYPE, name TYPE, ...
38360    /// `paren_style` indicates whether we're parsing parenthesized syntax (terminates at RParen)
38361    /// or angle-bracket syntax (terminates at Gt/GtGt).
38362    fn parse_struct_type_fields(&mut self, paren_style: bool) -> Result<Vec<StructField>> {
38363        let mut fields = Vec::new();
38364        // Check for empty field list
38365        if (paren_style && self.check(TokenType::RParen))
38366            || (!paren_style && (self.check(TokenType::Gt) || self.check(TokenType::GtGt)))
38367        {
38368            return Ok(fields);
38369        }
38370        loop {
38371            // Parse field name or just type (for anonymous struct fields)
38372            // Track whether it was a quoted identifier to preserve quoting
38373            let is_quoted = self.check(TokenType::QuotedIdentifier);
38374            let first = self.expect_identifier_or_keyword()?;
38375            let first_upper = first.to_uppercase();
38376
38377            // Check if this is a parametric type (ARRAY<T>, MAP<K,V>, STRUCT<...>, STRUCT(...))
38378            let is_parametric_type = (first_upper == "ARRAY"
38379                || first_upper == "MAP"
38380                || first_upper == "STRUCT"
38381                || first_upper == "ROW")
38382                && (self.check(TokenType::Lt) || self.check(TokenType::LParen));
38383
38384            let (field_name, field_type) = if is_parametric_type {
38385                // This is a parametric type as an anonymous field
38386                let field_type = self.parse_data_type_from_name(&first_upper)?;
38387                (String::new(), field_type)
38388            } else if self.check(TokenType::Comma)
38389                || self.match_identifier("OPTIONS")  // Check for OPTIONS (but don't consume yet)
38390                || (paren_style && self.check(TokenType::RParen))
38391                || (!paren_style && (self.check(TokenType::Gt) || self.check(TokenType::GtGt)))
38392            {
38393                // Check if we just matched OPTIONS - if so, retreat
38394                if self.previous().text.to_uppercase() == "OPTIONS" {
38395                    self.current -= 1;
38396                }
38397                // Anonymous field: just a type name
38398                let field_type = self.convert_name_to_type(&first)?;
38399                (String::new(), field_type)
38400            } else if self.is_identifier_token()
38401                || self.is_safe_keyword_as_identifier()
38402                || self.check(TokenType::Lt)
38403                || self.check(TokenType::LParen)
38404                || self.check(TokenType::Colon)
38405            {
38406                // Named field: fieldname TYPE (or fieldname: TYPE for Hive)
38407                // Consume optional colon separator (Hive-style: `STRUCT<field_name: TYPE>`)
38408                self.match_token(TokenType::Colon);
38409                let field_type = self.parse_data_type()?;
38410                // Preserve quoting for field names
38411                let field_name = if is_quoted {
38412                    format!("\"{}\"", first)
38413                } else {
38414                    first
38415                };
38416                (field_name, field_type)
38417            } else {
38418                // Just a type name
38419                let field_type = self.convert_name_to_type(&first)?;
38420                (String::new(), field_type)
38421            };
38422
38423            // Spark/Databricks: Check for COMMENT clause on struct field
38424            let comment = if self.match_token(TokenType::Comment) {
38425                Some(self.expect_string()?)
38426            } else {
38427                None
38428            };
38429
38430            // BigQuery: Check for OPTIONS clause on struct field
38431            let options = if self.match_identifier("OPTIONS") {
38432                self.parse_options_list()?
38433            } else {
38434                Vec::new()
38435            };
38436
38437            fields.push(StructField::with_options_and_comment(
38438                field_name, field_type, options, comment,
38439            ));
38440
38441            if !self.match_token(TokenType::Comma) {
38442                break;
38443            }
38444        }
38445        Ok(fields)
38446    }
38447
38448    /// Parse a data type given a name that was already consumed
38449    /// This is used for standalone type expressions like ARRAY<T>
38450    fn parse_data_type_from_name(&mut self, name: &str) -> Result<DataType> {
38451        match name {
38452            "ARRAY" => {
38453                if self.match_token(TokenType::Lt) {
38454                    let element_type = self.parse_data_type()?;
38455                    self.expect_gt()?;
38456                    Ok(DataType::Array {
38457                        element_type: Box::new(element_type),
38458                        dimension: None,
38459                    })
38460                } else {
38461                    Ok(DataType::Custom {
38462                        name: "ARRAY".to_string(),
38463                    })
38464                }
38465            }
38466            "MAP" => {
38467                if self.match_token(TokenType::Lt) {
38468                    let key_type = self.parse_data_type()?;
38469                    self.expect(TokenType::Comma)?;
38470                    let value_type = self.parse_data_type()?;
38471                    self.expect_gt()?;
38472                    Ok(DataType::Map {
38473                        key_type: Box::new(key_type),
38474                        value_type: Box::new(value_type),
38475                    })
38476                } else {
38477                    Ok(DataType::Custom {
38478                        name: "MAP".to_string(),
38479                    })
38480                }
38481            }
38482            "STRUCT" => {
38483                if self.match_token(TokenType::Lt) {
38484                    let fields = self.parse_struct_type_fields(false)?;
38485                    self.expect_gt()?;
38486                    Ok(DataType::Struct {
38487                        fields,
38488                        nested: false,
38489                    })
38490                } else if self.match_token(TokenType::LParen) {
38491                    let fields = self.parse_struct_type_fields(true)?;
38492                    self.expect(TokenType::RParen)?;
38493                    Ok(DataType::Struct {
38494                        fields,
38495                        nested: true,
38496                    })
38497                } else {
38498                    Ok(DataType::Custom {
38499                        name: "STRUCT".to_string(),
38500                    })
38501                }
38502            }
38503            "ROW" => {
38504                if self.match_token(TokenType::LParen) {
38505                    let fields = self.parse_struct_type_fields(true)?;
38506                    self.expect(TokenType::RParen)?;
38507                    Ok(DataType::Struct {
38508                        fields,
38509                        nested: true,
38510                    })
38511                } else {
38512                    Ok(DataType::Custom {
38513                        name: "ROW".to_string(),
38514                    })
38515                }
38516            }
38517            _ => Ok(DataType::Custom {
38518                name: name.to_string(),
38519            }),
38520        }
38521    }
38522
38523    /// Convert a type name string to a DataType
38524    /// Used for anonymous struct fields where we have just a type name
38525    fn convert_name_to_type(&self, name: &str) -> Result<DataType> {
38526        let upper = name.to_uppercase();
38527        Ok(match upper.as_str() {
38528            "INT" => DataType::Int {
38529                length: None,
38530                integer_spelling: false,
38531            },
38532            "INTEGER" => DataType::Int {
38533                length: None,
38534                integer_spelling: true,
38535            },
38536            "BIGINT" => DataType::BigInt { length: None },
38537            "SMALLINT" => DataType::SmallInt { length: None },
38538            "TINYINT" => DataType::TinyInt { length: None },
38539            "FLOAT" | "BINARY_FLOAT" => DataType::Float {
38540                precision: None,
38541                scale: None,
38542                real_spelling: false,
38543            },
38544            "REAL" => DataType::Float {
38545                precision: None,
38546                scale: None,
38547                real_spelling: true,
38548            },
38549            "DOUBLE" | "BINARY_DOUBLE" => DataType::Double {
38550                precision: None,
38551                scale: None,
38552            },
38553            "DECIMAL" | "NUMERIC" => DataType::Decimal {
38554                precision: None,
38555                scale: None,
38556            },
38557            "BOOLEAN" | "BOOL" => DataType::Boolean,
38558            "CHAR" | "CHARACTER" | "NCHAR" => DataType::Char { length: None },
38559            "VARCHAR" | "NVARCHAR" => DataType::VarChar {
38560                length: None,
38561                parenthesized_length: false,
38562            },
38563            "TEXT" | "STRING" | "NTEXT" => DataType::Text,
38564            "DATE" => DataType::Date,
38565            "TIME" => DataType::Time {
38566                precision: None,
38567                timezone: false,
38568            },
38569            "TIMETZ" => DataType::Time {
38570                precision: None,
38571                timezone: true,
38572            },
38573            "TIMESTAMP" => DataType::Timestamp {
38574                precision: None,
38575                timezone: false,
38576            },
38577            "INTERVAL" => DataType::Interval {
38578                unit: None,
38579                to: None,
38580            },
38581            "JSON" => DataType::Json,
38582            "JSONB" => DataType::JsonB,
38583            "UUID" => DataType::Uuid,
38584            "BLOB" => DataType::Blob,
38585            "BYTEA" => DataType::VarBinary { length: None },
38586            "BINARY" => DataType::Binary { length: None },
38587            "VARBINARY" => DataType::VarBinary { length: None },
38588            "BIT" => DataType::Bit { length: None },
38589            "VARBIT" => DataType::VarBit { length: None },
38590            _ => DataType::Custom {
38591                name: name.to_string(),
38592            },
38593        })
38594    }
38595
38596    /// Parse star modifiers: EXCLUDE/EXCEPT, REPLACE, RENAME
38597    /// Syntax varies by dialect:
38598    /// - DuckDB: * EXCLUDE (col1, col2)
38599    /// - BigQuery: * EXCEPT (col1, col2), * REPLACE (expr AS col)
38600    /// - Snowflake: * EXCLUDE col, * RENAME (old AS new)
38601    fn parse_star_modifiers(&mut self, table: Option<Identifier>) -> Result<Star> {
38602        self.parse_star_modifiers_with_comments(table, Vec::new())
38603    }
38604
38605    /// Parse star modifiers with explicit trailing comments from the star token
38606    fn parse_star_modifiers_with_comments(
38607        &mut self,
38608        table: Option<Identifier>,
38609        star_trailing_comments: Vec<String>,
38610    ) -> Result<Star> {
38611        let mut except = None;
38612        let mut replace = None;
38613        let mut rename = None;
38614
38615        // Parse EXCLUDE / EXCEPT clause
38616        if self.match_token(TokenType::Exclude) || self.match_token(TokenType::Except) {
38617            // ClickHouse: EXCEPT STRICT col1, col2 (STRICT is optional modifier)
38618            let _ = self.match_text_seq(&["STRICT"]);
38619            let mut columns = Vec::new();
38620            if self.match_token(TokenType::LParen) {
38621                // EXCLUDE (col1, col2) or EXCEPT (A.COL_1, B.COL_2)
38622                loop {
38623                    // ClickHouse: allow string literals in EXCEPT ('col_regex')
38624                    // and keywords like 'key', 'index' as column names
38625                    let col = if self.check(TokenType::String) {
38626                        self.advance().text
38627                    } else if self.is_safe_keyword_as_identifier() {
38628                        self.advance().text
38629                    } else {
38630                        self.expect_identifier()?
38631                    };
38632                    // Handle qualified column names like A.COL_1
38633                    if self.match_token(TokenType::Dot) {
38634                        let subcol = if self.is_safe_keyword_as_identifier() {
38635                            self.advance().text
38636                        } else {
38637                            self.expect_identifier()?
38638                        };
38639                        columns.push(Identifier::new(format!("{}.{}", col, subcol)));
38640                    } else {
38641                        columns.push(Identifier::new(col));
38642                    }
38643                    if !self.match_token(TokenType::Comma) {
38644                        break;
38645                    }
38646                }
38647                self.expect(TokenType::RParen)?;
38648            } else {
38649                // EXCLUDE col (single column, Snowflake) or EXCEPT col1, col2 (ClickHouse)
38650                // or EXCEPT 'regex' (ClickHouse)
38651                loop {
38652                    let col = if self.check(TokenType::String) {
38653                        self.advance().text
38654                    } else if self.is_safe_keyword_as_identifier() {
38655                        self.advance().text
38656                    } else {
38657                        self.expect_identifier()?
38658                    };
38659                    columns.push(Identifier::new(col));
38660                    // ClickHouse allows comma-separated columns without parens: EXCEPT col1, col2
38661                    // But only if the next token after comma looks like a column name
38662                    if !matches!(
38663                        self.config.dialect,
38664                        Some(crate::dialects::DialectType::ClickHouse)
38665                    ) || !self.check(TokenType::Comma)
38666                        || !matches!(
38667                            self.peek_nth(1).map(|t| t.token_type),
38668                            Some(TokenType::Identifier)
38669                                | Some(TokenType::QuotedIdentifier)
38670                                | Some(TokenType::Var)
38671                                | Some(TokenType::String)
38672                        )
38673                    {
38674                        break;
38675                    }
38676                    self.advance(); // consume comma
38677                }
38678            }
38679            except = Some(columns);
38680        }
38681
38682        // Parse REPLACE clause
38683        if self.match_token(TokenType::Replace) {
38684            // ClickHouse: REPLACE STRICT is optional modifier
38685            let _ = self.match_text_seq(&["STRICT"]);
38686            let mut replacements = Vec::new();
38687            if self.match_token(TokenType::LParen) {
38688                loop {
38689                    let expr = self.parse_expression()?;
38690                    self.expect(TokenType::As)?;
38691                    let alias = self.expect_identifier_or_keyword()?;
38692                    replacements.push(Alias::new(expr, Identifier::new(alias)));
38693                    if !self.match_token(TokenType::Comma) {
38694                        break;
38695                    }
38696                }
38697                self.expect(TokenType::RParen)?;
38698            } else if matches!(
38699                self.config.dialect,
38700                Some(crate::dialects::DialectType::ClickHouse)
38701            ) {
38702                // ClickHouse: REPLACE [STRICT] expr AS name (single entry without parens)
38703                // Multiple entries require parens: REPLACE(expr1 AS name1, expr2 AS name2)
38704                let expr = self.parse_expression()?;
38705                self.expect(TokenType::As)?;
38706                let alias = self.expect_identifier_or_keyword()?;
38707                replacements.push(Alias::new(expr, Identifier::new(alias)));
38708            } else {
38709                return Err(self.parse_error("Expected LParen after REPLACE"));
38710            }
38711            replace = Some(replacements);
38712        }
38713
38714        // Parse RENAME clause (Snowflake)
38715        if self.match_token(TokenType::Rename) {
38716            let mut renames = Vec::new();
38717            if self.match_token(TokenType::LParen) {
38718                loop {
38719                    let old_name = self.expect_identifier()?;
38720                    self.expect(TokenType::As)?;
38721                    let new_name = self.expect_identifier()?;
38722                    renames.push((Identifier::new(old_name), Identifier::new(new_name)));
38723                    if !self.match_token(TokenType::Comma) {
38724                        break;
38725                    }
38726                }
38727                self.expect(TokenType::RParen)?;
38728            } else {
38729                // Single rename without parens
38730                let old_name = self.expect_identifier()?;
38731                self.expect(TokenType::As)?;
38732                let new_name = self.expect_identifier()?;
38733                renames.push((Identifier::new(old_name), Identifier::new(new_name)));
38734            }
38735            rename = Some(renames);
38736        }
38737
38738        Ok(Star {
38739            table,
38740            except,
38741            replace,
38742            rename,
38743            trailing_comments: star_trailing_comments,
38744            span: None,
38745        })
38746    }
38747
38748    // === Helper methods ===
38749
38750    /// Check if at end of tokens
38751    fn is_at_end(&self) -> bool {
38752        self.current >= self.tokens.len()
38753    }
38754
38755    /// Check if current token is a query modifier keyword or end of input.
38756    /// Used after GROUP BY ALL/DISTINCT to decide whether to parse expression lists.
38757    fn is_at_query_modifier_or_end(&self) -> bool {
38758        if self.is_at_end() {
38759            return true;
38760        }
38761        matches!(
38762            self.peek().token_type,
38763            TokenType::Having
38764                | TokenType::Qualify
38765                | TokenType::Window
38766                | TokenType::Order
38767                | TokenType::Limit
38768                | TokenType::Fetch
38769                | TokenType::Offset
38770                | TokenType::For
38771                | TokenType::Lock
38772                | TokenType::Union
38773                | TokenType::Except
38774                | TokenType::Intersect
38775                | TokenType::RParen
38776                | TokenType::Semicolon
38777                | TokenType::Where
38778        )
38779    }
38780
38781    /// Create a parse error with position from the current token
38782    fn parse_error(&self, message: impl Into<String>) -> Error {
38783        let span = self.peek().span;
38784        Error::parse(message, span.line, span.column, span.start, span.end)
38785    }
38786
38787    /// Peek at current token
38788    /// Returns reference to current token, or last token if at end
38789    fn peek(&self) -> &Token {
38790        if self.current >= self.tokens.len() {
38791            // Return last token as fallback when at end
38792            // In practice, callers should check is_at_end() before calling peek()
38793            // but this prevents panic
38794            self.tokens.last().expect("Token list should not be empty")
38795        } else {
38796            &self.tokens[self.current]
38797        }
38798    }
38799
38800    /// Look ahead by n positions (0 = current token)
38801    fn peek_nth(&self, n: usize) -> Option<&Token> {
38802        let idx = self.current + n;
38803        if idx < self.tokens.len() {
38804            Some(&self.tokens[idx])
38805        } else {
38806            None
38807        }
38808    }
38809
38810    /// Advance to next token
38811    fn advance(&mut self) -> Token {
38812        if self.current >= self.tokens.len() {
38813            // Return last token as fallback if we're past the end
38814            // In practice, callers should check is_at_end() before calling advance()
38815            return self
38816                .tokens
38817                .last()
38818                .cloned()
38819                .expect("Token list should not be empty");
38820        }
38821        let token = self.tokens[self.current].clone();
38822        self.current += 1;
38823        token
38824    }
38825
38826    /// Get the previous token (last consumed)
38827    fn previous(&self) -> &Token {
38828        &self.tokens[self.current - 1]
38829    }
38830
38831    /// Get trailing comments from the previous token
38832    fn previous_trailing_comments(&self) -> Vec<String> {
38833        if self.current > 0 {
38834            self.tokens[self.current - 1].trailing_comments.clone()
38835        } else {
38836            Vec::new()
38837        }
38838    }
38839
38840    /// Get the token type of the previous token (the one before current).
38841    fn previous_token_type(&self) -> Option<TokenType> {
38842        if self.current > 0 {
38843            Some(self.tokens[self.current - 1].token_type.clone())
38844        } else {
38845            None
38846        }
38847    }
38848
38849    /// Wrap a query expression in a Subquery node.
38850    /// Only wraps if the expression is a query statement (Select, Union, etc.),
38851    /// not for simple expressions like column references.
38852    fn maybe_wrap_in_subquery(&self, inner: Expression) -> Expression {
38853        if matches!(
38854            &inner,
38855            Expression::Select(_)
38856                | Expression::Union(_)
38857                | Expression::Intersect(_)
38858                | Expression::Except(_)
38859        ) {
38860            Expression::Subquery(Box::new(Subquery {
38861                this: inner,
38862                alias: None,
38863                column_aliases: Vec::new(),
38864                order_by: None,
38865                limit: None,
38866                offset: None,
38867                distribute_by: None,
38868                sort_by: None,
38869                cluster_by: None,
38870                lateral: false,
38871                modifiers_inside: false,
38872                trailing_comments: Vec::new(),
38873                inferred_type: None,
38874            }))
38875        } else {
38876            inner
38877        }
38878    }
38879
38880    /// Clear trailing_comments from the rightmost leaf of an expression tree.
38881    /// Used by parse_and/parse_or to avoid comment duplication: when the same comment
38882    /// is captured both in an expression's trailing_comments (during parse_primary) and
38883    /// in a BinaryOp's operator_comments (during parse_and/parse_or), we clear the
38884    /// expression's copy since the operator_comments position (after AND/OR) is correct.
38885    fn clear_rightmost_trailing_comments(expr: &mut Expression) {
38886        match expr {
38887            Expression::Column(col) => col.trailing_comments.clear(),
38888            Expression::And(op) | Expression::Or(op) => {
38889                Self::clear_rightmost_trailing_comments(&mut op.right);
38890            }
38891            Expression::Not(op) => {
38892                Self::clear_rightmost_trailing_comments(&mut op.this);
38893            }
38894            // For comparison ops, the rightmost is the right operand
38895            Expression::Eq(op)
38896            | Expression::Neq(op)
38897            | Expression::Lt(op)
38898            | Expression::Lte(op)
38899            | Expression::Gt(op)
38900            | Expression::Gte(op)
38901            | Expression::Add(op)
38902            | Expression::Sub(op)
38903            | Expression::Mul(op)
38904            | Expression::Div(op) => {
38905                Self::clear_rightmost_trailing_comments(&mut op.right);
38906            }
38907            // For other expressions, trailing_comments might be stored differently
38908            // We don't need to handle all variants, just the common ones that appear
38909            // as operands in AND/OR expressions
38910            _ => {}
38911        }
38912    }
38913
38914    /// Get leading comments from the current token (comments that appeared before it)
38915    fn current_leading_comments(&self) -> Vec<String> {
38916        if !self.is_at_end() {
38917            self.tokens[self.current].comments.clone()
38918        } else {
38919            Vec::new()
38920        }
38921    }
38922
38923    /// Convert a slice of tokens to SQL string with proper quoting for strings
38924    fn tokens_to_sql(&self, start: usize, end: usize) -> String {
38925        let mut result = String::new();
38926        let mut prev_line: Option<usize> = None;
38927        let mut prev_end_offset: Option<usize> = None;
38928
38929        for t in &self.tokens[start..end] {
38930            // Check if we moved to a new line (preserve original line structure)
38931            let is_new_line = prev_line.is_some() && t.span.line > prev_line.unwrap();
38932
38933            // Use byte offsets to determine original spacing between tokens.
38934            // This preserves the exact spacing from the source (e.g., TRANSFORM( vs OPTIONS ())
38935            if is_new_line {
38936                result.push('\n');
38937                // Preserve original indentation
38938                // span.column is the column AFTER the last character (1-based),
38939                // so start column = span.column - text.chars().count()
38940                let text_len = t.text.chars().count();
38941                let start_col = t.span.column.saturating_sub(text_len);
38942                // For string tokens, add 2 for the quotes that were stripped
38943                let start_col = if t.token_type == TokenType::String {
38944                    start_col.saturating_sub(2)
38945                } else {
38946                    start_col
38947                };
38948                let indent = if start_col > 1 { start_col - 1 } else { 0 };
38949                for _ in 0..indent {
38950                    result.push(' ');
38951                }
38952            } else if !result.is_empty() {
38953                // Same line: use byte offsets to detect if there was whitespace
38954                let had_space = prev_end_offset.map_or(false, |prev_end| t.span.start > prev_end);
38955                if had_space {
38956                    result.push(' ');
38957                }
38958            }
38959
38960            if t.token_type == TokenType::String {
38961                // Re-add quotes around string literals
38962                result.push('\'');
38963                result.push_str(&t.text.replace('\'', "''"));
38964                result.push('\'');
38965            } else {
38966                result.push_str(&t.text);
38967            }
38968
38969            prev_line = Some(t.span.line);
38970            prev_end_offset = Some(t.span.end);
38971        }
38972        result
38973    }
38974
38975    /// Convert tokens to SQL for CREATE STAGE, normalizing FILE_FORMAT clause
38976    /// Transforms FILE_FORMAT='value' to FILE_FORMAT=(FORMAT_NAME='value')
38977    /// and FILE_FORMAT=schema.format to FILE_FORMAT=(FORMAT_NAME=schema.format)
38978    fn tokens_to_sql_stage_format(&self, start: usize, end: usize) -> String {
38979        let mut result = String::new();
38980        let mut prev_token_type: Option<TokenType> = None;
38981        let mut i = start;
38982
38983        while i < end {
38984            let t = &self.tokens[i];
38985
38986            // Check for FILE_FORMAT= pattern that needs normalization
38987            // FILE_FORMAT must be followed by = and then NOT by (
38988            if (t.token_type == TokenType::Var || t.token_type == TokenType::Identifier)
38989                && t.text.to_uppercase() == "FILE_FORMAT"
38990                && i + 1 < end
38991                && self.tokens[i + 1].token_type == TokenType::Eq
38992                && (i + 2 >= end || self.tokens[i + 2].token_type != TokenType::LParen)
38993            {
38994                // Need to normalize: FILE_FORMAT=value -> FILE_FORMAT=(FORMAT_NAME=value)
38995                if !result.is_empty() && prev_token_type != Some(TokenType::LParen) {
38996                    result.push(' ');
38997                }
38998                result.push_str("FILE_FORMAT=(FORMAT_NAME=");
38999
39000                // Skip FILE_FORMAT and =
39001                i += 2;
39002
39003                // Collect the value (string literal or qualified identifier like schema.format)
39004                while i < end {
39005                    let val = &self.tokens[i];
39006                    if val.token_type == TokenType::String {
39007                        // String literal: 'format1'
39008                        result.push('\'');
39009                        result.push_str(&val.text.replace('\'', "''"));
39010                        result.push('\'');
39011                        i += 1;
39012                        break;
39013                    } else if val.token_type == TokenType::Var
39014                        || val.token_type == TokenType::Identifier
39015                    {
39016                        // Identifier: schema1 or format1
39017                        result.push_str(&val.text);
39018                        i += 1;
39019                        // Check for dot (qualified name)
39020                        if i < end && self.tokens[i].token_type == TokenType::Dot {
39021                            result.push('.');
39022                            i += 1;
39023                            // Expect identifier after dot
39024                            if i < end {
39025                                result.push_str(&self.tokens[i].text);
39026                                i += 1;
39027                            }
39028                        }
39029                        break;
39030                    } else {
39031                        break;
39032                    }
39033                }
39034                result.push(')');
39035                prev_token_type = Some(TokenType::RParen);
39036                continue;
39037            }
39038
39039            // Normal token handling (same as tokens_to_sql)
39040            let needs_space = !result.is_empty()
39041                && prev_token_type != Some(TokenType::LParen)
39042                && prev_token_type != Some(TokenType::Eq)
39043                && prev_token_type != Some(TokenType::Dot)
39044                && t.token_type != TokenType::Comma
39045                && t.token_type != TokenType::RParen
39046                && t.token_type != TokenType::LParen
39047                && t.token_type != TokenType::Eq
39048                && t.token_type != TokenType::Dot;
39049
39050            if needs_space {
39051                result.push(' ');
39052            }
39053
39054            if t.token_type == TokenType::String {
39055                result.push('\'');
39056                result.push_str(&t.text.replace('\'', "''"));
39057                result.push('\'');
39058            } else {
39059                result.push_str(&t.text);
39060            }
39061
39062            prev_token_type = Some(t.token_type);
39063            i += 1;
39064        }
39065        result
39066    }
39067
39068    /// Like tokens_to_sql but also uppercases keyword tokens and adds space after commas
39069    fn tokens_to_sql_uppercased(&self, start: usize, end: usize) -> String {
39070        let mut result = String::new();
39071        let mut prev_token_type: Option<TokenType> = None;
39072        let mut prev_token_text: Option<String> = None;
39073
39074        for t in &self.tokens[start..end] {
39075            // Smart spacing: no space before comma, ), . or after (, .
39076            // Add space before ( only when preceded by a structural keyword or identifier
39077            // (e.g., "PRIMARY KEY (Id)", "CLUSTERED (EmpID)")
39078            // but NOT after data type keywords (e.g., "VARCHAR(100)", "INT(11)")
39079            let is_lparen_after_keyword = t.token_type == TokenType::LParen
39080                && prev_token_type.map_or(false, |p: TokenType| {
39081                    // Only add space for structural SQL keywords, not data type keywords
39082                    match p {
39083                        TokenType::PrimaryKey | TokenType::ForeignKey | TokenType::Unique
39084                        | TokenType::Check | TokenType::Index | TokenType::Key
39085                        | TokenType::Constraint | TokenType::References
39086                        | TokenType::Not | TokenType::Null
39087                        | TokenType::Default | TokenType::Values | TokenType::In
39088                        | TokenType::Exists | TokenType::Select | TokenType::From
39089                        | TokenType::Where | TokenType::Having | TokenType::Using
39090                        | TokenType::On | TokenType::Set | TokenType::Into
39091                        | TokenType::Table | TokenType::View | TokenType::Create
39092                        | TokenType::Insert | TokenType::Update | TokenType::Delete
39093                        | TokenType::Join | TokenType::Left | TokenType::Right
39094                        | TokenType::Inner | TokenType::Outer | TokenType::Full
39095                        | TokenType::Cross | TokenType::Case | TokenType::When
39096                        | TokenType::Then | TokenType::Else | TokenType::End
39097                        | TokenType::If | TokenType::Partition | TokenType::Over
39098                        | TokenType::Between | TokenType::Like | TokenType::Replace
39099                        | TokenType::Grant | TokenType::Revoke
39100                        => true,
39101                        _ => false,
39102                    }
39103                })
39104                // For Var/Identifier tokens, add space before ( only for structural tokens
39105                // (CLUSTERED, NONCLUSTERED, INDEX) but not data types (VARCHAR, INT, etc.)
39106                || (t.token_type == TokenType::LParen
39107                    && prev_token_text.as_ref().map_or(false, |text| {
39108                        let upper = text.to_uppercase();
39109                        matches!(upper.as_str(),
39110                            "CLUSTERED" | "NONCLUSTERED" | "HASH" | "RANGE"
39111                            | "INCLUDE" | "FILLFACTOR" | "PAD_INDEX"
39112                        )
39113                    }));
39114            let needs_space = !result.is_empty()
39115                && prev_token_type != Some(TokenType::LParen)
39116                && prev_token_type != Some(TokenType::Dot)
39117                && t.token_type != TokenType::Comma
39118                && t.token_type != TokenType::RParen
39119                && t.token_type != TokenType::Dot
39120                && (t.token_type != TokenType::LParen || is_lparen_after_keyword);
39121
39122            // Add space after comma
39123            if prev_token_type == Some(TokenType::Comma) {
39124                result.push(' ');
39125            } else if needs_space {
39126                result.push(' ');
39127            }
39128
39129            if t.token_type == TokenType::String {
39130                // Re-add quotes around string literals
39131                result.push('\'');
39132                result.push_str(&t.text.replace('\'', "''"));
39133                result.push('\'');
39134            } else if t.token_type.is_keyword() {
39135                // Uppercase keyword tokens
39136                result.push_str(&t.text.to_uppercase());
39137            } else {
39138                // For non-keyword tokens, preserve original text
39139                result.push_str(&t.text);
39140            }
39141
39142            prev_token_type = Some(t.token_type);
39143            prev_token_text = Some(t.text.clone());
39144        }
39145        result
39146    }
39147
39148    /// Check if current token matches type
39149    fn check(&self, token_type: TokenType) -> bool {
39150        if self.is_at_end() {
39151            false
39152        } else {
39153            self.peek().token_type == token_type
39154        }
39155    }
39156
39157    /// Check if current token is a keyword
39158    fn check_keyword(&self) -> bool {
39159        if self.is_at_end() {
39160            false
39161        } else {
39162            self.peek().token_type.is_keyword()
39163        }
39164    }
39165
39166    /// Check if current UNPIVOT token starts an UNPIVOT clause (vs being an alias).
39167    /// UNPIVOT clause starts with: UNPIVOT(, UNPIVOT INCLUDE, or UNPIVOT EXCLUDE
39168    fn is_unpivot_clause_start(&self) -> bool {
39169        if !self.check(TokenType::Unpivot) {
39170            return false;
39171        }
39172        let next_idx = self.current + 1;
39173        if next_idx >= self.tokens.len() {
39174            return false;
39175        }
39176        let next = &self.tokens[next_idx];
39177        if next.token_type == TokenType::LParen {
39178            return true;
39179        }
39180        // UNPIVOT INCLUDE NULLS (...) or UNPIVOT EXCLUDE NULLS (...)
39181        let next_text = next.text.to_uppercase();
39182        next_text == "INCLUDE" || next_text == "EXCLUDE"
39183    }
39184
39185    /// Check if current token text matches (case-insensitive), does not advance
39186    fn check_keyword_text(&self, keyword: &str) -> bool {
39187        if self.is_at_end() {
39188            false
39189        } else {
39190            self.peek().text.to_uppercase() == keyword.to_uppercase()
39191        }
39192    }
39193
39194    /// Check if current token is FROM keyword
39195    fn check_from_keyword(&self) -> bool {
39196        self.check(TokenType::From)
39197    }
39198
39199    /// Check if next token matches type
39200    fn check_next(&self, token_type: TokenType) -> bool {
39201        if self.current + 1 >= self.tokens.len() {
39202            false
39203        } else {
39204            self.tokens[self.current + 1].token_type == token_type
39205        }
39206    }
39207
39208    /// Check if next token is an identifier with specific name (case-insensitive)
39209    fn check_next_identifier(&self, name: &str) -> bool {
39210        if self.current + 1 >= self.tokens.len() {
39211            false
39212        } else {
39213            let token = &self.tokens[self.current + 1];
39214            (token.token_type == TokenType::Var || token.token_type == TokenType::Identifier)
39215                && token.text.to_uppercase() == name.to_uppercase()
39216        }
39217    }
39218
39219    /// Match an identifier with specific text (case insensitive)
39220    /// Checks for Identifier, Var, and QuotedIdentifier tokens
39221    fn match_identifier(&mut self, text: &str) -> bool {
39222        if (self.check(TokenType::Identifier)
39223            || self.check(TokenType::Var)
39224            || self.check(TokenType::QuotedIdentifier))
39225            && self.peek().text.to_uppercase() == text.to_uppercase()
39226        {
39227            self.advance();
39228            true
39229        } else {
39230            false
39231        }
39232    }
39233
39234    /// Check if current token is an identifier with specific text (case insensitive)
39235    /// Does NOT advance the parser
39236    fn check_identifier(&self, text: &str) -> bool {
39237        if self.is_at_end() {
39238            return false;
39239        }
39240        (self.check(TokenType::Identifier)
39241            || self.check(TokenType::Var)
39242            || self.check(TokenType::QuotedIdentifier))
39243            && self.peek().text.to_uppercase() == text.to_uppercase()
39244    }
39245
39246    /// Check if current token is a "safe" keyword that can be used as an identifier.
39247    /// Check if the current Percent token is a PERCENT modifier (not a modulo operator).
39248    /// "PERCENT" spelled out is always a modifier. "%" is a modifier when followed by
39249    /// a clause boundary (OFFSET, end of input, semicolon, RParen, comma, etc.)
39250    fn is_percent_modifier(&self) -> bool {
39251        if self.is_at_end() {
39252            return false;
39253        }
39254        let text = self.peek().text.to_uppercase();
39255        if text == "PERCENT" {
39256            return true;
39257        }
39258        // "%" symbol — only treat as PERCENT modifier if followed by a boundary
39259        if text == "%" {
39260            let next_idx = self.current + 1;
39261            if next_idx >= self.tokens.len() {
39262                return true; // at end — it's PERCENT
39263            }
39264            let next_type = self.tokens[next_idx].token_type;
39265            return matches!(
39266                next_type,
39267                TokenType::Offset
39268                    | TokenType::Semicolon
39269                    | TokenType::RParen
39270                    | TokenType::From
39271                    | TokenType::Where
39272                    | TokenType::GroupBy
39273                    | TokenType::OrderBy
39274                    | TokenType::Having
39275                    | TokenType::Union
39276                    | TokenType::Intersect
39277                    | TokenType::Except
39278                    | TokenType::Comma
39279                    | TokenType::With // WITH TIES
39280            ) || next_idx >= self.tokens.len();
39281        }
39282        false
39283    }
39284
39285    /// Structural keywords like FROM, WHERE, JOIN, SELECT are NOT safe.
39286    /// Non-structural keywords like FILTER, UPDATE, END, VALUES can be used as identifiers.
39287    fn is_safe_keyword_as_identifier(&self) -> bool {
39288        if self.is_at_end() {
39289            return false;
39290        }
39291        let token_type = self.peek().token_type;
39292        // Structural keywords that should NOT be used as identifiers
39293        let is_structural = matches!(
39294            token_type,
39295            TokenType::From
39296                | TokenType::Where
39297                | TokenType::Select
39298                | TokenType::Insert
39299                | TokenType::Delete
39300                | TokenType::Create
39301                | TokenType::Drop
39302                | TokenType::Alter
39303                | TokenType::Join
39304                | TokenType::Inner
39305                | TokenType::Cross
39306                | TokenType::On
39307                | TokenType::GroupBy
39308                | TokenType::OrderBy
39309                | TokenType::Having
39310                | TokenType::With
39311                | TokenType::Union
39312                | TokenType::Intersect
39313                | TokenType::Except
39314                | TokenType::Qualify
39315                | TokenType::Into
39316                | TokenType::Set
39317                | TokenType::Using
39318                | TokenType::Lateral
39319                | TokenType::Natural
39320        );
39321        // ClickHouse allows many SQL keywords as identifiers (table names, column aliases, etc.)
39322        if matches!(
39323            self.config.dialect,
39324            Some(crate::dialects::DialectType::ClickHouse)
39325        ) {
39326            let is_ch_structural = matches!(
39327                token_type,
39328                TokenType::From
39329                    | TokenType::Where
39330                    | TokenType::Select
39331                    | TokenType::Create
39332                    | TokenType::Drop
39333                    | TokenType::Alter
39334                    | TokenType::On
39335                    | TokenType::GroupBy
39336                    | TokenType::OrderBy
39337                    | TokenType::Having
39338                    | TokenType::With
39339                    | TokenType::Union
39340                    | TokenType::Intersect
39341                    | TokenType::Except
39342                    | TokenType::Into
39343                    | TokenType::Using
39344                    | TokenType::Lateral
39345                    | TokenType::Natural
39346            );
39347            // Also allow certain operator tokens and non-keyword tokens as identifiers
39348            if matches!(token_type, TokenType::RLike | TokenType::Values) {
39349                return true;
39350            }
39351            return self.peek().token_type.is_keyword() && !is_ch_structural;
39352        }
39353        // If it's a keyword but NOT structural, it's safe to use as identifier
39354        self.peek().token_type.is_keyword() && !is_structural
39355    }
39356
39357    /// Check if a token at current position is the last meaningful token in an expression context.
39358    /// This is used to detect when a keyword like IS or KEEP should be treated as an alias
39359    /// instead of an operator keyword.
39360    fn is_last_expression_token(&self, _token_type: TokenType) -> bool {
39361        // Check if the token after the current one is end-of-input or a clause boundary
39362        let next_idx = self.current + 1;
39363        if next_idx >= self.tokens.len() {
39364            return true; // at end of input
39365        }
39366        let next_type = self.tokens[next_idx].token_type;
39367        // Clause boundaries that indicate the current token is the last in the expression
39368        matches!(
39369            next_type,
39370            TokenType::From
39371                | TokenType::Where
39372                | TokenType::GroupBy
39373                | TokenType::OrderBy
39374                | TokenType::Having
39375                | TokenType::Limit
39376                | TokenType::Union
39377                | TokenType::Intersect
39378                | TokenType::Except
39379                | TokenType::Semicolon
39380                | TokenType::RParen
39381                | TokenType::Comma
39382        )
39383    }
39384
39385    /// Check if current token is a type keyword (for lambda type annotations)
39386    fn is_type_keyword(&self) -> bool {
39387        if self.is_at_end() {
39388            return false;
39389        }
39390        let token = self.peek();
39391        // Check for common type keywords that might appear in lambda annotations
39392        // Use text comparison to avoid depending on specific TokenType variants
39393        let text_upper = token.text.to_uppercase();
39394        matches!(
39395            text_upper.as_str(),
39396            "INT"
39397                | "INTEGER"
39398                | "BIGINT"
39399                | "SMALLINT"
39400                | "TINYINT"
39401                | "DOUBLE"
39402                | "FLOAT"
39403                | "DECIMAL"
39404                | "NUMERIC"
39405                | "REAL"
39406                | "VARCHAR"
39407                | "CHAR"
39408                | "TEXT"
39409                | "STRING"
39410                | "NVARCHAR"
39411                | "NCHAR"
39412                | "BOOLEAN"
39413                | "BOOL"
39414                | "DATE"
39415                | "TIME"
39416                | "TIMESTAMP"
39417                | "DATETIME"
39418                | "INTERVAL"
39419                | "BINARY"
39420                | "VARBINARY"
39421                | "BLOB"
39422                | "ARRAY"
39423                | "MAP"
39424                | "STRUCT"
39425                | "OBJECT"
39426                | "VARIANT"
39427                | "JSON"
39428                | "NUMBER"
39429                | "VARCHAR2"
39430        )
39431    }
39432
39433    /// Check if current token is a command keyword that can safely be used as an implicit alias.
39434    /// This is a narrow set of command-like keywords (GET, PUT, COPY, SHOW, etc.) that are
39435    /// unlikely to conflict with SQL clause keywords when used as implicit aliases.
39436    fn is_command_keyword_as_alias(&self) -> bool {
39437        if self.is_at_end() {
39438            return false;
39439        }
39440        let token_type = self.peek().token_type;
39441        // FORMAT is a query modifier in ClickHouse, so don't treat it as an alias there
39442        if matches!(token_type, TokenType::Format) {
39443            return !matches!(
39444                self.config.dialect,
39445                Some(crate::dialects::DialectType::ClickHouse)
39446            );
39447        }
39448        // Base keywords that can be aliases in all dialects
39449        if matches!(
39450            token_type,
39451            TokenType::Get
39452                | TokenType::Put
39453                | TokenType::Copy
39454                | TokenType::Show
39455                | TokenType::Rename
39456                | TokenType::Enum
39457                | TokenType::Sample
39458                | TokenType::Collate
39459                | TokenType::Add
39460        ) {
39461            return true;
39462        }
39463        // Spark/Hive allow LIMIT and OFFSET as aliases (without quoting)
39464        if matches!(
39465            self.config.dialect,
39466            Some(crate::dialects::DialectType::Spark) | Some(crate::dialects::DialectType::Hive)
39467        ) && matches!(token_type, TokenType::Limit | TokenType::Offset)
39468        {
39469            return true;
39470        }
39471        false
39472    }
39473
39474    /// Check if current token is a keyword that can be used as a table alias.
39475    /// This is more permissive than is_safe_keyword_as_identifier - it allows
39476    /// LEFT, RIGHT, OUTER, FULL which are JOIN keywords but can also be aliases.
39477    fn can_be_alias_keyword(&self) -> bool {
39478        if self.is_at_end() {
39479            return false;
39480        }
39481        let token_type = self.peek().token_type;
39482        // Keywords that can be used as aliases (similar to is_safe_keyword but more permissive)
39483        matches!(
39484            token_type,
39485            TokenType::Left
39486                | TokenType::Right
39487                | TokenType::Outer
39488                | TokenType::Full
39489                | TokenType::Only
39490                | TokenType::Next
39491                | TokenType::All
39492                | TokenType::If
39493        ) || self.is_safe_keyword_as_identifier()
39494    }
39495
39496    /// Match and consume a token type
39497    fn match_token(&mut self, token_type: TokenType) -> bool {
39498        if self.check(token_type) {
39499            self.advance();
39500            true
39501        } else {
39502            false
39503        }
39504    }
39505
39506    /// Match a sequence of keywords
39507    fn match_keywords(&mut self, keywords: &[TokenType]) -> bool {
39508        // Check if all keywords match
39509        for (i, &kw) in keywords.iter().enumerate() {
39510            if self.current + i >= self.tokens.len() {
39511                return false;
39512            }
39513            if self.tokens[self.current + i].token_type != kw {
39514                return false;
39515            }
39516        }
39517
39518        // Consume all matched keywords
39519        self.current += keywords.len();
39520        true
39521    }
39522
39523    /// Expect a specific token type
39524    fn expect(&mut self, token_type: TokenType) -> Result<Token> {
39525        if self.check(token_type) {
39526            Ok(self.advance())
39527        } else {
39528            let got = if self.is_at_end() {
39529                "end of input".to_string()
39530            } else {
39531                format!("{:?}", self.peek().token_type)
39532            };
39533            let got_text = if self.is_at_end() {
39534                "".to_string()
39535            } else {
39536                self.peek().text.clone()
39537            };
39538            let start = self.current.saturating_sub(3);
39539            let end = (self.current + 4).min(self.tokens.len());
39540            let context = self.tokens_to_sql(start, end).replace('\n', " ");
39541            Err(self.parse_error(format!(
39542                "Expected {:?}, got {} ('{}') near [{}]",
39543                token_type, got, got_text, context
39544            )))
39545        }
39546    }
39547
39548    /// Expect a `>` token, handling the case where `>>` was tokenized as GtGt
39549    /// This is needed for parsing nested generic types like `ARRAY<ARRAY<INT>>`
39550    fn expect_gt(&mut self) -> Result<Token> {
39551        if self.check(TokenType::Gt) {
39552            Ok(self.advance())
39553        } else if self.check(TokenType::GtGt) {
39554            // Split >> into two > tokens
39555            // Replace the GtGt with Gt and return a synthetic Gt token
39556            let token = self.peek().clone();
39557            self.tokens[self.current] = Token {
39558                token_type: TokenType::Gt,
39559                text: ">".to_string(),
39560                span: Span {
39561                    start: token.span.start + 1,
39562                    end: token.span.end,
39563                    line: token.span.line,
39564                    column: token.span.column + 1,
39565                },
39566                comments: Vec::new(),
39567                trailing_comments: Vec::new(),
39568            };
39569            Ok(Token {
39570                token_type: TokenType::Gt,
39571                text: ">".to_string(),
39572                span: Span {
39573                    start: token.span.start,
39574                    end: token.span.start + 1,
39575                    line: token.span.line,
39576                    column: token.span.column,
39577                },
39578                comments: token.comments,
39579                trailing_comments: Vec::new(),
39580            })
39581        } else {
39582            Err(self.parse_error(format!(
39583                "Expected Gt, got {:?}",
39584                if self.is_at_end() {
39585                    "end of input".to_string()
39586                } else {
39587                    format!("{:?}", self.peek().token_type)
39588                }
39589            )))
39590        }
39591    }
39592
39593    /// Expect a string literal and return its value
39594    fn expect_string(&mut self) -> Result<String> {
39595        if self.check(TokenType::String) || self.check(TokenType::DollarString) {
39596            Ok(self.advance().text)
39597        } else {
39598            Err(self.parse_error(format!(
39599                "Expected string, got {:?}",
39600                if self.is_at_end() {
39601                    "end of input".to_string()
39602                } else {
39603                    format!("{:?}", self.peek().token_type)
39604                }
39605            )))
39606        }
39607    }
39608
39609    /// Check if the current token is any kind of identifier (regular, quoted, or var)
39610    fn is_identifier_token(&self) -> bool {
39611        self.check(TokenType::Var)
39612            || self.check(TokenType::Identifier)
39613            || self.check(TokenType::QuotedIdentifier)
39614    }
39615
39616    /// Check if current token is a stage reference (starts with @)
39617    /// This handles both DAt token and Var tokens that start with @
39618    fn is_stage_reference(&self) -> bool {
39619        self.check(TokenType::DAt)
39620            || (self.check(TokenType::Var) && self.peek().text.starts_with('@'))
39621    }
39622
39623    /// Check if the current token could be a MySQL numeric-starting identifier (e.g., 00f, 1d)
39624    /// This checks that the Number token is followed by a connected Var/Identifier token
39625    fn is_mysql_numeric_identifier(&self) -> bool {
39626        if !self.check(TokenType::Number)
39627            || !matches!(
39628                self.config.dialect,
39629                Some(crate::dialects::DialectType::MySQL)
39630            )
39631        {
39632            return false;
39633        }
39634        // Check if the next token is connected (no space) and is a var/identifier
39635        if self.current + 1 < self.tokens.len() {
39636            let curr = &self.tokens[self.current];
39637            let next = &self.tokens[self.current + 1];
39638            // Tokens are connected if they are immediately adjacent (no whitespace between)
39639            // span.end is exclusive, so if curr.end == next.start, they are adjacent
39640            let connected = curr.span.end == next.span.start;
39641            connected
39642                && (next.token_type == TokenType::Var || next.token_type == TokenType::Identifier)
39643        } else {
39644            false
39645        }
39646    }
39647
39648    /// Parse a MySQL numeric-starting identifier (e.g., 00f, 1d)
39649    /// Merges the number token with connected identifier tokens
39650    fn parse_mysql_numeric_identifier(&mut self) -> Identifier {
39651        let num_token = self.advance();
39652        let mut name = num_token.text.clone();
39653        // Merge with connected identifier/var tokens
39654        while !self.is_at_end()
39655            && self.is_connected()
39656            && (self.check(TokenType::Var) || self.check(TokenType::Identifier))
39657        {
39658            let tok = self.advance();
39659            name.push_str(&tok.text);
39660        }
39661        Identifier {
39662            name,
39663            // sqlglot treats this as an identifier token and re-emits it quoted.
39664            quoted: true,
39665            trailing_comments: Vec::new(),
39666            span: None,
39667        }
39668    }
39669
39670    /// Check if an uppercase string starting with '_' is a MySQL charset introducer
39671    fn is_mysql_charset_introducer(text: &str) -> bool {
39672        matches!(
39673            text,
39674            "_ARMSCII8"
39675                | "_ASCII"
39676                | "_BIG5"
39677                | "_BINARY"
39678                | "_CP1250"
39679                | "_CP1251"
39680                | "_CP1256"
39681                | "_CP1257"
39682                | "_CP850"
39683                | "_CP852"
39684                | "_CP866"
39685                | "_CP932"
39686                | "_DEC8"
39687                | "_EUCJPMS"
39688                | "_EUCKR"
39689                | "_GB18030"
39690                | "_GB2312"
39691                | "_GBK"
39692                | "_GEOSTD8"
39693                | "_GREEK"
39694                | "_HEBREW"
39695                | "_HP8"
39696                | "_KEYBCS2"
39697                | "_KOI8R"
39698                | "_KOI8U"
39699                | "_LATIN1"
39700                | "_LATIN2"
39701                | "_LATIN5"
39702                | "_LATIN7"
39703                | "_MACCE"
39704                | "_MACROMAN"
39705                | "_SJIS"
39706                | "_SWE7"
39707                | "_TIS620"
39708                | "_UCS2"
39709                | "_UJIS"
39710                | "_UTF8"
39711                | "_UTF16"
39712                | "_UTF16LE"
39713                | "_UTF32"
39714                | "_UTF8MB3"
39715                | "_UTF8MB4"
39716        )
39717    }
39718
39719    /// Check if the current token can be used as an identifier (includes keywords)
39720    fn is_identifier_or_keyword_token(&self) -> bool {
39721        self.is_identifier_token() || self.check_keyword()
39722    }
39723
39724    /// Expect an identifier and return an Identifier struct with quoted flag
39725    fn expect_identifier_with_quoted(&mut self) -> Result<Identifier> {
39726        if self.is_mysql_numeric_identifier() {
39727            return Ok(self.parse_mysql_numeric_identifier());
39728        }
39729        if self.is_identifier_token() {
39730            let token = self.advance();
39731            let quoted = token.token_type == TokenType::QuotedIdentifier;
39732            Ok(Identifier {
39733                name: token.text,
39734                quoted,
39735                trailing_comments: Vec::new(),
39736                span: None,
39737            })
39738        } else if self.check(TokenType::LBrace)
39739            && matches!(
39740                self.config.dialect,
39741                Some(crate::dialects::DialectType::ClickHouse)
39742            )
39743        {
39744            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
39745                if let Expression::Parameter(param) = &param_expr {
39746                    let name = format!(
39747                        "{{{}: {}}}",
39748                        param.name.as_deref().unwrap_or(""),
39749                        param.expression.as_deref().unwrap_or("")
39750                    );
39751                    return Ok(Identifier {
39752                        name,
39753                        quoted: false,
39754                        trailing_comments: Vec::new(),
39755                        span: None,
39756                    });
39757                }
39758            }
39759            Err(self.parse_error("Expected identifier, got LBrace"))
39760        } else {
39761            Err(self.parse_error(format!(
39762                "Expected identifier, got {:?}",
39763                if self.is_at_end() {
39764                    "end of input".to_string()
39765                } else {
39766                    format!("{:?}", self.peek().token_type)
39767                }
39768            )))
39769        }
39770    }
39771
39772    /// Expect an identifier or keyword (for column names, field names, etc.)
39773    fn expect_identifier_or_keyword_with_quoted(&mut self) -> Result<Identifier> {
39774        // MySQL numeric-starting identifiers (e.g., 00f, 1d)
39775        if self.is_mysql_numeric_identifier() {
39776            return Ok(self.parse_mysql_numeric_identifier());
39777        }
39778        // Also accept ? (Parameter) as an identifier placeholder
39779        // For positional parameters like $23, the token text is "23" (without $)
39780        if self.check(TokenType::Parameter) {
39781            let token = self.advance();
39782            // If the text is a number, it's a positional parameter like $1, $2, $23
39783            // Construct $N as the identifier name
39784            let name = if token.text.chars().all(|c| c.is_ascii_digit()) && !token.text.is_empty() {
39785                format!("${}", token.text)
39786            } else {
39787                // Plain ? placeholder or other parameter
39788                "?".to_string()
39789            };
39790            return Ok(Identifier {
39791                name,
39792                quoted: false,
39793                trailing_comments: Vec::new(),
39794                span: None,
39795            });
39796        }
39797        if self.is_identifier_or_keyword_token() {
39798            let token = self.advance();
39799            let quoted = token.token_type == TokenType::QuotedIdentifier;
39800            Ok(Identifier {
39801                name: token.text,
39802                quoted,
39803                trailing_comments: Vec::new(),
39804                span: None,
39805            })
39806        } else if self.check(TokenType::LBrace)
39807            && matches!(
39808                self.config.dialect,
39809                Some(crate::dialects::DialectType::ClickHouse)
39810            )
39811        {
39812            // ClickHouse query parameter: {name:Type}
39813            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
39814                // Extract the parameter name to use as the identifier
39815                if let Expression::Parameter(param) = &param_expr {
39816                    let name = format!(
39817                        "{{{}: {}}}",
39818                        param.name.as_deref().unwrap_or(""),
39819                        param.expression.as_deref().unwrap_or("")
39820                    );
39821                    return Ok(Identifier {
39822                        name,
39823                        quoted: false,
39824                        trailing_comments: Vec::new(),
39825                        span: None,
39826                    });
39827                }
39828            }
39829            Err(self.parse_error("Expected identifier, got LBrace"))
39830        } else {
39831            Err(self.parse_error(format!(
39832                "Expected identifier, got {:?}",
39833                if self.is_at_end() {
39834                    "end of input".to_string()
39835                } else {
39836                    format!("{:?}", self.peek().token_type)
39837                }
39838            )))
39839        }
39840    }
39841
39842    /// Expect an identifier
39843    fn expect_identifier(&mut self) -> Result<String> {
39844        if self.is_identifier_token() {
39845            Ok(self.advance().text)
39846        } else if self.check(TokenType::LBrace)
39847            && matches!(
39848                self.config.dialect,
39849                Some(crate::dialects::DialectType::ClickHouse)
39850            )
39851        {
39852            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
39853                if let Expression::Parameter(param) = &param_expr {
39854                    return Ok(format!(
39855                        "{{{}: {}}}",
39856                        param.name.as_deref().unwrap_or(""),
39857                        param.expression.as_deref().unwrap_or("")
39858                    ));
39859                }
39860            }
39861            Err(self.parse_error("Expected identifier, got LBrace"))
39862        } else {
39863            Err(self.parse_error(format!(
39864                "Expected identifier, got {:?}",
39865                if self.is_at_end() {
39866                    "end of input".to_string()
39867                } else {
39868                    format!("{:?}", self.peek().token_type)
39869                }
39870            )))
39871        }
39872    }
39873
39874    /// Expect an identifier or keyword (for aliases, column names, etc.)
39875    fn expect_identifier_or_keyword(&mut self) -> Result<String> {
39876        if self.is_identifier_or_keyword_token() {
39877            Ok(self.advance().text)
39878        } else if self.check(TokenType::LBrace)
39879            && matches!(
39880                self.config.dialect,
39881                Some(crate::dialects::DialectType::ClickHouse)
39882            )
39883        {
39884            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
39885                if let Expression::Parameter(param) = &param_expr {
39886                    return Ok(format!(
39887                        "{{{}: {}}}",
39888                        param.name.as_deref().unwrap_or(""),
39889                        param.expression.as_deref().unwrap_or("")
39890                    ));
39891                }
39892            }
39893            Err(self.parse_error("Expected identifier, got LBrace"))
39894        } else {
39895            Err(self.parse_error(format!(
39896                "Expected identifier, got {:?}",
39897                if self.is_at_end() {
39898                    "end of input".to_string()
39899                } else {
39900                    format!("{:?}", self.peek().token_type)
39901                }
39902            )))
39903        }
39904    }
39905
39906    /// Expect an identifier or safe keyword (for CTE names, column names in CREATE TABLE, etc.)
39907    /// This is more permissive than expect_identifier but excludes structural keywords
39908    fn expect_identifier_or_safe_keyword(&mut self) -> Result<String> {
39909        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
39910            Ok(self.advance().text)
39911        } else if self.check(TokenType::LBrace)
39912            && matches!(
39913                self.config.dialect,
39914                Some(crate::dialects::DialectType::ClickHouse)
39915            )
39916        {
39917            if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
39918                if let Expression::Parameter(param) = &param_expr {
39919                    return Ok(format!(
39920                        "{{{}: {}}}",
39921                        param.name.as_deref().unwrap_or(""),
39922                        param.expression.as_deref().unwrap_or("")
39923                    ));
39924                }
39925            }
39926            Err(self.parse_error("Expected identifier, got LBrace"))
39927        } else {
39928            Err(self.parse_error(format!(
39929                "Expected identifier, got {:?}",
39930                if self.is_at_end() {
39931                    "end of input".to_string()
39932                } else {
39933                    format!("{:?}", self.peek().token_type)
39934                }
39935            )))
39936        }
39937    }
39938
39939    /// Expect an identifier or safe keyword, preserving quoted flag
39940    fn expect_identifier_or_safe_keyword_with_quoted(&mut self) -> Result<Identifier> {
39941        if self.is_mysql_numeric_identifier() {
39942            return Ok(self.parse_mysql_numeric_identifier());
39943        }
39944        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
39945            let token = self.advance();
39946            let quoted = token.token_type == TokenType::QuotedIdentifier;
39947            Ok(Identifier {
39948                name: token.text,
39949                quoted,
39950                trailing_comments: Vec::new(),
39951                span: None,
39952            })
39953        } else {
39954            Err(self.parse_error(format!(
39955                "Expected identifier, got {:?}",
39956                if self.is_at_end() {
39957                    "end of input".to_string()
39958                } else {
39959                    format!("{:?}", self.peek().token_type)
39960                }
39961            )))
39962        }
39963    }
39964
39965    fn expect_identifier_or_alias_keyword_with_quoted(&mut self) -> Result<Identifier> {
39966        // ClickHouse: any keyword can be used as a table alias after explicit AS
39967        let ch_keyword = matches!(
39968            self.config.dialect,
39969            Some(crate::dialects::DialectType::ClickHouse)
39970        ) && self.peek().token_type.is_keyword();
39971        if self.is_identifier_token()
39972            || self.can_be_alias_keyword()
39973            || self.is_safe_keyword_as_identifier()
39974            || ch_keyword
39975        {
39976            let token = self.advance();
39977            let quoted = token.token_type == TokenType::QuotedIdentifier;
39978            Ok(Identifier {
39979                name: token.text,
39980                quoted,
39981                trailing_comments: Vec::new(),
39982                span: None,
39983            })
39984        } else if self.check(TokenType::String)
39985            && matches!(
39986                self.config.dialect,
39987                Some(crate::dialects::DialectType::DuckDB)
39988            )
39989        {
39990            // DuckDB allows string literals as identifiers (e.g., WITH 'x' AS (...))
39991            let token = self.advance();
39992            Ok(Identifier {
39993                name: token.text,
39994                quoted: true,
39995                trailing_comments: Vec::new(),
39996                span: None,
39997            })
39998        } else {
39999            Err(self.parse_error(format!(
40000                "Expected identifier, got {:?}",
40001                if self.is_at_end() {
40002                    "end of input".to_string()
40003                } else {
40004                    format!("{:?}", self.peek().token_type)
40005                }
40006            )))
40007        }
40008    }
40009
40010    /// Expect a number
40011    fn expect_number(&mut self) -> Result<i64> {
40012        let negative = self.match_token(TokenType::Dash);
40013        if self.check(TokenType::Number) {
40014            let text = self.advance().text;
40015            let val = text
40016                .parse::<i64>()
40017                .map_err(|_| self.parse_error(format!("Invalid number: {}", text)))?;
40018            Ok(if negative { -val } else { val })
40019        } else {
40020            Err(self.parse_error("Expected number"))
40021        }
40022    }
40023
40024    /// Parse a comma-separated list of expressions.
40025    /// Supports named arguments with => or := syntax.
40026    fn parse_expression_list_with_capacity(
40027        &mut self,
40028        capacity_hint: usize,
40029    ) -> Result<Vec<Expression>> {
40030        let mut expressions = Vec::with_capacity(capacity_hint);
40031
40032        loop {
40033            // Check if this is a named argument: identifier => value or identifier := value
40034            // Also check for safe keywords (like TYPE, FORMAT, etc.) that can be used as named arg names
40035            let expr = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
40036                let start_pos = self.current;
40037                let name = self.expect_identifier_or_keyword_with_quoted()?;
40038
40039                if self.match_token(TokenType::FArrow) {
40040                    // name => value
40041                    let value = self.parse_expression()?;
40042                    Expression::NamedArgument(Box::new(NamedArgument {
40043                        name,
40044                        value,
40045                        separator: NamedArgSeparator::DArrow,
40046                    }))
40047                } else if self.match_token(TokenType::ColonEq) {
40048                    // name := value
40049                    let value = self.parse_expression()?;
40050                    Expression::NamedArgument(Box::new(NamedArgument {
40051                        name,
40052                        value,
40053                        separator: NamedArgSeparator::ColonEq,
40054                    }))
40055                } else {
40056                    // Not a named argument, backtrack and parse as regular expression
40057                    self.current = start_pos;
40058                    self.parse_expression()?
40059                }
40060            } else {
40061                self.parse_expression()?
40062            };
40063
40064            // Check for AS alias on this expression (Spark/Hive: IF(cond, val AS name, ...))
40065            let expr = if self.check(TokenType::As) {
40066                let as_pos = self.current;
40067                self.advance(); // consume AS
40068                                // Check if what follows looks like an alias name
40069                if self.is_identifier_token()
40070                    || self.is_safe_keyword_as_identifier()
40071                    || (matches!(
40072                        self.config.dialect,
40073                        Some(crate::dialects::DialectType::ClickHouse)
40074                    ) && self.peek().token_type.is_keyword())
40075                {
40076                    let alias = self.expect_identifier_or_keyword_with_quoted()?;
40077                    let alias_expr = Expression::Alias(Box::new(Alias {
40078                        this: expr,
40079                        alias,
40080                        column_aliases: Vec::new(),
40081                        pre_alias_comments: Vec::new(),
40082                        trailing_comments: Vec::new(),
40083                        inferred_type: None,
40084                    }));
40085                    // ClickHouse: if followed by an operator, the alias is part of a bigger expression
40086                    // e.g., blockSize() AS bs < 1000 means (blockSize() AS bs) < 1000
40087                    if matches!(
40088                        self.config.dialect,
40089                        Some(crate::dialects::DialectType::ClickHouse)
40090                    ) && matches!(
40091                        self.peek().token_type,
40092                        TokenType::Lt
40093                            | TokenType::Gt
40094                            | TokenType::Lte
40095                            | TokenType::Gte
40096                            | TokenType::Eq
40097                            | TokenType::Neq
40098                            | TokenType::Plus
40099                            | TokenType::Dash
40100                            | TokenType::Star
40101                            | TokenType::Slash
40102                            | TokenType::Percent
40103                            | TokenType::And
40104                            | TokenType::Or
40105                            | TokenType::Like
40106                            | TokenType::Not
40107                            | TokenType::In
40108                            | TokenType::Is
40109                            | TokenType::Between
40110                    ) {
40111                        // Parse the operator and right-hand side
40112                        let op_token = self.advance();
40113                        let right = self.parse_expression()?;
40114                        match op_token.token_type {
40115                            TokenType::Lt => {
40116                                Expression::Lt(Box::new(BinaryOp::new(alias_expr, right)))
40117                            }
40118                            TokenType::Gt => {
40119                                Expression::Gt(Box::new(BinaryOp::new(alias_expr, right)))
40120                            }
40121                            TokenType::Lte => {
40122                                Expression::Lte(Box::new(BinaryOp::new(alias_expr, right)))
40123                            }
40124                            TokenType::Gte => {
40125                                Expression::Gte(Box::new(BinaryOp::new(alias_expr, right)))
40126                            }
40127                            TokenType::Eq => {
40128                                Expression::Eq(Box::new(BinaryOp::new(alias_expr, right)))
40129                            }
40130                            TokenType::Neq => {
40131                                Expression::Neq(Box::new(BinaryOp::new(alias_expr, right)))
40132                            }
40133                            TokenType::Plus => {
40134                                Expression::Add(Box::new(BinaryOp::new(alias_expr, right)))
40135                            }
40136                            TokenType::Dash => {
40137                                Expression::Sub(Box::new(BinaryOp::new(alias_expr, right)))
40138                            }
40139                            TokenType::Star => {
40140                                Expression::Mul(Box::new(BinaryOp::new(alias_expr, right)))
40141                            }
40142                            TokenType::Slash => {
40143                                Expression::Div(Box::new(BinaryOp::new(alias_expr, right)))
40144                            }
40145                            TokenType::Percent => {
40146                                Expression::Mod(Box::new(BinaryOp::new(alias_expr, right)))
40147                            }
40148                            TokenType::And => {
40149                                Expression::And(Box::new(BinaryOp::new(alias_expr, right)))
40150                            }
40151                            TokenType::Or => {
40152                                Expression::Or(Box::new(BinaryOp::new(alias_expr, right)))
40153                            }
40154                            _ => alias_expr, // fallback, shouldn't happen
40155                        }
40156                    } else {
40157                        alias_expr
40158                    }
40159                } else {
40160                    // Not an alias name, backtrack
40161                    self.current = as_pos;
40162                    expr
40163                }
40164            } else {
40165                expr
40166            };
40167
40168            // Check for trailing comments on this expression
40169            // Only wrap in Annotated for expression types that don't have their own trailing_comments field
40170            let trailing_comments = self.previous_trailing_comments();
40171            let expr = if trailing_comments.is_empty() {
40172                expr
40173            } else {
40174                // Only annotate Literals and other types that don't capture trailing comments
40175                match &expr {
40176                    Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
40177                        Expression::Annotated(Box::new(Annotated {
40178                            this: expr,
40179                            trailing_comments,
40180                        }))
40181                    }
40182                    // For expressions that already capture trailing_comments, don't double-wrap
40183                    _ => expr,
40184                }
40185            };
40186            expressions.push(expr);
40187
40188            if !self.match_token(TokenType::Comma) {
40189                break;
40190            }
40191            // ClickHouse: allow trailing comma before RParen in expression lists
40192            if matches!(
40193                self.config.dialect,
40194                Some(crate::dialects::DialectType::ClickHouse)
40195            ) && self.check(TokenType::RParen)
40196            {
40197                break;
40198            }
40199        }
40200
40201        Ok(expressions)
40202    }
40203
40204    /// Parse a comma-separated list of expressions.
40205    /// Supports named arguments with => or := syntax.
40206    fn parse_expression_list(&mut self) -> Result<Vec<Expression>> {
40207        self.parse_expression_list_with_capacity(0)
40208    }
40209
40210    /// Estimate top-level expression count until the next unmatched `)`.
40211    ///
40212    /// This is used for pre-allocating comma-separated lists like `IN (...)`
40213    /// to reduce `Vec` growth churn on very large lists.
40214    fn estimate_expression_list_capacity_until_rparen(&self) -> usize {
40215        if self.current >= self.tokens.len() || self.check(TokenType::RParen) {
40216            return 0;
40217        }
40218
40219        let mut idx = self.current;
40220        let mut paren_depth = 0usize;
40221        let mut bracket_depth = 0usize;
40222        let mut brace_depth = 0usize;
40223        let mut commas = 0usize;
40224        let mut has_any_token = false;
40225
40226        while idx < self.tokens.len() {
40227            let token_type = self.tokens[idx].token_type;
40228            match token_type {
40229                TokenType::LParen => paren_depth += 1,
40230                TokenType::RParen => {
40231                    if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 {
40232                        break;
40233                    }
40234                    paren_depth = paren_depth.saturating_sub(1);
40235                }
40236                TokenType::LBracket => bracket_depth += 1,
40237                TokenType::RBracket => bracket_depth = bracket_depth.saturating_sub(1),
40238                TokenType::LBrace => brace_depth += 1,
40239                TokenType::RBrace => brace_depth = brace_depth.saturating_sub(1),
40240                TokenType::Comma if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
40241                    commas += 1;
40242                }
40243                _ => {}
40244            }
40245            has_any_token = true;
40246            idx += 1;
40247        }
40248
40249        if has_any_token {
40250            commas + 1
40251        } else {
40252            0
40253        }
40254    }
40255
40256    /// Parse function arguments with lambda support (for TRANSFORM and similar functions).
40257    /// Handles Snowflake typed lambda syntax: `a int -> a + 1`
40258    fn parse_function_args_with_lambda(&mut self) -> Result<Vec<Expression>> {
40259        let mut expressions = Vec::new();
40260
40261        loop {
40262            // Try to detect typed lambda: identifier type -> body
40263            let expr = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
40264                let saved_pos = self.current;
40265                let ident_token = self.advance();
40266                let ident_name = ident_token.text.clone();
40267
40268                // Check for arrow (simple lambda: a -> body)
40269                if self.match_token(TokenType::Arrow) {
40270                    let body = self.parse_expression()?;
40271                    Expression::Lambda(Box::new(LambdaExpr {
40272                        parameters: vec![Identifier::new(ident_name)],
40273                        body,
40274                        colon: false,
40275                        parameter_types: Vec::new(),
40276                    }))
40277                }
40278                // Check for type annotation followed by arrow: a int -> body
40279                else if !self.is_at_end()
40280                    && self.is_type_keyword()
40281                    && !self.check(TokenType::FArrow)
40282                    && !self.check(TokenType::ColonEq)
40283                {
40284                    let type_annotation = self.parse_data_type()?;
40285                    if self.match_token(TokenType::Arrow) {
40286                        let body = self.parse_expression()?;
40287                        Expression::Lambda(Box::new(LambdaExpr {
40288                            parameters: vec![Identifier::new(ident_name)],
40289                            body,
40290                            colon: false,
40291                            parameter_types: vec![Some(type_annotation)],
40292                        }))
40293                    } else {
40294                        self.current = saved_pos;
40295                        self.parse_expression()?
40296                    }
40297                } else {
40298                    // Not a lambda, backtrack and parse as regular expression
40299                    self.current = saved_pos;
40300                    self.parse_expression()?
40301                }
40302            } else {
40303                self.parse_expression()?
40304            };
40305
40306            expressions.push(expr);
40307            if !self.match_token(TokenType::Comma) {
40308                break;
40309            }
40310        }
40311
40312        Ok(expressions)
40313    }
40314
40315    /// Parse a comma-separated list of expressions for VALUES tuples
40316    /// This variant supports AS aliases on each element (Hive syntax): VALUES (1 AS a, 2 AS b, 3)
40317    fn parse_values_expression_list(&mut self) -> Result<Vec<Expression>> {
40318        let mut expressions = Vec::new();
40319
40320        loop {
40321            // Handle DEFAULT keyword in VALUES - output as unquoted Var (like Python sqlglot's exp.var("DEFAULT"))
40322            let expr = if self.match_token(TokenType::Default) {
40323                Expression::Var(Box::new(crate::expressions::Var {
40324                    this: "DEFAULT".to_string(),
40325                }))
40326            } else {
40327                self.parse_expression()?
40328            };
40329
40330            // Capture trailing comments on the expression (e.g., `1 /* c4 */`)
40331            let trailing_comments = self.previous_trailing_comments();
40332            let expr = if !trailing_comments.is_empty() {
40333                match &expr {
40334                    Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
40335                        Expression::Annotated(Box::new(crate::expressions::Annotated {
40336                            this: expr,
40337                            trailing_comments,
40338                        }))
40339                    }
40340                    _ => expr,
40341                }
40342            } else {
40343                expr
40344            };
40345
40346            // Check for AS alias on this value element (Hive syntax)
40347            let expr_with_alias = if self.match_token(TokenType::As) {
40348                let alias = self.expect_identifier_or_keyword_with_quoted()?;
40349                Expression::Alias(Box::new(Alias::new(expr, alias)))
40350            } else {
40351                expr
40352            };
40353
40354            expressions.push(expr_with_alias);
40355
40356            if !self.match_token(TokenType::Comma) {
40357                break;
40358            }
40359            // ClickHouse: trailing comma in VALUES, e.g., (1, 2, 3,)
40360            if self.check(TokenType::RParen) {
40361                break;
40362            }
40363        }
40364
40365        Ok(expressions)
40366    }
40367
40368    /// Parse a comma-separated list of identifiers
40369    fn parse_identifier_list(&mut self) -> Result<Vec<Identifier>> {
40370        let mut identifiers = Vec::new();
40371
40372        loop {
40373            // Allow keywords as identifiers in identifier lists (e.g., CTE column aliases)
40374            // Check if it's a quoted identifier before consuming
40375            let quoted = self.check(TokenType::QuotedIdentifier);
40376            let mut name = self.expect_identifier_or_safe_keyword()?;
40377            // ClickHouse: handle dotted names in identifier lists (e.g., INSERT INTO t (n.a, n.b))
40378            // Use keyword_with_quoted to allow any keyword after dot (e.g., replace.from)
40379            if matches!(
40380                self.config.dialect,
40381                Some(crate::dialects::DialectType::ClickHouse)
40382            ) {
40383                while self.match_token(TokenType::Dot) {
40384                    let sub_id = self.expect_identifier_or_keyword_with_quoted()?;
40385                    name = format!("{}.{}", name, sub_id.name);
40386                }
40387            }
40388            let trailing_comments = self.previous_trailing_comments();
40389            identifiers.push(Identifier {
40390                name,
40391                quoted,
40392                trailing_comments,
40393                span: None,
40394            });
40395
40396            if !self.match_token(TokenType::Comma) {
40397                break;
40398            }
40399            // ClickHouse: allow trailing comma before RParen in identifier lists
40400            if matches!(
40401                self.config.dialect,
40402                Some(crate::dialects::DialectType::ClickHouse)
40403            ) && self.check(TokenType::RParen)
40404            {
40405                break;
40406            }
40407        }
40408
40409        Ok(identifiers)
40410    }
40411
40412    /// Parse a comma-separated list of column references for USING clause
40413    /// Supports qualified names like table.col but extracts only the column part
40414    fn parse_using_column_list(&mut self) -> Result<Vec<Identifier>> {
40415        let mut identifiers = Vec::new();
40416
40417        loop {
40418            // ClickHouse: USING * — wildcard in USING clause
40419            if matches!(
40420                self.config.dialect,
40421                Some(crate::dialects::DialectType::ClickHouse)
40422            ) && self.match_token(TokenType::Star)
40423            {
40424                identifiers.push(Identifier::new("*".to_string()));
40425                if !self.match_token(TokenType::Comma) {
40426                    break;
40427                }
40428                continue;
40429            }
40430            // Check if it's a quoted identifier before consuming
40431            let quoted = self.check(TokenType::QuotedIdentifier);
40432            let mut name = self.expect_identifier_or_safe_keyword()?;
40433            let mut final_quoted = quoted;
40434
40435            // Handle qualified names: table.column or schema.table.column
40436            // Keep only the final column name
40437            while self.match_token(TokenType::Dot) {
40438                final_quoted = self.check(TokenType::QuotedIdentifier);
40439                name = self.expect_identifier_or_safe_keyword()?;
40440            }
40441
40442            // ClickHouse: USING (col AS alias) — consume optional AS alias
40443            if matches!(
40444                self.config.dialect,
40445                Some(crate::dialects::DialectType::ClickHouse)
40446            ) && self.match_token(TokenType::As)
40447            {
40448                // Use the alias name instead
40449                final_quoted = self.check(TokenType::QuotedIdentifier);
40450                name = self.expect_identifier_or_safe_keyword()?;
40451            }
40452
40453            let trailing_comments = self.previous_trailing_comments();
40454            identifiers.push(Identifier {
40455                name,
40456                quoted: final_quoted,
40457                trailing_comments,
40458                span: None,
40459            });
40460
40461            if !self.match_token(TokenType::Comma) {
40462                break;
40463            }
40464        }
40465
40466        Ok(identifiers)
40467    }
40468
40469    /// Parse a comma-separated list of identifiers for index columns.
40470    /// Supports MySQL prefix lengths: col(16) and sort order: col DESC
40471    fn parse_index_identifier_list(&mut self) -> Result<Vec<Identifier>> {
40472        let mut identifiers = Vec::new();
40473
40474        loop {
40475            let quoted = self.check(TokenType::QuotedIdentifier);
40476            let name = self.expect_identifier_or_safe_keyword()?;
40477            let trailing_comments = self.previous_trailing_comments();
40478
40479            // Check for prefix length: col(16)
40480            let mut display_name = name.clone();
40481            if self.match_token(TokenType::LParen) {
40482                if self.check(TokenType::Number) {
40483                    let len = self.advance().text;
40484                    display_name = format!("{}({})", name, len);
40485                }
40486                self.expect(TokenType::RParen)?;
40487            }
40488
40489            // Check for DESC/ASC sort order
40490            if self.match_token(TokenType::Desc) {
40491                display_name = format!("{} DESC", display_name);
40492            } else if self.match_token(TokenType::Asc) {
40493                display_name = format!("{} ASC", display_name);
40494            }
40495
40496            identifiers.push(Identifier {
40497                name: display_name,
40498                quoted,
40499                trailing_comments,
40500                span: None,
40501            });
40502
40503            if !self.match_token(TokenType::Comma) {
40504                break;
40505            }
40506        }
40507
40508        Ok(identifiers)
40509    }
40510    // =============================================================================
40511    // Auto-generated Missing Parser Methods
40512    // Total: 296 methods
40513    // =============================================================================
40514
40515    /// parse_add_column - Implemented from Python _parse_add_column
40516    /// Calls: parse_column, parse_column_def_with_exists
40517    #[allow(unused_variables, unused_mut)]
40518    pub fn parse_add_column(&mut self) -> Result<Option<Expression>> {
40519        if self.match_texts(&["FIRST", "AFTER"]) {
40520            // Matched one of: FIRST, AFTER
40521            return Ok(None);
40522        }
40523        Ok(None)
40524    }
40525
40526    /// parse_alias - Parses alias for an expression
40527    /// This method parses just the alias part (AS name or just name)
40528    /// Python: _parse_alias
40529    pub fn parse_alias(&mut self) -> Result<Option<Expression>> {
40530        // Check for AS keyword (explicit alias)
40531        let _explicit = self.match_token(TokenType::Alias);
40532
40533        // Parse the alias identifier
40534        if let Some(alias_expr) = self.parse_id_var()? {
40535            let alias_ident = match alias_expr {
40536                Expression::Identifier(id) => id,
40537                _ => return Ok(None),
40538            };
40539            // Return just the alias identifier wrapped in an expression
40540            return Ok(Some(Expression::Identifier(alias_ident)));
40541        }
40542
40543        Ok(None)
40544    }
40545
40546    /// parse_alias_with_expr - Wraps an expression with an alias if present
40547    pub fn parse_alias_with_expr(
40548        &mut self,
40549        this: Option<Expression>,
40550    ) -> Result<Option<Expression>> {
40551        if this.is_none() {
40552            return Ok(None);
40553        }
40554        let expr = this.unwrap();
40555
40556        // Check for AS keyword (explicit alias)
40557        // Accept both TokenType::Alias and TokenType::As
40558        let has_as = self.match_token(TokenType::Alias) || self.match_token(TokenType::As);
40559
40560        // Check for column aliases: (col1, col2)
40561        if has_as && self.match_token(TokenType::LParen) {
40562            let mut column_aliases = Vec::new();
40563            loop {
40564                if let Some(col_expr) = self.parse_id_var()? {
40565                    if let Expression::Identifier(id) = col_expr {
40566                        column_aliases.push(id);
40567                    }
40568                } else {
40569                    break;
40570                }
40571                if !self.match_token(TokenType::Comma) {
40572                    break;
40573                }
40574            }
40575            self.match_token(TokenType::RParen);
40576
40577            if !column_aliases.is_empty() {
40578                return Ok(Some(Expression::Alias(Box::new(Alias {
40579                    this: expr,
40580                    alias: Identifier::new(String::new()), // Empty alias when only column aliases
40581                    column_aliases,
40582                    pre_alias_comments: Vec::new(),
40583                    trailing_comments: Vec::new(),
40584                    inferred_type: None,
40585                }))));
40586            }
40587        }
40588
40589        // Parse the alias identifier
40590        if let Some(alias_expr) = self.parse_id_var()? {
40591            let alias_ident = match alias_expr {
40592                Expression::Identifier(id) => id,
40593                _ => return Ok(Some(expr)),
40594            };
40595            return Ok(Some(Expression::Alias(Box::new(Alias {
40596                this: expr,
40597                alias: alias_ident,
40598                column_aliases: Vec::new(),
40599                pre_alias_comments: Vec::new(),
40600                trailing_comments: Vec::new(),
40601                inferred_type: None,
40602            }))));
40603        }
40604
40605        Ok(Some(expr))
40606    }
40607
40608    /// parse_alter_diststyle - Implemented from Python _parse_alter_diststyle
40609    #[allow(unused_variables, unused_mut)]
40610    /// parse_alter_diststyle - Parses ALTER TABLE DISTSTYLE clause (Redshift)
40611    /// Python: parser.py:7797-7802
40612    pub fn parse_alter_diststyle(&mut self) -> Result<Option<Expression>> {
40613        // Check for ALL, EVEN, AUTO
40614        if self.match_texts(&["ALL", "EVEN", "AUTO"]) {
40615            let style = self.previous().text.to_uppercase();
40616            return Ok(Some(Expression::DistStyleProperty(Box::new(
40617                DistStyleProperty {
40618                    this: Box::new(Expression::Identifier(Identifier::new(style))),
40619                },
40620            ))));
40621        }
40622
40623        // KEY DISTKEY column
40624        if self.match_text_seq(&["KEY", "DISTKEY"]) {
40625            if let Some(column) = self.parse_column()? {
40626                return Ok(Some(Expression::DistStyleProperty(Box::new(
40627                    DistStyleProperty {
40628                        this: Box::new(column),
40629                    },
40630                ))));
40631            }
40632        }
40633
40634        Ok(None)
40635    }
40636
40637    /// parse_alter_session - Parses ALTER SESSION SET/UNSET statements
40638    /// Python: parser.py:7879-7889
40639    pub fn parse_alter_session(&mut self) -> Result<Option<Expression>> {
40640        // ALTER SESSION SET var = value, ...
40641        if self.match_token(TokenType::Set) {
40642            let mut expressions = Vec::new();
40643            loop {
40644                if let Some(item) = self.parse_set_item_assignment()? {
40645                    expressions.push(item);
40646                }
40647                if !self.match_token(TokenType::Comma) {
40648                    break;
40649                }
40650            }
40651            return Ok(Some(Expression::AlterSession(Box::new(AlterSession {
40652                expressions,
40653                unset: None,
40654            }))));
40655        }
40656
40657        // ALTER SESSION UNSET var, ...
40658        if self.match_text_seq(&["UNSET"]) {
40659            let mut expressions = Vec::new();
40660            loop {
40661                if let Some(var) = self.parse_id_var()? {
40662                    // For UNSET, we just use the identifier directly
40663                    expressions.push(var);
40664                }
40665                if !self.match_token(TokenType::Comma) {
40666                    break;
40667                }
40668            }
40669            return Ok(Some(Expression::AlterSession(Box::new(AlterSession {
40670                expressions,
40671                unset: Some(Box::new(Expression::Boolean(BooleanLiteral {
40672                    value: true,
40673                }))),
40674            }))));
40675        }
40676
40677        Ok(None)
40678    }
40679
40680    /// parse_alter_sortkey - Parses ALTER TABLE SORTKEY clause (Redshift)
40681    /// Python: parser.py:7804-7816
40682    pub fn parse_alter_sortkey(&mut self) -> Result<Option<Expression>> {
40683        self.parse_alter_sortkey_impl(None)
40684    }
40685
40686    /// Implementation of parse_alter_sortkey with compound option
40687    pub fn parse_alter_sortkey_impl(
40688        &mut self,
40689        compound: Option<bool>,
40690    ) -> Result<Option<Expression>> {
40691        // For compound sortkey, match SORTKEY keyword
40692        if compound == Some(true) {
40693            self.match_text_seq(&["SORTKEY"]);
40694        }
40695
40696        // Check for (column_list) syntax
40697        if self.check(TokenType::LParen) {
40698            let wrapped = self.parse_wrapped_id_vars()?;
40699            // Extract expressions from Tuple
40700            let expressions = if let Some(Expression::Tuple(t)) = wrapped {
40701                t.expressions
40702            } else {
40703                Vec::new()
40704            };
40705            return Ok(Some(Expression::AlterSortKey(Box::new(AlterSortKey {
40706                this: None,
40707                expressions,
40708                compound: compound
40709                    .map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
40710            }))));
40711        }
40712
40713        // Check for AUTO or NONE
40714        if self.match_texts(&["AUTO", "NONE"]) {
40715            let style = self.previous().text.to_uppercase();
40716            return Ok(Some(Expression::AlterSortKey(Box::new(AlterSortKey {
40717                this: Some(Box::new(Expression::Identifier(Identifier::new(style)))),
40718                expressions: Vec::new(),
40719                compound: compound
40720                    .map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
40721            }))));
40722        }
40723
40724        Ok(None)
40725    }
40726
40727    /// parse_alter_table_add - Parses ALTER TABLE ADD clause
40728    /// Python: parser.py:7715-7751
40729    pub fn parse_alter_table_add(&mut self) -> Result<Option<Expression>> {
40730        // Check for ADD keyword (optional in some contexts)
40731        self.match_text_seq(&["ADD"]);
40732
40733        // Check for INDEX/KEY with optional FULLTEXT/SPATIAL prefix (MySQL)
40734        // Syntax: ADD [FULLTEXT|SPATIAL] {INDEX|KEY} [name] (columns) [USING {BTREE|HASH}]
40735        let kind = if self.match_identifier("FULLTEXT") {
40736            Some("FULLTEXT".to_string())
40737        } else if self.match_identifier("SPATIAL") {
40738            Some("SPATIAL".to_string())
40739        } else {
40740            None
40741        };
40742
40743        if self.check(TokenType::Index) || self.check(TokenType::Key) || kind.is_some() {
40744            // Consume INDEX or KEY keyword, track which was used
40745            let use_key_keyword = if self.match_token(TokenType::Key) {
40746                true
40747            } else {
40748                self.match_token(TokenType::Index);
40749                false
40750            };
40751
40752            // Optional index name (before the columns)
40753            let name = if !self.check(TokenType::LParen) && !self.check(TokenType::Using) {
40754                Some(self.expect_identifier_with_quoted()?)
40755            } else {
40756                None
40757            };
40758
40759            // Parse columns (with optional prefix length and DESC)
40760            self.expect(TokenType::LParen)?;
40761            let columns = self.parse_index_identifier_list()?;
40762            self.expect(TokenType::RParen)?;
40763
40764            // Parse optional USING BTREE|HASH
40765            let modifiers = self.parse_constraint_modifiers();
40766
40767            return Ok(Some(Expression::AlterTable(Box::new(AlterTable {
40768                name: TableRef::new(""),
40769                actions: vec![AlterTableAction::AddConstraint(TableConstraint::Index {
40770                    name,
40771                    columns,
40772                    kind,
40773                    modifiers,
40774                    use_key_keyword,
40775                    expression: None,
40776                    index_type: None,
40777                    granularity: None,
40778                })],
40779                if_exists: false,
40780                algorithm: None,
40781                lock: None,
40782                with_check: None,
40783                partition: None,
40784                on_cluster: None,
40785            }))));
40786        }
40787
40788        // Check for constraint keywords (PRIMARY KEY, FOREIGN KEY, UNIQUE, CHECK, CONSTRAINT)
40789        if self.check(TokenType::PrimaryKey)
40790            || self.check(TokenType::ForeignKey)
40791            || self.check(TokenType::Unique)
40792            || self.check(TokenType::Check)
40793            || self.check(TokenType::Constraint)
40794        {
40795            // Parse a single constraint and return it wrapped in Constraint
40796            if let Some(constraint) = self.parse_constraint()? {
40797                return Ok(Some(Expression::Constraint(Box::new(Constraint {
40798                    this: Box::new(constraint),
40799                    expressions: Vec::new(),
40800                }))));
40801            }
40802        }
40803
40804        // Check for COLUMNS keyword (batch column addition)
40805        if self.match_text_seq(&["COLUMNS"]) {
40806            // Parse schema or column definitions
40807            if let Some(schema) = self.parse_schema()? {
40808                return Ok(Some(schema));
40809            }
40810        }
40811
40812        // Check for IF NOT EXISTS PARTITION (must check before parse_add_column)
40813        let exists = self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
40814        if self.match_token(TokenType::Partition) {
40815            // Parse PARTITION(key = value, ...)
40816            self.expect(TokenType::LParen)?;
40817            let mut partition_exprs = Vec::new();
40818            loop {
40819                if let Some(expr) = self.parse_conjunction()? {
40820                    partition_exprs.push(expr);
40821                }
40822                if !self.match_token(TokenType::Comma) {
40823                    break;
40824                }
40825            }
40826            self.expect(TokenType::RParen)?;
40827
40828            let partition = Expression::Partition(Box::new(crate::expressions::Partition {
40829                expressions: partition_exprs,
40830                subpartition: false,
40831            }));
40832
40833            let location = if self.match_text_seq(&["LOCATION"]) {
40834                self.parse_property()?
40835            } else {
40836                None
40837            };
40838            return Ok(Some(Expression::AddPartition(Box::new(AddPartition {
40839                this: Box::new(partition),
40840                exists,
40841                location: location.map(Box::new),
40842            }))));
40843        }
40844
40845        // Try to parse column definition (after checking for PARTITION)
40846        if let Some(column) = self.parse_add_column()? {
40847            return Ok(Some(column));
40848        }
40849
40850        Ok(None)
40851    }
40852
40853    /// parse_alter_table_alter - Parses ALTER TABLE ALTER COLUMN clause
40854    /// Python: parser.py:7753-7795
40855    pub fn parse_alter_table_alter(&mut self) -> Result<Option<Expression>> {
40856        // Match optional COLUMN keyword
40857        self.match_token(TokenType::Column);
40858
40859        // Parse the column name - required for ALTER COLUMN
40860        let column = match self.parse_field()? {
40861            Some(c) => c,
40862            None => return Ok(None),
40863        };
40864
40865        // DROP DEFAULT
40866        if self.match_keywords(&[TokenType::Drop, TokenType::Default]) {
40867            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40868                this: Box::new(column),
40869                dtype: None,
40870                collate: None,
40871                using: None,
40872                default: None,
40873                drop: Some(Box::new(Expression::Boolean(BooleanLiteral {
40874                    value: true,
40875                }))),
40876                allow_null: None,
40877                comment: None,
40878                visible: None,
40879                rename_to: None,
40880            }))));
40881        }
40882
40883        // SET DEFAULT expr
40884        if self.match_keywords(&[TokenType::Set, TokenType::Default]) {
40885            let default_val = self.parse_disjunction()?;
40886            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40887                this: Box::new(column),
40888                dtype: None,
40889                collate: None,
40890                using: None,
40891                default: default_val.map(Box::new),
40892                drop: None,
40893                allow_null: None,
40894                comment: None,
40895                visible: None,
40896                rename_to: None,
40897            }))));
40898        }
40899
40900        // COMMENT 'string'
40901        if self.match_token(TokenType::Comment) {
40902            let comment_val = self.parse_string()?;
40903            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40904                this: Box::new(column),
40905                dtype: None,
40906                collate: None,
40907                using: None,
40908                default: None,
40909                drop: None,
40910                allow_null: None,
40911                comment: comment_val.map(Box::new),
40912                visible: None,
40913                rename_to: None,
40914            }))));
40915        }
40916
40917        // DROP NOT NULL
40918        if self.match_text_seq(&["DROP", "NOT", "NULL"]) {
40919            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40920                this: Box::new(column),
40921                dtype: None,
40922                collate: None,
40923                using: None,
40924                default: None,
40925                drop: Some(Box::new(Expression::Boolean(BooleanLiteral {
40926                    value: true,
40927                }))),
40928                allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
40929                    value: true,
40930                }))),
40931                comment: None,
40932                visible: None,
40933                rename_to: None,
40934            }))));
40935        }
40936
40937        // SET NOT NULL
40938        if self.match_text_seq(&["SET", "NOT", "NULL"]) {
40939            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40940                this: Box::new(column),
40941                dtype: None,
40942                collate: None,
40943                using: None,
40944                default: None,
40945                drop: None,
40946                allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
40947                    value: false,
40948                }))),
40949                comment: None,
40950                visible: None,
40951                rename_to: None,
40952            }))));
40953        }
40954
40955        // SET VISIBLE
40956        if self.match_text_seq(&["SET", "VISIBLE"]) {
40957            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40958                this: Box::new(column),
40959                dtype: None,
40960                collate: None,
40961                using: None,
40962                default: None,
40963                drop: None,
40964                allow_null: None,
40965                comment: None,
40966                visible: Some(Box::new(Expression::Identifier(Identifier::new(
40967                    "VISIBLE".to_string(),
40968                )))),
40969                rename_to: None,
40970            }))));
40971        }
40972
40973        // SET INVISIBLE
40974        if self.match_text_seq(&["SET", "INVISIBLE"]) {
40975            return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
40976                this: Box::new(column),
40977                dtype: None,
40978                collate: None,
40979                using: None,
40980                default: None,
40981                drop: None,
40982                allow_null: None,
40983                comment: None,
40984                visible: Some(Box::new(Expression::Identifier(Identifier::new(
40985                    "INVISIBLE".to_string(),
40986                )))),
40987                rename_to: None,
40988            }))));
40989        }
40990
40991        // [SET DATA] TYPE type [COLLATE collation] [USING expr]
40992        self.match_text_seq(&["SET", "DATA"]);
40993        self.match_text_seq(&["TYPE"]);
40994
40995        let dtype = self.parse_types()?;
40996        let collate = if self.match_token(TokenType::Collate) {
40997            self.parse_term()?
40998        } else {
40999            None
41000        };
41001        let using = if self.match_token(TokenType::Using) {
41002            self.parse_disjunction()?
41003        } else {
41004            None
41005        };
41006
41007        Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
41008            this: Box::new(column),
41009            dtype: dtype.map(Box::new),
41010            collate: collate.map(Box::new),
41011            using: using.map(Box::new),
41012            default: None,
41013            drop: None,
41014            allow_null: None,
41015            comment: None,
41016            visible: None,
41017            rename_to: None,
41018        }))))
41019    }
41020
41021    /// Parse ALTER TABLE DROP action
41022    /// Note: Main ALTER TABLE DROP logic is implemented inline in parse_alter_table
41023    /// This method provides a separate entry point for the same functionality
41024    pub fn parse_alter_table_drop(&mut self) -> Result<Option<Expression>> {
41025        // Check for IF EXISTS before PARTITION
41026        let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
41027
41028        // Check if this is DROP PARTITION
41029        if self.check(TokenType::Partition) {
41030            return self.parse_drop_partition_with_exists(exists);
41031        }
41032
41033        // Check for DROP FOREIGN KEY (Oracle/MySQL)
41034        if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
41035            let name = self.expect_identifier_with_quoted()?;
41036            return Ok(Some(Expression::AlterTable(Box::new(AlterTable {
41037                name: TableRef::new(""),
41038                actions: vec![AlterTableAction::DropForeignKey { name }],
41039                if_exists: false,
41040                algorithm: None,
41041                lock: None,
41042                with_check: None,
41043                partition: None,
41044                on_cluster: None,
41045            }))));
41046        }
41047
41048        // Check for DROP COLUMNS (col1, col2, ...) syntax (Spark/Databricks)
41049        if self.check_identifier("COLUMNS") && self.check_next(TokenType::LParen) {
41050            self.advance(); // consume COLUMNS
41051            self.expect(TokenType::LParen)?;
41052            let mut columns = Vec::new();
41053            loop {
41054                if let Some(col) = self.parse_identifier()? {
41055                    columns.push(col);
41056                }
41057                if !self.match_token(TokenType::Comma) {
41058                    break;
41059                }
41060            }
41061            self.expect(TokenType::RParen)?;
41062            if columns.is_empty() {
41063                return Ok(None);
41064            } else if columns.len() == 1 {
41065                return Ok(Some(columns.remove(0)));
41066            } else {
41067                return Ok(Some(Expression::Tuple(Box::new(Tuple {
41068                    expressions: columns,
41069                }))));
41070            }
41071        }
41072
41073        // Otherwise, parse as DROP COLUMN(s)
41074        let mut columns = Vec::new();
41075
41076        // Parse first column
41077        if let Some(col) = self.parse_drop_column()? {
41078            columns.push(col);
41079        }
41080
41081        // Parse additional columns (comma-separated)
41082        while self.match_token(TokenType::Comma) {
41083            // Match optional DROP keyword before next column
41084            self.match_token(TokenType::Drop);
41085            if let Some(col) = self.parse_drop_column()? {
41086                columns.push(col);
41087            }
41088        }
41089
41090        if columns.is_empty() {
41091            Ok(None)
41092        } else if columns.len() == 1 {
41093            Ok(Some(columns.remove(0)))
41094        } else {
41095            // Multiple columns - wrap in a Tuple
41096            Ok(Some(Expression::Tuple(Box::new(Tuple {
41097                expressions: columns,
41098            }))))
41099        }
41100    }
41101
41102    /// parse_alter_table_rename - Parses ALTER TABLE RENAME clause
41103    /// Python: parser.py:7828-7841
41104    pub fn parse_alter_table_rename(&mut self) -> Result<Option<Expression>> {
41105        // RENAME COLUMN old_name TO new_name
41106        if self.match_token(TokenType::Column) {
41107            let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
41108            let old_column = match self.parse_column()? {
41109                Some(c) => c,
41110                None => return Ok(None),
41111            };
41112
41113            if !self.match_text_seq(&["TO"]) {
41114                return Ok(None);
41115            }
41116
41117            let new_column = self.parse_column()?;
41118
41119            return Ok(Some(Expression::RenameColumn(Box::new(RenameColumn {
41120                this: Box::new(old_column),
41121                to: new_column.map(Box::new),
41122                exists,
41123            }))));
41124        }
41125
41126        // RENAME TO new_table_name
41127        if self.match_text_seq(&["TO"]) {
41128            // Return the table expression directly - the caller will handle it as a rename target
41129            let new_table = self.parse_table()?;
41130            return Ok(new_table);
41131        }
41132
41133        // SQLite allows: RENAME old_name TO new_name (without COLUMN keyword)
41134        // Try to parse as column rename if followed by identifier and TO
41135        if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
41136            let old_column = match self.parse_column()? {
41137                Some(c) => c,
41138                None => return Ok(None),
41139            };
41140
41141            if self.match_text_seq(&["TO"]) {
41142                let new_column = self.parse_column()?;
41143                return Ok(Some(Expression::RenameColumn(Box::new(RenameColumn {
41144                    this: Box::new(old_column),
41145                    to: new_column.map(Box::new),
41146                    exists: false,
41147                }))));
41148            } else {
41149                // Not TO after identifier - put it back and return error
41150                return Err(self.parse_error("Expected COLUMN or TO after RENAME"));
41151            }
41152        }
41153
41154        Ok(None)
41155    }
41156
41157    /// parse_alter_table_set - Parses ALTER TABLE SET clause
41158    /// Python: parser.py:7843-7877
41159    pub fn parse_alter_table_set(&mut self) -> Result<Option<Expression>> {
41160        let mut alter_set = AlterSet {
41161            expressions: Vec::new(),
41162            option: None,
41163            tablespace: None,
41164            access_method: None,
41165            file_format: None,
41166            copy_options: None,
41167            tag: None,
41168            location: None,
41169            serde: None,
41170        };
41171
41172        // SET AUTHORIZATION [ROLE] user
41173        if self.match_token(TokenType::Authorization) {
41174            let mut auth_text = "AUTHORIZATION ".to_string();
41175            if self.match_texts(&["ROLE"]) {
41176                auth_text.push_str("ROLE ");
41177            }
41178            let user = self.expect_identifier()?;
41179            auth_text.push_str(&user);
41180            alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(auth_text))));
41181            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41182        }
41183
41184        // SET PROPERTIES prop = value, ...
41185        if self.match_text_seq(&["PROPERTIES"]) {
41186            let mut assignments = Vec::new();
41187            loop {
41188                // Parse property name (could be identifier or string literal)
41189                let key = if self.check(TokenType::String) {
41190                    self.parse_string()?.unwrap_or(Expression::Null(Null))
41191                } else {
41192                    let name = self.expect_identifier()?;
41193                    Expression::Identifier(Identifier::new(name))
41194                };
41195                self.expect(TokenType::Eq)?;
41196                // Parse value (could be DEFAULT or an expression)
41197                let value = if self.match_token(TokenType::Default) {
41198                    Expression::Identifier(Identifier::new("DEFAULT".to_string()))
41199                } else {
41200                    self.parse_expression()?
41201                };
41202                assignments.push(Expression::Eq(Box::new(BinaryOp {
41203                    left: key,
41204                    right: value,
41205                    left_comments: Vec::new(),
41206                    operator_comments: Vec::new(),
41207                    trailing_comments: Vec::new(),
41208                    inferred_type: None,
41209                })));
41210                if !self.match_token(TokenType::Comma) {
41211                    break;
41212                }
41213            }
41214            alter_set.expressions = assignments;
41215            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41216        }
41217
41218        // SET (properties) or SET TABLE PROPERTIES (properties)
41219        if self.check(TokenType::LParen) || self.match_text_seq(&["TABLE", "PROPERTIES"]) {
41220            let assignments = self.parse_wrapped_csv_assignments()?;
41221            alter_set.expressions = assignments;
41222            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41223        }
41224
41225        // SET FILESTREAM_ON = value
41226        if self.match_text_seq(&["FILESTREAM_ON"]) {
41227            if let Some(assignment) = self.parse_assignment()? {
41228                alter_set.expressions = vec![assignment];
41229            }
41230            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41231        }
41232
41233        // SET LOGGED or SET UNLOGGED
41234        if self.match_texts(&["LOGGED", "UNLOGGED"]) {
41235            let option = self.previous().text.to_uppercase();
41236            alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(option))));
41237            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41238        }
41239
41240        // SET WITHOUT CLUSTER or SET WITHOUT OIDS
41241        if self.match_text_seq(&["WITHOUT"]) {
41242            if self.match_texts(&["CLUSTER", "OIDS"]) {
41243                let option = format!("WITHOUT {}", self.previous().text.to_uppercase());
41244                alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(option))));
41245                return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41246            }
41247        }
41248
41249        // SET LOCATION path
41250        if self.match_text_seq(&["LOCATION"]) {
41251            let loc = self.parse_field()?;
41252            alter_set.location = loc.map(Box::new);
41253            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41254        }
41255
41256        // SET ACCESS METHOD method
41257        if self.match_text_seq(&["ACCESS", "METHOD"]) {
41258            let method = self.parse_field()?;
41259            alter_set.access_method = method.map(Box::new);
41260            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41261        }
41262
41263        // SET TABLESPACE name
41264        if self.match_text_seq(&["TABLESPACE"]) {
41265            let tablespace = self.parse_field()?;
41266            alter_set.tablespace = tablespace.map(Box::new);
41267            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41268        }
41269
41270        // SET FILE FORMAT format or SET FILEFORMAT format
41271        if self.match_text_seq(&["FILE", "FORMAT"]) || self.match_text_seq(&["FILEFORMAT"]) {
41272            let format = self.parse_field()?;
41273            alter_set.file_format = format.map(Box::new);
41274            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41275        }
41276
41277        // SET STAGE_FILE_FORMAT = (options)
41278        if self.match_text_seq(&["STAGE_FILE_FORMAT"]) {
41279            let options = self.parse_wrapped_options()?;
41280            alter_set.file_format = options.map(Box::new);
41281            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41282        }
41283
41284        // SET STAGE_COPY_OPTIONS = (options)
41285        if self.match_text_seq(&["STAGE_COPY_OPTIONS"]) {
41286            let options = self.parse_wrapped_options()?;
41287            alter_set.copy_options = options.map(Box::new);
41288            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41289        }
41290
41291        // SET TAG or SET TAGS
41292        if self.match_text_seq(&["TAG"]) || self.match_text_seq(&["TAGS"]) {
41293            let mut tags = Vec::new();
41294            loop {
41295                if let Some(assignment) = self.parse_assignment()? {
41296                    tags.push(assignment);
41297                }
41298                if !self.match_token(TokenType::Comma) {
41299                    break;
41300                }
41301            }
41302            if !tags.is_empty() {
41303                alter_set.tag = Some(Box::new(Expression::Tuple(Box::new(Tuple {
41304                    expressions: tags,
41305                }))));
41306            }
41307            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41308        }
41309
41310        // SET SERDE 'class' [WITH SERDEPROPERTIES (...)]
41311        if self.match_text_seq(&["SERDE"]) {
41312            let serde = self.parse_field()?;
41313            alter_set.serde = serde.map(Box::new);
41314
41315            // Parse optional properties
41316            let properties = self.parse_wrapped()?;
41317            if let Some(props) = properties {
41318                alter_set.expressions = vec![props];
41319            }
41320            return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
41321        }
41322
41323        Ok(None)
41324    }
41325
41326    /// Helper to parse wrapped CSV of assignments
41327    fn parse_wrapped_csv_assignments(&mut self) -> Result<Vec<Expression>> {
41328        if !self.match_token(TokenType::LParen) {
41329            return Ok(Vec::new());
41330        }
41331        let mut assignments = Vec::new();
41332        loop {
41333            if let Some(assignment) = self.parse_assignment()? {
41334                assignments.push(assignment);
41335            }
41336            if !self.match_token(TokenType::Comma) {
41337                break;
41338            }
41339        }
41340        self.expect(TokenType::RParen)?;
41341        Ok(assignments)
41342    }
41343
41344    /// parse_analyze - Implemented from Python _parse_analyze
41345    /// Calls: parse_table_parts, parse_number, parse_table
41346    #[allow(unused_variables, unused_mut)]
41347    /// parse_analyze - Parses ANALYZE statement
41348    /// Python: parser.py:7937-7999
41349    pub fn parse_analyze(&mut self) -> Result<Option<Expression>> {
41350        // If no more tokens, return empty Analyze
41351        if self.is_at_end() {
41352            return Ok(Some(Expression::Analyze(Box::new(Analyze {
41353                kind: None,
41354                this: None,
41355                options: Vec::new(),
41356                mode: None,
41357                partition: None,
41358                expression: None,
41359                properties: Vec::new(),
41360                columns: Vec::new(),
41361            }))));
41362        }
41363
41364        // Parse options (VERBOSE, SKIP_LOCKED, etc.)
41365        // StarRocks uses FULL and SAMPLE as options
41366        let mut options = Vec::new();
41367        let analyze_styles = [
41368            "VERBOSE",
41369            "SKIP_LOCKED",
41370            "BUFFER_USAGE_LIMIT",
41371            "FULL",
41372            "SAMPLE",
41373        ];
41374        while self.match_texts(&analyze_styles) {
41375            let style = self.previous().text.to_uppercase();
41376            if style == "BUFFER_USAGE_LIMIT" {
41377                // Parse number after BUFFER_USAGE_LIMIT
41378                if let Some(num) = self.parse_number()? {
41379                    options.push(Expression::Identifier(Identifier::new(format!(
41380                        "BUFFER_USAGE_LIMIT {}",
41381                        if let Expression::Literal(Literal::Number(n)) = &num {
41382                            n.clone()
41383                        } else {
41384                            String::new()
41385                        }
41386                    ))));
41387                }
41388            } else {
41389                options.push(Expression::Identifier(Identifier::new(style)));
41390            }
41391        }
41392
41393        let mut this: Option<Expression> = None;
41394        let mut kind: Option<String> = None;
41395        let mut inner_expression: Option<Expression> = None;
41396
41397        // Parse TABLE or INDEX
41398        if self.match_token(TokenType::Table) {
41399            kind = Some("TABLE".to_string());
41400            this = self.parse_table_parts()?;
41401        } else if self.match_token(TokenType::Index) {
41402            kind = Some("INDEX".to_string());
41403            this = self.parse_table_parts()?;
41404        } else if self.match_text_seq(&["TABLES"]) {
41405            kind = Some("TABLES".to_string());
41406            if self.match_token(TokenType::From) || self.match_token(TokenType::In) {
41407                let dir = self.previous().text.to_uppercase();
41408                kind = Some(format!("TABLES {}", dir));
41409                // Parse database name as identifier
41410                let db_name = self.expect_identifier()?;
41411                this = Some(Expression::Identifier(Identifier::new(db_name)));
41412            }
41413        } else if self.match_text_seq(&["DATABASE"]) {
41414            kind = Some("DATABASE".to_string());
41415            this = self.parse_table_parts()?;
41416        } else if self.match_text_seq(&["CLUSTER"]) {
41417            kind = Some("CLUSTER".to_string());
41418            this = self.parse_table_parts()?;
41419        } else if self.match_texts(&["LOCAL", "NO_WRITE_TO_BINLOG"]) {
41420            // MySQL: ANALYZE LOCAL TABLE tbl / ANALYZE NO_WRITE_TO_BINLOG TABLE tbl
41421            let opt_text = self.previous().text.to_uppercase();
41422            options.push(Expression::Identifier(Identifier::new(opt_text)));
41423            if self.match_token(TokenType::Table) {
41424                kind = Some("TABLE".to_string());
41425            }
41426            this = self.parse_table_parts()?;
41427        } else if self.match_text_seq(&["COMPUTE"]) {
41428            // Check ANALYZE_EXPRESSION_PARSERS keywords before fallback to parse_table_parts
41429            // Python: elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS)
41430            inner_expression = self.parse_analyze_statistics()?;
41431        } else if self.match_text_seq(&["DELETE"]) {
41432            inner_expression = self.parse_analyze_delete()?;
41433        } else if self.match_text_seq(&["VALIDATE"]) {
41434            inner_expression = self.parse_analyze_validate()?;
41435        } else if self.match_text_seq(&["LIST"]) {
41436            inner_expression = self.parse_analyze_list()?;
41437        } else if self.match_text_seq(&["DROP"]) {
41438            inner_expression = self.parse_analyze_histogram()?;
41439        } else if self.match_text_seq(&["UPDATE"]) {
41440            inner_expression = self.parse_analyze_histogram()?;
41441        } else if self.match_texts(&["ALL", "PREDICATE"]) {
41442            inner_expression = self.parse_analyze_columns()?;
41443        } else {
41444            // Try to parse table directly (empty kind - https://prestodb.io/docs/current/sql/analyze.html)
41445            this = self.parse_table_parts()?;
41446        }
41447
41448        // Parse optional column list: ANALYZE tbl(col1, col2) (PostgreSQL)
41449        let columns = if this.is_some() && self.match_token(TokenType::LParen) {
41450            let mut cols = Vec::new();
41451            loop {
41452                cols.push(self.expect_identifier_or_keyword()?);
41453                if !self.match_token(TokenType::Comma) {
41454                    break;
41455                }
41456            }
41457            self.expect(TokenType::RParen)?;
41458            cols
41459        } else {
41460            Vec::new()
41461        };
41462
41463        // Parse optional PARTITION
41464        let partition = self.parse_partition()?;
41465
41466        // Parse optional WITH SYNC/ASYNC MODE or WITH (prop=val, ...) for Presto
41467        let mut mode = None;
41468        let mut properties = Vec::new();
41469
41470        if self.match_text_seq(&["WITH", "SYNC", "MODE"]) {
41471            mode = Some(Box::new(Expression::Identifier(Identifier::new(
41472                "WITH SYNC MODE".to_string(),
41473            ))));
41474        } else if self.match_text_seq(&["WITH", "ASYNC", "MODE"]) {
41475            mode = Some(Box::new(Expression::Identifier(Identifier::new(
41476                "WITH ASYNC MODE".to_string(),
41477            ))));
41478        } else if self.match_text_seq(&["WITH"]) {
41479            // Presto syntax: ANALYZE tbl WITH (prop1=val1, prop2=val2)
41480            if self.match_token(TokenType::LParen) {
41481                loop {
41482                    // Parse key=value pairs
41483                    let key = self.parse_id_var()?;
41484                    if key.is_none() {
41485                        break;
41486                    }
41487
41488                    // Expect = sign
41489                    if self.match_token(TokenType::Eq) {
41490                        // Parse the value
41491                        let value = self.parse_primary()?;
41492                        if let Some(k) = key {
41493                            properties.push(Expression::Property(Box::new(Property {
41494                                this: Box::new(k),
41495                                value: Some(Box::new(value)),
41496                            })));
41497                        }
41498                    } else if let Some(k) = key {
41499                        // Key without value
41500                        properties.push(Expression::Property(Box::new(Property {
41501                            this: Box::new(k),
41502                            value: None,
41503                        })));
41504                    }
41505
41506                    if !self.match_token(TokenType::Comma) {
41507                        break;
41508                    }
41509                }
41510                self.expect(TokenType::RParen)?;
41511            }
41512        }
41513
41514        // Parse optional inner expressions (COMPUTE, DELETE, etc.)
41515        // Only if inner_expression wasn't already set (for cases like ANALYZE TABLE tbl VALIDATE...)
41516        if inner_expression.is_none() {
41517            if self.match_text_seq(&["COMPUTE"]) {
41518                inner_expression = self.parse_analyze_statistics()?;
41519            } else if self.match_text_seq(&["DELETE"]) {
41520                inner_expression = self.parse_analyze_delete()?;
41521            } else if self.match_text_seq(&["VALIDATE"]) {
41522                inner_expression = self.parse_analyze_validate()?;
41523            } else if self.match_text_seq(&["LIST"]) {
41524                inner_expression = self.parse_analyze_list()?;
41525            } else if self.match_text_seq(&["DROP"]) {
41526                inner_expression = self.parse_analyze_histogram()?;
41527            } else if self.match_text_seq(&["UPDATE"]) {
41528                inner_expression = self.parse_analyze_histogram()?;
41529            } else if self.match_texts(&["ALL", "PREDICATE"]) {
41530                // Redshift: ANALYZE TBL ALL COLUMNS / ANALYZE TBL PREDICATE COLUMNS
41531                inner_expression = self.parse_analyze_columns()?;
41532            }
41533        }
41534
41535        // Parse optional properties (if not already parsed from WITH clause)
41536        // StarRocks syntax: ANALYZE TABLE TBL PROPERTIES ('prop1'=val1, 'prop2'=val2)
41537        if properties.is_empty() && self.match_text_seq(&["PROPERTIES"]) {
41538            if self.match_token(TokenType::LParen) {
41539                loop {
41540                    // Parse key (can be a string literal or identifier)
41541                    let key = if self.check(TokenType::String) {
41542                        self.advance();
41543                        let key_str = self.previous().text.clone();
41544                        Expression::Literal(Literal::String(key_str))
41545                    } else {
41546                        self.parse_id_var()?
41547                            .unwrap_or(Expression::Identifier(Identifier::new(String::new())))
41548                    };
41549
41550                    // Expect = sign
41551                    if self.match_token(TokenType::Eq) {
41552                        // Parse the value
41553                        let value = self.parse_primary()?;
41554                        properties.push(Expression::Property(Box::new(Property {
41555                            this: Box::new(key),
41556                            value: Some(Box::new(value)),
41557                        })));
41558                    } else {
41559                        // Key without value
41560                        properties.push(Expression::Property(Box::new(Property {
41561                            this: Box::new(key),
41562                            value: None,
41563                        })));
41564                    }
41565
41566                    if !self.match_token(TokenType::Comma) {
41567                        break;
41568                    }
41569                }
41570                self.expect(TokenType::RParen)?;
41571            }
41572        }
41573
41574        Ok(Some(Expression::Analyze(Box::new(Analyze {
41575            kind,
41576            this: this.map(Box::new),
41577            options,
41578            mode,
41579            partition: partition.map(Box::new),
41580            expression: inner_expression.map(Box::new),
41581            properties,
41582            columns,
41583        }))))
41584    }
41585
41586    /// parse_analyze_columns - Parses ANALYZE ... COLUMNS
41587    /// Python: parser.py:8055-8059
41588    /// Note: AnalyzeColumns not in expressions.rs, using Identifier instead
41589    pub fn parse_analyze_columns(&mut self) -> Result<Option<Expression>> {
41590        let prev_text = self.previous().text.to_uppercase();
41591        if self.match_text_seq(&["COLUMNS"]) {
41592            return Ok(Some(Expression::Identifier(Identifier::new(format!(
41593                "{} COLUMNS",
41594                prev_text
41595            )))));
41596        }
41597        Ok(None)
41598    }
41599
41600    /// parse_analyze_delete - Parses ANALYZE DELETE STATISTICS
41601    /// Python: parser.py:8061-8065
41602    pub fn parse_analyze_delete(&mut self) -> Result<Option<Expression>> {
41603        let kind = if self.match_text_seq(&["SYSTEM"]) {
41604            Some("SYSTEM".to_string())
41605        } else {
41606            None
41607        };
41608
41609        if self.match_text_seq(&["STATISTICS"]) {
41610            return Ok(Some(Expression::AnalyzeDelete(Box::new(AnalyzeDelete {
41611                kind,
41612            }))));
41613        }
41614
41615        Ok(None)
41616    }
41617
41618    /// parse_analyze_histogram - Parses ANALYZE ... HISTOGRAM ON
41619    /// Python: parser.py:8073-8108
41620    pub fn parse_analyze_histogram(&mut self) -> Result<Option<Expression>> {
41621        let action = self.previous().text.to_uppercase(); // DROP or UPDATE
41622        let mut expressions = Vec::new();
41623        let mut update_options: Option<Box<Expression>> = None;
41624        let mut expression: Option<Box<Expression>> = None;
41625
41626        if !self.match_text_seq(&["HISTOGRAM", "ON"]) {
41627            return Ok(None);
41628        }
41629
41630        // Parse column references
41631        loop {
41632            if let Some(col) = self.parse_column_reference()? {
41633                expressions.push(col);
41634            } else {
41635                break;
41636            }
41637            if !self.match_token(TokenType::Comma) {
41638                break;
41639            }
41640        }
41641
41642        // Parse USING DATA 'json_data' (MySQL) - must check before WITH
41643        if self.match_text_seq(&["USING", "DATA"]) {
41644            if self.check(TokenType::String) {
41645                let tok = self.advance();
41646                expression = Some(Box::new(Expression::Identifier(Identifier::new(format!(
41647                    "USING DATA '{}'",
41648                    tok.text
41649                )))));
41650            } else {
41651                expression = Some(Box::new(Expression::Identifier(Identifier::new(
41652                    "USING DATA".to_string(),
41653                ))));
41654            }
41655        }
41656
41657        // Parse WITH options - can have two WITH clauses:
41658        // 1. WITH SYNC/ASYNC MODE (optional)
41659        // 2. WITH n BUCKETS (optional)
41660        // StarRocks syntax: WITH SYNC MODE WITH 5 BUCKETS
41661        let mut mode_str: Option<String> = None;
41662        let mut buckets_str: Option<String> = None;
41663
41664        if self.match_token(TokenType::With) {
41665            if self.match_texts(&["SYNC", "ASYNC"]) {
41666                let mode = self.previous().text.to_uppercase();
41667                if self.match_text_seq(&["MODE"]) {
41668                    mode_str = Some(format!("WITH {} MODE", mode));
41669                }
41670                // Check for second WITH clause for buckets
41671                if self.match_token(TokenType::With) {
41672                    if let Some(num) = self.parse_number()? {
41673                        if self.match_text_seq(&["BUCKETS"]) {
41674                            let num_str = if let Expression::Literal(Literal::Number(n)) = &num {
41675                                n.clone()
41676                            } else {
41677                                String::new()
41678                            };
41679                            buckets_str = Some(format!("WITH {} BUCKETS", num_str));
41680                        }
41681                    }
41682                }
41683            } else if let Some(num) = self.parse_number()? {
41684                if self.match_text_seq(&["BUCKETS"]) {
41685                    let num_str = if let Expression::Literal(Literal::Number(n)) = &num {
41686                        n.clone()
41687                    } else {
41688                        String::new()
41689                    };
41690                    buckets_str = Some(format!("WITH {} BUCKETS", num_str));
41691                }
41692            }
41693        }
41694
41695        // Combine mode and buckets into expression
41696        match (mode_str, buckets_str) {
41697            (Some(m), Some(b)) => {
41698                expression = Some(Box::new(Expression::Identifier(Identifier::new(format!(
41699                    "{} {}",
41700                    m, b
41701                )))));
41702            }
41703            (Some(m), None) => {
41704                expression = Some(Box::new(Expression::Identifier(Identifier::new(m))));
41705            }
41706            (None, Some(b)) => {
41707                expression = Some(Box::new(Expression::Identifier(Identifier::new(b))));
41708            }
41709            (None, None) => {}
41710        }
41711
41712        // Parse AUTO UPDATE or MANUAL UPDATE (MySQL 8.0.27+)
41713        if self.match_texts(&["MANUAL", "AUTO"]) {
41714            let mode = self.previous().text.to_uppercase();
41715            if self.check(TokenType::Update) {
41716                update_options = Some(Box::new(Expression::Identifier(Identifier::new(mode))));
41717                self.advance(); // consume UPDATE
41718            }
41719        }
41720
41721        Ok(Some(Expression::AnalyzeHistogram(Box::new(
41722            AnalyzeHistogram {
41723                this: Box::new(Expression::Identifier(Identifier::new(action))),
41724                expressions,
41725                expression,
41726                update_options,
41727            },
41728        ))))
41729    }
41730
41731    /// parse_analyze_list - Parses ANALYZE LIST CHAINED ROWS
41732    /// Python: parser.py:8067-8070
41733    pub fn parse_analyze_list(&mut self) -> Result<Option<Expression>> {
41734        if self.match_text_seq(&["CHAINED", "ROWS"]) {
41735            let expression = self.parse_into()?.map(Box::new);
41736            return Ok(Some(Expression::AnalyzeListChainedRows(Box::new(
41737                AnalyzeListChainedRows { expression },
41738            ))));
41739        }
41740        Ok(None)
41741    }
41742
41743    /// parse_analyze_statistics - Parses ANALYZE ... STATISTICS
41744    /// Python: parser.py:8002-8031
41745    pub fn parse_analyze_statistics(&mut self) -> Result<Option<Expression>> {
41746        let kind = self.previous().text.to_uppercase();
41747        let option = if self.match_text_seq(&["DELTA"]) {
41748            Some(Box::new(Expression::Identifier(Identifier::new(
41749                "DELTA".to_string(),
41750            ))))
41751        } else {
41752            None
41753        };
41754
41755        // Expect STATISTICS keyword
41756        if !self.match_text_seq(&["STATISTICS"]) {
41757            return Ok(None);
41758        }
41759
41760        let mut this: Option<Box<Expression>> = None;
41761        let mut expressions = Vec::new();
41762
41763        if self.match_text_seq(&["NOSCAN"]) {
41764            this = Some(Box::new(Expression::Identifier(Identifier::new(
41765                "NOSCAN".to_string(),
41766            ))));
41767        } else if self.match_token(TokenType::For) {
41768            if self.match_text_seq(&["ALL", "COLUMNS"]) {
41769                this = Some(Box::new(Expression::Identifier(Identifier::new(
41770                    "FOR ALL COLUMNS".to_string(),
41771                ))));
41772            } else if self.match_text_seq(&["COLUMNS"]) {
41773                this = Some(Box::new(Expression::Identifier(Identifier::new(
41774                    "FOR COLUMNS".to_string(),
41775                ))));
41776                // Parse column list
41777                loop {
41778                    if let Some(col) = self.parse_column_reference()? {
41779                        expressions.push(col);
41780                    } else {
41781                        break;
41782                    }
41783                    if !self.match_token(TokenType::Comma) {
41784                        break;
41785                    }
41786                }
41787            }
41788        } else if self.match_text_seq(&["SAMPLE"]) {
41789            // Parse SAMPLE number [PERCENT]
41790            if let Some(sample) = self.parse_number()? {
41791                let sample_kind = if self.match_token(TokenType::Percent) {
41792                    Some("PERCENT".to_string())
41793                } else {
41794                    None
41795                };
41796                expressions.push(Expression::AnalyzeSample(Box::new(AnalyzeSample {
41797                    kind: sample_kind.unwrap_or_default(),
41798                    sample: Some(Box::new(sample)),
41799                })));
41800            }
41801        }
41802
41803        Ok(Some(Expression::AnalyzeStatistics(Box::new(
41804            AnalyzeStatistics {
41805                kind,
41806                option,
41807                this,
41808                expressions,
41809            },
41810        ))))
41811    }
41812
41813    /// parse_analyze_validate - Parses ANALYZE VALIDATE
41814    /// Python: parser.py:8034-8053
41815    pub fn parse_analyze_validate(&mut self) -> Result<Option<Expression>> {
41816        let mut kind = String::new();
41817        let mut this: Option<Box<Expression>> = None;
41818        let mut expression: Option<Box<Expression>> = None;
41819
41820        if self.match_text_seq(&["REF", "UPDATE"]) {
41821            kind = "REF".to_string();
41822            this = Some(Box::new(Expression::Identifier(Identifier::new(
41823                "UPDATE".to_string(),
41824            ))));
41825            if self.match_text_seq(&["SET", "DANGLING", "TO", "NULL"]) {
41826                this = Some(Box::new(Expression::Identifier(Identifier::new(
41827                    "UPDATE SET DANGLING TO NULL".to_string(),
41828                ))));
41829            }
41830        } else if self.match_text_seq(&["STRUCTURE"]) {
41831            kind = "STRUCTURE".to_string();
41832            if self.match_text_seq(&["CASCADE", "FAST"]) {
41833                this = Some(Box::new(Expression::Identifier(Identifier::new(
41834                    "CASCADE FAST".to_string(),
41835                ))));
41836            } else if self.match_text_seq(&["CASCADE", "COMPLETE"]) {
41837                if self.match_texts(&["ONLINE", "OFFLINE"]) {
41838                    let mode = self.previous().text.to_uppercase();
41839                    this = Some(Box::new(Expression::Identifier(Identifier::new(format!(
41840                        "CASCADE COMPLETE {}",
41841                        mode
41842                    )))));
41843                    expression = self.parse_into()?.map(Box::new);
41844                }
41845            }
41846        }
41847
41848        if kind.is_empty() {
41849            return Ok(None);
41850        }
41851
41852        Ok(Some(Expression::AnalyzeValidate(Box::new(
41853            AnalyzeValidate {
41854                kind,
41855                this,
41856                expression,
41857            },
41858        ))))
41859    }
41860
41861    /// parse_attach_detach - Parses ATTACH/DETACH statements (DuckDB)
41862    /// Python: DuckDB._parse_attach_detach
41863    pub fn parse_attach_detach(&mut self, is_attach: bool) -> Result<Expression> {
41864        // ATTACH [DATABASE] [IF NOT EXISTS] 'path' [AS alias] [(options)]
41865        // DETACH [DATABASE] [IF EXISTS] name
41866        // DATABASE can be tokenized as TokenType::Database (keyword), not just Var
41867        let _ = self.match_identifier("DATABASE") || self.match_token(TokenType::Database);
41868
41869        let exists = if is_attach {
41870            self.match_text_seq(&["IF", "NOT", "EXISTS"])
41871        } else {
41872            self.match_text_seq(&["IF", "EXISTS"])
41873        };
41874
41875        // Parse the expression (can be a path string, identifier, or expression like 'foo' || '.foo2'
41876        // or NOT EXISTS(subquery) for conditional attach)
41877        let this_expr = self.parse_expression()?;
41878
41879        // Check for AS alias
41880        let this = if self.match_token(TokenType::As) {
41881            let alias = self.expect_identifier_or_keyword_with_quoted()?;
41882            Expression::Alias(Box::new(Alias {
41883                this: this_expr,
41884                alias,
41885                column_aliases: Vec::new(),
41886                pre_alias_comments: Vec::new(),
41887                trailing_comments: Vec::new(),
41888                inferred_type: None,
41889            }))
41890        } else {
41891            this_expr
41892        };
41893
41894        if is_attach {
41895            // Parse optional (options)
41896            let expressions = if self.match_token(TokenType::LParen) {
41897                let mut opts = Vec::new();
41898                loop {
41899                    // Parse option: KEY [VALUE]
41900                    let key_name = self.advance().text.to_uppercase();
41901                    let key = Expression::Identifier(Identifier::new(key_name));
41902                    let value = if !self.check(TokenType::Comma) && !self.check(TokenType::RParen) {
41903                        // The value can be an identifier, string, boolean, etc.
41904                        let val_token = self.advance();
41905                        let val_expr = if val_token.token_type == TokenType::String {
41906                            Expression::Literal(Literal::String(val_token.text.clone()))
41907                        } else if val_token.token_type == TokenType::True {
41908                            Expression::Boolean(BooleanLiteral { value: true })
41909                        } else if val_token.token_type == TokenType::False {
41910                            Expression::Boolean(BooleanLiteral { value: false })
41911                        } else {
41912                            Expression::Identifier(Identifier::new(val_token.text.clone()))
41913                        };
41914                        Some(Box::new(val_expr))
41915                    } else {
41916                        None
41917                    };
41918                    opts.push(Expression::AttachOption(Box::new(AttachOption {
41919                        this: Box::new(key),
41920                        expression: value,
41921                    })));
41922                    if !self.match_token(TokenType::Comma) {
41923                        break;
41924                    }
41925                }
41926                self.expect(TokenType::RParen)?;
41927                opts
41928            } else {
41929                Vec::new()
41930            };
41931
41932            Ok(Expression::Attach(Box::new(Attach {
41933                this: Box::new(this),
41934                exists,
41935                expressions,
41936            })))
41937        } else {
41938            Ok(Expression::Detach(Box::new(Detach {
41939                this: Box::new(this),
41940                exists,
41941            })))
41942        }
41943    }
41944
41945    /// parse_install - Parses INSTALL statement (DuckDB)
41946    /// Python: DuckDB._parse_install
41947    pub fn parse_install(&mut self, force: bool) -> Result<Expression> {
41948        // INSTALL extension [FROM source]
41949        let name = self.expect_identifier_or_keyword()?;
41950        let this = Expression::Identifier(Identifier::new(name));
41951
41952        let from_ = if self.match_token(TokenType::From) {
41953            // FROM can be followed by a string or identifier
41954            Some(Box::new(self.parse_primary()?))
41955        } else {
41956            None
41957        };
41958
41959        Ok(Expression::Install(Box::new(Install {
41960            this: Box::new(this),
41961            from_,
41962            force: if force {
41963                Some(Box::new(Expression::Boolean(BooleanLiteral {
41964                    value: true,
41965                })))
41966            } else {
41967                None
41968            },
41969        })))
41970    }
41971
41972    /// parse_force_statement - Parses FORCE INSTALL/CHECKPOINT (DuckDB)
41973    /// Python: DuckDB._parse_force
41974    pub fn parse_force_statement(&mut self) -> Result<Expression> {
41975        if self.match_identifier("INSTALL") {
41976            return self.parse_install(true);
41977        }
41978        // FORCE CHECKPOINT or other: fallback to command
41979        self.parse_as_command()?
41980            .ok_or_else(|| self.parse_error("Failed to parse FORCE statement"))
41981    }
41982
41983    /// parse_summarize_statement - Parses SUMMARIZE statement (DuckDB)
41984    /// Python: DuckDB parser for SUMMARIZE
41985    pub fn parse_summarize_statement(&mut self) -> Result<Expression> {
41986        // SUMMARIZE [TABLE] expression
41987        let is_table = self.match_token(TokenType::Table);
41988
41989        // Try to parse a SELECT statement, string, or table reference
41990        let this = if self.check(TokenType::Select) || self.check(TokenType::With) {
41991            self.parse_select()?
41992        } else if self.check(TokenType::String) {
41993            self.parse_primary()?
41994        } else {
41995            // Parse as table name
41996            self.parse_table_parts()?
41997                .unwrap_or(Expression::Identifier(Identifier::new(String::new())))
41998        };
41999
42000        Ok(Expression::Summarize(Box::new(Summarize {
42001            this: Box::new(this),
42002            table: if is_table {
42003                Some(Box::new(Expression::Boolean(BooleanLiteral {
42004                    value: true,
42005                })))
42006            } else {
42007                None
42008            },
42009        })))
42010    }
42011
42012    /// parse_deallocate_prepare - Parses DEALLOCATE PREPARE <name>
42013    /// Presto/Trino syntax for deallocating prepared statements
42014    pub fn parse_deallocate_prepare(&mut self) -> Result<Expression> {
42015        self.advance(); // consume DEALLOCATE
42016
42017        // Check for PREPARE keyword
42018        if self.match_identifier("PREPARE") {
42019            // Parse the statement name
42020            let name = if !self.is_at_end() && !self.check(TokenType::Semicolon) {
42021                self.advance().text.clone()
42022            } else {
42023                String::new()
42024            };
42025
42026            // Build the command text
42027            let command_text = if name.is_empty() {
42028                "DEALLOCATE PREPARE".to_string()
42029            } else {
42030                format!("DEALLOCATE PREPARE {}", name)
42031            };
42032
42033            Ok(Expression::Command(Box::new(Command {
42034                this: command_text,
42035            })))
42036        } else {
42037            // Just DEALLOCATE without PREPARE - consume rest as command
42038            let mut parts = vec!["DEALLOCATE".to_string()];
42039            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
42040                let token = self.advance();
42041                parts.push(token.text.clone());
42042            }
42043            Ok(Expression::Command(Box::new(Command {
42044                this: parts.join(" "),
42045            })))
42046        }
42047    }
42048
42049    /// parse_as_command - Creates Command expression
42050    #[allow(unused_variables, unused_mut)]
42051    /// parse_as_command - Parses remaining tokens as a raw command
42052    /// Python: _parse_as_command
42053    /// Used as fallback when specific parsing fails
42054    pub fn parse_as_command(&mut self) -> Result<Option<Expression>> {
42055        // Get the starting token text
42056        let start_text = if self.current > 0 {
42057            self.tokens
42058                .get(self.current - 1)
42059                .map(|t| t.text.clone())
42060                .unwrap_or_default()
42061        } else {
42062            String::new()
42063        };
42064
42065        // Consume all remaining tokens, storing both text and type
42066        let mut tokens_info: Vec<(String, TokenType)> = Vec::new();
42067        while !self.is_at_end() {
42068            let token = self.advance();
42069            tokens_info.push((token.text.clone(), token.token_type.clone()));
42070        }
42071
42072        // Join tokens intelligently, avoiding spaces around punctuation
42073        let mut expression = String::new();
42074        for (i, (text, token_type)) in tokens_info.iter().enumerate() {
42075            if i > 0 {
42076                // Check if we should add a space before this token
42077                let prev_type = &tokens_info[i - 1].1;
42078                let needs_space = !Self::is_punctuation_token(prev_type)
42079                    && !Self::is_punctuation_token(token_type);
42080                if needs_space {
42081                    expression.push(' ');
42082                }
42083            }
42084            expression.push_str(text);
42085        }
42086
42087        Ok(Some(Expression::Command(Box::new(Command {
42088            this: if expression.is_empty() {
42089                start_text
42090            } else {
42091                format!("{} {}", start_text, expression)
42092            },
42093        }))))
42094    }
42095
42096    /// Helper to determine if a token type is punctuation that shouldn't have spaces around it
42097    fn is_punctuation_token(token_type: &TokenType) -> bool {
42098        matches!(
42099            token_type,
42100            TokenType::Dot | TokenType::Colon | TokenType::DColon
42101        )
42102    }
42103
42104    /// Fallback to Command expression from a saved position.
42105    /// Extracts verbatim SQL text from source if available, consuming tokens until semicolon/EOF.
42106    fn fallback_to_command(&mut self, start_pos: usize) -> Result<Expression> {
42107        let start_span = self.tokens[start_pos].span.start;
42108        // Consume until semicolon or end
42109        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
42110            self.advance();
42111        }
42112        let command_text = if let Some(ref source) = self.source {
42113            let end_span = if self.current > 0 {
42114                self.tokens[self.current - 1].span.end
42115            } else {
42116                start_span
42117            };
42118            source[start_span..end_span].trim().to_string()
42119        } else {
42120            // Fallback: join token texts
42121            let mut parts = Vec::new();
42122            for i in start_pos..self.current {
42123                if self.tokens[i].token_type == TokenType::String {
42124                    parts.push(format!("'{}'", self.tokens[i].text.replace('\'', "''")));
42125                } else {
42126                    parts.push(self.tokens[i].text.clone());
42127                }
42128            }
42129            parts.join(" ")
42130        };
42131        Ok(Expression::Command(Box::new(Command {
42132            this: command_text,
42133        })))
42134    }
42135
42136    /// parse_assignment - Parses assignment expressions (variable := value)
42137    /// Python: _parse_assignment
42138    pub fn parse_assignment(&mut self) -> Result<Option<Expression>> {
42139        // First parse a disjunction (left side of potential assignment)
42140        let mut this = self.parse_disjunction()?;
42141
42142        // Handle := assignment operator
42143        while self.match_token(TokenType::ColonEq) {
42144            if let Some(left) = this {
42145                let right = self.parse_assignment()?;
42146                if let Some(right_expr) = right {
42147                    this = Some(Expression::PropertyEQ(Box::new(BinaryOp {
42148                        left,
42149                        right: right_expr,
42150                        left_comments: Vec::new(),
42151                        operator_comments: Vec::new(),
42152                        trailing_comments: Vec::new(),
42153                        inferred_type: None,
42154                    })));
42155                } else {
42156                    this = Some(left);
42157                    break;
42158                }
42159            } else {
42160                break;
42161            }
42162        }
42163
42164        // ClickHouse ternary operator: condition ? true_value : false_value
42165        // Parsed as: If(this=condition, true=true_value, false=false_value)
42166        if matches!(
42167            self.config.dialect,
42168            Some(crate::dialects::DialectType::ClickHouse)
42169        ) {
42170            if let Some(condition) = this {
42171                if self.match_token(TokenType::Parameter) {
42172                    if self.check(TokenType::Colon) {
42173                        return Err(self.parse_error(
42174                            "Expected true expression after ? in ClickHouse ternary",
42175                        ));
42176                    }
42177                    let true_value = self.parse_assignment()?.ok_or_else(|| {
42178                        self.parse_error("Expected true expression after ? in ClickHouse ternary")
42179                    })?;
42180                    let false_value = if self.match_token(TokenType::Colon) {
42181                        self.parse_assignment()?.unwrap_or(Expression::Null(Null))
42182                    } else {
42183                        Expression::Null(Null)
42184                    };
42185                    return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
42186                        original_name: None,
42187                        condition,
42188                        true_value,
42189                        false_value: Some(false_value),
42190                        inferred_type: None,
42191                    }))));
42192                }
42193                this = Some(condition);
42194            }
42195        }
42196
42197        Ok(this)
42198    }
42199
42200    /// parse_auto_increment - Implemented from Python _parse_auto_increment
42201    /// Calls: parse_bitwise
42202    #[allow(unused_variables, unused_mut)]
42203    pub fn parse_auto_increment(&mut self) -> Result<Option<Expression>> {
42204        if self.match_text_seq(&["START"]) {
42205            return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
42206                Box::new(GeneratedAsIdentityColumnConstraint {
42207                    this: None,
42208                    expression: None,
42209                    on_null: None,
42210                    start: None,
42211                    increment: None,
42212                    minvalue: None,
42213                    maxvalue: None,
42214                    cycle: None,
42215                    order: None,
42216                }),
42217            )));
42218        }
42219        if self.match_text_seq(&["INCREMENT"]) {
42220            // Matched: INCREMENT
42221            return Ok(None);
42222        }
42223        if self.match_text_seq(&["ORDER"]) {
42224            // Matched: ORDER
42225            return Ok(None);
42226        }
42227        Ok(None)
42228    }
42229
42230    /// parse_auto_property - Implemented from Python _parse_auto_property
42231    #[allow(unused_variables, unused_mut)]
42232    pub fn parse_auto_property(&mut self) -> Result<Option<Expression>> {
42233        if self.match_text_seq(&["REFRESH"]) {
42234            // Matched: REFRESH
42235            return Ok(None);
42236        }
42237        Ok(None)
42238    }
42239
42240    /// parse_between - Implemented from Python _parse_between
42241    #[allow(unused_variables, unused_mut)]
42242    pub fn parse_between(&mut self) -> Result<Option<Expression>> {
42243        if self.match_text_seq(&["SYMMETRIC"]) {
42244            // Matched: SYMMETRIC
42245            return Ok(None);
42246        }
42247        if self.match_text_seq(&["ASYMMETRIC"]) {
42248            // Matched: ASYMMETRIC
42249            return Ok(None);
42250        }
42251        Ok(None)
42252    }
42253
42254    /// parse_bitwise - Parses bitwise OR/XOR/AND expressions
42255    /// Python: _parse_bitwise
42256    /// Delegates to the existing parse_bitwise_or in the operator precedence chain
42257    pub fn parse_bitwise(&mut self) -> Result<Option<Expression>> {
42258        let start = self.current;
42259        match self.parse_bitwise_or() {
42260            Ok(expr) => Ok(Some(expr)),
42261            Err(_err) if self.current == start => Ok(None),
42262            Err(err) => Err(err),
42263        }
42264    }
42265
42266    /// parse_blockcompression - Implemented from Python _parse_blockcompression
42267    #[allow(unused_variables, unused_mut)]
42268    pub fn parse_blockcompression(&mut self) -> Result<Option<Expression>> {
42269        if self.match_text_seq(&["ALWAYS"]) {
42270            return Ok(Some(Expression::BlockCompressionProperty(Box::new(
42271                BlockCompressionProperty {
42272                    autotemp: None,
42273                    always: None,
42274                    default: None,
42275                    manual: None,
42276                    never: None,
42277                },
42278            ))));
42279        }
42280        if self.match_text_seq(&["MANUAL"]) {
42281            // Matched: MANUAL
42282            return Ok(None);
42283        }
42284        Ok(None)
42285    }
42286
42287    /// parse_boolean - Parse boolean literal (TRUE/FALSE)
42288    /// Python: if self._match(TokenType.TRUE): return exp.Boolean(this=True)
42289    pub fn parse_boolean(&mut self) -> Result<Option<Expression>> {
42290        if self.match_token(TokenType::True) {
42291            return Ok(Some(Expression::Boolean(BooleanLiteral { value: true })));
42292        }
42293        if self.match_token(TokenType::False) {
42294            return Ok(Some(Expression::Boolean(BooleanLiteral { value: false })));
42295        }
42296        Ok(None)
42297    }
42298
42299    /// parse_bracket - Ported from Python _parse_bracket
42300    /// Parses bracket expressions: array[index], array literal [1,2,3], or struct {key: value}
42301    #[allow(unused_variables, unused_mut)]
42302    pub fn parse_bracket(&mut self) -> Result<Option<Expression>> {
42303        self.parse_bracket_with_expr(None)
42304    }
42305
42306    /// parse_bracket_with_expr - Parses bracket with optional left-side expression
42307    fn parse_bracket_with_expr(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
42308        // Check for [ or {
42309        let is_bracket = self.match_token(TokenType::LBracket);
42310        let is_brace = if !is_bracket {
42311            self.match_token(TokenType::LBrace)
42312        } else {
42313            false
42314        };
42315
42316        if !is_bracket && !is_brace {
42317            return Ok(this);
42318        }
42319
42320        // Parse comma-separated expressions inside brackets
42321        let mut expressions: Vec<Expression> = Vec::new();
42322
42323        if is_bracket && !self.check(TokenType::RBracket) {
42324            // Check for slice syntax at the start: [:...] or [:-...]
42325            // This needs to be detected before parse_bracket_key_value which calls parse_primary
42326            // and parse_primary would consume : as a parameter prefix
42327            let first_expr = if self.check(TokenType::Colon) {
42328                // This is slice syntax like [:] or [:-1] or [::step]
42329                // Parse it using slice parser with no 'this'
42330                if let Some(slice) = self.parse_slice()? {
42331                    slice
42332                } else {
42333                    self.parse_expression()?
42334                }
42335            } else if let Ok(Some(expr)) = self.parse_bracket_key_value() {
42336                expr
42337            } else {
42338                // Parse regular expression and check for slice
42339                let expr = self.parse_expression()?;
42340                // Check if followed by colon (slice syntax like [start:end])
42341                if self.check(TokenType::Colon) {
42342                    if let Some(slice) = self.parse_slice_with_this(Some(expr))? {
42343                        slice
42344                    } else {
42345                        return Err(self.parse_error("Failed to parse slice"));
42346                    }
42347                } else {
42348                    expr
42349                }
42350            };
42351
42352            // Check for comprehension syntax: [expr FOR var IN iterator [IF condition]]
42353            if self.match_token(TokenType::For) {
42354                // Parse loop variable - typically a simple identifier like 'x'
42355                let loop_var = self.parse_primary()?;
42356
42357                // Parse optional position (second variable after comma)
42358                let position = if self.match_token(TokenType::Comma) {
42359                    Some(self.parse_primary()?)
42360                } else {
42361                    None
42362                };
42363
42364                // Expect IN keyword
42365                if !self.match_token(TokenType::In) {
42366                    return Err(self.parse_error("Expected IN in comprehension"));
42367                }
42368
42369                // Parse iterator expression
42370                let iterator = self.parse_expression()?;
42371
42372                // Parse optional condition after IF
42373                let condition = if self.match_token(TokenType::If) {
42374                    Some(self.parse_expression()?)
42375                } else {
42376                    None
42377                };
42378
42379                // Expect closing bracket
42380                self.expect(TokenType::RBracket)?;
42381
42382                // Return Comprehension wrapped in an expression
42383                return Ok(Some(Expression::Comprehension(Box::new(Comprehension {
42384                    this: Box::new(first_expr),
42385                    expression: Box::new(loop_var),
42386                    position: position.map(Box::new),
42387                    iterator: Some(Box::new(iterator)),
42388                    condition: condition.map(Box::new),
42389                }))));
42390            }
42391
42392            expressions.push(first_expr);
42393
42394            // Continue parsing remaining expressions
42395            while self.match_token(TokenType::Comma) {
42396                if let Ok(Some(expr)) = self.parse_bracket_key_value() {
42397                    expressions.push(expr);
42398                } else {
42399                    match self.parse_expression() {
42400                        Ok(expr) => expressions.push(expr),
42401                        Err(_) => break,
42402                    }
42403                }
42404            }
42405        } else if is_brace && !self.check(TokenType::RBrace) {
42406            loop {
42407                if let Ok(Some(expr)) = self.parse_bracket_key_value() {
42408                    expressions.push(expr);
42409                } else {
42410                    match self.parse_expression() {
42411                        Ok(expr) => expressions.push(expr),
42412                        Err(_) => break,
42413                    }
42414                }
42415                if !self.match_token(TokenType::Comma) {
42416                    break;
42417                }
42418            }
42419        }
42420
42421        // Expect closing bracket
42422        if is_bracket {
42423            self.expect(TokenType::RBracket)?;
42424        } else if is_brace {
42425            self.expect(TokenType::RBrace)?;
42426        }
42427
42428        // Build the result
42429        if is_brace {
42430            // Struct literal: {key: value, ...}
42431            // Convert expressions to (Option<name>, expr) pairs
42432            let fields: Vec<(Option<String>, Expression)> =
42433                expressions.into_iter().map(|e| (None, e)).collect();
42434            Ok(Some(Expression::Struct(Box::new(Struct { fields }))))
42435        } else if let Some(base_expr) = this {
42436            // Subscript access: base[index]
42437            if expressions.len() == 1 {
42438                Ok(Some(Expression::Subscript(Box::new(Subscript {
42439                    this: base_expr,
42440                    index: expressions.remove(0),
42441                }))))
42442            } else {
42443                // Multiple indices - create nested subscripts or array
42444                let mut result = base_expr;
42445                for expr in expressions {
42446                    result = Expression::Subscript(Box::new(Subscript {
42447                        this: result,
42448                        index: expr,
42449                    }));
42450                }
42451                Ok(Some(result))
42452            }
42453        } else {
42454            // Array literal: [1, 2, 3]
42455            Ok(Some(Expression::Array(Box::new(Array { expressions }))))
42456        }
42457    }
42458
42459    /// parse_bracket_key_value - Ported from Python _parse_bracket_key_value
42460    /// Parses key-value pairs in brackets: key: value or key => value
42461    #[allow(unused_variables, unused_mut)]
42462    pub fn parse_bracket_key_value(&mut self) -> Result<Option<Expression>> {
42463        let saved_pos = self.current;
42464
42465        // Try to parse as key: value or key => value
42466        if let Ok(key) = self.parse_primary() {
42467            // Check for : or =>
42468            if self.match_token(TokenType::Colon) || self.match_text_seq(&["=>"]) {
42469                match self.parse_expression() {
42470                    Ok(value) => {
42471                        // Return as NamedArgument for key-value pair
42472                        // Extract the name from the key (identifier or string literal)
42473                        let name = match &key {
42474                            Expression::Identifier(id) => id.clone(),
42475                            Expression::Literal(crate::expressions::Literal::String(s)) => {
42476                                Identifier::new(s.clone())
42477                            }
42478                            _ => Identifier::new("".to_string()),
42479                        };
42480                        return Ok(Some(Expression::NamedArgument(Box::new(NamedArgument {
42481                            name,
42482                            value,
42483                            separator: NamedArgSeparator::DArrow, // Using DArrow for colon-style key: value
42484                        }))));
42485                    }
42486                    Err(_) => {
42487                        self.current = saved_pos;
42488                        return Ok(None);
42489                    }
42490                }
42491            }
42492            self.current = saved_pos;
42493        }
42494
42495        Ok(None)
42496    }
42497
42498    /// parse_ceil_floor - Implemented from Python _parse_ceil_floor
42499    /// Calls: parse_lambda, parse_var
42500    #[allow(unused_variables, unused_mut)]
42501    pub fn parse_ceil_floor(&mut self) -> Result<Option<Expression>> {
42502        if self.match_text_seq(&["TO"]) {
42503            // Matched: TO
42504            return Ok(None);
42505        }
42506        Ok(None)
42507    }
42508
42509    /// parse_changes - Implemented from Python _parse_changes
42510    /// Parses: CHANGES(INFORMATION => var) AT|BEFORE(...) END(...)
42511    pub fn parse_changes(&mut self) -> Result<Option<Expression>> {
42512        // Match: CHANGES(INFORMATION =>
42513        if !self.match_text_seq(&["CHANGES", "(", "INFORMATION", "=>"]) {
42514            return Ok(None);
42515        }
42516
42517        // Parse information (any token as var)
42518        let information = self.parse_var()?.map(Box::new);
42519
42520        // Match closing paren
42521        self.match_token(TokenType::RParen);
42522
42523        // Parse at_before (Snowflake AT/BEFORE clause)
42524        let at_before = self.parse_historical_data()?.map(Box::new);
42525
42526        // Parse end (optional second historical data clause)
42527        let end = self.parse_historical_data()?.map(Box::new);
42528
42529        Ok(Some(Expression::Changes(Box::new(Changes {
42530            information,
42531            at_before,
42532            end,
42533        }))))
42534    }
42535
42536    /// parse_char - Parses CHAR/CHR function with optional USING charset
42537    /// Python: CHAR(args...) [USING charset]
42538    /// MySQL: CHAR(n1, n2, ... USING charset)
42539    pub fn parse_char(&mut self) -> Result<Option<Expression>> {
42540        // Parse expressions inside CHAR()
42541        let mut args = Vec::new();
42542        loop {
42543            let expr = self.parse_expression()?;
42544            args.push(expr);
42545            if !self.match_token(TokenType::Comma) {
42546                break;
42547            }
42548        }
42549
42550        // Check for USING charset
42551        let charset = if self.match_token(TokenType::Using) {
42552            self.parse_var()?.map(|v| {
42553                if let Expression::Identifier(id) = v {
42554                    id.name
42555                } else {
42556                    String::new()
42557                }
42558            })
42559        } else {
42560            None
42561        };
42562
42563        if args.is_empty() {
42564            return Ok(None);
42565        }
42566
42567        // If there's a charset or multiple args, use CharFunc (MySQL-style)
42568        // Otherwise use simple Chr for single-arg CHR function
42569        if charset.is_some() || args.len() > 1 {
42570            Ok(Some(Expression::CharFunc(Box::new(
42571                crate::expressions::CharFunc {
42572                    args,
42573                    charset,
42574                    name: None, // defaults to CHAR
42575                },
42576            ))))
42577        } else {
42578            Ok(Some(Expression::Chr(Box::new(UnaryFunc::new(
42579                args.into_iter().next().unwrap(),
42580            )))))
42581        }
42582    }
42583
42584    /// parse_character_set - Ported from Python _parse_character_set
42585    #[allow(unused_variables, unused_mut)]
42586    /// parse_character_set - Parses CHARACTER SET property
42587    /// Example: CHARACTER SET = utf8 or CHARACTER SET utf8mb4
42588    pub fn parse_character_set(&mut self) -> Result<Option<Expression>> {
42589        // Optional = sign
42590        self.match_token(TokenType::Eq);
42591
42592        // Parse the charset name (variable or string)
42593        let charset = self.parse_var_or_string()?;
42594        if charset.is_none() {
42595            return Ok(None);
42596        }
42597
42598        Ok(Some(Expression::CharacterSetProperty(Box::new(
42599            CharacterSetProperty {
42600                this: Box::new(charset.unwrap()),
42601                default: None,
42602            },
42603        ))))
42604    }
42605
42606    /// parse_checksum - Implemented from Python _parse_checksum
42607    #[allow(unused_variables, unused_mut)]
42608    pub fn parse_checksum(&mut self) -> Result<Option<Expression>> {
42609        if self.match_text_seq(&["OFF"]) {
42610            return Ok(Some(Expression::ChecksumProperty(Box::new(
42611                ChecksumProperty {
42612                    on: None,
42613                    default: None,
42614                },
42615            ))));
42616        }
42617        Ok(None)
42618    }
42619
42620    /// parse_cluster - CLUSTER BY clause for Hive/Spark-style queries
42621    /// Parses a list of ordered expressions (columns with optional ASC/DESC)
42622    #[allow(unused_variables, unused_mut)]
42623    pub fn parse_cluster(&mut self) -> Result<Option<Expression>> {
42624        let mut expressions: Vec<Ordered> = Vec::new();
42625
42626        loop {
42627            // Parse an ordered expression (column with optional direction)
42628            if let Some(ordered) = self.parse_ordered_item()? {
42629                expressions.push(ordered);
42630            } else {
42631                break;
42632            }
42633
42634            if !self.match_token(TokenType::Comma) {
42635                break;
42636            }
42637        }
42638
42639        if expressions.is_empty() {
42640            return Ok(None);
42641        }
42642
42643        Ok(Some(Expression::ClusterBy(Box::new(ClusterBy {
42644            expressions,
42645        }))))
42646    }
42647
42648    /// parse_clustered_by - Implemented from Python _parse_clustered_by
42649    #[allow(unused_variables, unused_mut)]
42650    pub fn parse_clustered_by(&mut self) -> Result<Option<Expression>> {
42651        if self.match_text_seq(&["BY"]) {
42652            return Ok(Some(Expression::ClusteredByProperty(Box::new(
42653                ClusteredByProperty {
42654                    expressions: Vec::new(),
42655                    sorted_by: None,
42656                    buckets: None,
42657                },
42658            ))));
42659        }
42660        if self.match_text_seq(&["SORTED", "BY"]) {
42661            // Matched: SORTED BY
42662            return Ok(None);
42663        }
42664        Ok(None)
42665    }
42666
42667    /// Parse Snowflake colon JSON path extraction: data:field or data:field.subfield
42668    /// Python: def _parse_colon_as_variant_extract(self, this)
42669    pub fn parse_colon_as_variant_extract(
42670        &mut self,
42671        this: Expression,
42672    ) -> Result<Option<Expression>> {
42673        // Build a JSON path from colon-separated identifiers
42674        // Track whether each segment was quoted (needs bracket notation for spaces/special chars)
42675        let mut json_path: Vec<(String, bool)> = Vec::new();
42676
42677        while self.match_token(TokenType::Colon) {
42678            // Parse the path segment (field name)
42679            if let Some(field) = self.parse_identifier()? {
42680                if let Expression::Identifier(ident) = field {
42681                    json_path.push((
42682                        ident.name.clone(),
42683                        ident.quoted || ident.name.contains(' ') || ident.name.contains('\''),
42684                    ));
42685                }
42686            }
42687
42688            // Check for dot-separated sub-paths
42689            while self.match_token(TokenType::Dot) {
42690                if let Some(subfield) = self.parse_identifier()? {
42691                    if let Expression::Identifier(ident) = subfield {
42692                        json_path.push((
42693                            ident.name.clone(),
42694                            ident.quoted || ident.name.contains(' ') || ident.name.contains('\''),
42695                        ));
42696                    }
42697                }
42698            }
42699        }
42700
42701        if json_path.is_empty() {
42702            return Ok(Some(this));
42703        }
42704
42705        // Build the JSON path expression string
42706        // Use bracket notation for segments with spaces/special chars: a["b c"]
42707        // Use dot notation for simple segments: a.b.c
42708        let mut path_str = String::new();
42709        for (i, (segment, needs_bracket)) in json_path.iter().enumerate() {
42710            if *needs_bracket {
42711                // Bracket notation: ["key with spaces"]
42712                path_str.push('[');
42713                path_str.push('"');
42714                path_str.push_str(segment);
42715                path_str.push('"');
42716                path_str.push(']');
42717            } else {
42718                if i > 0 {
42719                    path_str.push('.');
42720                }
42721                path_str.push_str(segment);
42722            }
42723        }
42724
42725        Ok(Some(Expression::JSONExtract(Box::new(JSONExtract {
42726            this: Box::new(this),
42727            expression: Box::new(Expression::Literal(Literal::String(path_str))),
42728            only_json_types: None,
42729            expressions: Vec::new(),
42730            variant_extract: Some(Box::new(Expression::Boolean(BooleanLiteral {
42731                value: true,
42732            }))),
42733            json_query: None,
42734            option: None,
42735            quote: None,
42736            on_condition: None,
42737            requires_json: None,
42738        }))))
42739    }
42740
42741    /// parse_column - Parse column expression
42742    /// Python: this = self._parse_column_reference(); return self._parse_column_ops(this)
42743    pub fn parse_column(&mut self) -> Result<Option<Expression>> {
42744        // Parse column reference (field name that becomes a Column expression)
42745        let column_ref = self.parse_column_reference()?;
42746        if column_ref.is_some() {
42747            // Apply column ops (bracket subscript, property access with dots, casts)
42748            return self.parse_column_ops_with_expr(column_ref);
42749        }
42750        // Try parsing bracket directly if no column reference
42751        self.parse_bracket()
42752    }
42753
42754    /// parse_column_constraint - Ported from Python _parse_column_constraint
42755    /// Parses column-level constraints like NOT NULL, PRIMARY KEY, UNIQUE, DEFAULT, CHECK, etc.
42756    #[allow(unused_variables, unused_mut)]
42757    pub fn parse_column_constraint(&mut self) -> Result<Option<Expression>> {
42758        // Check for optional CONSTRAINT keyword and name
42759        let constraint_name = if self.match_token(TokenType::Constraint) {
42760            self.parse_id_var()?.and_then(|e| {
42761                if let Expression::Identifier(id) = e {
42762                    Some(id)
42763                } else {
42764                    None
42765                }
42766            })
42767        } else {
42768            None
42769        };
42770
42771        // NOT NULL
42772        if self.match_text_seq(&["NOT", "NULL"]) {
42773            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
42774                NotNullColumnConstraint { allow_null: None },
42775            ))));
42776        }
42777
42778        // NOT FOR REPLICATION (SQL Server) - must be before NULL check
42779        if self.match_text_seq(&["NOT", "FOR", "REPLICATION"]) {
42780            return Ok(Some(Expression::Property(Box::new(
42781                crate::expressions::Property {
42782                    this: Box::new(Expression::Identifier(Identifier::new(
42783                        "NOT FOR REPLICATION".to_string(),
42784                    ))),
42785                    value: None,
42786                },
42787            ))));
42788        }
42789
42790        // NULL
42791        if self.match_text_seq(&["NULL"]) {
42792            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
42793                NotNullColumnConstraint {
42794                    allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
42795                        value: true,
42796                    }))),
42797                },
42798            ))));
42799        }
42800
42801        // PRIMARY KEY
42802        if self.match_text_seq(&["PRIMARY", "KEY"]) {
42803            return Ok(Some(Expression::PrimaryKeyColumnConstraint(Box::new(
42804                PrimaryKeyColumnConstraint {
42805                    desc: None,
42806                    options: Vec::new(),
42807                },
42808            ))));
42809        }
42810
42811        // UNIQUE
42812        if self.match_text_seq(&["UNIQUE"]) {
42813            // Check for optional KEY/INDEX
42814            let _ = self.match_texts(&["KEY", "INDEX"]);
42815            // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
42816            let nulls = if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
42817                Some(Box::new(Expression::Boolean(BooleanLiteral {
42818                    value: true,
42819                })))
42820            } else {
42821                None
42822            };
42823            return Ok(Some(Expression::UniqueColumnConstraint(Box::new(
42824                UniqueColumnConstraint {
42825                    this: None,
42826                    index_type: None,
42827                    on_conflict: None,
42828                    nulls,
42829                    options: Vec::new(),
42830                },
42831            ))));
42832        }
42833
42834        // DEFAULT
42835        if self.match_text_seq(&["DEFAULT"]) {
42836            let default_value = self.parse_select_or_expression()?;
42837            if let Some(val) = default_value {
42838                return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
42839                    DefaultColumnConstraint {
42840                        this: Box::new(val),
42841                    },
42842                ))));
42843            }
42844            return Ok(None);
42845        }
42846
42847        // CHECK
42848        if self.match_text_seq(&["CHECK"]) {
42849            if self.match_token(TokenType::LParen) {
42850                let expr = self.parse_select_or_expression()?;
42851                self.match_token(TokenType::RParen);
42852                if let Some(check_expr) = expr {
42853                    return Ok(Some(Expression::CheckColumnConstraint(Box::new(
42854                        CheckColumnConstraint {
42855                            this: Box::new(check_expr),
42856                            enforced: None,
42857                        },
42858                    ))));
42859                }
42860            }
42861            return Ok(None);
42862        }
42863
42864        // REFERENCES (foreign key)
42865        if self.match_text_seq(&["REFERENCES"]) {
42866            let table = self.parse_table_parts()?;
42867            let columns = if self.match_token(TokenType::LParen) {
42868                let mut cols = Vec::new();
42869                loop {
42870                    if let Some(col) = self.parse_id_var()? {
42871                        cols.push(col);
42872                    }
42873                    if !self.match_token(TokenType::Comma) {
42874                        break;
42875                    }
42876                }
42877                self.match_token(TokenType::RParen);
42878                cols
42879            } else {
42880                Vec::new()
42881            };
42882
42883            return Ok(Some(Expression::ForeignKey(Box::new(ForeignKey {
42884                expressions: columns,
42885                reference: table.map(Box::new),
42886                delete: None,
42887                update: None,
42888                options: Vec::new(),
42889            }))));
42890        }
42891
42892        // AUTO_INCREMENT / AUTOINCREMENT / IDENTITY
42893        if self.match_texts(&["AUTO_INCREMENT", "AUTOINCREMENT", "IDENTITY"]) {
42894            // Check for IDENTITY(start, increment) or IDENTITY START x INCREMENT y syntax
42895            let mut start = None;
42896            let mut increment = None;
42897
42898            if self.match_token(TokenType::LParen) {
42899                // Parse (start, increment)
42900                start = self.parse_bitwise()?;
42901                if self.match_token(TokenType::Comma) {
42902                    increment = self.parse_bitwise()?;
42903                }
42904                self.expect(TokenType::RParen)?;
42905            } else if self.match_text_seq(&["START"]) {
42906                // Parse START x INCREMENT y
42907                start = self.parse_bitwise()?;
42908                if self.match_text_seq(&["INCREMENT"]) {
42909                    increment = self.parse_bitwise()?;
42910                }
42911            }
42912
42913            if start.is_some() || increment.is_some() {
42914                return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
42915                    Box::new(GeneratedAsIdentityColumnConstraint {
42916                        this: Some(Box::new(Expression::Boolean(BooleanLiteral {
42917                            value: false,
42918                        }))),
42919                        expression: None,
42920                        on_null: None,
42921                        start: start.map(Box::new),
42922                        increment: increment.map(Box::new),
42923                        minvalue: None,
42924                        maxvalue: None,
42925                        cycle: None,
42926                        order: None,
42927                    }),
42928                )));
42929            }
42930            return Ok(Some(Expression::AutoIncrementColumnConstraint(
42931                AutoIncrementColumnConstraint,
42932            )));
42933        }
42934
42935        // COMMENT 'text' - CommentColumnConstraint is a unit struct, use a different expression
42936        if self.match_text_seq(&["COMMENT"]) {
42937            if let Some(comment) = self.parse_string()? {
42938                // Use CommentColumnConstraint unit struct
42939                return Ok(Some(Expression::CommentColumnConstraint(
42940                    CommentColumnConstraint,
42941                )));
42942            }
42943            return Ok(None);
42944        }
42945
42946        // COLLATE collation_name - use CollateProperty instead
42947        if self.match_text_seq(&["COLLATE"]) {
42948            if let Some(collation) = self.parse_id_var()? {
42949                return Ok(Some(Expression::CollateProperty(Box::new(
42950                    CollateProperty {
42951                        this: Box::new(collation),
42952                        default: None,
42953                    },
42954                ))));
42955            }
42956            return Ok(None);
42957        }
42958
42959        // ClickHouse dictionary column attributes: HIERARCHICAL, IS_OBJECT_ID, INJECTIVE
42960        if matches!(
42961            self.config.dialect,
42962            Some(crate::dialects::DialectType::ClickHouse)
42963        ) {
42964            if self.match_texts(&["HIERARCHICAL", "IS_OBJECT_ID", "INJECTIVE"]) {
42965                let attr_name = self.previous().text.to_uppercase();
42966                return Ok(Some(Expression::Property(Box::new(
42967                    crate::expressions::Property {
42968                        this: Box::new(Expression::Identifier(Identifier::new(attr_name))),
42969                        value: None,
42970                    },
42971                ))));
42972            }
42973            // ClickHouse EXPRESSION expr and ALIAS expr (dictionary column attributes)
42974            if self.match_texts(&["EXPRESSION"]) {
42975                let expr = self.parse_expression()?;
42976                return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
42977                    DefaultColumnConstraint {
42978                        this: Box::new(expr),
42979                    },
42980                ))));
42981            }
42982        }
42983
42984        // GENERATED ... AS IDENTITY
42985        if self.match_text_seq(&["GENERATED"]) {
42986            let always = self.match_text_seq(&["ALWAYS"]);
42987            if !always {
42988                self.match_text_seq(&["BY", "DEFAULT"]);
42989            }
42990            let on_null = self.match_text_seq(&["ON", "NULL"]);
42991            if self.match_text_seq(&["AS", "IDENTITY"]) {
42992                return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
42993                    Box::new(GeneratedAsIdentityColumnConstraint {
42994                        this: None,
42995                        expression: None,
42996                        on_null: if on_null {
42997                            Some(Box::new(Expression::Boolean(BooleanLiteral {
42998                                value: true,
42999                            })))
43000                        } else {
43001                            None
43002                        },
43003                        start: None,
43004                        increment: None,
43005                        minvalue: None,
43006                        maxvalue: None,
43007                        cycle: None,
43008                        order: None,
43009                    }),
43010                )));
43011            }
43012            return Ok(None);
43013        }
43014
43015        // PATH 'xpath' - for XMLTABLE/JSON_TABLE columns
43016        if self.match_text_seq(&["PATH"]) {
43017            if let Some(path_expr) = self.parse_string()? {
43018                return Ok(Some(Expression::PathColumnConstraint(Box::new(
43019                    PathColumnConstraint {
43020                        this: Box::new(path_expr),
43021                    },
43022                ))));
43023            }
43024            return Ok(None);
43025        }
43026
43027        // Return the constraint name if we matched CONSTRAINT but no actual constraint
43028        if let Some(name) = constraint_name {
43029            return Ok(Some(Expression::Identifier(name)));
43030        }
43031
43032        Ok(None)
43033    }
43034
43035    /// parse_column_def_with_exists - Ported from Python _parse_column_def_with_exists
43036    /// Parses a column definition with optional IF [NOT] EXISTS clause
43037    #[allow(unused_variables, unused_mut)]
43038    pub fn parse_column_def_with_exists(&mut self) -> Result<Option<Expression>> {
43039        let start = self.current;
43040
43041        // Optionally match COLUMN keyword
43042        let _ = self.match_text_seq(&["COLUMN"]);
43043
43044        // Check for IF NOT EXISTS
43045        let not_exists = self.match_text_seq(&["IF", "NOT", "EXISTS"]);
43046        let exists = if !not_exists {
43047            self.match_text_seq(&["IF", "EXISTS"])
43048        } else {
43049            false
43050        };
43051
43052        // Parse the field definition
43053        let expression = self.parse_field_def()?;
43054
43055        if expression.is_none() {
43056            self.current = start;
43057            return Ok(None);
43058        }
43059
43060        // If it's a ColumnDef, we're good
43061        if let Some(Expression::ColumnDef(ref _col_def)) = expression {
43062            // The exists flag would be set on the ColumnDef, but our struct doesn't have that field
43063            // Just return the expression as-is
43064            return Ok(expression);
43065        }
43066
43067        // Not a ColumnDef, backtrack
43068        self.current = start;
43069        Ok(None)
43070    }
43071
43072    /// parse_column_ops - Parses column operations (stub for compatibility)
43073    pub fn parse_column_ops(&mut self) -> Result<Option<Expression>> {
43074        self.parse_column_ops_with_expr(None)
43075    }
43076
43077    /// parse_column_ops_with_expr - Parses column operations (dot access, brackets, casts)
43078    /// Python: _parse_column_ops(this)
43079    pub fn parse_column_ops_with_expr(
43080        &mut self,
43081        this: Option<Expression>,
43082    ) -> Result<Option<Expression>> {
43083        // First apply any bracket subscripts
43084        let mut result = if let Some(expr) = this {
43085            if self.match_token(TokenType::LBracket) {
43086                let index = self.parse_disjunction()?;
43087                self.match_token(TokenType::RBracket);
43088                if let Some(idx) = index {
43089                    Some(Expression::Subscript(Box::new(Subscript {
43090                        this: expr,
43091                        index: idx,
43092                    })))
43093                } else {
43094                    Some(expr)
43095                }
43096            } else {
43097                Some(expr)
43098            }
43099        } else {
43100            None
43101        };
43102
43103        // Handle DOT for qualified column names: table.column or schema.table.column
43104        while self.match_token(TokenType::Dot) {
43105            if result.is_none() {
43106                break;
43107            }
43108            // Handle .* (qualified star) with modifiers
43109            if self.match_token(TokenType::Star) {
43110                // Determine table name from the expression
43111                let table_name = match &result {
43112                    Some(Expression::Column(col)) if col.table.is_none() => Some(col.name.clone()),
43113                    Some(Expression::Dot(dot)) => {
43114                        // For deep qualified names like schema.table.*, use the whole expression name
43115                        fn dot_to_name(expr: &Expression) -> String {
43116                            match expr {
43117                                Expression::Column(col) => {
43118                                    if let Some(ref table) = col.table {
43119                                        format!("{}.{}", table.name, col.name.name)
43120                                    } else {
43121                                        col.name.name.clone()
43122                                    }
43123                                }
43124                                Expression::Dot(d) => {
43125                                    format!("{}.{}", dot_to_name(&d.this), d.field.name)
43126                                }
43127                                _ => String::new(),
43128                            }
43129                        }
43130                        Some(Identifier::new(dot_to_name(&Expression::Dot(dot.clone()))))
43131                    }
43132                    _ => None,
43133                };
43134                let star = self.parse_star_modifiers(table_name)?;
43135                result = Some(Expression::Star(star));
43136                break;
43137            }
43138            // Parse the field identifier - use is_identifier_or_keyword_token to allow keywords
43139            // like "schema" as field names in dot access
43140            // ClickHouse: also allow numeric tuple index access like expr.1, expr.2
43141            if self.is_identifier_or_keyword_token()
43142                || self.check(TokenType::QuotedIdentifier)
43143                || (matches!(
43144                    self.config.dialect,
43145                    Some(crate::dialects::DialectType::ClickHouse)
43146                ) && self.check(TokenType::Number))
43147            {
43148                let token = self.advance();
43149                let field_ident = Identifier {
43150                    name: token.text,
43151                    quoted: token.token_type == TokenType::QuotedIdentifier,
43152                    trailing_comments: Vec::new(),
43153                    span: None,
43154                };
43155                result = Some(Expression::Dot(Box::new(DotAccess {
43156                    this: result.take().unwrap(),
43157                    field: field_ident,
43158                })));
43159            } else {
43160                break;
43161            }
43162        }
43163
43164        // Handle EXCLAMATION for Snowflake model attribute syntax: model!PREDICT(...)
43165        if self.match_token(TokenType::Exclamation) {
43166            if let Some(expr) = result.take() {
43167                // Parse the attribute/function after the exclamation mark
43168                // This can be either a simple identifier (model!admin) or a function call (model!PREDICT(1))
43169                let attr = self.parse_unary()?;
43170                result = Some(Expression::ModelAttribute(Box::new(ModelAttribute {
43171                    this: Box::new(expr),
43172                    expression: Box::new(attr),
43173                })));
43174            }
43175        }
43176
43177        // Handle DCOLON for casts (PostgreSQL syntax: column::type)
43178        if self.match_token(TokenType::DColon) {
43179            if let Some(type_expr) = self.parse_types()? {
43180                if let Some(expr) = result {
43181                    // Extract DataType from the expression
43182                    let data_type = match type_expr {
43183                        Expression::DataType(dt) => dt,
43184                        _ => {
43185                            result = Some(expr);
43186                            return Ok(result);
43187                        }
43188                    };
43189                    result = Some(Expression::Cast(Box::new(Cast {
43190                        this: expr,
43191                        to: data_type,
43192                        trailing_comments: Vec::new(),
43193                        double_colon_syntax: true,
43194                        format: None,
43195                        default: None,
43196                        inferred_type: None,
43197                    })));
43198                }
43199            }
43200        }
43201
43202        // Teradata: (FORMAT '...') phrase after a column/expression
43203        if matches!(
43204            self.config.dialect,
43205            Some(crate::dialects::DialectType::Teradata)
43206        ) && self.check(TokenType::LParen)
43207            && self.check_next(TokenType::Format)
43208        {
43209            self.advance(); // consume (
43210            self.advance(); // consume FORMAT
43211            let format = self.expect_string()?;
43212            self.expect(TokenType::RParen)?;
43213            if let Some(expr) = result.take() {
43214                result = Some(Expression::FormatPhrase(Box::new(FormatPhrase {
43215                    this: Box::new(expr),
43216                    format,
43217                })));
43218            }
43219        }
43220
43221        Ok(result)
43222    }
43223
43224    /// parse_column_reference - Parse column reference (field -> Column)
43225    /// Python: this = self._parse_field(); if isinstance(this, exp.Identifier): return exp.Column(this=this)
43226    pub fn parse_column_reference(&mut self) -> Result<Option<Expression>> {
43227        // Parse the field (identifier or literal)
43228        if let Some(field) = self.parse_field()? {
43229            // If it's an identifier, wrap it in a Column expression
43230            match &field {
43231                Expression::Identifier(id) => {
43232                    return Ok(Some(Expression::Column(Column {
43233                        name: id.clone(),
43234                        table: None,
43235                        join_mark: false,
43236                        trailing_comments: Vec::new(),
43237                        span: None,
43238                        inferred_type: None,
43239                    })));
43240                }
43241                // If it's already something else (like a literal), return as-is
43242                _ => return Ok(Some(field)),
43243            }
43244        }
43245        Ok(None)
43246    }
43247
43248    /// parse_command - Parses a generic SQL command
43249    /// Python: _parse_command
43250    /// Used for commands that we don't have specific parsing for
43251    pub fn parse_command(&mut self) -> Result<Option<Expression>> {
43252        // Get the command keyword from the previous token
43253        let command_text = self.previous().text.to_uppercase();
43254
43255        // Collect remaining tokens as the command expression (until statement end)
43256        // Use (text, token_type) tuples for smart spacing with join_command_tokens
43257        let mut tokens: Vec<(String, TokenType)> = vec![(command_text, TokenType::Var)];
43258        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
43259            let token = self.advance();
43260            // Preserve quotes for quoted identifiers and strings
43261            let text = if token.token_type == TokenType::QuotedIdentifier {
43262                // Re-add the identifier quote characters
43263                // Use backticks as default; this handles MySQL backtick-quoted identifiers
43264                // and double-quoted identifiers for other dialects
43265                let quote_char = if self.config.dialect == Some(crate::dialects::DialectType::MySQL)
43266                    || self.config.dialect == Some(crate::dialects::DialectType::SingleStore)
43267                    || self.config.dialect == Some(crate::dialects::DialectType::Doris)
43268                    || self.config.dialect == Some(crate::dialects::DialectType::StarRocks)
43269                {
43270                    '`'
43271                } else {
43272                    '"'
43273                };
43274                format!("{}{}{}", quote_char, token.text, quote_char)
43275            } else if token.token_type == TokenType::String {
43276                format!("'{}'", token.text)
43277            } else {
43278                token.text.clone()
43279            };
43280            tokens.push((text, token.token_type));
43281        }
43282
43283        Ok(Some(Expression::Command(Box::new(Command {
43284            this: self.join_command_tokens(tokens),
43285        }))))
43286    }
43287
43288    /// parse_commit_or_rollback - Implemented from Python _parse_commit_or_rollback
43289    #[allow(unused_variables, unused_mut)]
43290    pub fn parse_commit_or_rollback(&mut self) -> Result<Option<Expression>> {
43291        if self.match_text_seq(&["TO"]) {
43292            return Ok(Some(Expression::Rollback(Box::new(Rollback {
43293                savepoint: None,
43294                this: None,
43295            }))));
43296        }
43297        if self.match_text_seq(&["SAVEPOINT"]) {
43298            // Matched: SAVEPOINT
43299            return Ok(None);
43300        }
43301        Ok(None)
43302    }
43303
43304    /// parse_composite_key_property - Implemented from Python _parse_composite_key_property
43305    #[allow(unused_variables, unused_mut)]
43306    pub fn parse_composite_key_property(&mut self) -> Result<Option<Expression>> {
43307        if self.match_text_seq(&["KEY"]) {
43308            // Matched: KEY
43309            return Ok(None);
43310        }
43311        Ok(None)
43312    }
43313
43314    /// parse_comprehension - Implemented from Python _parse_comprehension
43315    /// Parses list comprehension: expr FOR var [, position] IN iterator [IF condition]
43316    pub fn parse_comprehension(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
43317        let start_index = self.current;
43318
43319        // Parse expression (column)
43320        let expression = self.parse_column()?;
43321
43322        // Parse optional position (if comma follows)
43323        let position = if self.match_token(TokenType::Comma) {
43324            self.parse_column()?.map(Box::new)
43325        } else {
43326            None
43327        };
43328
43329        // Must have IN keyword
43330        if !self.match_token(TokenType::In) {
43331            // Backtrack
43332            self.current = start_index.saturating_sub(1);
43333            return Ok(None);
43334        }
43335
43336        // Parse iterator
43337        let iterator = self.parse_column()?.map(Box::new);
43338
43339        // Parse optional condition (IF followed by expression)
43340        let condition = if self.match_text_seq(&["IF"]) {
43341            self.parse_disjunction()?.map(Box::new)
43342        } else {
43343            None
43344        };
43345
43346        // Build the comprehension expression
43347        match (this, expression) {
43348            (Some(t), Some(e)) => Ok(Some(Expression::Comprehension(Box::new(Comprehension {
43349                this: Box::new(t),
43350                expression: Box::new(e),
43351                position,
43352                iterator,
43353                condition,
43354            })))),
43355            _ => Ok(None),
43356        }
43357    }
43358
43359    /// parse_compress - Parses COMPRESS column constraint (Teradata)
43360    /// Python: _parse_compress
43361    /// Format: COMPRESS or COMPRESS (value1, value2, ...)
43362    pub fn parse_compress(&mut self) -> Result<Option<Expression>> {
43363        // Check if it's a parenthesized list of values
43364        if self.check(TokenType::LParen) {
43365            // Parse wrapped CSV of bitwise expressions
43366            self.advance(); // consume LParen
43367            let mut expressions = Vec::new();
43368            loop {
43369                if let Some(expr) = self.parse_bitwise()? {
43370                    expressions.push(expr);
43371                } else {
43372                    break;
43373                }
43374                if !self.match_token(TokenType::Comma) {
43375                    break;
43376                }
43377            }
43378            self.expect(TokenType::RParen)?;
43379
43380            // Wrap in a Tuple if multiple values
43381            let this = if expressions.len() == 1 {
43382                Some(Box::new(expressions.into_iter().next().unwrap()))
43383            } else if expressions.is_empty() {
43384                None
43385            } else {
43386                Some(Box::new(Expression::Tuple(Box::new(Tuple { expressions }))))
43387            };
43388
43389            Ok(Some(Expression::CompressColumnConstraint(Box::new(
43390                CompressColumnConstraint { this },
43391            ))))
43392        } else {
43393            // Single value or no value
43394            let this = self.parse_bitwise()?.map(Box::new);
43395            Ok(Some(Expression::CompressColumnConstraint(Box::new(
43396                CompressColumnConstraint { this },
43397            ))))
43398        }
43399    }
43400
43401    /// parse_conjunction - Parses AND expressions
43402    /// Python: _parse_conjunction
43403    /// Delegates to the existing parse_and in the operator precedence chain
43404    pub fn parse_conjunction(&mut self) -> Result<Option<Expression>> {
43405        match self.parse_and() {
43406            Ok(expr) => Ok(Some(expr)),
43407            Err(_) => Ok(None),
43408        }
43409    }
43410
43411    /// parse_connect_with_prior - Parses expression in CONNECT BY context with PRIOR support
43412    /// Python: _parse_connect_with_prior
43413    /// This method temporarily treats PRIOR as a prefix operator while parsing the expression
43414    pub fn parse_connect_with_prior(&mut self) -> Result<Option<Expression>> {
43415        // parse_connect_expression already handles PRIOR as a prefix operator
43416        let connect = self.parse_connect_expression()?;
43417        Ok(Some(connect))
43418    }
43419
43420    /// parse_constraint - Parses named or unnamed constraint
43421    /// Python: _parse_constraint
43422    pub fn parse_constraint(&mut self) -> Result<Option<Expression>> {
43423        // Check for CONSTRAINT keyword (named constraint)
43424        if !self.match_token(TokenType::Constraint) {
43425            // Try to parse an unnamed constraint
43426            return self.parse_unnamed_constraint();
43427        }
43428
43429        // Parse the constraint name
43430        let name = self.parse_id_var()?;
43431        if name.is_none() {
43432            return Ok(None);
43433        }
43434
43435        // Parse the constraint expressions (PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK, etc.)
43436        let expressions = self.parse_unnamed_constraints()?;
43437
43438        Ok(Some(Expression::Constraint(Box::new(Constraint {
43439            this: Box::new(name.unwrap()),
43440            expressions,
43441        }))))
43442    }
43443
43444    /// parse_unnamed_constraints - Parses multiple unnamed constraints
43445    /// Python: _parse_unnamed_constraints
43446    pub fn parse_unnamed_constraints(&mut self) -> Result<Vec<Expression>> {
43447        let mut constraints = Vec::new();
43448
43449        loop {
43450            if let Some(constraint) = self.parse_unnamed_constraint()? {
43451                constraints.push(constraint);
43452            } else {
43453                break;
43454            }
43455        }
43456
43457        Ok(constraints)
43458    }
43459
43460    /// parse_unnamed_constraint - Parses a single unnamed constraint
43461    /// Python: _parse_unnamed_constraint
43462    pub fn parse_unnamed_constraint(&mut self) -> Result<Option<Expression>> {
43463        // Try PRIMARY KEY
43464        if self.match_text_seq(&["PRIMARY", "KEY"]) {
43465            // ClickHouse: PRIMARY KEY expr (without parens) in schema = table-level PK expression
43466            if matches!(
43467                self.config.dialect,
43468                Some(crate::dialects::DialectType::ClickHouse)
43469            ) && !self.check(TokenType::LParen)
43470            {
43471                let expr = self.parse_expression()?;
43472                return Ok(Some(Expression::Raw(Raw {
43473                    sql: format!("PRIMARY KEY {}", expr),
43474                })));
43475            }
43476            return self.parse_primary_key();
43477        }
43478
43479        // Try UNIQUE
43480        if self.match_texts(&["UNIQUE"]) {
43481            return self.parse_unique();
43482        }
43483
43484        // Try FOREIGN KEY
43485        if self.match_text_seq(&["FOREIGN", "KEY"]) {
43486            return self.parse_foreign_key();
43487        }
43488
43489        // Try CHECK
43490        if self.match_texts(&["CHECK"]) {
43491            let expr = self.parse_wrapped()?;
43492            if let Some(check_expr) = expr {
43493                return Ok(Some(Expression::CheckColumnConstraint(Box::new(
43494                    CheckColumnConstraint {
43495                        this: Box::new(check_expr),
43496                        enforced: None,
43497                    },
43498                ))));
43499            }
43500        }
43501
43502        // Try NOT NULL
43503        if self.match_text_seq(&["NOT", "NULL"]) {
43504            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
43505                NotNullColumnConstraint {
43506                    allow_null: None, // NOT NULL means allow_null is not set
43507                },
43508            ))));
43509        }
43510
43511        // Try NULL (allow null)
43512        if self.match_texts(&["NULL"]) {
43513            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
43514                NotNullColumnConstraint {
43515                    allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
43516                        value: true,
43517                    }))),
43518                },
43519            ))));
43520        }
43521
43522        // Try DEFAULT
43523        if self.match_token(TokenType::Default) {
43524            let default_value = self.parse_bitwise()?;
43525            if let Some(val) = default_value {
43526                return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
43527                    DefaultColumnConstraint {
43528                        this: Box::new(val),
43529                    },
43530                ))));
43531            }
43532        }
43533
43534        // Try REFERENCES (inline foreign key)
43535        if self.match_texts(&["REFERENCES"]) {
43536            return self.parse_references();
43537        }
43538
43539        // ClickHouse: INDEX name expr TYPE type_name [GRANULARITY n]
43540        if matches!(
43541            self.config.dialect,
43542            Some(crate::dialects::DialectType::ClickHouse)
43543        ) && self.match_token(TokenType::Index)
43544        {
43545            let name = self.expect_identifier_or_keyword_with_quoted()?;
43546            // Use parse_conjunction to handle comparisons like c0 < (SELECT _table)
43547            let expression = self.parse_conjunction()?.ok_or_else(|| {
43548                self.parse_error("Expected expression in ClickHouse INDEX definition")
43549            })?;
43550            let index_type = if self.match_token(TokenType::Type) {
43551                if let Some(func) = self.parse_function()? {
43552                    Some(Box::new(func))
43553                } else if !self.is_at_end() {
43554                    let type_name = self.advance().text.clone();
43555                    if self.check(TokenType::LParen) {
43556                        self.advance();
43557                        let mut args = Vec::new();
43558                        if !self.check(TokenType::RParen) {
43559                            args.push(self.parse_expression()?);
43560                            while self.match_token(TokenType::Comma) {
43561                                args.push(self.parse_expression()?);
43562                            }
43563                        }
43564                        self.expect(TokenType::RParen)?;
43565                        Some(Box::new(Expression::Function(Box::new(Function::new(
43566                            type_name, args,
43567                        )))))
43568                    } else {
43569                        Some(Box::new(Expression::Identifier(Identifier::new(type_name))))
43570                    }
43571                } else {
43572                    None
43573                }
43574            } else {
43575                None
43576            };
43577            let _granularity = if self.match_identifier("GRANULARITY") {
43578                let _ = self.parse_expression()?;
43579                true
43580            } else {
43581                false
43582            };
43583            // Return as a raw SQL expression preserving the INDEX definition
43584            let mut sql = format!("INDEX {} ", name.name);
43585            if let Some(ref idx_type) = index_type {
43586                sql.push_str(&format!("{} TYPE {} ", expression, idx_type));
43587            }
43588            return Ok(Some(Expression::Raw(Raw {
43589                sql: sql.trim().to_string(),
43590            })));
43591        }
43592
43593        // ClickHouse: PROJECTION name (SELECT ...) or PROJECTION name INDEX expr TYPE type_name
43594        if matches!(
43595            self.config.dialect,
43596            Some(crate::dialects::DialectType::ClickHouse)
43597        ) && self.check_identifier("PROJECTION")
43598        {
43599            self.advance(); // consume PROJECTION
43600            let name = self.expect_identifier_or_keyword_with_quoted()?;
43601            // Parse the projection body - either (SELECT ...) or INDEX expr TYPE type_name
43602            if self.match_token(TokenType::LParen) {
43603                let mut depth = 1i32;
43604                let start = self.current;
43605                while !self.is_at_end() && depth > 0 {
43606                    if self.check(TokenType::LParen) {
43607                        depth += 1;
43608                    }
43609                    if self.check(TokenType::RParen) {
43610                        depth -= 1;
43611                        if depth == 0 {
43612                            break;
43613                        }
43614                    }
43615                    self.advance();
43616                }
43617                let body_sql = self.tokens_to_sql(start, self.current);
43618                self.expect(TokenType::RParen)?;
43619                return Ok(Some(Expression::Raw(Raw {
43620                    sql: format!("PROJECTION {} ({})", name.name, body_sql),
43621                })));
43622            }
43623            // PROJECTION name INDEX expr TYPE type_name
43624            if self.match_token(TokenType::Index) {
43625                let expr = self.parse_bitwise()?.ok_or_else(|| {
43626                    self.parse_error(
43627                        "Expected expression in ClickHouse PROJECTION INDEX definition",
43628                    )
43629                })?;
43630                let type_str = if self.match_token(TokenType::Type) {
43631                    if !self.is_at_end() {
43632                        let t = self.advance().text.clone();
43633                        format!(" TYPE {}", t)
43634                    } else {
43635                        String::new()
43636                    }
43637                } else {
43638                    String::new()
43639                };
43640                return Ok(Some(Expression::Raw(Raw {
43641                    sql: format!("PROJECTION {} INDEX {}{}", name.name, expr, type_str),
43642                })));
43643            }
43644            return Ok(Some(Expression::Raw(Raw {
43645                sql: format!("PROJECTION {}", name.name),
43646            })));
43647        }
43648
43649        Ok(None)
43650    }
43651
43652    /// parse_contains_property - Implemented from Python _parse_contains_property
43653    #[allow(unused_variables, unused_mut)]
43654    pub fn parse_contains_property(&mut self) -> Result<Option<Expression>> {
43655        if self.match_text_seq(&["SQL"]) {
43656            // Matched: SQL
43657            return Ok(None);
43658        }
43659        Ok(None)
43660    }
43661
43662    /// parse_convert - Ported from Python _parse_convert
43663    /// Parses CONVERT function: CONVERT(expr USING charset) or CONVERT(expr, type)
43664    #[allow(unused_variables, unused_mut)]
43665    pub fn parse_convert(&mut self) -> Result<Option<Expression>> {
43666        // Parse the expression to convert
43667        let this = match self.parse_bitwise() {
43668            Ok(Some(expr)) => expr,
43669            Ok(None) => return Ok(None),
43670            Err(e) => return Err(e),
43671        };
43672
43673        // Check for USING charset (CONVERT(x USING utf8))
43674        if self.match_token(TokenType::Using) {
43675            let _ = self.parse_var(); // charset
43676                                      // Return as Cast with charset
43677            return Ok(Some(Expression::Cast(Box::new(Cast {
43678                this,
43679                to: DataType::Char { length: None },
43680                trailing_comments: Vec::new(),
43681                double_colon_syntax: false,
43682                format: None,
43683                default: None,
43684                inferred_type: None,
43685            }))));
43686        }
43687
43688        // Check for comma then type (CONVERT(x, INT))
43689        if self.match_token(TokenType::Comma) {
43690            let data_type = self.parse_data_type()?;
43691            return Ok(Some(Expression::Cast(Box::new(Cast {
43692                this,
43693                to: data_type,
43694                trailing_comments: Vec::new(),
43695                double_colon_syntax: false,
43696                format: None,
43697                default: None,
43698                inferred_type: None,
43699            }))));
43700        }
43701
43702        // No type specified, return as-is wrapped in Cast
43703        Ok(Some(Expression::Cast(Box::new(Cast {
43704            this,
43705            to: DataType::Char { length: None },
43706            trailing_comments: Vec::new(),
43707            double_colon_syntax: false,
43708            format: None,
43709            default: None,
43710            inferred_type: None,
43711        }))))
43712    }
43713
43714    /// parse_copy_parameters - Implemented from Python _parse_copy_parameters
43715    /// parse_copy_parameters - Parses COPY statement parameters
43716    /// Returns a tuple of CopyParameter expressions
43717    pub fn parse_copy_parameters(&mut self) -> Result<Option<Expression>> {
43718        let mut options = Vec::new();
43719
43720        while !self.is_at_end() && !self.check(TokenType::RParen) {
43721            // Parse option name as var
43722            let option = self.parse_var()?;
43723            if option.is_none() {
43724                break;
43725            }
43726
43727            let option_name = match &option {
43728                Some(Expression::Var(v)) => v.this.to_uppercase(),
43729                Some(Expression::Identifier(id)) => id.name.to_uppercase(),
43730                _ => String::new(),
43731            };
43732
43733            // Options and values may be separated by whitespace, "=" or "AS"
43734            self.match_token(TokenType::Eq);
43735            self.match_token(TokenType::Alias);
43736
43737            // Parse value based on option type
43738            let (expression, expressions) = if (option_name == "FILE_FORMAT"
43739                || option_name == "FORMAT_OPTIONS")
43740                && self.check(TokenType::LParen)
43741            {
43742                // Parse wrapped options for FILE_FORMAT
43743                let wrapped = self.parse_wrapped_options()?;
43744                let exprs = match wrapped {
43745                    Some(Expression::Tuple(t)) => t.expressions,
43746                    Some(e) => vec![e],
43747                    None => Vec::new(),
43748                };
43749                (None, exprs)
43750            } else if option_name == "FILE_FORMAT" {
43751                // T-SQL external file format case
43752                let field = self.parse_field()?;
43753                (field, Vec::new())
43754            } else if option_name == "FORMAT"
43755                && self.previous().token_type == TokenType::Alias
43756                && self.match_texts(&["AVRO", "JSON"])
43757            {
43758                // FORMAT AS AVRO/JSON
43759                let format_type = self.previous().text.to_uppercase();
43760                let field = self.parse_field()?;
43761                (
43762                    Some(Expression::Var(Box::new(Var {
43763                        this: format!("FORMAT AS {}", format_type),
43764                    }))),
43765                    field.map_or(Vec::new(), |f| vec![f]),
43766                )
43767            } else {
43768                // Parse unquoted field or bracket
43769                let expr = self
43770                    .parse_unquoted_field()?
43771                    .or_else(|| self.parse_bracket().ok().flatten());
43772                (expr, Vec::new())
43773            };
43774
43775            options.push(Expression::CopyParameter(Box::new(CopyParameter {
43776                name: option_name,
43777                value: expression,
43778                values: expressions,
43779                eq: true,
43780            })));
43781
43782            // Optional comma separator (dialect-specific)
43783            self.match_token(TokenType::Comma);
43784        }
43785
43786        if options.is_empty() {
43787            Ok(None)
43788        } else {
43789            Ok(Some(Expression::Tuple(Box::new(Tuple {
43790                expressions: options,
43791            }))))
43792        }
43793    }
43794
43795    /// parse_copy_property - Implemented from Python _parse_copy_property
43796    #[allow(unused_variables, unused_mut)]
43797    pub fn parse_copy_property(&mut self) -> Result<Option<Expression>> {
43798        if self.match_text_seq(&["GRANTS"]) {
43799            // Matched: GRANTS
43800            return Ok(None);
43801        }
43802        Ok(None)
43803    }
43804
43805    /// parse_create_like - Implemented from Python _parse_create_like
43806    /// Calls: parse_id_var
43807    #[allow(unused_variables, unused_mut)]
43808    pub fn parse_create_like(&mut self) -> Result<Option<Expression>> {
43809        if self.match_texts(&["INCLUDING", "EXCLUDING"]) {
43810            // Matched one of: INCLUDING, EXCLUDING
43811            return Ok(None);
43812        }
43813        Ok(None)
43814    }
43815
43816    /// parse_credentials - Implemented from Python _parse_credentials
43817    #[allow(unused_variables, unused_mut)]
43818    pub fn parse_credentials(&mut self) -> Result<Option<Expression>> {
43819        if self.match_text_seq(&["STORAGE_INTEGRATION", "="]) {
43820            return Ok(Some(Expression::Credentials(Box::new(Credentials {
43821                credentials: Vec::new(),
43822                encryption: None,
43823                storage: None,
43824            }))));
43825        }
43826        if self.match_text_seq(&["CREDENTIALS"]) {
43827            // Matched: CREDENTIALS
43828            return Ok(None);
43829        }
43830        Ok(None)
43831    }
43832
43833    /// parse_csv - Parses comma-separated expressions
43834    /// Python: _parse_csv
43835    /// In Python this takes a parse_method callback, but in Rust we use parse_expression_list
43836    pub fn parse_csv(&mut self) -> Result<Option<Expression>> {
43837        let expressions = self.parse_expression_list()?;
43838        if expressions.is_empty() {
43839            return Ok(None);
43840        }
43841        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
43842    }
43843
43844    /// parse_cte - Implemented from Python _parse_cte
43845    /// Calls: parse_wrapped_id_vars
43846    #[allow(unused_variables, unused_mut)]
43847    pub fn parse_cte(&mut self) -> Result<Option<Expression>> {
43848        if self.match_text_seq(&["USING", "KEY"]) {
43849            return Ok(Some(Expression::Values(Box::new(Values {
43850                expressions: Vec::new(),
43851                alias: None,
43852                column_aliases: Vec::new(),
43853            }))));
43854        }
43855        if self.match_text_seq(&["NOT", "MATERIALIZED"]) {
43856            // Matched: NOT MATERIALIZED
43857            return Ok(None);
43858        }
43859        if self.match_text_seq(&["MATERIALIZED"]) {
43860            // Matched: MATERIALIZED
43861            return Ok(None);
43862        }
43863        Ok(None)
43864    }
43865
43866    /// parse_cube_or_rollup - Ported from Python _parse_cube_or_rollup
43867    /// Parses CUBE(...) or ROLLUP(...) expressions in GROUP BY
43868    #[allow(unused_variables, unused_mut)]
43869    pub fn parse_cube_or_rollup(&mut self) -> Result<Option<Expression>> {
43870        // Check for CUBE or ROLLUP keyword
43871        let is_cube = self.match_texts(&["CUBE"]);
43872        let is_rollup = if !is_cube {
43873            self.match_texts(&["ROLLUP"])
43874        } else {
43875            false
43876        };
43877
43878        if !is_cube && !is_rollup {
43879            return Ok(None);
43880        }
43881
43882        // Parse wrapped expressions
43883        self.expect(TokenType::LParen)?;
43884        let mut expressions = Vec::new();
43885        if !self.check(TokenType::RParen) {
43886            loop {
43887                match self.parse_bitwise() {
43888                    Ok(Some(expr)) => expressions.push(expr),
43889                    Ok(None) => break,
43890                    Err(e) => return Err(e),
43891                }
43892                if !self.match_token(TokenType::Comma) {
43893                    break;
43894                }
43895            }
43896        }
43897        self.expect(TokenType::RParen)?;
43898
43899        if is_cube {
43900            Ok(Some(Expression::Cube(Box::new(Cube { expressions }))))
43901        } else {
43902            Ok(Some(Expression::Rollup(Box::new(Rollup { expressions }))))
43903        }
43904    }
43905
43906    /// parse_data_deletion_property - Implemented from Python _parse_data_deletion_property
43907    /// Calls: parse_column, parse_retention_period
43908    #[allow(unused_variables, unused_mut)]
43909    pub fn parse_data_deletion_property(&mut self) -> Result<Option<Expression>> {
43910        if self.match_text_seq(&["ON"]) {
43911            // Matched: ON
43912            return Ok(None);
43913        }
43914        if self.match_text_seq(&["OFF"]) {
43915            // Matched: OFF
43916            return Ok(None);
43917        }
43918        if self.match_text_seq(&["FILTER_COLUMN", "="]) {
43919            // Matched: FILTER_COLUMN =
43920            return Ok(None);
43921        }
43922        Ok(None)
43923    }
43924
43925    /// parse_datablocksize - Implemented from Python _parse_datablocksize
43926    /// Calls: parse_number
43927    #[allow(unused_variables, unused_mut)]
43928    pub fn parse_datablocksize(&mut self) -> Result<Option<Expression>> {
43929        if self.match_texts(&["BYTES", "KBYTES", "KILOBYTES"]) {
43930            // Matched one of: BYTES, KBYTES, KILOBYTES
43931            return Ok(None);
43932        }
43933        Ok(None)
43934    }
43935
43936    /// parse_dcolon - Delegates to parse_types
43937    #[allow(unused_variables, unused_mut)]
43938    pub fn parse_dcolon(&mut self) -> Result<Option<Expression>> {
43939        self.parse_types()
43940    }
43941
43942    /// parse_ddl_select - Ported from Python _parse_ddl_select
43943    /// Parses a SELECT statement in DDL context (CREATE TABLE AS SELECT, INSERT INTO ... SELECT)
43944    #[allow(unused_variables, unused_mut)]
43945    pub fn parse_ddl_select(&mut self) -> Result<Option<Expression>> {
43946        // Parse a nested SELECT statement
43947        let select = self.parse_select_query()?;
43948
43949        if select.is_none() {
43950            return Ok(None);
43951        }
43952
43953        // Apply set operations (UNION, INTERSECT, EXCEPT)
43954        let with_set_ops = self.parse_set_operations_with_expr(select)?;
43955
43956        // Return the result (query modifiers would be applied by parse_select_query already)
43957        Ok(with_set_ops)
43958    }
43959
43960    /// parse_for_in - BigQuery procedural FOR...IN...DO loop
43961    /// Python: BigQuery._parse_for_in
43962    /// Format: FOR variable IN (query) DO statement(s) END FOR
43963    /// Example: FOR record IN (SELECT * FROM t) DO SELECT record.col
43964    pub fn parse_for_in(&mut self) -> Result<Expression> {
43965        // Parse: variable IN (query)
43966        // This is handled by parse_range which produces an In expression
43967        let this = self
43968            .parse_range()?
43969            .ok_or_else(|| self.parse_error("Expected expression after FOR"))?;
43970
43971        // Match DO keyword
43972        self.match_text_seq(&["DO"]);
43973
43974        // Parse the body statement
43975        let expression = self.parse_statement()?;
43976
43977        Ok(Expression::ForIn(Box::new(ForIn {
43978            this: Box::new(this),
43979            expression: Box::new(expression),
43980        })))
43981    }
43982
43983    /// parse_declare - Parses DECLARE statement
43984    /// Python: _parse_declare
43985    /// Format: DECLARE var1 type [DEFAULT expr], var2 type [DEFAULT expr], ...
43986    pub fn parse_declare(&mut self) -> Result<Option<Expression>> {
43987        // Try to parse comma-separated declare items
43988        let mut expressions = Vec::new();
43989
43990        // BigQuery multi-variable DECLARE: DECLARE X, Y, Z INT64 [DEFAULT expr]
43991        // Detect by looking ahead: if we see identifier, comma, identifier pattern
43992        // before a data type keyword, collect all names then parse type once.
43993        let saved = self.current;
43994        let mut multi_names: Vec<Expression> = Vec::new();
43995        if let Some(first_var) = self.parse_id_var()? {
43996            // Check if next is a comma (BigQuery multi-var syntax)
43997            if self.check(TokenType::Comma) && !self.check_identifier("CURSOR") {
43998                // Speculatively collect comma-separated identifiers
43999                multi_names.push(first_var);
44000                while self.match_token(TokenType::Comma) {
44001                    if let Some(next_var) = self.parse_id_var()? {
44002                        multi_names.push(next_var);
44003                    } else {
44004                        break;
44005                    }
44006                }
44007                // Now check if we're at a data type (not comma, not @, not semicolon)
44008                // If so, this is BigQuery multi-var syntax
44009                if multi_names.len() > 1 && !self.is_at_end() && !self.check(TokenType::Semicolon) {
44010                    let data_type = self.parse_data_type()?;
44011                    let kind_str = self.data_type_to_sql(&data_type);
44012                    let default = if self.match_token(TokenType::Default)
44013                        || self.match_token(TokenType::Eq)
44014                    {
44015                        Some(Box::new(self.parse_expression()?))
44016                    } else {
44017                        None
44018                    };
44019                    let first_name = multi_names.remove(0);
44020                    expressions.push(Expression::DeclareItem(Box::new(DeclareItem {
44021                        this: Box::new(first_name),
44022                        kind: Some(kind_str),
44023                        default,
44024                        has_as: false,
44025                        additional_names: multi_names,
44026                    })));
44027                    return Ok(Some(Expression::Declare(Box::new(Declare { expressions }))));
44028                }
44029            }
44030        }
44031        // Reset and parse normally
44032        self.current = saved;
44033
44034        loop {
44035            if let Some(item) = self.parse_declareitem()? {
44036                expressions.push(item);
44037            } else {
44038                break;
44039            }
44040            if !self.match_token(TokenType::Comma) {
44041                break;
44042            }
44043        }
44044
44045        // If we successfully parsed at least one item, return the Declare
44046        if !expressions.is_empty() {
44047            return Ok(Some(Expression::Declare(Box::new(Declare { expressions }))));
44048        }
44049
44050        Ok(None)
44051    }
44052
44053    /// parse_declareitem - Parse a DECLARE item (variable declaration)
44054    /// TSQL format: @var AS type [= expr] or @var type [= expr]
44055    /// Also handles: DECLARE name CURSOR FOR SELECT ...
44056    /// Also handles: DECLARE @var TABLE (col_defs)
44057    #[allow(unused_variables, unused_mut)]
44058    pub fn parse_declareitem(&mut self) -> Result<Option<Expression>> {
44059        // Consume optional VAR or VARIABLE keyword (Spark/Databricks)
44060        if self.check_identifier("VAR") || self.check_identifier("VARIABLE") {
44061            self.advance();
44062        }
44063
44064        // Parse the variable name (starts with @ or is a cursor name)
44065        let var = if let Some(v) = self.parse_id_var()? {
44066            v
44067        } else {
44068            return Ok(None);
44069        };
44070
44071        // Check for CURSOR FOR syntax: DECLARE name CURSOR FOR SELECT ...
44072        if self.check_identifier("CURSOR") {
44073            self.advance(); // consume CURSOR
44074                            // Parse optional cursor options before FOR (e.g., SCROLL, INSENSITIVE, etc.)
44075                            // For now just look for FOR
44076            if self.match_token(TokenType::For) {
44077                // Capture the remaining tokens as the cursor query using tokens_to_sql for proper spacing
44078                let start = self.current;
44079                while !self.is_at_end() && !self.check(TokenType::Semicolon) {
44080                    self.advance();
44081                }
44082                let query_str = self.tokens_to_sql_uppercased(start, self.current);
44083                let kind_str = format!("CURSOR FOR {}", query_str);
44084                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
44085                    this: Box::new(var),
44086                    kind: Some(kind_str),
44087                    default: None,
44088                    has_as: false,
44089                    additional_names: Vec::new(),
44090                }))));
44091            } else {
44092                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
44093                    this: Box::new(var),
44094                    kind: Some("CURSOR".to_string()),
44095                    default: None,
44096                    has_as: false,
44097                    additional_names: Vec::new(),
44098                }))));
44099            }
44100        }
44101
44102        // Parse optional AS keyword
44103        let has_as = self.match_token(TokenType::As);
44104
44105        // Check for TABLE type with column definitions
44106        if self.check(TokenType::Table) {
44107            self.advance(); // consume TABLE
44108            if self.match_token(TokenType::LParen) {
44109                // Parse the TABLE column definitions using tokens_to_sql for proper spacing
44110                let start = self.current;
44111                let mut depth = 1;
44112                while depth > 0 && !self.is_at_end() {
44113                    if self.check(TokenType::LParen) {
44114                        depth += 1;
44115                    }
44116                    if self.check(TokenType::RParen) {
44117                        depth -= 1;
44118                        if depth == 0 {
44119                            break;
44120                        }
44121                    }
44122                    self.advance();
44123                }
44124                let col_defs_str = self.tokens_to_sql_uppercased(start, self.current);
44125                self.expect(TokenType::RParen)?;
44126                let kind_str = format!("TABLE ({})", col_defs_str);
44127                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
44128                    this: Box::new(var),
44129                    kind: Some(kind_str),
44130                    default: None,
44131                    has_as,
44132                    additional_names: Vec::new(),
44133                }))));
44134            } else {
44135                return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
44136                    this: Box::new(var),
44137                    kind: Some("TABLE".to_string()),
44138                    default: None,
44139                    has_as,
44140                    additional_names: Vec::new(),
44141                }))));
44142            }
44143        }
44144
44145        // Parse the data type
44146        let data_type = self.parse_data_type()?;
44147        let kind_str = self.data_type_to_sql(&data_type);
44148
44149        // Parse optional DEFAULT value or = value (TSQL uses =)
44150        let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq) {
44151            Some(Box::new(self.parse_expression()?))
44152        } else {
44153            None
44154        };
44155
44156        Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
44157            this: Box::new(var),
44158            kind: Some(kind_str),
44159            default,
44160            has_as,
44161            additional_names: Vec::new(),
44162        }))))
44163    }
44164
44165    /// Convert a DataType to its SQL string representation
44166    fn data_type_to_sql(&self, dt: &DataType) -> String {
44167        match dt {
44168            DataType::Boolean => "BOOLEAN".to_string(),
44169            DataType::TinyInt { length } => {
44170                if let Some(n) = length {
44171                    format!("TINYINT({})", n)
44172                } else {
44173                    "TINYINT".to_string()
44174                }
44175            }
44176            DataType::SmallInt { length } => {
44177                if let Some(n) = length {
44178                    format!("SMALLINT({})", n)
44179                } else {
44180                    "SMALLINT".to_string()
44181                }
44182            }
44183            DataType::Int {
44184                length,
44185                integer_spelling,
44186            } => {
44187                if let Some(n) = length {
44188                    if *integer_spelling {
44189                        format!("INTEGER({})", n)
44190                    } else {
44191                        format!("INT({})", n)
44192                    }
44193                } else if *integer_spelling {
44194                    "INTEGER".to_string()
44195                } else {
44196                    "INT".to_string()
44197                }
44198            }
44199            DataType::BigInt { length } => {
44200                if let Some(n) = length {
44201                    format!("BIGINT({})", n)
44202                } else {
44203                    "BIGINT".to_string()
44204                }
44205            }
44206            DataType::Float {
44207                precision, scale, ..
44208            } => match (precision, scale) {
44209                (Some(p), Some(s)) => format!("FLOAT({}, {})", p, s),
44210                (Some(p), None) => format!("FLOAT({})", p),
44211                _ => "FLOAT".to_string(),
44212            },
44213            DataType::Double { precision, scale } => match (precision, scale) {
44214                (Some(p), Some(s)) => format!("DOUBLE({}, {})", p, s),
44215                (Some(p), None) => format!("DOUBLE({})", p),
44216                _ => "DOUBLE".to_string(),
44217            },
44218            DataType::Decimal { precision, scale } => match (precision, scale) {
44219                (Some(p), Some(s)) => format!("DECIMAL({}, {})", p, s),
44220                (Some(p), None) => format!("DECIMAL({})", p),
44221                _ => "DECIMAL".to_string(),
44222            },
44223            DataType::Char { length } => {
44224                if let Some(n) = length {
44225                    format!("CHAR({})", n)
44226                } else {
44227                    "CHAR".to_string()
44228                }
44229            }
44230            DataType::VarChar { length, .. } => {
44231                if let Some(n) = length {
44232                    format!("VARCHAR({})", n)
44233                } else {
44234                    "VARCHAR".to_string()
44235                }
44236            }
44237            DataType::Text => "TEXT".to_string(),
44238            DataType::Date => "DATE".to_string(),
44239            DataType::Time { precision, .. } => {
44240                if let Some(p) = precision {
44241                    format!("TIME({})", p)
44242                } else {
44243                    "TIME".to_string()
44244                }
44245            }
44246            DataType::Timestamp { precision, .. } => {
44247                if let Some(p) = precision {
44248                    format!("TIMESTAMP({})", p)
44249                } else {
44250                    "TIMESTAMP".to_string()
44251                }
44252            }
44253            DataType::Binary { length } => {
44254                if let Some(n) = length {
44255                    format!("BINARY({})", n)
44256                } else {
44257                    "BINARY".to_string()
44258                }
44259            }
44260            DataType::VarBinary { length } => {
44261                if let Some(n) = length {
44262                    format!("VARBINARY({})", n)
44263                } else {
44264                    "VARBINARY".to_string()
44265                }
44266            }
44267            DataType::Blob => "BLOB".to_string(),
44268            DataType::Json => "JSON".to_string(),
44269            DataType::Uuid => "UUID".to_string(),
44270            DataType::Custom { name } => name.clone(), // Custom types (INT64, FLOAT64, etc.)
44271            _ => format!("{:?}", dt),                  // Fallback for unknown types
44272        }
44273    }
44274
44275    /// parse_decode - Ported from Python _parse_decode
44276    /// Parses Oracle-style DECODE or simple DECODE function
44277    /// If 3+ args: Oracle DECODE(expr, search1, result1, ..., default)
44278    /// If 2 args: character set decode (expr, charset)
44279    #[allow(unused_variables, unused_mut)]
44280    pub fn parse_decode(&mut self) -> Result<Option<Expression>> {
44281        // Parse comma-separated arguments
44282        let mut args: Vec<Expression> = Vec::new();
44283        loop {
44284            match self.parse_expression() {
44285                Ok(expr) => args.push(expr),
44286                Err(_) => break,
44287            }
44288            if !self.match_token(TokenType::Comma) {
44289                break;
44290            }
44291        }
44292
44293        if args.len() < 3 {
44294            // Simple decode with charset
44295            return Ok(Some(Expression::DecodeCase(Box::new(DecodeCase {
44296                expressions: args,
44297            }))));
44298        }
44299
44300        // Oracle DECODE: first arg is the expression being compared
44301        // Remaining args are search/result pairs, with optional default at end
44302        Ok(Some(Expression::DecodeCase(Box::new(DecodeCase {
44303            expressions: args,
44304        }))))
44305    }
44306
44307    /// parse_definer - MySQL DEFINER property
44308    /// Parses: DEFINER = user@host
44309    #[allow(unused_variables, unused_mut)]
44310    pub fn parse_definer(&mut self) -> Result<Option<Expression>> {
44311        // Optionally consume = sign
44312        self.match_token(TokenType::Eq);
44313
44314        // Parse the user part
44315        let user = self.parse_id_var()?;
44316        if user.is_none() {
44317            return Ok(None);
44318        }
44319
44320        // Expect @ symbol
44321        if !self.match_token(TokenType::DAt) {
44322            return Ok(None);
44323        }
44324
44325        // Parse the host part (can be identifier or % wildcard)
44326        let host = if let Some(id) = self.parse_id_var()? {
44327            id
44328        } else if self.match_token(TokenType::Mod) {
44329            // % wildcard for any host
44330            Expression::Identifier(Identifier::new(self.previous().text.clone()))
44331        } else {
44332            return Ok(None);
44333        };
44334
44335        // Combine user@host into a string
44336        let user_str = match &user {
44337            Some(Expression::Identifier(id)) => id.name.clone(),
44338            _ => "".to_string(),
44339        };
44340        let host_str = match &host {
44341            Expression::Identifier(id) => id.name.clone(),
44342            _ => "".to_string(),
44343        };
44344
44345        let definer_str = format!("{}@{}", user_str, host_str);
44346
44347        Ok(Some(Expression::DefinerProperty(Box::new(
44348            DefinerProperty {
44349                this: Box::new(Expression::Literal(Literal::String(definer_str))),
44350            },
44351        ))))
44352    }
44353
44354    /// parse_derived_table_values - Implemented from Python _parse_derived_table_values
44355    #[allow(unused_variables, unused_mut)]
44356    pub fn parse_derived_table_values(&mut self) -> Result<Option<Expression>> {
44357        if self.match_text_seq(&["VALUES"]) {
44358            return Ok(Some(Expression::Values(Box::new(Values {
44359                expressions: Vec::new(),
44360                alias: None,
44361                column_aliases: Vec::new(),
44362            }))));
44363        }
44364        if self.match_text_seq(&["FORMAT", "VALUES"]) {
44365            // Matched: FORMAT VALUES
44366            return Ok(None);
44367        }
44368        Ok(None)
44369    }
44370
44371    /// parse_dict_property - ClickHouse dictionary property
44372    /// Parses: property_name(kind(key1 value1, key2 value2, ...))
44373    /// property_name should be the already matched property keyword (LAYOUT, SOURCE, etc.)
44374    #[allow(unused_variables, unused_mut)]
44375    pub fn parse_dict_property(&mut self, property_name: &str) -> Result<Option<Expression>> {
44376        // Expect opening paren
44377        if !self.match_token(TokenType::LParen) {
44378            return Ok(None);
44379        }
44380
44381        // Parse the kind (e.g., HASHED, FLAT, CLICKHOUSE, CACHE, etc.)
44382        // Accept Var, Identifier, or keyword tokens as the kind name
44383        let kind_str = if self.is_identifier_token() || self.check_keyword() {
44384            self.advance().text.clone()
44385        } else {
44386            String::new()
44387        };
44388        if kind_str.is_empty() {
44389            return Err(self.parse_error("Expected dictionary property kind"));
44390        }
44391
44392        // Parse optional settings in nested parens
44393        let settings = if self.match_token(TokenType::LParen) {
44394            let mut setting_pairs = Vec::new();
44395            loop {
44396                let key = if let Some(k) = self.parse_id_var()? {
44397                    Some(k)
44398                } else if self.is_safe_keyword_as_identifier() || self.check_keyword() {
44399                    let name = self.advance().text.clone();
44400                    Some(Expression::Identifier(Identifier::new(name)))
44401                } else if !self.check(TokenType::RParen) && !self.check(TokenType::Comma) {
44402                    let name = self.advance().text.clone();
44403                    Some(Expression::Identifier(Identifier::new(name)))
44404                } else {
44405                    None
44406                };
44407                // ClickHouse: STRUCTURE (...) contains column defs without commas — consume balanced parens
44408                let is_structure = key.as_ref().map_or(false, |k| {
44409                    matches!(k, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("STRUCTURE"))
44410                });
44411                let value = if is_structure && self.check(TokenType::LParen) {
44412                    let mut raw = String::new();
44413                    let mut depth = 0i32;
44414                    while !self.is_at_end() {
44415                        let tok = self.advance();
44416                        match tok.token_type {
44417                            TokenType::LParen => {
44418                                depth += 1;
44419                                raw.push('(');
44420                            }
44421                            TokenType::RParen => {
44422                                depth -= 1;
44423                                if depth == 0 {
44424                                    raw.push(')');
44425                                    break;
44426                                }
44427                                raw.push(')');
44428                            }
44429                            _ => {
44430                                if !raw.is_empty() && !raw.ends_with('(') {
44431                                    raw.push(' ');
44432                                }
44433                                raw.push_str(&tok.text);
44434                            }
44435                        }
44436                    }
44437                    Some(Expression::Var(Box::new(Var { this: raw })))
44438                } else {
44439                    self.parse_primary_or_var()?
44440                };
44441                if key.is_none() && value.is_none() {
44442                    break;
44443                }
44444                if let (Some(k), Some(v)) = (key, value) {
44445                    // Store as a tuple-like expression
44446                    setting_pairs.push(Expression::Tuple(Box::new(Tuple {
44447                        expressions: vec![k, v],
44448                    })));
44449                }
44450                // ClickHouse dict properties are space-separated, not comma-separated
44451                // e.g. SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'test'))
44452                // Accept optional comma but don't require it
44453                self.match_token(TokenType::Comma);
44454                // Break if we see RParen (end of settings)
44455                if self.check(TokenType::RParen) {
44456                    break;
44457                }
44458            }
44459            self.expect(TokenType::RParen)?;
44460            if !setting_pairs.is_empty() {
44461                Some(Box::new(Expression::Tuple(Box::new(Tuple {
44462                    expressions: setting_pairs,
44463                }))))
44464            } else {
44465                None
44466            }
44467        } else {
44468            None
44469        };
44470
44471        self.expect(TokenType::RParen)?;
44472
44473        Ok(Some(Expression::DictProperty(Box::new(DictProperty {
44474            this: Box::new(Expression::Identifier(Identifier::new(
44475                property_name.to_string(),
44476            ))),
44477            kind: kind_str,
44478            settings,
44479        }))))
44480    }
44481
44482    /// parse_dict_range - Implemented from Python _parse_dict_range
44483    /// Parses dictionary range specification: (MIN min_val MAX max_val) or (max_val)
44484    pub fn parse_dict_range(&mut self, property_name: &str) -> Result<Option<Expression>> {
44485        // Expect opening paren
44486        self.expect(TokenType::LParen)?;
44487
44488        // Prefer id/var first for dictionary bounds to avoid function-keyword ambiguity
44489        // such as `MIN discount_start_date MAX discount_end_date`.
44490        let parse_bound = |parser: &mut Parser| -> Result<Option<Expression>> {
44491            // Handle negative numbers: -1, -100, etc.
44492            if parser.check(TokenType::Dash)
44493                && parser
44494                    .peek_nth(1)
44495                    .is_some_and(|t| t.token_type == TokenType::Number)
44496            {
44497                parser.advance(); // consume -
44498                let num = parser.advance().text.clone();
44499                return Ok(Some(Expression::Literal(Literal::Number(format!(
44500                    "-{}",
44501                    num
44502                )))));
44503            }
44504            if let Some(id) = parser.parse_id_var()? {
44505                return Ok(Some(id));
44506            }
44507            parser.parse_primary_or_var()
44508        };
44509
44510        let (min_val, max_val) = if self.peek().text.eq_ignore_ascii_case("MIN") {
44511            self.advance(); // consume MIN
44512            let min = parse_bound(self)?;
44513            if self.peek().text.eq_ignore_ascii_case("MAX") {
44514                self.advance(); // consume MAX
44515            }
44516            let max = parse_bound(self)?;
44517            (min, max)
44518        } else {
44519            let max = parse_bound(self)?;
44520            let min = Some(Expression::Literal(Literal::Number("0".to_string())));
44521            (min, max)
44522        };
44523
44524        // Match closing paren
44525        self.expect(TokenType::RParen)?;
44526
44527        Ok(Some(Expression::DictRange(Box::new(DictRange {
44528            this: Box::new(Expression::Var(Box::new(Var {
44529                this: property_name.to_string(),
44530            }))),
44531            min: min_val.map(Box::new),
44532            max: max_val.map(Box::new),
44533        }))))
44534    }
44535
44536    /// parse_disjunction - Parses OR expressions
44537    /// Python: _parse_disjunction
44538    /// Delegates to the existing parse_or in the operator precedence chain
44539    pub fn parse_disjunction(&mut self) -> Result<Option<Expression>> {
44540        match self.parse_or() {
44541            Ok(expr) => Ok(Some(expr)),
44542            Err(_) => Ok(None),
44543        }
44544    }
44545
44546    /// parse_distkey - Redshift DISTKEY property for distribution key
44547    /// Parses: DISTKEY(column_name)
44548    #[allow(unused_variables, unused_mut)]
44549    pub fn parse_distkey(&mut self) -> Result<Option<Expression>> {
44550        // Parse wrapped column identifier (in parentheses)
44551        if !self.match_token(TokenType::LParen) {
44552            return Ok(None);
44553        }
44554
44555        let column = self.parse_id_var()?;
44556        if column.is_none() {
44557            return Ok(None);
44558        }
44559
44560        self.match_token(TokenType::RParen);
44561
44562        Ok(Some(Expression::DistKeyProperty(Box::new(
44563            DistKeyProperty {
44564                this: Box::new(column.unwrap()),
44565            },
44566        ))))
44567    }
44568
44569    /// parse_distributed_property - Implemented from Python _parse_distributed_property
44570    #[allow(unused_variables, unused_mut)]
44571    /// parse_distributed_property - Parses DISTRIBUTED BY property
44572    /// Python: parser.py:2462-2481
44573    pub fn parse_distributed_property(&mut self) -> Result<Option<Expression>> {
44574        let mut kind = "HASH".to_string();
44575        let mut expressions = Vec::new();
44576
44577        if self.match_text_seq(&["BY", "HASH"]) {
44578            // Parse column list: (col1, col2, ...)
44579            if let Some(wrapped) = self.parse_wrapped_id_vars()? {
44580                if let Expression::Tuple(t) = wrapped {
44581                    expressions = t.expressions;
44582                }
44583            }
44584        } else if self.match_text_seq(&["BY", "RANDOM"]) {
44585            kind = "RANDOM".to_string();
44586        } else {
44587            return Ok(None);
44588        }
44589
44590        // Parse optional BUCKETS
44591        let buckets = if self.match_text_seq(&["BUCKETS"]) {
44592            if !self.match_text_seq(&["AUTO"]) {
44593                self.parse_number()?
44594            } else {
44595                None
44596            }
44597        } else {
44598            None
44599        };
44600
44601        // Parse optional ORDER BY
44602        let order = self.parse_order()?;
44603
44604        Ok(Some(Expression::DistributedByProperty(Box::new(
44605            DistributedByProperty {
44606                expressions,
44607                kind,
44608                buckets: buckets.map(Box::new),
44609                order: order.map(Box::new),
44610            },
44611        ))))
44612    }
44613
44614    /// Parse DROP COLUMN in ALTER TABLE
44615    /// Note: Main ALTER TABLE DROP COLUMN logic is in parse_alter_table -> AlterTableAction::DropColumn
44616    pub fn parse_drop_column(&mut self) -> Result<Option<Expression>> {
44617        // Optionally match COLUMN keyword
44618        self.match_token(TokenType::Column);
44619
44620        // Parse IF EXISTS
44621        let _if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
44622
44623        // Parse the column identifier
44624        if let Some(column) = self.parse_identifier()? {
44625            // Check for CASCADE
44626            let _cascade = self.match_text_seq(&["CASCADE"]);
44627            // Return the column as an identifier (the caller handles the drop semantics)
44628            Ok(Some(column))
44629        } else {
44630            Ok(None)
44631        }
44632    }
44633
44634    /// Parse DROP PARTITION in ALTER TABLE
44635    /// Note: Main ALTER TABLE DROP PARTITION logic is in parse_alter_table -> AlterTableAction::DropPartition
44636    pub fn parse_drop_partition(&mut self) -> Result<Option<Expression>> {
44637        self.parse_drop_partition_with_exists(false)
44638    }
44639
44640    /// Parse DROP PARTITION with exists flag
44641    pub fn parse_drop_partition_with_exists(&mut self, exists: bool) -> Result<Option<Expression>> {
44642        // Parse one or more partitions
44643        let mut partitions = Vec::new();
44644
44645        loop {
44646            // Parse PARTITION (key = value, ...)
44647            if self.match_token(TokenType::Partition) {
44648                if self.match_token(TokenType::LParen) {
44649                    // Parse partition expressions
44650                    let mut exprs = Vec::new();
44651                    loop {
44652                        let expr = self.parse_expression()?;
44653                        exprs.push(expr);
44654                        if !self.match_token(TokenType::Comma) {
44655                            break;
44656                        }
44657                    }
44658                    self.match_token(TokenType::RParen);
44659                    partitions.push(Expression::Tuple(Box::new(Tuple { expressions: exprs })));
44660                }
44661            } else {
44662                break;
44663            }
44664
44665            if !self.match_token(TokenType::Comma) {
44666                break;
44667            }
44668        }
44669
44670        if partitions.is_empty() {
44671            Ok(None)
44672        } else {
44673            Ok(Some(Expression::DropPartition(Box::new(DropPartition {
44674                expressions: partitions,
44675                exists,
44676            }))))
44677        }
44678    }
44679
44680    /// parse_equality - Parses comparison/equality expressions (= <> < > <= >=)
44681    /// Python: _parse_equality
44682    /// Delegates to the existing parse_comparison in the operator precedence chain
44683    pub fn parse_equality(&mut self) -> Result<Option<Expression>> {
44684        match self.parse_comparison() {
44685            Ok(expr) => Ok(Some(expr)),
44686            Err(_) => Ok(None),
44687        }
44688    }
44689
44690    /// parse_escape - Parses ESCAPE clause for LIKE patterns
44691    /// Python: _parse_escape
44692    /// Returns the escape character/expression if ESCAPE keyword is found
44693    pub fn parse_escape(&mut self) -> Result<Option<Expression>> {
44694        if !self.match_token(TokenType::Escape) {
44695            return Ok(None);
44696        }
44697
44698        // Parse escape character (usually a string like '\')
44699        if let Some(escape_char) = self.parse_string()? {
44700            return Ok(Some(escape_char));
44701        }
44702
44703        // Or parse NULL
44704        if let Some(null_expr) = self.parse_null()? {
44705            return Ok(Some(null_expr));
44706        }
44707
44708        Ok(None)
44709    }
44710
44711    /// parse_exists - Implemented from Python _parse_exists
44712    #[allow(unused_variables, unused_mut)]
44713    pub fn parse_exists(&mut self) -> Result<Option<Expression>> {
44714        if self.match_text_seq(&["IF"]) {
44715            // Matched: IF
44716            return Ok(None);
44717        }
44718        Ok(None)
44719    }
44720
44721    /// parse_exponent - Parses exponent/power expressions
44722    /// Python: _parse_exponent
44723    /// In most dialects, EXPONENT is empty, so this delegates to parse_unary
44724    pub fn parse_exponent(&mut self) -> Result<Option<Expression>> {
44725        match self.parse_unary() {
44726            Ok(expr) => Ok(Some(expr)),
44727            Err(_) => Ok(None),
44728        }
44729    }
44730
44731    /// parse_expressions - Parse comma-separated expressions
44732    /// Returns a Tuple containing all expressions, or None if empty
44733    #[allow(unused_variables, unused_mut)]
44734    pub fn parse_expressions(&mut self) -> Result<Option<Expression>> {
44735        let expressions = self.parse_expression_list()?;
44736        if expressions.is_empty() {
44737            return Ok(None);
44738        }
44739        if expressions.len() == 1 {
44740            return Ok(expressions.into_iter().next());
44741        }
44742        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
44743    }
44744
44745    /// parse_extract - Ported from Python _parse_extract
44746    /// Parses EXTRACT(field FROM expression) function
44747    #[allow(unused_variables, unused_mut)]
44748    pub fn parse_extract(&mut self) -> Result<Option<Expression>> {
44749        // Parse the field (YEAR, MONTH, DAY, HOUR, etc.)
44750        let field_name = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
44751            let token = self.advance();
44752            token.text.to_uppercase()
44753        } else {
44754            return Ok(None);
44755        };
44756
44757        // Convert field name to DateTimeField
44758        let field = match field_name.as_str() {
44759            "YEAR" => DateTimeField::Year,
44760            "MONTH" => DateTimeField::Month,
44761            "DAY" => DateTimeField::Day,
44762            "HOUR" => DateTimeField::Hour,
44763            "MINUTE" => DateTimeField::Minute,
44764            "SECOND" => DateTimeField::Second,
44765            "MILLISECOND" | "MILLISECONDS" | "MS" => DateTimeField::Millisecond,
44766            "MICROSECOND" | "MICROSECONDS" | "US" => DateTimeField::Microsecond,
44767            "DOW" | "DAYOFWEEK" => DateTimeField::DayOfWeek,
44768            "DOY" | "DAYOFYEAR" => DateTimeField::DayOfYear,
44769            "WEEK" => DateTimeField::Week,
44770            "QUARTER" => DateTimeField::Quarter,
44771            "EPOCH" => DateTimeField::Epoch,
44772            "TIMEZONE" => DateTimeField::Timezone,
44773            "TIMEZONE_HOUR" => DateTimeField::TimezoneHour,
44774            "TIMEZONE_MINUTE" => DateTimeField::TimezoneMinute,
44775            "DATE" => DateTimeField::Date,
44776            "TIME" => DateTimeField::Time,
44777            other => DateTimeField::Custom(other.to_string()),
44778        };
44779
44780        // Expect FROM or comma
44781        if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
44782            return Err(self.parse_error("Expected FROM or comma after EXTRACT field"));
44783        }
44784
44785        // Parse the expression to extract from
44786        let expression = self.parse_bitwise()?;
44787        let this = match expression {
44788            Some(expr) => self.try_clickhouse_func_arg_alias(expr),
44789            None => return Err(self.parse_error("Expected expression after FROM in EXTRACT")),
44790        };
44791
44792        Ok(Some(Expression::Extract(Box::new(ExtractFunc {
44793            this,
44794            field,
44795        }))))
44796    }
44797
44798    /// parse_factor - Parses multiplication/division expressions (* / % operators)
44799    /// Python: _parse_factor
44800    /// Delegates to the existing parse_multiplication in the operator precedence chain
44801    pub fn parse_factor(&mut self) -> Result<Option<Expression>> {
44802        // Delegate to the existing multiplication parsing
44803        match self.parse_multiplication() {
44804            Ok(expr) => Ok(Some(expr)),
44805            Err(_) => Ok(None),
44806        }
44807    }
44808
44809    /// parse_fallback - Implemented from Python _parse_fallback
44810    #[allow(unused_variables, unused_mut)]
44811    pub fn parse_fallback(&mut self) -> Result<Option<Expression>> {
44812        if self.match_text_seq(&["PROTECTION"]) {
44813            return Ok(Some(Expression::FallbackProperty(Box::new(
44814                FallbackProperty {
44815                    no: None,
44816                    protection: None,
44817                },
44818            ))));
44819        }
44820        Ok(None)
44821    }
44822
44823    /// parse_field - Parse a field (column name, literal, or expression)
44824    /// Python: field = self._parse_primary() or self._parse_function() or self._parse_id_var()
44825    pub fn parse_field(&mut self) -> Result<Option<Expression>> {
44826        // Try parsing literals first
44827        if let Some(expr) = self.parse_string()? {
44828            return Ok(Some(expr));
44829        }
44830        if let Some(expr) = self.parse_number()? {
44831            return Ok(Some(expr));
44832        }
44833        if let Some(expr) = self.parse_boolean()? {
44834            return Ok(Some(expr));
44835        }
44836        if let Some(expr) = self.parse_null()? {
44837            return Ok(Some(expr));
44838        }
44839        if let Some(expr) = self.parse_star()? {
44840            return Ok(Some(expr));
44841        }
44842        // Try parsing identifier
44843        if let Some(expr) = self.parse_identifier()? {
44844            return Ok(Some(expr));
44845        }
44846        // Try parsing a variable/identifier
44847        if let Some(expr) = self.parse_var()? {
44848            return Ok(Some(expr));
44849        }
44850        // Allow keywords as identifiers in field context (e.g., "schema" as a field name)
44851        if self.check_keyword() {
44852            let token = self.advance();
44853            return Ok(Some(Expression::Identifier(Identifier {
44854                name: token.text,
44855                quoted: false,
44856                trailing_comments: Vec::new(),
44857                span: None,
44858            })));
44859        }
44860        Ok(None)
44861    }
44862
44863    /// parse_field_def - Ported from Python _parse_field_def
44864    /// Parses a field definition (column name + type + optional constraints)
44865    #[allow(unused_variables, unused_mut)]
44866    pub fn parse_field_def(&mut self) -> Result<Option<Expression>> {
44867        // First parse the field name (identifier)
44868        let field = self.parse_field()?;
44869
44870        if field.is_none() {
44871            return Ok(None);
44872        }
44873
44874        // Parse the column definition with the field as the name
44875        self.parse_column_def_with_field(field)
44876    }
44877
44878    /// Helper to parse a column definition with a pre-parsed field name
44879    fn parse_column_def_with_field(
44880        &mut self,
44881        field: Option<Expression>,
44882    ) -> Result<Option<Expression>> {
44883        if field.is_none() {
44884            return Ok(None);
44885        }
44886
44887        let this = field.unwrap();
44888
44889        // Get the identifier from the expression and preserve quoted-identifier state.
44890        let name_ident = match &this {
44891            Expression::Column(col) => col.name.clone(),
44892            Expression::Identifier(id) => id.clone(),
44893            Expression::Var(v) => Identifier::new(v.this.clone()),
44894            _ => return Ok(None),
44895        };
44896
44897        // Parse the data type using parse_data_type_optional (which handles unknown types gracefully)
44898        let data_type = match self.parse_data_type_optional()? {
44899            Some(dt) => dt,
44900            None => DataType::Unknown,
44901        };
44902
44903        // Create ColumnDef with default values
44904        let mut col_def = ColumnDef::new(name_ident.name.clone(), data_type);
44905        col_def.name = name_ident;
44906
44907        // Check for FOR ORDINALITY (JSON table columns)
44908        if self.match_text_seq(&["FOR", "ORDINALITY"]) {
44909            return Ok(Some(Expression::ColumnDef(Box::new(col_def))));
44910        }
44911
44912        // Parse constraints and extract specific constraint values
44913        loop {
44914            if let Some(constraint) = self.parse_column_constraint()? {
44915                // Check specific constraint types
44916                match &constraint {
44917                    Expression::NotNullColumnConstraint(_) => {
44918                        col_def.nullable = Some(false);
44919                        col_def.constraints.push(ColumnConstraint::NotNull);
44920                    }
44921                    Expression::PrimaryKeyColumnConstraint(_) => {
44922                        col_def.primary_key = true;
44923                        col_def.constraints.push(ColumnConstraint::PrimaryKey);
44924                    }
44925                    Expression::UniqueColumnConstraint(_) => {
44926                        col_def.unique = true;
44927                        col_def.constraints.push(ColumnConstraint::Unique);
44928                    }
44929                    Expression::DefaultColumnConstraint(dc) => {
44930                        col_def.default = Some((*dc.this).clone());
44931                        col_def
44932                            .constraints
44933                            .push(ColumnConstraint::Default((*dc.this).clone()));
44934                    }
44935                    Expression::AutoIncrementColumnConstraint(_) => {
44936                        col_def.auto_increment = true;
44937                    }
44938                    Expression::CommentColumnConstraint(_) => {
44939                        // Comment is a unit struct, we'd need the actual comment text
44940                    }
44941                    Expression::CheckColumnConstraint(cc) => {
44942                        col_def
44943                            .constraints
44944                            .push(ColumnConstraint::Check((*cc.this).clone()));
44945                    }
44946                    Expression::PathColumnConstraint(pc) => {
44947                        col_def
44948                            .constraints
44949                            .push(ColumnConstraint::Path((*pc.this).clone()));
44950                        col_def.constraint_order.push(ConstraintType::Path);
44951                    }
44952                    _ => {}
44953                }
44954            } else if matches!(
44955                self.config.dialect,
44956                Some(crate::dialects::DialectType::ClickHouse)
44957            ) && self.match_identifier("ALIAS")
44958            {
44959                // ClickHouse: ALIAS expr
44960                let expr = self.parse_or()?;
44961                col_def.alias_expr = Some(Box::new(expr));
44962            } else if matches!(
44963                self.config.dialect,
44964                Some(crate::dialects::DialectType::ClickHouse)
44965            ) && self.check(TokenType::Materialized)
44966                && !self.check_next(TokenType::View)
44967            {
44968                // ClickHouse: MATERIALIZED expr
44969                self.advance(); // consume MATERIALIZED
44970                let expr = self.parse_or()?;
44971                col_def.materialized_expr = Some(Box::new(expr));
44972            } else if matches!(
44973                self.config.dialect,
44974                Some(crate::dialects::DialectType::ClickHouse)
44975            ) && self.match_identifier("EPHEMERAL")
44976            {
44977                // ClickHouse: EPHEMERAL [expr]
44978                if !self.check(TokenType::Comma)
44979                    && !self.check(TokenType::RParen)
44980                    && !self.is_at_end()
44981                    && !self.check_identifier("CODEC")
44982                    && !self.check_identifier("TTL")
44983                    && !self.check(TokenType::Comment)
44984                {
44985                    let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
44986                    col_def.ephemeral = Some(Some(Box::new(expr)));
44987                } else {
44988                    col_def.ephemeral = Some(None);
44989                }
44990            } else if matches!(
44991                self.config.dialect,
44992                Some(crate::dialects::DialectType::ClickHouse)
44993            ) && self.check_identifier("CODEC")
44994            {
44995                // ClickHouse: CODEC(LZ4HC(9), ZSTD, DELTA)
44996                self.advance(); // consume CODEC
44997                self.expect(TokenType::LParen)?;
44998                let start = self.current;
44999                let mut depth = 1;
45000                while !self.is_at_end() && depth > 0 {
45001                    if self.check(TokenType::LParen) {
45002                        depth += 1;
45003                    }
45004                    if self.check(TokenType::RParen) {
45005                        depth -= 1;
45006                        if depth == 0 {
45007                            break;
45008                        }
45009                    }
45010                    self.advance();
45011                }
45012                let codec_text = self.tokens_to_sql(start, self.current);
45013                self.expect(TokenType::RParen)?;
45014                col_def.codec = Some(codec_text);
45015            } else if matches!(
45016                self.config.dialect,
45017                Some(crate::dialects::DialectType::ClickHouse)
45018            ) && self.match_identifier("TTL")
45019            {
45020                // ClickHouse: TTL expr
45021                let expr = self.parse_expression()?;
45022                col_def.ttl_expr = Some(Box::new(expr));
45023            } else {
45024                break;
45025            }
45026        }
45027
45028        Ok(Some(Expression::ColumnDef(Box::new(col_def))))
45029    }
45030
45031    /// parse_foreign_key - Implemented from Python _parse_foreign_key
45032    /// Calls: parse_key_constraint_options, parse_wrapped_id_vars, parse_references
45033    #[allow(unused_variables, unused_mut)]
45034    pub fn parse_foreign_key(&mut self) -> Result<Option<Expression>> {
45035        if self.match_text_seq(&["NO", "ACTION"]) {
45036            return Ok(Some(Expression::ForeignKey(Box::new(ForeignKey {
45037                expressions: Vec::new(),
45038                reference: None,
45039                delete: None,
45040                update: None,
45041                options: Vec::new(),
45042            }))));
45043        }
45044        Ok(None)
45045    }
45046
45047    /// parse_format_json - Implemented from Python _parse_format_json
45048    #[allow(unused_variables, unused_mut)]
45049    pub fn parse_format_json(&mut self) -> Result<Option<Expression>> {
45050        if self.match_text_seq(&["FORMAT", "JSON"]) {
45051            // Matched: FORMAT JSON
45052            return Ok(None);
45053        }
45054        Ok(None)
45055    }
45056
45057    /// parse_format_name - Snowflake FILE_FORMAT = format_name property
45058    /// Parses: format_name (string or identifier)
45059    #[allow(unused_variables, unused_mut)]
45060    pub fn parse_format_name(&mut self) -> Result<Option<Expression>> {
45061        // Try to parse a string first, then fall back to table parts
45062        let value = if let Some(s) = self.parse_string()? {
45063            s
45064        } else if let Some(tp) = self.parse_table_parts()? {
45065            tp
45066        } else {
45067            return Ok(None);
45068        };
45069
45070        Ok(Some(Expression::Property(Box::new(Property {
45071            this: Box::new(Expression::Identifier(Identifier::new(
45072                "FORMAT_NAME".to_string(),
45073            ))),
45074            value: Some(Box::new(value)),
45075        }))))
45076    }
45077
45078    /// parse_freespace - Teradata FREESPACE property
45079    /// Parses: FREESPACE = number [PERCENT]
45080    #[allow(unused_variables, unused_mut)]
45081    pub fn parse_freespace(&mut self) -> Result<Option<Expression>> {
45082        // Optionally consume = sign
45083        self.match_token(TokenType::Eq);
45084
45085        // Parse the number value
45086        let this = self.parse_number()?;
45087        if this.is_none() {
45088            return Ok(None);
45089        }
45090
45091        // Check for PERCENT keyword
45092        let percent = if self.match_token(TokenType::Percent) {
45093            Some(Box::new(Expression::Boolean(BooleanLiteral {
45094                value: true,
45095            })))
45096        } else {
45097            None
45098        };
45099
45100        Ok(Some(Expression::FreespaceProperty(Box::new(
45101            FreespaceProperty {
45102                this: Box::new(this.unwrap()),
45103                percent,
45104            },
45105        ))))
45106    }
45107
45108    /// parse_function - Ported from Python _parse_function
45109    /// Parses function calls like func_name(args) or {fn func_name(args)} (ODBC syntax)
45110    pub fn parse_function(&mut self) -> Result<Option<Expression>> {
45111        // Check for ODBC escape syntax: {fn function_call}
45112        let fn_syntax = if self.check(TokenType::LBrace) {
45113            if let Some(next) = self.tokens.get(self.current + 1) {
45114                if next.text.to_uppercase() == "FN" {
45115                    self.advance(); // consume {
45116                    self.advance(); // consume FN
45117                    true
45118                } else {
45119                    false
45120                }
45121            } else {
45122                false
45123            }
45124        } else {
45125            false
45126        };
45127
45128        let func = self.parse_function_call()?;
45129
45130        if fn_syntax {
45131            self.match_token(TokenType::RBrace);
45132        }
45133
45134        Ok(func)
45135    }
45136
45137    /// parse_function_args - Ported from Python _parse_function_args
45138    /// Parses the arguments inside a function call, handling aliases and key-value pairs
45139    pub fn parse_function_args_list(&mut self) -> Result<Vec<Expression>> {
45140        let mut args = Vec::new();
45141
45142        if self.check(TokenType::RParen) {
45143            return Ok(args);
45144        }
45145
45146        loop {
45147            // Try to parse expression with optional alias
45148            if let Some(expr) = self.parse_assignment()? {
45149                // Handle explicit AS alias inside function args (e.g. `tuple(1 AS "a", 2 AS "b")`)
45150                if self.match_token(TokenType::As) {
45151                    let alias_token = self.advance();
45152                    let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
45153                        // Preserve quoted identifiers
45154                        let raw = alias_token.text.clone();
45155                        let mut ident = Identifier::new(raw);
45156                        ident.quoted = true;
45157                        ident
45158                    } else {
45159                        Identifier::new(alias_token.text.clone())
45160                    };
45161                    args.push(Expression::Alias(Box::new(crate::expressions::Alias {
45162                        this: expr,
45163                        alias: alias_name,
45164                        column_aliases: Vec::new(),
45165                        pre_alias_comments: Vec::new(),
45166                        trailing_comments: Vec::new(),
45167                        inferred_type: None,
45168                    })));
45169                } else {
45170                    args.push(expr);
45171                }
45172            }
45173
45174            if !self.match_token(TokenType::Comma) {
45175                break;
45176            }
45177        }
45178
45179        Ok(args)
45180    }
45181
45182    /// parse_function_call - Ported from Python _parse_function_call
45183    /// Parses a function call expression like func_name(arg1, arg2, ...)
45184    pub fn parse_function_call(&mut self) -> Result<Option<Expression>> {
45185        if self.is_at_end() {
45186            return Ok(None);
45187        }
45188
45189        let token = self.peek().clone();
45190        let token_type = token.token_type.clone();
45191        let name = token.text.clone();
45192        let _upper_name = name.to_uppercase();
45193
45194        // Check for no-paren functions like CURRENT_DATE, CURRENT_TIMESTAMP
45195        if self.is_no_paren_function() {
45196            // Check if next token is NOT a paren (so it's used without parens)
45197            if !self.check_next(TokenType::LParen) {
45198                self.advance();
45199                return Ok(Some(Expression::Function(Box::new(Function {
45200                    name, // Preserve original case; generator handles normalization
45201                    args: Vec::new(),
45202                    distinct: false,
45203                    trailing_comments: Vec::new(),
45204                    use_bracket_syntax: false,
45205                    no_parens: true,
45206                    quoted: false,
45207                    span: None,
45208                    inferred_type: None,
45209                }))));
45210            }
45211        }
45212
45213        // Must be followed by left paren
45214        if !self.check_next(TokenType::LParen) {
45215            return Ok(None);
45216        }
45217
45218        // Token must be a valid function name token
45219        let is_valid_func_token = matches!(
45220            token_type,
45221            TokenType::Identifier
45222                | TokenType::Var
45223                | TokenType::If
45224                | TokenType::Left
45225                | TokenType::Right
45226                | TokenType::Insert
45227                | TokenType::Replace
45228                | TokenType::Row
45229                | TokenType::Index
45230        );
45231        if !is_valid_func_token {
45232            return Ok(None);
45233        }
45234
45235        self.advance(); // consume function name
45236        self.advance(); // consume (
45237
45238        // Check for DISTINCT keyword
45239        let distinct = self.match_token(TokenType::Distinct);
45240
45241        // Parse arguments
45242        let args = self.parse_function_args_list()?;
45243
45244        self.match_token(TokenType::RParen);
45245
45246        // Handle window specifications
45247        let func_expr = Expression::Function(Box::new(Function {
45248            name, // Preserve original case; generator handles normalization
45249            args,
45250            distinct,
45251            trailing_comments: Vec::new(),
45252            use_bracket_syntax: false,
45253            no_parens: false,
45254            quoted: false,
45255            span: None,
45256            inferred_type: None,
45257        }));
45258
45259        // Check for OVER clause (window function)
45260        if self.match_token(TokenType::Over) {
45261            // Parse window spec - create a simple WindowSpec
45262            if self.match_token(TokenType::LParen) {
45263                // Use parse_window_spec_inner to handle DISTRIBUTE BY/SORT BY (Hive)
45264                let spec = self.parse_window_spec_inner()?;
45265                self.expect(TokenType::RParen)?;
45266
45267                if let Some(spec_expr) = spec {
45268                    return Ok(Some(spec_expr));
45269                }
45270            }
45271        }
45272
45273        Ok(Some(func_expr))
45274    }
45275
45276    /// parse_function_parameter - Ported from Python _parse_function_parameter
45277    /// Parses a function parameter in CREATE FUNCTION (name type [DEFAULT expr])
45278    pub fn parse_function_parameter(&mut self) -> Result<Option<Expression>> {
45279        // Parse optional parameter mode (IN, OUT, INOUT)
45280        let _mode = if self.match_texts(&["IN"]) {
45281            if self.match_texts(&["OUT"]) {
45282                Some(ParameterMode::InOut)
45283            } else {
45284                Some(ParameterMode::In)
45285            }
45286        } else if self.match_texts(&["OUT"]) {
45287            Some(ParameterMode::Out)
45288        } else if self.match_texts(&["INOUT"]) {
45289            Some(ParameterMode::InOut)
45290        } else {
45291            None
45292        };
45293
45294        // Parse parameter name (optional in some dialects)
45295        let name_expr = self.parse_id_var()?;
45296        let name = name_expr.and_then(|n| match n {
45297            Expression::Identifier(id) => Some(id),
45298            _ => None,
45299        });
45300
45301        // Parse data type - returns Result<DataType>, not Result<Option<DataType>>
45302        // We need to handle the case where we can't parse a data type
45303        let data_type_result = self.parse_data_type();
45304        let _data_type = match data_type_result {
45305            Ok(dt) => dt,
45306            Err(_) => return Ok(None),
45307        };
45308
45309        // Parse optional DEFAULT value
45310        let _default = if self.match_token(TokenType::Default) || self.match_texts(&["="]) {
45311            self.parse_disjunction()?
45312        } else {
45313            None
45314        };
45315
45316        // Return the name as a Column expression
45317        Ok(Some(Expression::Column(Column {
45318            name: Identifier {
45319                name: name.map(|n| n.name).unwrap_or_default(),
45320                quoted: false,
45321                trailing_comments: Vec::new(),
45322                span: None,
45323            },
45324            table: None,
45325            join_mark: false,
45326            trailing_comments: Vec::new(),
45327            span: None,
45328            inferred_type: None,
45329        })))
45330    }
45331
45332    /// parse_gap_fill - Ported from Python _parse_gap_fill
45333    #[allow(unused_variables, unused_mut)]
45334    /// parse_gap_fill - Parses GAP_FILL function for time series
45335    /// Example: GAP_FILL(TABLE t, ts_column, bucket_width, partitioning_columns, value_columns)
45336    pub fn parse_gap_fill(&mut self) -> Result<Option<Expression>> {
45337        // Optional TABLE keyword
45338        self.match_token(TokenType::Table);
45339
45340        // Parse the table reference
45341        let this = self.parse_table()?;
45342        if this.is_none() {
45343            return Ok(None);
45344        }
45345
45346        // Parse comma-separated arguments
45347        self.match_token(TokenType::Comma);
45348        let mut args = self.parse_expression_list()?;
45349
45350        // Extract arguments by position
45351        let ts_column = args.get(0).cloned().map(Box::new);
45352        let bucket_width = args.get(1).cloned().map(Box::new);
45353        let partitioning_columns = args.get(2).cloned().map(Box::new);
45354        let value_columns = args.get(3).cloned().map(Box::new);
45355
45356        Ok(Some(Expression::GapFill(Box::new(GapFill {
45357            this: Box::new(this.unwrap()),
45358            ts_column,
45359            bucket_width,
45360            partitioning_columns,
45361            value_columns,
45362            origin: None,
45363            ignore_nulls: None,
45364        }))))
45365    }
45366
45367    /// parse_semantic_view - Parse Snowflake SEMANTIC_VIEW function
45368    /// Example: SEMANTIC_VIEW(foo METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE a.b > '1995-01-01')
45369    pub fn parse_semantic_view(&mut self) -> Result<Expression> {
45370        // Parse the table/view reference as a primary expression (identifier or qualified name)
45371        let this = self.parse_primary()?;
45372
45373        let mut metrics = None;
45374        let mut dimensions = None;
45375        let mut facts = None;
45376        let mut where_clause = None;
45377
45378        // Parse optional clauses: METRICS, DIMENSIONS, FACTS, WHERE
45379        while !self.check(TokenType::RParen) && !self.is_at_end() {
45380            if self.match_identifier("METRICS") {
45381                // Parse comma-separated expressions until next keyword or )
45382                let exprs = self.parse_semantic_view_list()?;
45383                metrics = Some(Box::new(Expression::Tuple(Box::new(Tuple {
45384                    expressions: exprs,
45385                }))));
45386            } else if self.match_identifier("DIMENSIONS") {
45387                let exprs = self.parse_semantic_view_list()?;
45388                dimensions = Some(Box::new(Expression::Tuple(Box::new(Tuple {
45389                    expressions: exprs,
45390                }))));
45391            } else if self.match_identifier("FACTS") {
45392                let exprs = self.parse_semantic_view_list()?;
45393                facts = Some(Box::new(Expression::Tuple(Box::new(Tuple {
45394                    expressions: exprs,
45395                }))));
45396            } else if self.match_token(TokenType::Where) {
45397                // Parse the WHERE expression
45398                where_clause = Some(Box::new(self.parse_expression()?));
45399                // WHERE is the last clause, break after parsing it
45400                break;
45401            } else {
45402                // Unknown token
45403                break;
45404            }
45405        }
45406
45407        Ok(Expression::SemanticView(Box::new(SemanticView {
45408            this: Box::new(this),
45409            metrics,
45410            dimensions,
45411            facts,
45412            where_: where_clause,
45413        })))
45414    }
45415
45416    /// Helper to parse comma-separated expression list for SEMANTIC_VIEW clauses
45417    /// Stops at METRICS, DIMENSIONS, FACTS, WHERE, or )
45418    /// Each element can have an optional AS alias: expr AS name
45419    fn parse_semantic_view_list(&mut self) -> Result<Vec<Expression>> {
45420        let first = self.parse_semantic_view_element()?;
45421        let mut exprs = vec![first];
45422        while self.match_token(TokenType::Comma) {
45423            // Check if next token is a keyword that starts a new clause
45424            if self.check_identifier("METRICS")
45425                || self.check_identifier("DIMENSIONS")
45426                || self.check_identifier("FACTS")
45427                || self.check(TokenType::Where)
45428                || self.check(TokenType::RParen)
45429            {
45430                break;
45431            }
45432            exprs.push(self.parse_semantic_view_element()?);
45433        }
45434        Ok(exprs)
45435    }
45436
45437    /// Parse a single SEMANTIC_VIEW element: expression [AS alias]
45438    fn parse_semantic_view_element(&mut self) -> Result<Expression> {
45439        let expr = self.parse_disjunction()?.ok_or_else(|| {
45440            self.parse_error("Expected expression in SEMANTIC_VIEW clause")
45441        })?;
45442        // Check for optional explicit AS alias
45443        if self.match_token(TokenType::As) {
45444            let alias = self.expect_identifier_or_keyword_with_quoted()?;
45445            Ok(Expression::Alias(Box::new(crate::expressions::Alias {
45446                this: expr,
45447                alias,
45448                column_aliases: Vec::new(),
45449                pre_alias_comments: Vec::new(),
45450                trailing_comments: Vec::new(),
45451                inferred_type: None,
45452            })))
45453        } else {
45454            Ok(expr)
45455        }
45456    }
45457
45458    /// parse_grant_principal - Implemented from Python _parse_grant_principal
45459    /// Calls: parse_id_var
45460    #[allow(unused_variables, unused_mut)]
45461    pub fn parse_grant_principal(&mut self) -> Result<Option<Expression>> {
45462        if self.match_texts(&["ROLE", "GROUP"]) {
45463            // Matched one of: ROLE, GROUP
45464            return Ok(None);
45465        }
45466        Ok(None)
45467    }
45468
45469    /// parse_grant_privilege - Parse a single privilege in GRANT/REVOKE
45470    /// Parses: SELECT, INSERT, UPDATE(col1, col2), DELETE, etc.
45471    #[allow(unused_variables, unused_mut)]
45472    pub fn parse_grant_privilege(&mut self) -> Result<Option<Expression>> {
45473        // Collect privilege keywords (SELECT, INSERT, UPDATE, DELETE, ALL PRIVILEGES, etc.)
45474        let mut privilege_parts = Vec::new();
45475
45476        // Keep consuming keywords until we hit a follow token
45477        // Follow tokens are: comma, ON, left paren
45478        while !self.is_at_end() {
45479            // Check if we've hit a follow token
45480            if self.check(TokenType::Comma)
45481                || self.check(TokenType::On)
45482                || self.check(TokenType::LParen)
45483            {
45484                break;
45485            }
45486
45487            // Get the current token text
45488            let text = self.peek().text.to_uppercase();
45489            privilege_parts.push(text);
45490            self.advance();
45491        }
45492
45493        if privilege_parts.is_empty() {
45494            return Ok(None);
45495        }
45496
45497        let privilege_str = privilege_parts.join(" ");
45498
45499        // Check for column list in parentheses (e.g., UPDATE(col1, col2))
45500        let expressions = if self.match_token(TokenType::LParen) {
45501            let mut columns = Vec::new();
45502            loop {
45503                if let Some(col) = self.parse_column()? {
45504                    columns.push(col);
45505                } else {
45506                    break;
45507                }
45508                if !self.match_token(TokenType::Comma) {
45509                    break;
45510                }
45511            }
45512            self.match_token(TokenType::RParen);
45513            columns
45514        } else {
45515            Vec::new()
45516        };
45517
45518        Ok(Some(Expression::GrantPrivilege(Box::new(GrantPrivilege {
45519            this: Box::new(Expression::Identifier(Identifier::new(privilege_str))),
45520            expressions,
45521        }))))
45522    }
45523
45524    /// parse_grant_revoke_common - Parses common parts of GRANT/REVOKE statements
45525    /// Python: _parse_grant_revoke_common
45526    /// Returns a Tuple containing (privileges, kind, securable)
45527    pub fn parse_grant_revoke_common(&mut self) -> Result<Option<Expression>> {
45528        // Parse privileges (CSV of grant privileges)
45529        let mut privileges = Vec::new();
45530        loop {
45531            if let Some(priv_expr) = self.parse_grant_privilege()? {
45532                privileges.push(priv_expr);
45533            }
45534            if !self.match_token(TokenType::Comma) {
45535                break;
45536            }
45537        }
45538
45539        // Match ON keyword
45540        self.match_token(TokenType::On);
45541
45542        // Parse kind (TABLE, VIEW, SCHEMA, DATABASE, etc.)
45543        let kind = if self.match_texts(&[
45544            "TABLE",
45545            "VIEW",
45546            "SCHEMA",
45547            "DATABASE",
45548            "SEQUENCE",
45549            "FUNCTION",
45550            "PROCEDURE",
45551            "INDEX",
45552            "TYPE",
45553            "TABLESPACE",
45554            "ROLE",
45555            "USER",
45556        ]) {
45557            let kind_text = self.previous().text.to_uppercase();
45558            Some(Expression::Var(Box::new(Var { this: kind_text })))
45559        } else {
45560            None
45561        };
45562
45563        // Try to parse securable (table parts)
45564        let securable = self.parse_table_parts()?;
45565
45566        // Return as Tuple with three elements: privileges_list, kind, securable
45567        let privileges_expr = Expression::Tuple(Box::new(Tuple {
45568            expressions: privileges,
45569        }));
45570
45571        let mut result_exprs = vec![privileges_expr];
45572
45573        if let Some(k) = kind {
45574            result_exprs.push(k);
45575        } else {
45576            result_exprs.push(Expression::Null(Null));
45577        }
45578
45579        if let Some(s) = securable {
45580            result_exprs.push(s);
45581        } else {
45582            result_exprs.push(Expression::Null(Null));
45583        }
45584
45585        Ok(Some(Expression::Tuple(Box::new(Tuple {
45586            expressions: result_exprs,
45587        }))))
45588    }
45589
45590    /// parse_group - Parse GROUP BY clause
45591    /// Python: if not self._match(TokenType.GROUP_BY): return None; expressions = self._parse_csv(self._parse_disjunction)
45592    pub fn parse_group(&mut self) -> Result<Option<Expression>> {
45593        // Check for GROUP BY token (which should be parsed as Group + By tokens)
45594        if !self.match_token(TokenType::Group) {
45595            return Ok(None);
45596        }
45597        // Consume BY if present
45598        self.match_token(TokenType::By);
45599
45600        // Check for optional ALL/DISTINCT
45601        // Some(true) = ALL, Some(false) = DISTINCT, None = no modifier
45602        let all = if self.match_token(TokenType::All) {
45603            Some(true)
45604        } else if self.match_token(TokenType::Distinct) {
45605            Some(false)
45606        } else {
45607            None
45608        };
45609
45610        // Parse comma-separated expressions
45611        let mut expressions = Vec::new();
45612        loop {
45613            match self.parse_expression() {
45614                Ok(expr) => expressions.push(expr),
45615                Err(_) => break,
45616            }
45617            if !self.match_token(TokenType::Comma) {
45618                break;
45619            }
45620        }
45621
45622        // Handle TOTALS (ClickHouse)
45623        let totals = if self.match_text_seq(&["WITH", "TOTALS"]) {
45624            Some(Box::new(Expression::Boolean(BooleanLiteral {
45625                value: true,
45626            })))
45627        } else if self.match_text_seq(&["TOTALS"]) {
45628            Some(Box::new(Expression::Boolean(BooleanLiteral {
45629                value: true,
45630            })))
45631        } else {
45632            None
45633        };
45634
45635        Ok(Some(Expression::Group(Box::new(Group {
45636            expressions,
45637            grouping_sets: None,
45638            cube: None,
45639            rollup: None,
45640            totals,
45641            all,
45642        }))))
45643    }
45644
45645    /// parse_group_concat - Ported from Python _parse_group_concat
45646    #[allow(unused_variables, unused_mut)]
45647    /// parse_group_concat - Parses MySQL GROUP_CONCAT function
45648    /// Example: GROUP_CONCAT(DISTINCT col ORDER BY col SEPARATOR ',')
45649    pub fn parse_group_concat(&mut self) -> Result<Option<Expression>> {
45650        // Check for DISTINCT
45651        let distinct = self.match_token(TokenType::Distinct);
45652
45653        // Parse expression(s)
45654        let expr = self.parse_expression()?;
45655
45656        // Parse optional ORDER BY
45657        let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
45658            let mut orderings = Vec::new();
45659            loop {
45660                let order_expr = self.parse_expression()?;
45661                let desc = if self.match_token(TokenType::Desc) {
45662                    true
45663                } else {
45664                    self.match_token(TokenType::Asc);
45665                    false
45666                };
45667                let nulls_first = if self.match_keywords(&[TokenType::Nulls, TokenType::First]) {
45668                    Some(true)
45669                } else if self.match_keywords(&[TokenType::Nulls, TokenType::Last]) {
45670                    Some(false)
45671                } else {
45672                    None
45673                };
45674                orderings.push(Ordered {
45675                    this: order_expr,
45676                    desc,
45677                    nulls_first,
45678                    explicit_asc: !desc,
45679                    with_fill: None,
45680                });
45681                if !self.match_token(TokenType::Comma) {
45682                    break;
45683                }
45684            }
45685            Some(orderings)
45686        } else {
45687            None
45688        };
45689
45690        // Parse optional SEPARATOR
45691        let separator = if self.match_token(TokenType::Separator) {
45692            self.parse_string()?
45693        } else {
45694            None
45695        };
45696
45697        Ok(Some(Expression::GroupConcat(Box::new(GroupConcatFunc {
45698            this: expr,
45699            separator,
45700            order_by,
45701            distinct,
45702            filter: None,
45703            inferred_type: None,
45704        }))))
45705    }
45706
45707    /// parse_grouping_set - Delegates to parse_grouping_sets
45708    #[allow(unused_variables, unused_mut)]
45709    pub fn parse_grouping_set(&mut self) -> Result<Option<Expression>> {
45710        self.parse_grouping_sets()
45711    }
45712
45713    /// parse_grouping_sets - Ported from Python _parse_grouping_sets
45714    /// Parses GROUPING SETS ((...), (...)) in GROUP BY
45715    #[allow(unused_variables, unused_mut)]
45716    pub fn parse_grouping_sets(&mut self) -> Result<Option<Expression>> {
45717        // Check for GROUPING SETS keyword
45718        if !self.match_text_seq(&["GROUPING", "SETS"]) {
45719            return Ok(None);
45720        }
45721
45722        // Parse wrapped grouping sets
45723        self.expect(TokenType::LParen)?;
45724        let mut expressions = Vec::new();
45725
45726        if !self.check(TokenType::RParen) {
45727            loop {
45728                // Each grouping set can be:
45729                // - A nested GROUPING SETS
45730                // - CUBE or ROLLUP
45731                // - A parenthesized list
45732                // - A single expression
45733                if let Some(nested) = self.parse_grouping_sets()? {
45734                    expressions.push(nested);
45735                } else if let Some(cube_rollup) = self.parse_cube_or_rollup()? {
45736                    expressions.push(cube_rollup);
45737                } else if self.match_token(TokenType::LParen) {
45738                    // Parenthesized group
45739                    let mut group = Vec::new();
45740                    if !self.check(TokenType::RParen) {
45741                        loop {
45742                            match self.parse_bitwise() {
45743                                Ok(Some(expr)) => group.push(expr),
45744                                Ok(None) => break,
45745                                Err(e) => return Err(e),
45746                            }
45747                            if !self.match_token(TokenType::Comma) {
45748                                break;
45749                            }
45750                        }
45751                    }
45752                    self.expect(TokenType::RParen)?;
45753                    expressions.push(Expression::Tuple(Box::new(Tuple { expressions: group })));
45754                } else {
45755                    // Single expression
45756                    match self.parse_bitwise() {
45757                        Ok(Some(expr)) => expressions.push(expr),
45758                        Ok(None) => break,
45759                        Err(e) => return Err(e),
45760                    }
45761                }
45762
45763                if !self.match_token(TokenType::Comma) {
45764                    break;
45765                }
45766            }
45767        }
45768
45769        self.expect(TokenType::RParen)?;
45770
45771        Ok(Some(Expression::GroupingSets(Box::new(GroupingSets {
45772            expressions,
45773        }))))
45774    }
45775
45776    /// parse_having - Parse HAVING clause
45777    /// Python: if not self._match(TokenType.HAVING): return None; return exp.Having(this=self._parse_disjunction())
45778    pub fn parse_having(&mut self) -> Result<Option<Expression>> {
45779        if !self.match_token(TokenType::Having) {
45780            return Ok(None);
45781        }
45782        // Parse the condition expression
45783        let condition = self.parse_expression()?;
45784        Ok(Some(Expression::Having(Box::new(Having {
45785            this: condition,
45786            comments: Vec::new(),
45787        }))))
45788    }
45789
45790    /// parse_having_max - Implemented from Python _parse_having_max
45791    /// Calls: parse_column
45792    #[allow(unused_variables, unused_mut)]
45793    pub fn parse_having_max(&mut self) -> Result<Option<Expression>> {
45794        if self.match_texts(&["MAX", "MIN"]) {
45795            // Matched one of: MAX, MIN
45796            return Ok(None);
45797        }
45798        Ok(None)
45799    }
45800
45801    /// parse_heredoc - Implemented from Python _parse_heredoc
45802    /// Parses dollar-quoted strings: $$content$$, $tag$content$tag$
45803    pub fn parse_heredoc(&mut self) -> Result<Option<Expression>> {
45804        // Check if current token is a HEREDOC_STRING type
45805        if self.match_token(TokenType::HeredocString) {
45806            let text = self.previous().text.clone();
45807            return Ok(Some(Expression::Heredoc(Box::new(Heredoc {
45808                this: Box::new(Expression::Literal(Literal::String(text))),
45809                tag: None,
45810            }))));
45811        }
45812
45813        // Try to parse $...$ or $tag$...$tag$
45814        if !self.match_text_seq(&["$"]) {
45815            return Ok(None);
45816        }
45817
45818        // Collect the tag text (if any) and the closing marker
45819        let mut tags = vec!["$".to_string()];
45820        let mut tag_text: Option<String> = None;
45821
45822        // Check if next token is connected (no whitespace) and collect tag
45823        if !self.is_at_end() {
45824            let next_text = self.peek().text.to_uppercase();
45825            if next_text == "$" {
45826                // Simple $$ ... $$ case
45827                self.advance();
45828                tags.push("$".to_string());
45829            } else {
45830                // $tag$ ... $tag$ case
45831                self.advance();
45832                tag_text = Some(next_text.clone());
45833                tags.push(next_text);
45834
45835                // Expect closing $
45836                if self.match_text_seq(&["$"]) {
45837                    tags.push("$".to_string());
45838                } else {
45839                    return Err(self.parse_error("No closing $ found"));
45840                }
45841            }
45842        }
45843
45844        // Now collect content until we find the closing tags
45845        let mut content_parts = Vec::new();
45846        let closing_tag = tags.join("");
45847
45848        while !self.is_at_end() {
45849            // Build current sequence to check for closing tag
45850            let current_text = self.peek().text.clone();
45851
45852            // Check if we've reached the closing tag
45853            if current_text == "$" || current_text.to_uppercase() == closing_tag {
45854                // Try to match the full closing sequence
45855                let start_pos = self.current;
45856                let mut matched = true;
45857                for expected in &tags {
45858                    if self.is_at_end()
45859                        || self.peek().text.to_uppercase() != expected.to_uppercase()
45860                    {
45861                        matched = false;
45862                        break;
45863                    }
45864                    self.advance();
45865                }
45866                if matched {
45867                    // Found the closing tag
45868                    let content = content_parts.join(" ");
45869                    return Ok(Some(Expression::Heredoc(Box::new(Heredoc {
45870                        this: Box::new(Expression::Literal(Literal::String(content))),
45871                        tag: tag_text.map(|t| Box::new(Expression::Literal(Literal::String(t)))),
45872                    }))));
45873                }
45874                // Not the closing tag, backtrack and add to content
45875                self.current = start_pos;
45876            }
45877
45878            content_parts.push(self.advance().text.clone());
45879        }
45880
45881        Err(self.parse_error(&format!("No closing {} found", closing_tag)))
45882    }
45883
45884    /// parse_hint_body - Delegates to parse_hint_fallback_to_string
45885    #[allow(unused_variables, unused_mut)]
45886    pub fn parse_hint_body(&mut self) -> Result<Option<Expression>> {
45887        self.parse_hint_fallback_to_string()
45888    }
45889
45890    /// parse_hint_fallback_to_string - Parses remaining hint tokens as a raw string
45891    /// Python: _parse_hint_fallback_to_string
45892    /// Used when structured hint parsing fails - collects all remaining tokens
45893    pub fn parse_hint_fallback_to_string(&mut self) -> Result<Option<Expression>> {
45894        // Collect all remaining tokens as a string
45895        let mut parts = Vec::new();
45896        while !self.is_at_end() {
45897            let token = self.advance();
45898            parts.push(token.text.clone());
45899        }
45900
45901        if parts.is_empty() {
45902            return Ok(None);
45903        }
45904
45905        let hint_text = parts.join(" ");
45906        Ok(Some(Expression::Hint(Box::new(Hint {
45907            expressions: vec![HintExpression::Raw(hint_text)],
45908        }))))
45909    }
45910
45911    /// parse_hint_function_call - Delegates to parse_function_call
45912    #[allow(unused_variables, unused_mut)]
45913    pub fn parse_hint_function_call(&mut self) -> Result<Option<Expression>> {
45914        self.parse_function_call()
45915    }
45916
45917    /// parse_historical_data - Snowflake AT/BEFORE time travel clauses
45918    /// Parses: AT(TIMESTAMP => expr) or BEFORE(STATEMENT => 'id') etc.
45919    /// Reference: https://docs.snowflake.com/en/sql-reference/constructs/at-before
45920    #[allow(unused_variables, unused_mut)]
45921    pub fn parse_historical_data(&mut self) -> Result<Option<Expression>> {
45922        // Save position for backtracking
45923        let start_index = self.current;
45924
45925        // Check for AT, BEFORE, or END keywords
45926        let this = if self.match_texts(&["AT", "BEFORE", "END"]) {
45927            self.previous().text.to_uppercase()
45928        } else {
45929            return Ok(None);
45930        };
45931
45932        // Expect opening paren and kind (OFFSET, STATEMENT, STREAM, TIMESTAMP, VERSION)
45933        if !self.match_token(TokenType::LParen) {
45934            // Backtrack if not the right pattern
45935            self.current = start_index;
45936            return Ok(None);
45937        }
45938
45939        let kind = if self.match_texts(&["OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"]) {
45940            self.previous().text.to_uppercase()
45941        } else {
45942            // Backtrack if not the right pattern
45943            self.current = start_index;
45944            return Ok(None);
45945        };
45946
45947        // Expect => and expression
45948        if !self.match_token(TokenType::FArrow) {
45949            self.current = start_index;
45950            return Ok(None);
45951        }
45952
45953        let expression = self.parse_bitwise()?;
45954        if expression.is_none() {
45955            self.current = start_index;
45956            return Ok(None);
45957        }
45958
45959        self.match_token(TokenType::RParen); // Consume closing paren
45960
45961        Ok(Some(Expression::HistoricalData(Box::new(HistoricalData {
45962            this: Box::new(Expression::Identifier(Identifier::new(this))),
45963            kind,
45964            expression: Box::new(expression.unwrap()),
45965        }))))
45966    }
45967
45968    /// parse_id_var - Ported from Python _parse_id_var
45969    /// Parses an identifier or variable (more permissive than parse_identifier)
45970    #[allow(unused_variables, unused_mut)]
45971    pub fn parse_id_var(&mut self) -> Result<Option<Expression>> {
45972        // First try to parse a regular identifier
45973        if let Some(ident) = self.parse_identifier()? {
45974            return Ok(Some(ident));
45975        }
45976
45977        // Try to match Var token type
45978        if self.match_token(TokenType::Var) {
45979            let text = self.previous().text.clone();
45980            return Ok(Some(Expression::Identifier(Identifier {
45981                name: text,
45982                quoted: false,
45983                trailing_comments: Vec::new(),
45984                span: None,
45985            })));
45986        }
45987
45988        // Try to match string as identifier (some dialects allow this)
45989        if self.match_token(TokenType::String) {
45990            let text = self.previous().text.clone();
45991            return Ok(Some(Expression::Identifier(Identifier {
45992                name: text,
45993                quoted: true,
45994                trailing_comments: Vec::new(),
45995                span: None,
45996            })));
45997        }
45998
45999        // Accept keywords as identifiers in some contexts
46000        if self.check(TokenType::Select)
46001            || self.check(TokenType::From)
46002            || self.check(TokenType::Where)
46003            || self.check(TokenType::And)
46004            || self.check(TokenType::Or)
46005            || self.check(TokenType::Not)
46006            || self.check(TokenType::True)
46007            || self.check(TokenType::False)
46008            || self.check(TokenType::Null)
46009        {
46010            // Don't consume keywords as identifiers in parse_id_var
46011            return Ok(None);
46012        }
46013
46014        Ok(None)
46015    }
46016
46017    /// parse_identifier - Parse quoted identifier
46018    /// Python: if self._match(TokenType.IDENTIFIER): return self._identifier_expression(quoted=True)
46019    pub fn parse_identifier(&mut self) -> Result<Option<Expression>> {
46020        // Match quoted identifiers (e.g., "column_name" or `column_name`)
46021        if self.match_token(TokenType::QuotedIdentifier) || self.match_token(TokenType::Identifier)
46022        {
46023            let text = self.previous().text.clone();
46024            let quoted = self.previous().token_type == TokenType::QuotedIdentifier;
46025            return Ok(Some(Expression::Identifier(Identifier {
46026                name: text,
46027                quoted,
46028                trailing_comments: Vec::new(),
46029                span: None,
46030            })));
46031        }
46032        Ok(None)
46033    }
46034
46035    /// Parse IF expression
46036    /// IF(condition, true_value, false_value) - function style
46037    /// IF condition THEN true_value ELSE false_value END - statement style
46038    pub fn parse_if(&mut self) -> Result<Option<Expression>> {
46039        // TSQL/Fabric: IF (cond) BEGIN ... END is a statement, not a function.
46040        // Parse condition, strip outer parens, then capture rest as command.
46041        if matches!(
46042            self.config.dialect,
46043            Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
46044        ) && self.check(TokenType::LParen) {
46045            // Parse the parenthesized condition using balanced paren matching
46046            let cond_start = self.current;
46047            self.advance(); // consume opening (
46048            let mut depth = 1;
46049            while depth > 0 && !self.is_at_end() {
46050                if self.check(TokenType::LParen) {
46051                    depth += 1;
46052                } else if self.check(TokenType::RParen) {
46053                    depth -= 1;
46054                    if depth == 0 {
46055                        break;
46056                    }
46057                }
46058                self.advance();
46059            }
46060            // Extract condition text from source (inside outer parens)
46061            let cond_text = if let Some(ref source) = self.source {
46062                let inner_start = self.tokens[cond_start + 1].span.start;
46063                let inner_end = self.tokens[self.current].span.start;
46064                source[inner_start..inner_end].trim().to_string()
46065            } else {
46066                self.tokens_to_sql(cond_start + 1, self.current)
46067            };
46068            self.advance(); // consume closing )
46069
46070            // Now collect the rest (BEGIN...END) as raw text
46071            let body_start = self.current;
46072            while !self.is_at_end() && !self.check(TokenType::Semicolon) {
46073                self.advance();
46074            }
46075            let body_text = if let Some(ref source) = self.source {
46076                let start_span = self.tokens[body_start].span.start;
46077                let end_span = if self.current > 0 { self.tokens[self.current - 1].span.end } else { start_span };
46078                source[start_span..end_span].trim().to_string()
46079            } else {
46080                self.tokens_to_sql(body_start, self.current)
46081            };
46082            let command_text = format!("IF {} {}", cond_text, body_text);
46083            return Ok(Some(Expression::Command(Box::new(crate::expressions::Command {
46084                this: command_text,
46085            }))));
46086        }
46087
46088        // Function style: IF(cond, true, false)
46089        if self.match_token(TokenType::LParen) {
46090            // ClickHouse: if() with zero args is valid (used in test queries)
46091            if self.check(TokenType::RParen) {
46092                self.advance(); // consume RParen
46093                return Ok(Some(Expression::Function(Box::new(Function {
46094                    name: "IF".to_string(),
46095                    args: vec![],
46096                    distinct: false,
46097                    trailing_comments: Vec::new(),
46098                    use_bracket_syntax: false,
46099                    no_parens: false,
46100                    quoted: false,
46101                    span: None,
46102                    inferred_type: None,
46103                }))));
46104            }
46105            let args = self.parse_expression_list()?;
46106            self.expect(TokenType::RParen)?;
46107
46108            if args.len() == 3 {
46109                return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
46110                    original_name: None,
46111                    condition: args[0].clone(),
46112                    true_value: args[1].clone(),
46113                    false_value: Some(args[2].clone()),
46114                    inferred_type: None,
46115                }))));
46116            } else if args.len() == 2 {
46117                return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
46118                    original_name: None,
46119                    condition: args[0].clone(),
46120                    true_value: args[1].clone(),
46121                    false_value: None,
46122                    inferred_type: None,
46123                }))));
46124            } else if args.len() == 1 {
46125                return Ok(Some(Expression::Function(Box::new(Function {
46126                    name: "IF".to_string(),
46127                    args,
46128                    distinct: false,
46129                    trailing_comments: Vec::new(),
46130                    use_bracket_syntax: false,
46131                    no_parens: false,
46132                    quoted: false,
46133                    span: None,
46134                    inferred_type: None,
46135                }))));
46136            } else {
46137                return Err(self.parse_error("IF function requires 2 or 3 arguments"));
46138            }
46139        }
46140
46141        // TSQL: IF OBJECT_ID(...) IS NOT NULL [BEGIN] DROP TABLE x [; END] -> DROP TABLE IF EXISTS x
46142        if matches!(
46143            self.config.dialect,
46144            Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
46145        ) {
46146            let saved = self.current;
46147            if self.match_text_seq(&["OBJECT_ID"]) {
46148                // Capture the OBJECT_ID arguments text for TSQL round-trip
46149                let object_id_args_text = if self.match_token(TokenType::LParen) {
46150                    let args_start = self.current;
46151                    let args = self.parse_expression_list()?;
46152                    // Reconstruct args text from source
46153                    let args_text = if let Some(ref source) = self.source {
46154                        let start_span = self.tokens[args_start].span.start;
46155                        let end_span = self.tokens[self.current].span.start;
46156                        source[start_span..end_span].trim().to_string()
46157                    } else {
46158                        // Fallback: generate from parsed expressions
46159                        args.iter()
46160                            .map(|a| format!("{:?}", a))
46161                            .collect::<Vec<_>>()
46162                            .join(", ")
46163                    };
46164                    let _ = self.match_token(TokenType::RParen);
46165                    Some(args_text)
46166                } else {
46167                    None
46168                };
46169                if self.match_text_seq(&["IS", "NOT", "NULL"]) {
46170                    // Check for DROP directly or BEGIN ... DROP ... END
46171                    let has_begin = self.match_token(TokenType::Begin);
46172                    if self.check(TokenType::Drop) {
46173                        // Parse DROP TABLE, forcing if_exists = true
46174                        self.advance(); // consume DROP
46175                        if self.match_token(TokenType::Table) {
46176                            // Parse table names
46177                            let mut names = Vec::new();
46178                            loop {
46179                                names.push(self.parse_table_ref()?);
46180                                if !self.match_token(TokenType::Comma) {
46181                                    break;
46182                                }
46183                            }
46184                            // If we had BEGIN, consume optional ; and END
46185                            if has_begin {
46186                                let _ = self.match_token(TokenType::Semicolon);
46187                                let _ = self.match_token(TokenType::End);
46188                            }
46189                            return Ok(Some(Expression::DropTable(Box::new(
46190                                crate::expressions::DropTable {
46191                                    names,
46192                                    if_exists: true,
46193                                    cascade: false,
46194                                    cascade_constraints: false,
46195                                    purge: false,
46196                                    leading_comments: Vec::new(),
46197                                    object_id_args: object_id_args_text,
46198                                },
46199                            ))));
46200                        }
46201                    }
46202                }
46203                // Retreat if pattern didn't match
46204                self.current = saved;
46205            }
46206        }
46207
46208        // Statement style: IF cond THEN true [ELSE false] END/ENDIF
46209        // Use parse_disjunction (parse_or) for condition - same as Python sqlglot
46210        // This ensures we stop at THEN rather than consuming too much
46211        let condition = match self.parse_disjunction()? {
46212            Some(c) => c,
46213            None => return Ok(None),
46214        };
46215
46216        if !self.match_token(TokenType::Then) {
46217            // Not statement style, return as just the expression parsed
46218            return Ok(Some(condition));
46219        }
46220
46221        // Parse true value - use parse_disjunction to stop at ELSE/END
46222        let true_value = match self.parse_disjunction()? {
46223            Some(v) => v,
46224            None => return Err(self.parse_error("Expected expression after THEN")),
46225        };
46226
46227        let false_value = if self.match_token(TokenType::Else) {
46228            match self.parse_disjunction()? {
46229                Some(v) => Some(v),
46230                None => return Err(self.parse_error("Expected expression after ELSE")),
46231            }
46232        } else {
46233            None
46234        };
46235
46236        // Consume END or ENDIF (Exasol tokenizes ENDIF as END)
46237        self.match_token(TokenType::End);
46238
46239        Ok(Some(Expression::IfFunc(Box::new(IfFunc {
46240            original_name: None,
46241            condition,
46242            true_value,
46243            false_value,
46244            inferred_type: None,
46245        }))))
46246    }
46247
46248    /// parse_in - Ported from Python _parse_in
46249    /// Parses IN expression: expr IN (values...) or expr IN (subquery)
46250    /// Can also parse standalone IN list after IN keyword has been matched
46251    #[allow(unused_variables, unused_mut)]
46252    pub fn parse_in(&mut self) -> Result<Option<Expression>> {
46253        // If we're at IN keyword, parse what follows
46254        if self.match_token(TokenType::In) {
46255            return Err(self.parse_error("Expected expression before IN"));
46256        }
46257
46258        // Try to parse as a complete expression: left IN (...)
46259        let saved_pos = self.current;
46260
46261        // Parse the left side expression
46262        match self.parse_bitwise() {
46263            Ok(Some(left_expr)) => {
46264                // Check for optional NOT
46265                let negate = self.match_token(TokenType::Not);
46266
46267                // Expect IN keyword
46268                if self.match_token(TokenType::In) {
46269                    let in_result = self.parse_in_with_expr(Some(left_expr))?;
46270                    return Ok(Some(if negate {
46271                        Expression::Not(Box::new(UnaryOp {
46272                            this: in_result,
46273                            inferred_type: None,
46274                        }))
46275                    } else {
46276                        in_result
46277                    }));
46278                }
46279
46280                // Not an IN expression, restore position
46281                self.current = saved_pos;
46282                Ok(None)
46283            }
46284            Ok(None) => {
46285                self.current = saved_pos;
46286                Ok(None)
46287            }
46288            Err(_) => {
46289                self.current = saved_pos;
46290                Ok(None)
46291            }
46292        }
46293    }
46294
46295    /// parse_index - Implemented from Python _parse_index
46296    /// Calls: parse_index_params, parse_id_var
46297    #[allow(unused_variables, unused_mut)]
46298    pub fn parse_index(&mut self) -> Result<Option<Expression>> {
46299        if self.match_text_seq(&["PRIMARY"]) {
46300            return Ok(Some(Expression::Index(Box::new(Index {
46301                this: None,
46302                table: None,
46303                unique: false,
46304                primary: None,
46305                amp: None,
46306                params: Vec::new(),
46307            }))));
46308        }
46309        if self.match_text_seq(&["AMP"]) {
46310            // Matched: AMP
46311            return Ok(None);
46312        }
46313        Ok(None)
46314    }
46315
46316    /// parse_index_params - Implemented from Python _parse_index_params
46317    /// Calls: parse_where, parse_wrapped_properties, parse_wrapped_id_vars
46318    #[allow(unused_variables, unused_mut)]
46319    pub fn parse_index_params(&mut self) -> Result<Option<Expression>> {
46320        if self.match_text_seq(&["INCLUDE"]) {
46321            return Ok(Some(Expression::IndexParameters(Box::new(
46322                IndexParameters {
46323                    using: None,
46324                    include: None,
46325                    columns: Vec::new(),
46326                    with_storage: None,
46327                    partition_by: None,
46328                    tablespace: None,
46329                    where_: None,
46330                    on: None,
46331                },
46332            ))));
46333        }
46334        if self.match_text_seq(&["USING", "INDEX", "TABLESPACE"]) {
46335            // Matched: USING INDEX TABLESPACE
46336            return Ok(None);
46337        }
46338        Ok(None)
46339    }
46340
46341    /// parse_initcap - Ported from Python _parse_initcap
46342    #[allow(unused_variables, unused_mut)]
46343    /// parse_initcap - Parses INITCAP function
46344    /// Example: INITCAP(str) or INITCAP(str, delimiter)
46345    pub fn parse_initcap(&mut self) -> Result<Option<Expression>> {
46346        // Parse the first argument (string to capitalize)
46347        let args = self.parse_expression_list()?;
46348
46349        if args.is_empty() {
46350            return Ok(None);
46351        }
46352
46353        // Initcap is a UnaryFunc
46354        Ok(Some(Expression::Initcap(Box::new(UnaryFunc::new(
46355            args.into_iter().next().unwrap(),
46356        )))))
46357    }
46358
46359    /// parse_inline - Implemented from Python _parse_inline
46360    #[allow(unused_variables, unused_mut)]
46361    pub fn parse_inline(&mut self) -> Result<Option<Expression>> {
46362        if self.match_text_seq(&["LENGTH"]) {
46363            // Matched: LENGTH
46364            return Ok(None);
46365        }
46366        Ok(None)
46367    }
46368
46369    /// parse_insert_table - Parse table reference for INSERT statement
46370    /// Parses: table_name [schema] [partition] [alias]
46371    /// This method is a simple wrapper around parse_table for INSERT context
46372    #[allow(unused_variables, unused_mut)]
46373    pub fn parse_insert_table(&mut self) -> Result<Option<Expression>> {
46374        // Parse the table reference - parse_table handles aliases
46375        self.parse_table()
46376    }
46377
46378    /// parse_interpolate - Implemented from Python _parse_interpolate
46379    /// Parses INTERPOLATE clause for ClickHouse ORDER BY WITH FILL
46380    pub fn parse_interpolate(&mut self) -> Result<Option<Expression>> {
46381        if !self.match_text_seq(&["INTERPOLATE"]) {
46382            return Ok(None);
46383        }
46384
46385        // Parse wrapped CSV of name-as-expression pairs
46386        if self.match_token(TokenType::LParen) {
46387            let mut expressions = Vec::new();
46388            loop {
46389                if let Some(expr) = self.parse_name_as_expression()? {
46390                    expressions.push(expr);
46391                }
46392                if !self.match_token(TokenType::Comma) {
46393                    break;
46394                }
46395            }
46396            self.match_token(TokenType::RParen);
46397
46398            if expressions.is_empty() {
46399                return Ok(None);
46400            }
46401
46402            return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
46403        }
46404
46405        Ok(None)
46406    }
46407
46408    /// parse_interval - Creates Interval expression
46409    /// Parses INTERVAL expressions: INTERVAL '1 day', INTERVAL 1 MONTH, etc.
46410    #[allow(unused_variables, unused_mut)]
46411    pub fn parse_interval(&mut self) -> Result<Option<Expression>> {
46412        // Delegate to the existing try_parse_interval method
46413        self.try_parse_interval()
46414    }
46415
46416    /// parse_interval_span - Implemented from Python _parse_interval_span
46417    /// Calls: parse_function
46418    #[allow(unused_variables, unused_mut)]
46419    pub fn parse_interval_span(&mut self) -> Result<Option<Expression>> {
46420        if self.match_text_seq(&["TO"]) {
46421            return Ok(Some(Expression::Var(Box::new(Var {
46422                this: String::new(),
46423            }))));
46424        }
46425        if self.match_text_seq(&["TO"]) {
46426            // Matched: TO
46427            return Ok(None);
46428        }
46429        Ok(None)
46430    }
46431
46432    /// parse_into - Implemented from Python _parse_into
46433    /// Parses: INTO [TEMPORARY] [UNLOGGED] [TABLE] table_name
46434    /// Returns the table expression for the INTO clause
46435    #[allow(unused_variables, unused_mut)]
46436    pub fn parse_into(&mut self) -> Result<Option<Expression>> {
46437        if !self.match_token(TokenType::Into) {
46438            return Ok(None);
46439        }
46440
46441        // Optional TEMPORARY
46442        let _temp = self.match_token(TokenType::Temporary);
46443
46444        // Optional UNLOGGED
46445        let _unlogged = self.match_text_seq(&["UNLOGGED"]);
46446
46447        // Optional TABLE keyword
46448        let _ = self.match_token(TokenType::Table);
46449
46450        // Parse the table name
46451        self.parse_table_parts()
46452    }
46453
46454    /// parse_introducer - Parses MySQL introducer expression (_charset'string')
46455    /// Python: _parse_introducer
46456    /// Format: _charset 'literal'
46457    pub fn parse_introducer(&mut self) -> Result<Option<Expression>> {
46458        // We expect to have already consumed the introducer token (e.g., _utf8)
46459        let token = self.previous().clone();
46460
46461        // Try to parse a primary expression (usually a string literal)
46462        // parse_primary returns Expression (not Option), so we use it directly
46463        let literal = self.parse_primary()?;
46464
46465        // Check if it's a null expression (indicating nothing was parsed)
46466        match &literal {
46467            Expression::Null(_) => {
46468                // Just return as an identifier
46469                Ok(Some(Expression::Identifier(Identifier {
46470                    name: token.text.clone(),
46471                    quoted: false,
46472                    trailing_comments: Vec::new(),
46473                    span: None,
46474                })))
46475            }
46476            _ => Ok(Some(Expression::Introducer(Box::new(Introducer {
46477                this: Box::new(Expression::Identifier(Identifier {
46478                    name: token.text.clone(),
46479                    quoted: false,
46480                    trailing_comments: Vec::new(),
46481                    span: None,
46482                })),
46483                expression: Box::new(literal),
46484            })))),
46485        }
46486    }
46487
46488    /// parse_is - Implemented from Python _parse_is
46489    /// Calls: parse_null, parse_bitwise
46490    #[allow(unused_variables, unused_mut)]
46491    pub fn parse_is(&mut self) -> Result<Option<Expression>> {
46492        if self.match_text_seq(&["DISTINCT", "FROM"]) {
46493            return Ok(Some(Expression::JSON(Box::new(JSON {
46494                this: None,
46495                with_: None,
46496                unique: false,
46497            }))));
46498        }
46499        if self.match_text_seq(&["WITH"]) {
46500            // Matched: WITH
46501            return Ok(None);
46502        }
46503        if self.match_text_seq(&["WITHOUT"]) {
46504            // Matched: WITHOUT
46505            return Ok(None);
46506        }
46507        Ok(None)
46508    }
46509
46510    /// parse_join - Ported from Python _parse_join
46511    /// Parses a single JOIN clause: [method] [side] [kind] JOIN table [ON condition | USING (columns)]
46512    /// Returns the Join wrapped in an Expression, or None if no join is found
46513    #[allow(unused_variables, unused_mut)]
46514    pub fn parse_join(&mut self) -> Result<Option<Expression>> {
46515        // Check for comma-style implicit join
46516        if self.match_token(TokenType::Comma) {
46517            if let Ok(Some(table)) = self.parse_table() {
46518                return Ok(Some(Expression::Join(Box::new(Join {
46519                    this: table,
46520                    on: None,
46521                    using: Vec::new(),
46522                    kind: JoinKind::Implicit,
46523                    use_inner_keyword: false,
46524                    use_outer_keyword: false,
46525                    deferred_condition: false,
46526                    join_hint: None,
46527                    match_condition: None,
46528                    pivots: Vec::new(),
46529                    comments: Vec::new(),
46530                    nesting_group: 0,
46531                    directed: false,
46532                }))));
46533            }
46534            return Ok(None);
46535        }
46536
46537        // Try to parse join kind (INNER, LEFT, RIGHT, FULL, CROSS, etc.)
46538        let saved_pos = self.current;
46539        if let Some((kind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint)) =
46540            self.try_parse_join_kind()
46541        {
46542            // Collect comments from tokens consumed by try_parse_join_kind
46543            let mut join_comments = Vec::new();
46544            for i in saved_pos..self.current {
46545                if i < self.tokens.len() {
46546                    join_comments.extend(self.tokens[i].trailing_comments.iter().cloned());
46547                }
46548            }
46549
46550            // If kind requires JOIN keyword, expect it
46551            if needs_join_keyword && !self.match_token(TokenType::Join) {
46552                self.current = saved_pos;
46553                return Ok(None);
46554            }
46555
46556            // Parse the table being joined
46557            let table = self.parse_table_expression()?;
46558
46559            // Parse ON or USING condition
46560            let (on, using) = if self.match_token(TokenType::On) {
46561                (Some(self.parse_expression()?), Vec::new())
46562            } else if self.match_token(TokenType::Using) {
46563                let has_parens = self.match_token(TokenType::LParen);
46564                // Use parse_using_column_list to handle qualified names like t1.col
46565                let cols = self.parse_using_column_list()?;
46566                if has_parens {
46567                    self.expect(TokenType::RParen)?;
46568                }
46569                (None, cols)
46570            } else {
46571                (None, Vec::new())
46572            };
46573
46574            return Ok(Some(Expression::Join(Box::new(Join {
46575                this: table,
46576                on,
46577                using,
46578                kind,
46579                use_inner_keyword,
46580                use_outer_keyword,
46581                deferred_condition: false,
46582                join_hint,
46583                match_condition: None,
46584                pivots: Vec::new(),
46585                comments: join_comments,
46586                nesting_group: 0,
46587                directed: false,
46588            }))));
46589        }
46590
46591        // Check for CROSS APPLY / OUTER APPLY (SQL Server)
46592        if self.match_text_seq(&["CROSS", "APPLY"]) || self.match_text_seq(&["OUTER", "APPLY"]) {
46593            let is_outer = self.previous().text.eq_ignore_ascii_case("OUTER");
46594            let table = self.parse_table_expression()?;
46595            return Ok(Some(Expression::Join(Box::new(Join {
46596                this: table,
46597                on: None,
46598                using: Vec::new(),
46599                kind: if is_outer {
46600                    JoinKind::Outer
46601                } else {
46602                    JoinKind::Cross
46603                },
46604                use_inner_keyword: false,
46605                use_outer_keyword: is_outer,
46606                deferred_condition: false,
46607                join_hint: None,
46608                match_condition: None,
46609                pivots: Vec::new(),
46610                comments: Vec::new(),
46611                nesting_group: 0,
46612                directed: false,
46613            }))));
46614        }
46615
46616        Ok(None)
46617    }
46618
46619    /// parse_join_hint - Spark/Hive join hints (BROADCAST, MERGE, SHUFFLE_HASH, etc.)
46620    /// Parses: HINT_NAME(table1, table2, ...)
46621    /// hint_name should be the already matched hint keyword (BROADCAST, MAPJOIN, etc.)
46622    #[allow(unused_variables, unused_mut)]
46623    pub fn parse_join_hint(&mut self, hint_name: &str) -> Result<Option<Expression>> {
46624        // Parse comma-separated list of tables
46625        let mut tables = Vec::new();
46626        loop {
46627            if let Some(table) = self.parse_table()? {
46628                tables.push(table);
46629            } else {
46630                break;
46631            }
46632            if !self.match_token(TokenType::Comma) {
46633                break;
46634            }
46635        }
46636
46637        Ok(Some(Expression::JoinHint(Box::new(JoinHint {
46638            this: Box::new(Expression::Identifier(Identifier::new(
46639                hint_name.to_uppercase(),
46640            ))),
46641            expressions: tables,
46642        }))))
46643    }
46644
46645    /// parse_join_parts - Ported from Python _parse_join_parts
46646    /// Returns (method, side, kind) where each is an optional string
46647    /// method: ASOF, NATURAL, POSITIONAL
46648    /// side: LEFT, RIGHT, FULL
46649    /// kind: ANTI, CROSS, INNER, OUTER, SEMI
46650    pub fn parse_join_parts(&mut self) -> (Option<String>, Option<String>, Option<String>) {
46651        // Parse join method (ASOF, NATURAL, POSITIONAL)
46652        let method = if self.match_texts(&["ASOF", "NATURAL", "POSITIONAL"]) {
46653            Some(self.previous().text.to_uppercase())
46654        } else {
46655            None
46656        };
46657
46658        // Parse join side (LEFT, RIGHT, FULL)
46659        let side = if self.match_texts(&["LEFT", "RIGHT", "FULL"]) {
46660            Some(self.previous().text.to_uppercase())
46661        } else {
46662            None
46663        };
46664
46665        // Parse join kind (ANTI, CROSS, INNER, OUTER, SEMI)
46666        let kind = if self.match_texts(&["ANTI", "CROSS", "INNER", "OUTER", "SEMI"]) {
46667            Some(self.previous().text.to_uppercase())
46668        } else if self.match_token(TokenType::StraightJoin) {
46669            Some("STRAIGHT_JOIN".to_string())
46670        } else {
46671            None
46672        };
46673
46674        (method, side, kind)
46675    }
46676
46677    /// parse_journal - Parses JOURNAL property (Teradata)
46678    /// Python: _parse_journal
46679    /// Creates a JournalProperty expression
46680    pub fn parse_journal(&mut self) -> Result<Option<Expression>> {
46681        self.parse_journal_impl(false, false, false, false, false)
46682    }
46683
46684    /// Implementation of parse_journal with options
46685    pub fn parse_journal_impl(
46686        &mut self,
46687        no: bool,
46688        dual: bool,
46689        before: bool,
46690        local: bool,
46691        after: bool,
46692    ) -> Result<Option<Expression>> {
46693        Ok(Some(Expression::JournalProperty(Box::new(
46694            JournalProperty {
46695                no: if no {
46696                    Some(Box::new(Expression::Boolean(BooleanLiteral {
46697                        value: true,
46698                    })))
46699                } else {
46700                    None
46701                },
46702                dual: if dual {
46703                    Some(Box::new(Expression::Boolean(BooleanLiteral {
46704                        value: true,
46705                    })))
46706                } else {
46707                    None
46708                },
46709                before: if before {
46710                    Some(Box::new(Expression::Boolean(BooleanLiteral {
46711                        value: true,
46712                    })))
46713                } else {
46714                    None
46715                },
46716                local: if local {
46717                    Some(Box::new(Expression::Boolean(BooleanLiteral {
46718                        value: true,
46719                    })))
46720                } else {
46721                    None
46722                },
46723                after: if after {
46724                    Some(Box::new(Expression::Boolean(BooleanLiteral {
46725                        value: true,
46726                    })))
46727                } else {
46728                    None
46729                },
46730            },
46731        ))))
46732    }
46733
46734    /// parse_json_column_def - Implemented from Python _parse_json_column_def
46735    /// Calls: parse_string, parse_json_schema, parse_id_var
46736    #[allow(unused_variables, unused_mut)]
46737    pub fn parse_json_column_def(&mut self) -> Result<Option<Expression>> {
46738        if self.match_text_seq(&["NESTED"]) {
46739            return Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
46740                this: None,
46741                kind: None,
46742                path: None,
46743                nested_schema: None,
46744                ordinality: None,
46745            }))));
46746        }
46747        if self.match_text_seq(&["PATH"]) {
46748            // Matched: PATH
46749            return Ok(None);
46750        }
46751        Ok(None)
46752    }
46753
46754    /// parse_json_key_value - Implemented from Python _parse_json_key_value
46755    #[allow(unused_variables, unused_mut)]
46756    /// parse_json_key_value - Parses a JSON key-value pair
46757    /// Python: _parse_json_key_value
46758    /// Format: [KEY] key [: | VALUE] value
46759    pub fn parse_json_key_value(&mut self) -> Result<Option<Expression>> {
46760        // Optional KEY keyword
46761        self.match_text_seq(&["KEY"]);
46762
46763        // Parse the key expression
46764        let key = self.parse_column()?;
46765
46766        // Match separator (colon, comma, or VALUE keyword)
46767        let _ = self.match_token(TokenType::Colon)
46768            || self.match_token(TokenType::Comma)
46769            || self.match_text_seq(&["VALUE"]);
46770
46771        // Optional VALUE keyword
46772        self.match_text_seq(&["VALUE"]);
46773
46774        // Parse the value expression
46775        let value = self.parse_bitwise()?;
46776
46777        // If neither key nor value, return None
46778        match (key, value) {
46779            (None, None) => Ok(None),
46780            (Some(k), None) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
46781                this: Box::new(k),
46782                expression: Box::new(Expression::Null(Null)),
46783            })))),
46784            (None, Some(v)) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
46785                this: Box::new(Expression::Null(Null)),
46786                expression: Box::new(v),
46787            })))),
46788            (Some(k), Some(v)) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
46789                this: Box::new(k),
46790                expression: Box::new(v),
46791            })))),
46792        }
46793    }
46794
46795    /// parse_json_object - Parses JSON_OBJECT function
46796    /// Python: _parse_json_object
46797    /// Handles both JSON_OBJECT and JSON_OBJECTAGG
46798    pub fn parse_json_object(&mut self) -> Result<Option<Expression>> {
46799        self.parse_json_object_impl(false)
46800    }
46801
46802    /// Implementation of JSON object parsing with aggregate flag
46803    pub fn parse_json_object_impl(&mut self, agg: bool) -> Result<Option<Expression>> {
46804        // Try to parse a star expression
46805        let star = self.parse_star()?;
46806
46807        // Parse expressions: either star or comma-separated key-value pairs
46808        let expressions = if let Some(star_expr) = star {
46809            vec![star_expr]
46810        } else {
46811            // Parse comma-separated JSON key-value pairs
46812            let mut exprs = Vec::new();
46813            loop {
46814                if let Some(kv) = self.parse_json_key_value()? {
46815                    // Wrap with FORMAT JSON if specified
46816                    if self.match_text_seq(&["FORMAT", "JSON"]) {
46817                        exprs.push(Expression::JSONFormat(Box::new(JSONFormat {
46818                            this: Some(Box::new(kv)),
46819                            options: Vec::new(),
46820                            is_json: None,
46821                            to_json: None,
46822                        })));
46823                    } else {
46824                        exprs.push(kv);
46825                    }
46826                } else {
46827                    break;
46828                }
46829                if !self.match_token(TokenType::Comma) {
46830                    break;
46831                }
46832            }
46833            exprs
46834        };
46835
46836        // Parse NULL handling: NULL ON NULL or ABSENT ON NULL
46837        let null_handling = self.parse_json_on_null_handling()?;
46838
46839        // Parse UNIQUE KEYS option
46840        let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE"]) {
46841            self.match_text_seq(&["KEYS"]);
46842            Some(Box::new(Expression::Boolean(BooleanLiteral {
46843                value: true,
46844            })))
46845        } else if self.match_text_seq(&["WITHOUT", "UNIQUE"]) {
46846            self.match_text_seq(&["KEYS"]);
46847            Some(Box::new(Expression::Boolean(BooleanLiteral {
46848                value: false,
46849            })))
46850        } else {
46851            None
46852        };
46853
46854        // Consume optional KEYS keyword
46855        self.match_text_seq(&["KEYS"]);
46856
46857        // Parse RETURNING clause
46858        let return_type = if self.match_text_seq(&["RETURNING"]) {
46859            let type_expr = self.parse_type()?;
46860            // Wrap with FORMAT JSON if specified
46861            if self.match_text_seq(&["FORMAT", "JSON"]) {
46862                type_expr.map(|t| {
46863                    Box::new(Expression::JSONFormat(Box::new(JSONFormat {
46864                        this: Some(Box::new(t)),
46865                        options: Vec::new(),
46866                        is_json: None,
46867                        to_json: None,
46868                    })))
46869                })
46870            } else {
46871                type_expr.map(Box::new)
46872            }
46873        } else {
46874            None
46875        };
46876
46877        // Parse ENCODING option
46878        let encoding = if self.match_text_seq(&["ENCODING"]) {
46879            self.parse_var()?.map(Box::new)
46880        } else {
46881            None
46882        };
46883
46884        if agg {
46885            Ok(Some(Expression::JSONObjectAgg(Box::new(JSONObjectAgg {
46886                expressions,
46887                null_handling,
46888                unique_keys,
46889                return_type,
46890                encoding,
46891            }))))
46892        } else {
46893            Ok(Some(Expression::JSONObject(Box::new(JSONObject {
46894                expressions,
46895                null_handling,
46896                unique_keys,
46897                return_type,
46898                encoding,
46899            }))))
46900        }
46901    }
46902
46903    /// Parse JSON NULL handling clause: NULL ON NULL or ABSENT ON NULL
46904    fn parse_json_on_null_handling(&mut self) -> Result<Option<Box<Expression>>> {
46905        if self.match_text_seq(&["NULL", "ON", "NULL"]) {
46906            Ok(Some(Box::new(Expression::Var(Box::new(Var {
46907                this: "NULL ON NULL".to_string(),
46908            })))))
46909        } else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
46910            Ok(Some(Box::new(Expression::Var(Box::new(Var {
46911                this: "ABSENT ON NULL".to_string(),
46912            })))))
46913        } else {
46914            Ok(None)
46915        }
46916    }
46917
46918    /// parse_json_schema - Implemented from Python _parse_json_schema
46919    #[allow(unused_variables, unused_mut)]
46920    pub fn parse_json_schema(&mut self) -> Result<Option<Expression>> {
46921        if self.match_text_seq(&["COLUMNS"]) {
46922            return Ok(Some(Expression::JSONSchema(Box::new(JSONSchema {
46923                expressions: Vec::new(),
46924            }))));
46925        }
46926        Ok(None)
46927    }
46928
46929    /// Parse JSON_TABLE COLUMNS clause: COLUMNS (column_def, column_def, ...) or COLUMNS column_def
46930    /// Column definitions can be:
46931    /// - name type PATH 'json_path'
46932    /// - name FOR ORDINALITY
46933    /// - NESTED [PATH] 'json_path' COLUMNS (...)
46934    pub fn parse_json_table_columns(&mut self) -> Result<Option<Expression>> {
46935        if !self.match_text_seq(&["COLUMNS"]) {
46936            return Ok(None);
46937        }
46938
46939        // Check for opening paren - Oracle supports both COLUMNS(...) and COLUMNS col PATH '...'
46940        let has_parens = self.match_token(TokenType::LParen);
46941
46942        let mut columns = Vec::new();
46943
46944        // Parse column definitions
46945        if has_parens {
46946            // COLUMNS(col1, col2, ...)
46947            if !self.check(TokenType::RParen) {
46948                loop {
46949                    if let Some(col_def) = self.parse_json_table_column_def()? {
46950                        columns.push(col_def);
46951                    }
46952                    if !self.match_token(TokenType::Comma) {
46953                        break;
46954                    }
46955                }
46956            }
46957            // Expect closing paren for COLUMNS(...)
46958            self.expect(TokenType::RParen)?;
46959        } else {
46960            // COLUMNS col PATH '...' (single column without parens)
46961            if let Some(col_def) = self.parse_json_table_column_def()? {
46962                columns.push(col_def);
46963            }
46964        }
46965
46966        Ok(Some(Expression::JSONSchema(Box::new(JSONSchema {
46967            expressions: columns,
46968        }))))
46969    }
46970
46971    /// Parse a single JSON_TABLE column definition
46972    /// Formats:
46973    /// - name [FOR ORDINALITY] [type] [PATH 'path']
46974    /// - NESTED [PATH] 'path' COLUMNS (...)
46975    pub fn parse_json_table_column_def(&mut self) -> Result<Option<Expression>> {
46976        // Check for NESTED column
46977        if self.match_text_seq(&["NESTED"]) {
46978            // NESTED [PATH] 'json_path' COLUMNS (...)
46979            self.match_text_seq(&["PATH"]); // Optional PATH keyword
46980            let path = self.parse_string()?;
46981            let nested_schema = self.parse_json_table_columns()?;
46982
46983            return Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
46984                this: None,
46985                kind: None,
46986                path: path.map(Box::new),
46987                nested_schema: nested_schema.map(Box::new),
46988                ordinality: None,
46989            }))));
46990        }
46991
46992        // Regular column: name [FOR ORDINALITY] [type] [PATH 'path']
46993        let name = self.parse_id_var()?;
46994        if name.is_none() {
46995            return Ok(None);
46996        }
46997
46998        // Check for FOR ORDINALITY
46999        let ordinality = if self.match_text_seq(&["FOR", "ORDINALITY"]) {
47000            Some(Box::new(Expression::Boolean(BooleanLiteral {
47001                value: true,
47002            })))
47003        } else {
47004            None
47005        };
47006
47007        // Parse data type (if not FOR ORDINALITY, type is expected)
47008        let kind = if ordinality.is_none() {
47009            // Try to parse a data type
47010            let data_type = self.parse_data_type_optional()?;
47011            data_type.map(|dt| self.data_type_to_string(&dt))
47012        } else {
47013            None
47014        };
47015
47016        // Parse PATH 'json_path'
47017        let path = if self.match_text_seq(&["PATH"]) {
47018            self.parse_string()?
47019        } else {
47020            None
47021        };
47022
47023        Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
47024            this: name.map(Box::new),
47025            kind,
47026            path: path.map(Box::new),
47027            nested_schema: None,
47028            ordinality,
47029        }))))
47030    }
47031
47032    /// Parse JSON_TABLE function
47033    /// JSON_TABLE(expr, path COLUMNS (...)) [ON ERROR ...] [ON EMPTY ...]
47034    pub fn parse_json_table(&mut self) -> Result<Option<Expression>> {
47035        // Parse the JSON expression
47036        let this = self.parse_expression()?;
47037
47038        // Optional path after comma
47039        let path = if self.match_token(TokenType::Comma) {
47040            if let Some(s) = self.parse_string()? {
47041                Some(Box::new(s))
47042            } else {
47043                None
47044            }
47045        } else {
47046            None
47047        };
47048
47049        // Parse error handling: ON ERROR NULL or ON ERROR ERROR
47050        let error_handling = if self.match_text_seq(&["ON", "ERROR"]) {
47051            if self.match_text_seq(&["NULL"]) {
47052                Some(Box::new(Expression::Var(Box::new(Var {
47053                    this: "NULL".to_string(),
47054                }))))
47055            } else if self.match_text_seq(&["ERROR"]) {
47056                Some(Box::new(Expression::Var(Box::new(Var {
47057                    this: "ERROR".to_string(),
47058                }))))
47059            } else {
47060                None
47061            }
47062        } else {
47063            None
47064        };
47065
47066        // Parse empty handling: ON EMPTY NULL or ON EMPTY ERROR
47067        let empty_handling = if self.match_text_seq(&["ON", "EMPTY"]) {
47068            if self.match_text_seq(&["NULL"]) {
47069                Some(Box::new(Expression::Var(Box::new(Var {
47070                    this: "NULL".to_string(),
47071                }))))
47072            } else if self.match_text_seq(&["ERROR"]) {
47073                Some(Box::new(Expression::Var(Box::new(Var {
47074                    this: "ERROR".to_string(),
47075                }))))
47076            } else {
47077                None
47078            }
47079        } else {
47080            None
47081        };
47082
47083        // Parse COLUMNS clause
47084        let schema = self.parse_json_schema()?;
47085
47086        Ok(Some(Expression::JSONTable(Box::new(JSONTable {
47087            this: Box::new(this),
47088            schema: schema.map(Box::new),
47089            path,
47090            error_handling,
47091            empty_handling,
47092        }))))
47093    }
47094
47095    /// parse_json_value - Ported from Python _parse_json_value
47096    #[allow(unused_variables, unused_mut)]
47097    /// parse_json_value - Parses JSON_VALUE function
47098    /// Example: JSON_VALUE(json, '$.path' RETURNING type)
47099    pub fn parse_json_value(&mut self) -> Result<Option<Expression>> {
47100        // Parse the JSON expression
47101        let this = self.parse_expression()?;
47102
47103        // Parse path (after comma)
47104        self.match_token(TokenType::Comma);
47105        let path = self.parse_expression()?;
47106
47107        // Parse optional RETURNING type
47108        let returning = if self.match_token(TokenType::Returning) {
47109            Some(Box::new(self.parse_expression()?))
47110        } else {
47111            None
47112        };
47113
47114        // Parse optional ON condition (ON ERROR, ON EMPTY)
47115        let on_condition = if self.check(TokenType::On) {
47116            self.parse_on_condition()?
47117        } else {
47118            None
47119        };
47120
47121        Ok(Some(Expression::JSONValue(Box::new(JSONValue {
47122            this: Box::new(this),
47123            path: Some(Box::new(path)),
47124            returning,
47125            on_condition: on_condition.map(Box::new),
47126        }))))
47127    }
47128
47129    /// parse_key_constraint_options - Implemented from Python _parse_key_constraint_options
47130    #[allow(unused_variables, unused_mut)]
47131    pub fn parse_key_constraint_options(&mut self) -> Result<Option<Expression>> {
47132        if self.match_text_seq(&["NO", "ACTION"]) {
47133            // Matched: NO ACTION
47134            return Ok(None);
47135        }
47136        if self.match_text_seq(&["CASCADE"]) {
47137            // Matched: CASCADE
47138            return Ok(None);
47139        }
47140        if self.match_text_seq(&["RESTRICT"]) {
47141            // Matched: RESTRICT
47142            return Ok(None);
47143        }
47144        Ok(None)
47145    }
47146
47147    /// parse_lambda - Ported from Python _parse_lambda
47148    /// Parses lambda expressions: x -> x + 1 or (x, y) -> x + y
47149    /// Also supports DuckDB syntax: LAMBDA x : x + 1
47150    #[allow(unused_variables, unused_mut)]
47151    pub fn parse_lambda(&mut self) -> Result<Option<Expression>> {
47152        let start_index = self.current;
47153
47154        // Check for DuckDB's LAMBDA keyword syntax: LAMBDA x : expr
47155        // ClickHouse doesn't use LAMBDA keyword — lambda is just a function name there
47156        if !matches!(
47157            self.config.dialect,
47158            Some(crate::dialects::DialectType::ClickHouse)
47159        ) && self.match_token(TokenType::Lambda)
47160        {
47161            // Parse lambda parameters (comma-separated identifiers)
47162            let mut params = Vec::new();
47163            loop {
47164                // Use is_identifier_token which handles Identifier, QuotedIdentifier, and Var
47165                if self.is_identifier_token() {
47166                    let token = self.advance();
47167                    let quoted = token.token_type == TokenType::QuotedIdentifier;
47168                    params.push(Identifier {
47169                        name: token.text,
47170                        quoted,
47171                        trailing_comments: Vec::new(),
47172                        span: None,
47173                    });
47174                } else {
47175                    break;
47176                }
47177                if !self.match_token(TokenType::Comma) {
47178                    break;
47179                }
47180            }
47181
47182            // Must have at least one parameter
47183            if params.is_empty() {
47184                return Err(self.parse_error("LAMBDA requires at least one parameter"));
47185            }
47186
47187            // Expect colon separator
47188            if !self.match_token(TokenType::Colon) {
47189                return Err(self.parse_error("Expected ':' after LAMBDA parameters"));
47190            }
47191
47192            let body = self.parse_expression()?;
47193            return Ok(Some(Expression::Lambda(Box::new(LambdaExpr {
47194                parameters: params,
47195                body,
47196                colon: true,
47197                parameter_types: Vec::new(),
47198            }))));
47199        }
47200
47201        // Try to parse lambda parameters
47202        let parameters = if self.match_token(TokenType::LParen) {
47203            // Parenthesized parameters: (x, y) -> ...
47204            let mut params = Vec::new();
47205            if !self.check(TokenType::RParen) {
47206                loop {
47207                    if let Some(ident) = self.parse_identifier()? {
47208                        if let Expression::Identifier(id) = ident {
47209                            params.push(id);
47210                        }
47211                    }
47212                    if !self.match_token(TokenType::Comma) {
47213                        break;
47214                    }
47215                }
47216            }
47217            if !self.match_token(TokenType::RParen) {
47218                // Not a lambda, retreat
47219                self.current = start_index;
47220                return Ok(None);
47221            }
47222            params
47223        } else {
47224            // Single parameter: x -> ...
47225            if let Some(ident) = self.parse_identifier()? {
47226                if let Expression::Identifier(id) = ident {
47227                    vec![id]
47228                } else {
47229                    self.current = start_index;
47230                    return Ok(None);
47231                }
47232            } else {
47233                return Ok(None);
47234            }
47235        };
47236
47237        // Check for arrow operator
47238        if self.match_token(TokenType::Arrow) || self.match_token(TokenType::FArrow) {
47239            // Parse lambda body
47240            let body = self.parse_expression()?;
47241            Ok(Some(Expression::Lambda(Box::new(LambdaExpr {
47242                parameters,
47243                body,
47244                colon: false,
47245                parameter_types: Vec::new(),
47246            }))))
47247        } else {
47248            // Not a lambda, retreat
47249            self.current = start_index;
47250            Ok(None)
47251        }
47252    }
47253
47254    /// parse_lambda_arg - Delegates to parse_id_var
47255    #[allow(unused_variables, unused_mut)]
47256    pub fn parse_lambda_arg(&mut self) -> Result<Option<Expression>> {
47257        self.parse_id_var()
47258    }
47259
47260    /// parse_lateral - Parse LATERAL subquery or table function
47261    /// Python: if self._match(TokenType.LATERAL): return exp.Lateral(this=..., view=..., outer=...)
47262    pub fn parse_lateral(&mut self) -> Result<Option<Expression>> {
47263        // Check for CROSS APPLY / OUTER APPLY (handled by join parsing in try_parse_join_kind)
47264        // This method focuses on LATERAL keyword parsing
47265
47266        if !self.match_token(TokenType::Lateral) {
47267            return Ok(None);
47268        }
47269
47270        // Check for LATERAL VIEW (Hive/Spark syntax)
47271        let view = self.match_token(TokenType::View);
47272        let outer = if view {
47273            self.match_token(TokenType::Outer)
47274        } else {
47275            false
47276        };
47277
47278        // Parse the lateral expression (subquery, function call, or table reference)
47279        let this = if self.check(TokenType::LParen) {
47280            // Could be a subquery: LATERAL (SELECT ...)
47281            self.expect(TokenType::LParen)?;
47282            let inner = self.parse_statement()?;
47283            self.expect(TokenType::RParen)?;
47284            inner
47285        } else {
47286            // Could be a function or table reference: LATERAL unnest(...)
47287            self.parse_primary()?
47288        };
47289
47290        // Parse optional alias
47291        let alias = if self.match_token(TokenType::As) {
47292            Some(self.expect_identifier()?)
47293        } else if self.check(TokenType::Identifier) && !self.check_keyword() {
47294            Some(self.expect_identifier()?)
47295        } else {
47296            None
47297        };
47298
47299        // Parse optional column aliases: AS alias(col1, col2, ...)
47300        let column_aliases = if alias.is_some() && self.match_token(TokenType::LParen) {
47301            let mut cols = Vec::new();
47302            loop {
47303                if self.check(TokenType::RParen) {
47304                    break;
47305                }
47306                let col = self.expect_identifier()?;
47307                cols.push(col);
47308                if !self.match_token(TokenType::Comma) {
47309                    break;
47310                }
47311            }
47312            self.expect(TokenType::RParen)?;
47313            cols
47314        } else {
47315            Vec::new()
47316        };
47317
47318        Ok(Some(Expression::Lateral(Box::new(Lateral {
47319            this: Box::new(this),
47320            view: if view {
47321                Some(Box::new(Expression::Boolean(BooleanLiteral {
47322                    value: true,
47323                })))
47324            } else {
47325                None
47326            },
47327            outer: if outer {
47328                Some(Box::new(Expression::Boolean(BooleanLiteral {
47329                    value: true,
47330                })))
47331            } else {
47332                None
47333            },
47334            alias,
47335            alias_quoted: false,
47336            cross_apply: None,
47337            ordinality: None,
47338            column_aliases,
47339        }))))
47340    }
47341
47342    /// parse_limit - Parse LIMIT clause
47343    /// Python: if self._match(TokenType.LIMIT): return exp.Limit(this=self._parse_term())
47344    pub fn parse_limit(&mut self) -> Result<Option<Expression>> {
47345        if !self.match_token(TokenType::Limit) {
47346            return Ok(None);
47347        }
47348        // Parse the limit expression (usually a number)
47349        let limit_expr = self.parse_expression()?;
47350        Ok(Some(Expression::Limit(Box::new(Limit {
47351            this: limit_expr,
47352            percent: false,
47353            comments: Vec::new(),
47354        }))))
47355    }
47356
47357    /// parse_limit_by - Implemented from Python _parse_limit_by
47358    #[allow(unused_variables, unused_mut)]
47359    pub fn parse_limit_by(&mut self) -> Result<Option<Expression>> {
47360        if self.match_text_seq(&["BY"]) {
47361            // Matched: BY
47362            return Ok(None);
47363        }
47364        Ok(None)
47365    }
47366
47367    /// parse_limit_options - Implemented from Python _parse_limit_options
47368    #[allow(unused_variables, unused_mut)]
47369    pub fn parse_limit_options(&mut self) -> Result<Option<Expression>> {
47370        if self.match_text_seq(&["ONLY"]) {
47371            return Ok(Some(Expression::LimitOptions(Box::new(LimitOptions {
47372                percent: None,
47373                rows: None,
47374                with_ties: None,
47375            }))));
47376        }
47377        if self.match_text_seq(&["WITH", "TIES"]) {
47378            // Matched: WITH TIES
47379            return Ok(None);
47380        }
47381        Ok(None)
47382    }
47383
47384    /// parse_load - Implemented from Python _parse_load
47385    #[allow(unused_variables, unused_mut)]
47386    pub fn parse_load(&mut self) -> Result<Option<Expression>> {
47387        if self.match_text_seq(&["DATA"]) {
47388            return Ok(Some(Expression::Command(Box::new(Command {
47389                this: String::new(),
47390            }))));
47391        }
47392        if self.match_text_seq(&["LOCAL"]) {
47393            // Matched: LOCAL
47394            return Ok(None);
47395        }
47396        Ok(None)
47397    }
47398
47399    /// parse_locking - Implemented from Python _parse_locking
47400    /// Calls: parse_table_parts
47401    #[allow(unused_variables, unused_mut)]
47402    pub fn parse_locking(&mut self) -> Result<Option<Expression>> {
47403        let kind = if self.match_token(TokenType::Table) {
47404            Some("TABLE")
47405        } else if self.match_token(TokenType::View) {
47406            Some("VIEW")
47407        } else if self.match_token(TokenType::Row) {
47408            Some("ROW")
47409        } else if self.match_token(TokenType::Database) || self.match_identifier("DATABASE") {
47410            Some("DATABASE")
47411        } else {
47412            None
47413        };
47414
47415        let kind = match kind {
47416            Some(k) => k.to_string(),
47417            None => return Ok(None),
47418        };
47419
47420        let this = if matches!(kind.as_str(), "DATABASE" | "TABLE" | "VIEW") {
47421            self.parse_table_parts()?
47422        } else {
47423            None
47424        };
47425
47426        let for_or_in = if self.match_token(TokenType::For) {
47427            Some("FOR")
47428        } else if self.match_token(TokenType::In) {
47429            Some("IN")
47430        } else {
47431            None
47432        };
47433
47434        let lock_type = if self.match_identifier("ACCESS") {
47435            Some("ACCESS")
47436        } else if self.match_texts(&["EXCL", "EXCLUSIVE"]) {
47437            Some("EXCLUSIVE")
47438        } else if self.match_identifier("SHARE") {
47439            Some("SHARE")
47440        } else if self.match_identifier("READ") {
47441            Some("READ")
47442        } else if self.match_identifier("WRITE") {
47443            Some("WRITE")
47444        } else if self.match_identifier("CHECKSUM") {
47445            Some("CHECKSUM")
47446        } else {
47447            None
47448        };
47449
47450        let override_ = if self.match_identifier("OVERRIDE") {
47451            Some(Box::new(Expression::Boolean(BooleanLiteral {
47452                value: true,
47453            })))
47454        } else {
47455            None
47456        };
47457
47458        Ok(Some(Expression::LockingProperty(Box::new(
47459            LockingProperty {
47460                this: this.map(Box::new),
47461                kind,
47462                for_or_in: for_or_in.map(|v| {
47463                    Box::new(Expression::Var(Box::new(Var {
47464                        this: v.to_string(),
47465                    })))
47466                }),
47467                lock_type: lock_type.map(|v| {
47468                    Box::new(Expression::Var(Box::new(Var {
47469                        this: v.to_string(),
47470                    })))
47471                }),
47472                override_,
47473            },
47474        ))))
47475    }
47476
47477    /// Parse Teradata LOCKING statement: LOCKING <property> SELECT ...
47478    fn parse_locking_statement(&mut self) -> Result<Expression> {
47479        self.expect(TokenType::Lock)?;
47480        let locking = self
47481            .parse_locking()?
47482            .ok_or_else(|| self.parse_error("Expected LOCKING clause"))?;
47483        let query = if self.check(TokenType::With) {
47484            self.parse_statement()?
47485        } else {
47486            self.parse_select()?
47487        };
47488        Ok(Expression::LockingStatement(Box::new(LockingStatement {
47489            this: Box::new(locking),
47490            expression: Box::new(query),
47491        })))
47492    }
47493
47494    /// parse_log - Parses LOG property (Teradata)
47495    /// Python: _parse_log
47496    /// Creates a LogProperty expression
47497    pub fn parse_log(&mut self) -> Result<Option<Expression>> {
47498        self.parse_log_impl(false)
47499    }
47500
47501    /// Implementation of parse_log with no flag
47502    pub fn parse_log_impl(&mut self, no: bool) -> Result<Option<Expression>> {
47503        Ok(Some(Expression::LogProperty(Box::new(LogProperty {
47504            no: if no {
47505                Some(Box::new(Expression::Boolean(BooleanLiteral {
47506                    value: true,
47507                })))
47508            } else {
47509                None
47510            },
47511        }))))
47512    }
47513
47514    /// parse_match_against - Parses MATCH(columns) AGAINST(pattern)
47515    /// Python: parser.py:7125-7153
47516    #[allow(unused_variables, unused_mut)]
47517    pub fn parse_match_against(&mut self) -> Result<Option<Expression>> {
47518        // Parse column expressions or TABLE syntax
47519        let expressions = if self.match_text_seq(&["TABLE"]) {
47520            // SingleStore TABLE syntax
47521            if let Some(table) = self.parse_table()? {
47522                vec![table]
47523            } else {
47524                Vec::new()
47525            }
47526        } else {
47527            // Regular column list
47528            let mut cols = Vec::new();
47529            loop {
47530                if let Some(col) = self.parse_column()? {
47531                    cols.push(col);
47532                }
47533                if !self.match_token(TokenType::Comma) {
47534                    break;
47535                }
47536            }
47537            cols
47538        };
47539
47540        // Match ) AGAINST (
47541        self.match_text_seq(&[")", "AGAINST", "("]);
47542
47543        // Parse the search pattern
47544        let this = self.parse_string()?;
47545
47546        // Parse modifier
47547        let modifier = if self.match_text_seq(&["IN", "NATURAL", "LANGUAGE", "MODE"]) {
47548            if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
47549                Some(Box::new(Expression::Var(Box::new(Var {
47550                    this: "IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION".to_string(),
47551                }))))
47552            } else {
47553                Some(Box::new(Expression::Var(Box::new(Var {
47554                    this: "IN NATURAL LANGUAGE MODE".to_string(),
47555                }))))
47556            }
47557        } else if self.match_text_seq(&["IN", "BOOLEAN", "MODE"]) {
47558            Some(Box::new(Expression::Var(Box::new(Var {
47559                this: "IN BOOLEAN MODE".to_string(),
47560            }))))
47561        } else if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
47562            Some(Box::new(Expression::Var(Box::new(Var {
47563                this: "WITH QUERY EXPANSION".to_string(),
47564            }))))
47565        } else {
47566            None
47567        };
47568
47569        match this {
47570            Some(t) => Ok(Some(Expression::MatchAgainst(Box::new(MatchAgainst {
47571                this: Box::new(t),
47572                expressions,
47573                modifier,
47574            })))),
47575            None => Ok(None),
47576        }
47577    }
47578
47579    /// parse_match_recognize_measure - Implemented from Python _parse_match_recognize_measure
47580    /// Parses a MEASURES expression in MATCH_RECOGNIZE: [FINAL|RUNNING] expression
47581    pub fn parse_match_recognize_measure(&mut self) -> Result<Option<Expression>> {
47582        // Check for optional FINAL or RUNNING keyword
47583        let window_frame = if self.match_texts(&["FINAL", "RUNNING"]) {
47584            let text = self.previous().text.to_uppercase();
47585            Some(if text == "FINAL" {
47586                MatchRecognizeSemantics::Final
47587            } else {
47588                MatchRecognizeSemantics::Running
47589            })
47590        } else {
47591            None
47592        };
47593
47594        // Parse the expression
47595        let this = self.parse_expression()?;
47596
47597        Ok(Some(Expression::MatchRecognizeMeasure(Box::new(
47598            MatchRecognizeMeasure { this, window_frame },
47599        ))))
47600    }
47601
47602    /// parse_max_min_by - MAX_BY / MIN_BY / ARG_MAX / ARG_MIN aggregate functions
47603    /// Parses: MAX_BY(value, key [, n]) or MIN_BY(value, key [, n])
47604    /// is_max: true for MAX_BY/ARG_MAX, false for MIN_BY/ARG_MIN
47605    #[allow(unused_variables, unused_mut)]
47606    pub fn parse_max_min_by(&mut self, is_max: bool) -> Result<Option<Expression>> {
47607        let mut args = Vec::new();
47608
47609        // Handle optional DISTINCT
47610        let distinct = if self.match_token(TokenType::Distinct) {
47611            let lambda_expr = self.parse_lambda()?;
47612            if let Some(expr) = lambda_expr {
47613                args.push(expr);
47614            }
47615            self.match_token(TokenType::Comma);
47616            true
47617        } else {
47618            false
47619        };
47620
47621        // Parse remaining arguments
47622        loop {
47623            if let Some(arg) = self.parse_lambda()? {
47624                args.push(arg);
47625            } else {
47626                break;
47627            }
47628            if !self.match_token(TokenType::Comma) {
47629                break;
47630            }
47631        }
47632
47633        let this = args
47634            .get(0)
47635            .cloned()
47636            .map(Box::new)
47637            .unwrap_or_else(|| Box::new(Expression::Null(Null)));
47638        let expression = args
47639            .get(1)
47640            .cloned()
47641            .map(Box::new)
47642            .unwrap_or_else(|| Box::new(Expression::Null(Null)));
47643        let count = args.get(2).cloned().map(Box::new);
47644
47645        if is_max {
47646            Ok(Some(Expression::ArgMax(Box::new(ArgMax {
47647                this,
47648                expression,
47649                count,
47650            }))))
47651        } else {
47652            Ok(Some(Expression::ArgMin(Box::new(ArgMin {
47653                this,
47654                expression,
47655                count,
47656            }))))
47657        }
47658    }
47659
47660    /// Parse MERGE statement
47661    /// Python: def _parse_merge(self) -> exp.Merge
47662    pub fn parse_merge(&mut self) -> Result<Option<Expression>> {
47663        // Optional INTO keyword
47664        self.match_token(TokenType::Into);
47665
47666        // Parse target table using parse_table_ref
47667        let mut target = Expression::Table(self.parse_table_ref()?);
47668
47669        // Parse optional TSQL table hints: WITH (HOLDLOCK), WITH (TABLOCK), etc.
47670        if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
47671            if let Expression::Table(ref mut table) = target {
47672                if let Some(hint_expr) = self.parse_table_hints()? {
47673                    match hint_expr {
47674                        Expression::Tuple(tuple) => {
47675                            table.hints = tuple.expressions;
47676                        }
47677                        other => {
47678                            table.hints = vec![other];
47679                        }
47680                    }
47681                }
47682            }
47683        }
47684
47685        // Parse optional alias for target table
47686        // Try to get an identifier as alias if AS is present or there's an identifier
47687        // Use parse_id_var instead of parse_identifier to handle Var tokens (e.g. T)
47688        if self.match_token(TokenType::As) {
47689            if let Some(alias_expr) = self.parse_id_var()? {
47690                // Extract identifier from the expression
47691                if let Expression::Identifier(ident) = alias_expr {
47692                    target = Expression::Alias(Box::new(Alias {
47693                        this: target,
47694                        alias: ident,
47695                        column_aliases: Vec::new(),
47696                        pre_alias_comments: Vec::new(),
47697                        trailing_comments: Vec::new(),
47698                        inferred_type: None,
47699                    }));
47700                }
47701            }
47702        } else if !self.check(TokenType::Using) {
47703            // Try to parse alias without AS keyword (e.g., MERGE t1 T USING ...)
47704            // Use parse_id_var to handle both Identifier and Var tokens
47705            if let Some(alias_expr) = self.parse_id_var()? {
47706                if let Expression::Identifier(ident) = alias_expr {
47707                    target = Expression::Alias(Box::new(Alias {
47708                        this: target,
47709                        alias: ident,
47710                        column_aliases: Vec::new(),
47711                        pre_alias_comments: Vec::new(),
47712                        trailing_comments: Vec::new(),
47713                        inferred_type: None,
47714                    }));
47715                }
47716            }
47717        }
47718
47719        // USING clause
47720        if !self.match_token(TokenType::Using) {
47721            return Err(self.parse_error("Expected USING in MERGE statement"));
47722        }
47723
47724        // Parse source table or subquery
47725        let mut using = if self.match_token(TokenType::LParen) {
47726            // Subquery: USING (SELECT ...) AS alias
47727            let query = self.parse_statement()?;
47728            self.expect(TokenType::RParen)?;
47729            let trailing = self.previous_trailing_comments();
47730            let mut subq = Subquery {
47731                this: query,
47732                alias: None,
47733                column_aliases: Vec::new(),
47734                order_by: None,
47735                limit: None,
47736                offset: None,
47737                distribute_by: None,
47738                sort_by: None,
47739                cluster_by: None,
47740                lateral: false,
47741                modifiers_inside: false,
47742                trailing_comments: trailing,
47743                inferred_type: None,
47744            };
47745            // Parse optional alias: (SELECT ...) AS y(col1, col2)
47746            if self.match_token(TokenType::As) {
47747                let alias_name = self.expect_identifier_or_keyword()?;
47748                subq.alias = Some(Identifier::new(alias_name));
47749                // Parse optional column aliases: AS alias(col1, col2)
47750                if self.match_token(TokenType::LParen) {
47751                    let mut cols = Vec::new();
47752                    loop {
47753                        let col_name = self.expect_identifier_or_keyword()?;
47754                        cols.push(Identifier::new(col_name));
47755                        if !self.match_token(TokenType::Comma) {
47756                            break;
47757                        }
47758                    }
47759                    self.expect(TokenType::RParen)?;
47760                    subq.column_aliases = cols;
47761                }
47762            } else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
47763                // Implicit alias without AS
47764                let alias_name = self.expect_identifier_or_keyword()?;
47765                subq.alias = Some(Identifier::new(alias_name));
47766                // Parse optional column aliases: alias(col1, col2)
47767                if self.match_token(TokenType::LParen) {
47768                    let mut cols = Vec::new();
47769                    loop {
47770                        let col_name = self.expect_identifier_or_keyword()?;
47771                        cols.push(Identifier::new(col_name));
47772                        if !self.match_token(TokenType::Comma) {
47773                            break;
47774                        }
47775                    }
47776                    self.expect(TokenType::RParen)?;
47777                    subq.column_aliases = cols;
47778                }
47779            }
47780            Expression::Subquery(Box::new(subq))
47781        } else {
47782            Expression::Table(self.parse_table_ref()?)
47783        };
47784
47785        // Parse optional alias for source (if not already parsed for subquery)
47786        if matches!(&using, Expression::Table(_)) {
47787            if self.match_token(TokenType::As) {
47788                if let Some(alias_expr) = self.parse_id_var()? {
47789                    if let Expression::Identifier(ident) = alias_expr {
47790                        using = Expression::Alias(Box::new(Alias {
47791                            this: using,
47792                            alias: ident,
47793                            column_aliases: Vec::new(),
47794                            pre_alias_comments: Vec::new(),
47795                            trailing_comments: Vec::new(),
47796                            inferred_type: None,
47797                        }));
47798                    }
47799                }
47800            } else if !self.check(TokenType::On) {
47801                // Try to parse alias without AS keyword
47802                // Use parse_id_var to handle both Identifier and Var tokens (e.g., S, T)
47803                if let Some(alias_expr) = self.parse_id_var()? {
47804                    if let Expression::Identifier(ident) = alias_expr {
47805                        using = Expression::Alias(Box::new(Alias {
47806                            this: using,
47807                            alias: ident,
47808                            column_aliases: Vec::new(),
47809                            pre_alias_comments: Vec::new(),
47810                            trailing_comments: Vec::new(),
47811                            inferred_type: None,
47812                        }));
47813                    }
47814                }
47815            }
47816        }
47817
47818        // ON clause with condition
47819        let on = if self.match_token(TokenType::On) {
47820            Some(Box::new(self.parse_expression()?))
47821        } else {
47822            None
47823        };
47824
47825        // Optional additional USING clause for key columns (DuckDB: USING (col1, col2))
47826        let using_cond = if self.match_token(TokenType::Using) {
47827            // Parse comma-separated identifiers wrapped in parentheses
47828            if self.match_token(TokenType::LParen) {
47829                let mut idents = Vec::new();
47830                loop {
47831                    // Use parse_id_var to handle Var tokens (unquoted identifiers)
47832                    if let Some(ident) = self.parse_id_var()? {
47833                        idents.push(ident);
47834                    } else {
47835                        break;
47836                    }
47837                    if !self.match_token(TokenType::Comma) {
47838                        break;
47839                    }
47840                }
47841                self.match_token(TokenType::RParen);
47842                if !idents.is_empty() {
47843                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
47844                        expressions: idents,
47845                    }))))
47846                } else {
47847                    None
47848                }
47849            } else {
47850                // Also support without parentheses for backwards compatibility
47851                let mut idents = Vec::new();
47852                loop {
47853                    if let Some(ident) = self.parse_id_var()? {
47854                        idents.push(ident);
47855                    } else {
47856                        break;
47857                    }
47858                    if !self.match_token(TokenType::Comma) {
47859                        break;
47860                    }
47861                }
47862                if !idents.is_empty() {
47863                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
47864                        expressions: idents,
47865                    }))))
47866                } else {
47867                    None
47868                }
47869            }
47870        } else {
47871            None
47872        };
47873
47874        // Parse WHEN MATCHED clauses
47875        let whens = self.parse_when_matched_clauses()?;
47876
47877        // Parse optional RETURNING clause (PostgreSQL) or OUTPUT clause (TSQL)
47878        let returning = if let Some(ret) = self.parse_returning()? {
47879            Some(ret)
47880        } else if self.match_token(TokenType::Output) {
47881            // TSQL OUTPUT clause: OUTPUT $action, Inserted.col, Deleted.col [INTO target]
47882            let output = self.parse_output_clause()?;
47883            Some(Expression::Returning(Box::new(Returning {
47884                expressions: output.columns,
47885                into: output.into_table.map(Box::new),
47886            })))
47887        } else {
47888            None
47889        };
47890
47891        Ok(Some(Expression::Merge(Box::new(Merge {
47892            this: Box::new(target),
47893            using: Box::new(using),
47894            on,
47895            using_cond,
47896            whens: whens.map(Box::new),
47897            with_: None,
47898            returning: returning.map(Box::new),
47899        }))))
47900    }
47901
47902    /// Parse multiple WHEN [NOT] MATCHED clauses for MERGE
47903    fn parse_when_matched_clauses(&mut self) -> Result<Option<Expression>> {
47904        let mut whens = Vec::new();
47905
47906        while self.match_token(TokenType::When) {
47907            // Check for NOT MATCHED
47908            let matched = !self.match_token(TokenType::Not);
47909            self.match_text_seq(&["MATCHED"]);
47910
47911            // Check for BY TARGET or BY SOURCE
47912            let source = if self.match_text_seq(&["BY", "TARGET"]) {
47913                Some(Box::new(Expression::Boolean(BooleanLiteral {
47914                    value: false,
47915                })))
47916            } else if self.match_text_seq(&["BY", "SOURCE"]) {
47917                Some(Box::new(Expression::Boolean(BooleanLiteral {
47918                    value: true,
47919                })))
47920            } else {
47921                None
47922            };
47923
47924            // Optional AND condition
47925            let condition = if self.match_token(TokenType::And) {
47926                Some(Box::new(self.parse_expression()?))
47927            } else {
47928                None
47929            };
47930
47931            // THEN action
47932            if !self.match_token(TokenType::Then) {
47933                return Err(self.parse_error("Expected THEN in WHEN clause"));
47934            }
47935
47936            // Parse the action: INSERT, UPDATE, DELETE, or other keywords (DO NOTHING, etc.)
47937            let then: Expression = if self.match_token(TokenType::Insert) {
47938                // INSERT action - use Tuple to represent it
47939                let mut elements = vec![Expression::Var(Box::new(Var {
47940                    this: "INSERT".to_string(),
47941                }))];
47942
47943                // Spark/Databricks: INSERT * (insert all columns)
47944                if self.match_token(TokenType::Star) {
47945                    elements.push(Expression::Star(crate::expressions::Star {
47946                        table: None,
47947                        except: None,
47948                        replace: None,
47949                        rename: None,
47950                        trailing_comments: Vec::new(),
47951                        span: None,
47952                    }));
47953                } else
47954                // Parse column list (optional)
47955                if self.match_token(TokenType::LParen) {
47956                    let mut columns: Vec<Expression> = Vec::new();
47957                    loop {
47958                        if let Some(col) = self.parse_id_var()? {
47959                            // Handle qualified column references (e.g., target.a)
47960                            let col = if self.match_token(TokenType::Dot) {
47961                                if let Expression::Identifier(table_ident) = col {
47962                                    if let Some(col_expr) = self.parse_id_var()? {
47963                                        if let Expression::Identifier(col_ident) = col_expr {
47964                                            Expression::Column(Column {
47965                                                name: col_ident,
47966                                                table: Some(table_ident),
47967                                                join_mark: false,
47968                                                trailing_comments: Vec::new(),
47969                                                span: None,
47970                                                inferred_type: None,
47971                                            })
47972                                        } else {
47973                                            col_expr
47974                                        }
47975                                    } else {
47976                                        return Err(self.parse_error(
47977                                            "Expected column name after dot in MERGE INSERT",
47978                                        ));
47979                                    }
47980                                } else {
47981                                    col
47982                                }
47983                            } else {
47984                                col
47985                            };
47986                            columns.push(col);
47987                        } else {
47988                            break;
47989                        }
47990                        if !self.match_token(TokenType::Comma) {
47991                            break;
47992                        }
47993                    }
47994                    self.match_token(TokenType::RParen);
47995                    if !columns.is_empty() {
47996                        elements.push(Expression::Tuple(Box::new(Tuple {
47997                            expressions: columns,
47998                        })));
47999                    }
48000                }
48001
48002                // Parse VALUES clause
48003                if self.match_text_seq(&["VALUES"]) {
48004                    if let Some(values) = self.parse_value()? {
48005                        elements.push(values);
48006                    }
48007                } else if self.match_text_seq(&["ROW"]) {
48008                    elements.push(Expression::Var(Box::new(Var {
48009                        this: "ROW".to_string(),
48010                    })));
48011                }
48012
48013                if elements.len() == 1 {
48014                    elements[0].clone()
48015                } else {
48016                    Expression::Tuple(Box::new(Tuple {
48017                        expressions: elements,
48018                    }))
48019                }
48020            } else if self.match_token(TokenType::Update) {
48021                // UPDATE action - use Tuple to represent SET assignments
48022                let mut elements = vec![Expression::Var(Box::new(Var {
48023                    this: "UPDATE".to_string(),
48024                }))];
48025
48026                // Spark/Databricks: UPDATE * (update all columns)
48027                if self.match_token(TokenType::Star) {
48028                    elements.push(Expression::Star(crate::expressions::Star {
48029                        table: None,
48030                        except: None,
48031                        replace: None,
48032                        rename: None,
48033                        trailing_comments: Vec::new(),
48034                        span: None,
48035                    }));
48036                } else if self.match_token(TokenType::Set) {
48037                    // Parse col = value assignments manually
48038                    let mut assignments: Vec<Expression> = Vec::new();
48039                    loop {
48040                        // Parse: column = expression (column can be qualified like x.a)
48041                        if let Some(col) = self.parse_id_var()? {
48042                            // Handle qualified column references (e.g., x.a = y.b)
48043                            let col = if self.match_token(TokenType::Dot) {
48044                                // We have a qualified column reference
48045                                if let Expression::Identifier(table_ident) = col {
48046                                    // Parse the column part after the dot
48047                                    if let Some(col_expr) = self.parse_id_var()? {
48048                                        if let Expression::Identifier(col_ident) = col_expr {
48049                                            Expression::Column(Column {
48050                                                name: col_ident,
48051                                                table: Some(table_ident),
48052                                                join_mark: false,
48053                                                trailing_comments: Vec::new(),
48054                                                span: None,
48055                                                inferred_type: None,
48056                                            })
48057                                        } else {
48058                                            col_expr
48059                                        }
48060                                    } else {
48061                                        return Err(
48062                                            self.parse_error("Expected column name after dot")
48063                                        );
48064                                    }
48065                                } else {
48066                                    col
48067                                }
48068                            } else {
48069                                col
48070                            };
48071                            if self.match_token(TokenType::Eq) {
48072                                let value = self.parse_expression()?;
48073                                // Create assignment as EQ expression
48074                                let assignment = Expression::Eq(Box::new(BinaryOp {
48075                                    left: col,
48076                                    right: value,
48077                                    left_comments: Vec::new(),
48078                                    operator_comments: Vec::new(),
48079                                    trailing_comments: Vec::new(),
48080                                    inferred_type: None,
48081                                }));
48082                                assignments.push(assignment);
48083                            }
48084                        }
48085                        if !self.match_token(TokenType::Comma) {
48086                            break;
48087                        }
48088                    }
48089                    if !assignments.is_empty() {
48090                        elements.push(Expression::Tuple(Box::new(Tuple {
48091                            expressions: assignments,
48092                        })));
48093                    }
48094                }
48095
48096                if elements.len() == 1 {
48097                    elements[0].clone()
48098                } else {
48099                    Expression::Tuple(Box::new(Tuple {
48100                        expressions: elements,
48101                    }))
48102                }
48103            } else if self.match_token(TokenType::Delete) {
48104                // DELETE action
48105                Expression::Var(Box::new(Var {
48106                    this: "DELETE".to_string(),
48107                }))
48108            } else if self.match_identifier("DO") {
48109                // DO NOTHING action (PostgreSQL)
48110                if self.match_identifier("NOTHING") {
48111                    Expression::Var(Box::new(Var {
48112                        this: "DO NOTHING".to_string(),
48113                    }))
48114                } else {
48115                    return Err(self.parse_error("Expected NOTHING after DO"));
48116                }
48117            } else {
48118                // Other action
48119                if let Some(var) = self.parse_var()? {
48120                    var
48121                } else {
48122                    return Err(
48123                        self.parse_error("Expected INSERT, UPDATE, DELETE, or action keyword")
48124                    );
48125                }
48126            };
48127
48128            whens.push(Expression::When(Box::new(When {
48129                matched: Some(Box::new(Expression::Boolean(BooleanLiteral {
48130                    value: matched,
48131                }))),
48132                source,
48133                condition,
48134                then: Box::new(then),
48135            })));
48136        }
48137
48138        if whens.is_empty() {
48139            Ok(None)
48140        } else {
48141            Ok(Some(Expression::Whens(Box::new(Whens {
48142                expressions: whens,
48143            }))))
48144        }
48145    }
48146
48147    /// parse_mergeblockratio - Parses MERGEBLOCKRATIO property (Teradata)
48148    /// Python: _parse_mergeblockratio
48149    /// Format: MERGEBLOCKRATIO = number [PERCENT] or NO MERGEBLOCKRATIO or DEFAULT MERGEBLOCKRATIO
48150    pub fn parse_mergeblockratio(&mut self) -> Result<Option<Expression>> {
48151        self.parse_mergeblockratio_impl(false, false)
48152    }
48153
48154    /// Implementation of parse_mergeblockratio with options
48155    pub fn parse_mergeblockratio_impl(
48156        &mut self,
48157        no: bool,
48158        default: bool,
48159    ) -> Result<Option<Expression>> {
48160        // Check for = followed by a number
48161        if self.match_token(TokenType::Eq) {
48162            let this = self.parse_number()?;
48163            let percent = self.match_token(TokenType::Percent);
48164
48165            Ok(Some(Expression::MergeBlockRatioProperty(Box::new(
48166                MergeBlockRatioProperty {
48167                    this: this.map(Box::new),
48168                    no: None,
48169                    default: None,
48170                    percent: if percent {
48171                        Some(Box::new(Expression::Boolean(BooleanLiteral {
48172                            value: true,
48173                        })))
48174                    } else {
48175                        None
48176                    },
48177                },
48178            ))))
48179        } else {
48180            // NO or DEFAULT variant
48181            Ok(Some(Expression::MergeBlockRatioProperty(Box::new(
48182                MergeBlockRatioProperty {
48183                    this: None,
48184                    no: if no {
48185                        Some(Box::new(Expression::Boolean(BooleanLiteral {
48186                            value: true,
48187                        })))
48188                    } else {
48189                        None
48190                    },
48191                    default: if default {
48192                        Some(Box::new(Expression::Boolean(BooleanLiteral {
48193                            value: true,
48194                        })))
48195                    } else {
48196                        None
48197                    },
48198                    percent: None,
48199                },
48200            ))))
48201        }
48202    }
48203
48204    /// parse_modifies_property - Implemented from Python _parse_modifies_property
48205    #[allow(unused_variables, unused_mut)]
48206    pub fn parse_modifies_property(&mut self) -> Result<Option<Expression>> {
48207        if self.match_text_seq(&["SQL", "DATA"]) {
48208            // Matched: SQL DATA
48209            return Ok(None);
48210        }
48211        Ok(None)
48212    }
48213
48214    /// parse_multitable_inserts - Parses Oracle's multi-table INSERT (INSERT ALL/FIRST)
48215    /// Python: _parse_multitable_inserts
48216    /// Syntax: INSERT ALL|FIRST [WHEN cond THEN] INTO table [(cols)] [VALUES(...)] ... SELECT ...
48217    pub fn parse_multitable_inserts(
48218        &mut self,
48219        leading_comments: Vec<String>,
48220    ) -> Result<Option<Expression>> {
48221        // Get kind from previous token (ALL or FIRST)
48222        let kind = self.previous().text.to_uppercase();
48223
48224        let mut expressions = Vec::new();
48225
48226        // Helper closure to parse a single conditional insert
48227        // Returns None when no more INTO clauses found
48228        loop {
48229            // Check for WHEN condition
48230            let condition = if self.match_token(TokenType::When) {
48231                let cond = self.parse_or()?;
48232                self.match_token(TokenType::Then);
48233                Some(cond)
48234            } else {
48235                None
48236            };
48237
48238            // Check for ELSE (used in INSERT FIRST ... ELSE INTO ...)
48239            let is_else = self.match_token(TokenType::Else);
48240
48241            // Must have INTO keyword to continue
48242            if !self.match_token(TokenType::Into) {
48243                break;
48244            }
48245
48246            // Parse table with optional schema (using parse_table_parts for proper schema.table parsing)
48247            let table_expr = self.parse_table_parts()?;
48248
48249            // Extract TableRef from the table expression
48250            let table_ref = if let Some(Expression::Table(t)) = table_expr {
48251                t
48252            } else {
48253                // Fallback: create empty table ref (shouldn't happen)
48254                TableRef::new("")
48255            };
48256
48257            // Parse optional column list: (col1, col2, ...)
48258            let columns = if self.match_token(TokenType::LParen) {
48259                let cols = self.parse_identifier_list()?;
48260                self.expect(TokenType::RParen)?;
48261                cols
48262            } else {
48263                Vec::new()
48264            };
48265
48266            // Parse optional VALUES clause
48267            let values = if self.match_token(TokenType::Values) {
48268                self.expect(TokenType::LParen)?;
48269                let row = self.parse_expression_list()?;
48270                self.expect(TokenType::RParen)?;
48271                vec![row]
48272            } else {
48273                Vec::new()
48274            };
48275
48276            // Create Insert expression for this INTO clause
48277            let insert_expr = Expression::Insert(Box::new(Insert {
48278                table: table_ref,
48279                columns,
48280                values,
48281                query: None,
48282                overwrite: false,
48283                partition: Vec::new(),
48284                directory: None,
48285                returning: Vec::new(),
48286                output: None,
48287                on_conflict: None,
48288                leading_comments: Vec::new(),
48289                if_exists: false,
48290                with: None,
48291                ignore: false,
48292                source_alias: None,
48293                alias: None,
48294                alias_explicit_as: false,
48295                default_values: false,
48296                by_name: false,
48297                conflict_action: None,
48298                is_replace: false,
48299                replace_where: None,
48300                source: None,
48301                hint: None,
48302                function_target: None,
48303                partition_by: None,
48304                settings: Vec::new(),
48305            }));
48306
48307            // Wrap in ConditionalInsert
48308            let conditional_insert = Expression::ConditionalInsert(Box::new(ConditionalInsert {
48309                this: Box::new(insert_expr),
48310                expression: condition.map(Box::new),
48311                else_: if is_else {
48312                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48313                        value: true,
48314                    })))
48315                } else {
48316                    None
48317                },
48318            }));
48319
48320            expressions.push(conditional_insert);
48321        }
48322
48323        // Parse the source SELECT statement (or subquery)
48324        let source = self.parse_statement()?;
48325
48326        Ok(Some(Expression::MultitableInserts(Box::new(
48327            MultitableInserts {
48328                kind,
48329                expressions,
48330                source: Some(Box::new(source)),
48331                leading_comments,
48332            },
48333        ))))
48334    }
48335
48336    /// parse_name_as_expression - Parse identifier that can be aliased
48337    /// Parses: identifier [AS expression]
48338    #[allow(unused_variables, unused_mut)]
48339    pub fn parse_name_as_expression(&mut self) -> Result<Option<Expression>> {
48340        // Parse the identifier
48341        let this = self.parse_id_var()?;
48342        if this.is_none() {
48343            return Ok(None);
48344        }
48345
48346        // Check for AS alias
48347        if self.match_token(TokenType::Alias) {
48348            let expression = self.parse_disjunction()?;
48349            if expression.is_none() {
48350                return Ok(this);
48351            }
48352
48353            // Extract the identifier for the alias
48354            let alias_ident =
48355                match this.ok_or_else(|| self.parse_error("Expected identifier for alias"))? {
48356                    Expression::Identifier(id) => id,
48357                    _ => Identifier::new(String::new()),
48358                };
48359
48360            return Ok(Some(Expression::Alias(Box::new(Alias {
48361                this: expression.ok_or_else(|| self.parse_error("Expected expression after AS"))?,
48362                alias: alias_ident,
48363                column_aliases: Vec::new(),
48364                pre_alias_comments: Vec::new(),
48365                trailing_comments: Vec::new(),
48366                inferred_type: None,
48367            }))));
48368        }
48369
48370        Ok(this)
48371    }
48372
48373    /// parse_named_window - Ported from Python _parse_named_window
48374    /// Parses a named window definition: name AS (spec)
48375    #[allow(unused_variables, unused_mut)]
48376    pub fn parse_named_window(&mut self) -> Result<Option<Expression>> {
48377        // Parse window name
48378        let name = self.parse_id_var()?;
48379        if name.is_none() {
48380            return Ok(None);
48381        }
48382
48383        // Expect AS
48384        if !self.match_token(TokenType::As) {
48385            return Ok(name); // Just the name, no spec
48386        }
48387
48388        // Parse window spec (parenthesized)
48389        self.expect(TokenType::LParen)?;
48390        let spec = self.parse_window_spec_inner()?;
48391        self.expect(TokenType::RParen)?;
48392
48393        if let (Some(name_expr), Some(spec_expr)) = (name, spec) {
48394            // Create an Alias expression wrapping the spec with the name
48395            let alias_ident = if let Expression::Identifier(id) = name_expr {
48396                id
48397            } else {
48398                Identifier::new("window")
48399            };
48400            Ok(Some(Expression::Alias(Box::new(Alias {
48401                this: spec_expr,
48402                alias: alias_ident,
48403                column_aliases: Vec::new(),
48404                pre_alias_comments: Vec::new(),
48405                trailing_comments: Vec::new(),
48406                inferred_type: None,
48407            }))))
48408        } else {
48409            Ok(None)
48410        }
48411    }
48412
48413    /// parse_next_value_for - Parses NEXT VALUE FOR sequence_name
48414    /// Python: parser.py:6752-6761
48415    #[allow(unused_variables, unused_mut)]
48416    pub fn parse_next_value_for(&mut self) -> Result<Option<Expression>> {
48417        if !self.match_text_seq(&["VALUE", "FOR"]) {
48418            // Retreat if we consumed a token
48419            if self.current > 0 {
48420                self.current -= 1;
48421            }
48422            return Ok(None);
48423        }
48424
48425        // Parse the sequence name as a dotted identifier (db.schema.sequence_name)
48426        // Manually parse identifier parts separated by dots
48427        let first = self
48428            .parse_id_var()?
48429            .ok_or_else(|| self.parse_error("Expected sequence name after NEXT VALUE FOR"))?;
48430        let first_id = match first {
48431            Expression::Identifier(id) => id,
48432            Expression::Var(v) => Identifier {
48433                name: v.this,
48434                quoted: false,
48435                trailing_comments: Vec::new(),
48436                span: None,
48437            },
48438            _ => Identifier {
48439                name: String::new(),
48440                quoted: false,
48441                trailing_comments: Vec::new(),
48442                span: None,
48443            },
48444        };
48445
48446        // Check for dotted parts (db.schema.sequence_name)
48447        let mut parts = vec![first_id];
48448        while self.match_token(TokenType::Dot) {
48449            if self.is_identifier_or_keyword_token() {
48450                let token = self.advance();
48451                parts.push(Identifier {
48452                    name: token.text,
48453                    quoted: token.token_type == TokenType::QuotedIdentifier,
48454                    trailing_comments: Vec::new(),
48455                    span: None,
48456                });
48457            } else {
48458                break;
48459            }
48460        }
48461
48462        // Build a Column expression from the parts
48463        let this = if parts.len() == 1 {
48464            Expression::Column(Column {
48465                name: parts.remove(0),
48466                table: None,
48467                join_mark: false,
48468                trailing_comments: Vec::new(),
48469                span: None,
48470                inferred_type: None,
48471            })
48472        } else if parts.len() == 2 {
48473            Expression::Column(Column {
48474                name: parts.remove(1),
48475                table: Some(parts.remove(0)),
48476                join_mark: false,
48477                trailing_comments: Vec::new(),
48478                span: None,
48479                inferred_type: None,
48480            })
48481        } else {
48482            // For 3+ parts, build nested Dot expressions
48483            let mut expr = Expression::Identifier(parts.remove(0));
48484            for part in parts.drain(..) {
48485                expr = Expression::Dot(Box::new(DotAccess {
48486                    this: expr,
48487                    field: part,
48488                }));
48489            }
48490            expr
48491        };
48492
48493        // Parse optional OVER (ORDER BY ...) clause
48494        let order = if self.match_token(TokenType::Over) {
48495            if self.match_token(TokenType::LParen) {
48496                let ord = self.parse_order()?;
48497                self.expect(TokenType::RParen)?;
48498                ord.map(Box::new)
48499            } else {
48500                None
48501            }
48502        } else {
48503            None
48504        };
48505
48506        Ok(Some(Expression::NextValueFor(Box::new(NextValueFor {
48507            this: Box::new(this),
48508            order,
48509        }))))
48510    }
48511
48512    /// parse_no_property - Implemented from Python _parse_no_property
48513    #[allow(unused_variables, unused_mut)]
48514    pub fn parse_no_property(&mut self) -> Result<Option<Expression>> {
48515        if self.match_text_seq(&["PRIMARY", "INDEX"]) {
48516            // Matched: PRIMARY INDEX
48517            return Ok(None);
48518        }
48519        if self.match_text_seq(&["SQL"]) {
48520            // Matched: SQL
48521            return Ok(None);
48522        }
48523        Ok(None)
48524    }
48525
48526    /// parse_normalize - Ported from Python _parse_normalize
48527    #[allow(unused_variables, unused_mut)]
48528    /// parse_normalize - Parses NORMALIZE(expr [, form])
48529    /// Python: NORMALIZE(expr, form) where form is NFC/NFD/NFKC/NFKD
48530    pub fn parse_normalize(&mut self) -> Result<Option<Expression>> {
48531        // Parse the expression to normalize
48532        let this = self.parse_expression()?;
48533
48534        // Check for optional form argument
48535        let form = if self.match_token(TokenType::Comma) {
48536            self.parse_var()?.map(Box::new)
48537        } else {
48538            None
48539        };
48540
48541        Ok(Some(Expression::Normalize(Box::new(Normalize {
48542            this: Box::new(this),
48543            form,
48544            is_casefold: None,
48545        }))))
48546    }
48547
48548    /// parse_not_constraint - Implemented from Python _parse_not_constraint
48549    /// Parses constraints that start with NOT: NOT NULL, NOT CASESPECIFIC
48550    pub fn parse_not_constraint(&mut self) -> Result<Option<Expression>> {
48551        // NOT NULL constraint
48552        if self.match_text_seq(&["NULL"]) {
48553            return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
48554                NotNullColumnConstraint { allow_null: None },
48555            ))));
48556        }
48557        // NOT CASESPECIFIC constraint (Teradata)
48558        if self.match_text_seq(&["CASESPECIFIC"]) {
48559            return Ok(Some(Expression::CaseSpecificColumnConstraint(Box::new(
48560                CaseSpecificColumnConstraint {
48561                    not_: Some(Box::new(Expression::Boolean(BooleanLiteral {
48562                        value: true,
48563                    }))),
48564                },
48565            ))));
48566        }
48567        // NOT FOR REPLICATION (SQL Server) - consume the tokens and return as a property
48568        if self.match_token(TokenType::For) && self.match_identifier("REPLICATION") {
48569            return Ok(Some(Expression::Property(Box::new(
48570                crate::expressions::Property {
48571                    this: Box::new(Expression::Identifier(Identifier::new(
48572                        "NOT FOR REPLICATION".to_string(),
48573                    ))),
48574                    value: None,
48575                },
48576            ))));
48577        }
48578        Ok(None)
48579    }
48580
48581    /// parse_null - Parse NULL literal
48582    /// Python: if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): return exp.Null
48583    pub fn parse_null(&mut self) -> Result<Option<Expression>> {
48584        if self.match_token(TokenType::Null) {
48585            return Ok(Some(Expression::Null(Null)));
48586        }
48587        // UNKNOWN is treated as NULL in some dialects
48588        if self.match_token(TokenType::Unknown) {
48589            return Ok(Some(Expression::Null(Null)));
48590        }
48591        Ok(None)
48592    }
48593
48594    /// parse_number - Parse numeric literal
48595    /// Python: TokenType.NUMBER -> exp.Literal(this=token.text, is_string=False)
48596    /// Handles Hive/Spark numeric suffixes encoded as "number::TYPE" by the tokenizer
48597    pub fn parse_number(&mut self) -> Result<Option<Expression>> {
48598        if self.match_token(TokenType::Number) {
48599            let text = self.previous().text.clone();
48600            // Check for numeric literal suffix encoded as "number::TYPE"
48601            if let Some(sep_pos) = text.find("::") {
48602                let num_part = &text[..sep_pos];
48603                let type_name = &text[sep_pos + 2..];
48604                // Create a TryCast expression: TRY_CAST(number AS TYPE)
48605                let num_expr = Expression::Literal(Literal::Number(num_part.to_string()));
48606                let data_type = match type_name {
48607                    "BIGINT" => crate::expressions::DataType::BigInt { length: None },
48608                    "SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
48609                    "TINYINT" => crate::expressions::DataType::TinyInt { length: None },
48610                    "DOUBLE" => crate::expressions::DataType::Double {
48611                        precision: None,
48612                        scale: None,
48613                    },
48614                    "FLOAT" => crate::expressions::DataType::Float {
48615                        precision: None,
48616                        scale: None,
48617                        real_spelling: false,
48618                    },
48619                    "DECIMAL" => crate::expressions::DataType::Decimal {
48620                        precision: None,
48621                        scale: None,
48622                    },
48623                    _ => crate::expressions::DataType::Custom {
48624                        name: type_name.to_string(),
48625                    },
48626                };
48627                return Ok(Some(Expression::TryCast(Box::new(
48628                    crate::expressions::Cast {
48629                        this: num_expr,
48630                        to: data_type,
48631                        trailing_comments: Vec::new(),
48632                        double_colon_syntax: false,
48633                        format: None,
48634                        default: None,
48635                        inferred_type: None,
48636                    },
48637                ))));
48638            }
48639            return Ok(Some(Expression::Literal(Literal::Number(text))));
48640        }
48641        Ok(None)
48642    }
48643
48644    /// parse_odbc_datetime_literal - Ported from Python _parse_odbc_datetime_literal
48645    #[allow(unused_variables, unused_mut)]
48646    /// parse_odbc_datetime_literal - Parses ODBC datetime literals
48647    /// Examples: {d'2023-01-01'}, {t'12:00:00'}, {ts'2023-01-01 12:00:00'}
48648    pub fn parse_odbc_datetime_literal(&mut self) -> Result<Option<Expression>> {
48649        // Match the type indicator (d, t, ts)
48650        if !self.match_token(TokenType::Var) {
48651            return Ok(None);
48652        }
48653        let type_indicator = self.previous().text.to_lowercase();
48654
48655        // Parse the string value
48656        let value = self.parse_string()?;
48657        if value.is_none() {
48658            return Ok(None);
48659        }
48660
48661        // Expect closing brace
48662        self.expect(TokenType::RBrace)?;
48663
48664        // Return appropriate expression based on type
48665        let value = value
48666            .ok_or_else(|| self.parse_error("Expected string value in ODBC datetime literal"))?;
48667        match type_indicator.as_str() {
48668            "d" => Ok(Some(Expression::Date(Box::new(UnaryFunc::new(value))))),
48669            "t" => Ok(Some(Expression::Time(Box::new(UnaryFunc::new(value))))),
48670            "ts" => Ok(Some(Expression::Timestamp(Box::new(TimestampFunc {
48671                this: Some(Box::new(value)),
48672                zone: None,
48673                with_tz: None,
48674                safe: None,
48675            })))),
48676            _ => Ok(Some(value)),
48677        }
48678    }
48679
48680    /// parse_offset - Parse OFFSET clause
48681    /// Python: if self._match(TokenType.OFFSET): return exp.Offset(this=self._parse_term())
48682    pub fn parse_offset(&mut self) -> Result<Option<Expression>> {
48683        if !self.match_token(TokenType::Offset) {
48684            return Ok(None);
48685        }
48686        // Parse the offset expression (usually a number)
48687        let offset_expr = self.parse_expression()?;
48688        Ok(Some(Expression::Offset(Box::new(Offset {
48689            this: offset_expr,
48690            rows: None,
48691        }))))
48692    }
48693
48694    /// parse_on_condition - Ported from Python _parse_on_condition
48695    #[allow(unused_variables, unused_mut)]
48696    /// parse_on_condition - Parses ON EMPTY/ERROR/NULL conditions
48697    /// Example: NULL ON EMPTY, ERROR ON ERROR
48698    pub fn parse_on_condition(&mut self) -> Result<Option<Expression>> {
48699        // Parse ON EMPTY
48700        let empty = if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
48701            Some(Box::new(Expression::Identifier(Identifier::new(
48702                "NULL".to_string(),
48703            ))))
48704        } else if self.match_text_seq(&["ERROR", "ON", "EMPTY"]) {
48705            Some(Box::new(Expression::Identifier(Identifier::new(
48706                "ERROR".to_string(),
48707            ))))
48708        } else if self.match_text_seq(&["DEFAULT"]) {
48709            let default_val = self.parse_expression()?;
48710            if self.match_text_seq(&["ON", "EMPTY"]) {
48711                Some(Box::new(default_val))
48712            } else {
48713                None
48714            }
48715        } else {
48716            None
48717        };
48718
48719        // Parse ON ERROR
48720        let error = if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
48721            Some(Box::new(Expression::Identifier(Identifier::new(
48722                "NULL".to_string(),
48723            ))))
48724        } else if self.match_text_seq(&["ERROR", "ON", "ERROR"]) {
48725            Some(Box::new(Expression::Identifier(Identifier::new(
48726                "ERROR".to_string(),
48727            ))))
48728        } else if self.match_text_seq(&["DEFAULT"]) {
48729            let default_val = self.parse_expression()?;
48730            if self.match_text_seq(&["ON", "ERROR"]) {
48731                Some(Box::new(default_val))
48732            } else {
48733                None
48734            }
48735        } else {
48736            None
48737        };
48738
48739        // Parse ON NULL
48740        let null = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
48741            Some(Box::new(Expression::Identifier(Identifier::new(
48742                "NULL".to_string(),
48743            ))))
48744        } else {
48745            None
48746        };
48747
48748        if empty.is_none() && error.is_none() && null.is_none() {
48749            return Ok(None);
48750        }
48751
48752        Ok(Some(Expression::OnCondition(Box::new(OnCondition {
48753            empty,
48754            error,
48755            null,
48756        }))))
48757    }
48758
48759    /// parse_on_handling - Implemented from Python _parse_on_handling
48760    /// Calls: parse_bitwise
48761    #[allow(unused_variables, unused_mut)]
48762    pub fn parse_on_handling(&mut self) -> Result<Option<Expression>> {
48763        if self.match_text_seq(&["ON"]) {
48764            // Matched: ON
48765            return Ok(None);
48766        }
48767        if self.match_text_seq(&["ON"]) {
48768            // Matched: ON
48769            return Ok(None);
48770        }
48771        Ok(None)
48772    }
48773
48774    /// parse_on_property - Implemented from Python _parse_on_property
48775    #[allow(unused_variables, unused_mut)]
48776    pub fn parse_on_property(&mut self) -> Result<Option<Expression>> {
48777        if self.match_text_seq(&["COMMIT", "PRESERVE", "ROWS"]) {
48778            return Ok(Some(Expression::OnCommitProperty(Box::new(
48779                OnCommitProperty { delete: None },
48780            ))));
48781        }
48782        if self.match_text_seq(&["COMMIT", "DELETE", "ROWS"]) {
48783            // Matched: COMMIT DELETE ROWS
48784            return Ok(None);
48785        }
48786        Ok(None)
48787    }
48788
48789    /// parse_opclass - Ported from Python _parse_opclass
48790    #[allow(unused_variables, unused_mut)]
48791    /// parse_opclass - Parses PostgreSQL operator class in index expressions
48792    /// Example: column_name text_pattern_ops
48793    pub fn parse_opclass(&mut self) -> Result<Option<Expression>> {
48794        // Parse the expression first
48795        let this = self.parse_expression()?;
48796
48797        // Check for keywords that would indicate this is not an opclass
48798        // (e.g., ASC, DESC, NULLS, etc.)
48799        if self.check(TokenType::Asc)
48800            || self.check(TokenType::Desc)
48801            || self.check(TokenType::Nulls)
48802            || self.check(TokenType::Comma)
48803            || self.check(TokenType::RParen)
48804        {
48805            return Ok(Some(this));
48806        }
48807
48808        // Try to parse an operator class name (table parts)
48809        if let Some(opclass_name) = self.parse_table()? {
48810            return Ok(Some(Expression::Opclass(Box::new(Opclass {
48811                this: Box::new(this),
48812                expression: Box::new(opclass_name),
48813            }))));
48814        }
48815
48816        Ok(Some(this))
48817    }
48818
48819    /// parse_open_json - Parses SQL Server OPENJSON function
48820    /// Example: OPENJSON(json, '$.path') WITH (col1 type '$.path' AS JSON, ...)
48821    pub fn parse_open_json(&mut self) -> Result<Option<Expression>> {
48822        // Parse the JSON expression
48823        let this = self.parse_expression()?;
48824
48825        // Parse optional path
48826        let path = if self.match_token(TokenType::Comma) {
48827            self.parse_string()?.map(Box::new)
48828        } else {
48829            None
48830        };
48831
48832        // Check for closing paren and WITH clause
48833        let expressions = if self.match_token(TokenType::RParen)
48834            && self.match_token(TokenType::With)
48835        {
48836            self.expect(TokenType::LParen)?;
48837            let mut cols = Vec::new();
48838            loop {
48839                // Parse column definition: name type 'path' [AS JSON]
48840                let col_name = self.parse_field()?;
48841                if col_name.is_none() {
48842                    break;
48843                }
48844                let col_type = self.parse_data_type()?;
48845                let col_path = self.parse_string()?.map(Box::new);
48846                let as_json = if self.match_token(TokenType::As) && self.match_identifier("JSON") {
48847                    Some(Box::new(Expression::Boolean(BooleanLiteral {
48848                        value: true,
48849                    })))
48850                } else {
48851                    None
48852                };
48853                cols.push(Expression::OpenJSONColumnDef(Box::new(OpenJSONColumnDef {
48854                    this: Box::new(col_name.ok_or_else(|| {
48855                        self.parse_error("Expected column name in OPENJSON WITH clause")
48856                    })?),
48857                    kind: String::new(), // kept for backwards compat, use data_type instead
48858                    path: col_path,
48859                    as_json,
48860                    data_type: Some(col_type),
48861                })));
48862                if !self.match_token(TokenType::Comma) {
48863                    break;
48864                }
48865            }
48866            self.expect(TokenType::RParen)?;
48867            cols
48868        } else {
48869            Vec::new()
48870        };
48871
48872        Ok(Some(Expression::OpenJSON(Box::new(OpenJSON {
48873            this: Box::new(this),
48874            path,
48875            expressions,
48876        }))))
48877    }
48878
48879    /// parse_operator - Ported from Python _parse_operator
48880    #[allow(unused_variables, unused_mut)]
48881    /// parse_operator - Parses PostgreSQL OPERATOR(op) syntax
48882    /// Example: col1 OPERATOR(~>) col2
48883    pub fn parse_operator(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
48884        let mut result = this;
48885
48886        // Parse OPERATOR(op) expressions
48887        while self.match_token(TokenType::LParen) {
48888            // Collect the operator text between parens
48889            let mut op_text = String::new();
48890            while !self.check(TokenType::RParen) && !self.is_at_end() {
48891                op_text.push_str(&self.peek().text);
48892                self.advance();
48893            }
48894            self.expect(TokenType::RParen)?;
48895
48896            // Parse the right-hand side expression
48897            let rhs = self.parse_expression()?;
48898
48899            result = Some(Expression::Operator(Box::new(Operator {
48900                this: Box::new(result.unwrap_or_else(|| Expression::Null(Null))),
48901                operator: Some(Box::new(Expression::Identifier(Identifier::new(op_text)))),
48902                expression: Box::new(rhs),
48903                comments: Vec::new(),
48904            })));
48905
48906            // Check if there's another OPERATOR keyword
48907            if !self.match_token(TokenType::Operator) {
48908                break;
48909            }
48910        }
48911
48912        Ok(result)
48913    }
48914
48915    /// parse_order - Parse ORDER BY clause
48916    /// Python: if not self._match(TokenType.ORDER_BY): return this; return exp.Order(expressions=self._parse_csv(self._parse_ordered))
48917    pub fn parse_order(&mut self) -> Result<Option<Expression>> {
48918        if !self.match_token(TokenType::Order) {
48919            return Ok(None);
48920        }
48921        // Consume BY if present
48922        self.match_token(TokenType::By);
48923
48924        // Parse comma-separated ordered expressions
48925        let mut expressions = Vec::new();
48926        loop {
48927            if let Some(ordered) = self.parse_ordered_item()? {
48928                expressions.push(ordered);
48929            } else {
48930                break;
48931            }
48932            if !self.match_token(TokenType::Comma) {
48933                break;
48934            }
48935        }
48936
48937        Ok(Some(Expression::OrderBy(Box::new(OrderBy {
48938            expressions,
48939            siblings: false,
48940            comments: Vec::new(),
48941        }))))
48942    }
48943
48944    /// parse_ordered_item - Parse a single ORDER BY item (expr [ASC|DESC] [NULLS FIRST|LAST])
48945    fn parse_ordered_item(&mut self) -> Result<Option<Ordered>> {
48946        // Parse the expression to order by
48947        let expr = match self.parse_expression() {
48948            Ok(e) => e,
48949            Err(_) => return Ok(None),
48950        };
48951
48952        // Check for ASC/DESC
48953        let mut desc = false;
48954        let mut explicit_asc = false;
48955        if self.match_token(TokenType::Asc) {
48956            explicit_asc = true;
48957        } else if self.match_token(TokenType::Desc) {
48958            desc = true;
48959        }
48960
48961        // Check for NULLS FIRST/LAST
48962        let nulls_first = if self.match_text_seq(&["NULLS", "FIRST"]) {
48963            Some(true)
48964        } else if self.match_text_seq(&["NULLS", "LAST"]) {
48965            Some(false)
48966        } else {
48967            None
48968        };
48969
48970        // Parse optional WITH FILL clause (ClickHouse)
48971        let with_fill = if self.match_text_seq(&["WITH", "FILL"]) {
48972            let from_ = if self.match_token(TokenType::From) {
48973                Some(Box::new(self.parse_or()?))
48974            } else {
48975                None
48976            };
48977            let to = if self.match_text_seq(&["TO"]) {
48978                Some(Box::new(self.parse_or()?))
48979            } else {
48980                None
48981            };
48982            let step = if self.match_text_seq(&["STEP"]) {
48983                Some(Box::new(self.parse_or()?))
48984            } else {
48985                None
48986            };
48987            let staleness = if self.match_text_seq(&["STALENESS"]) {
48988                Some(Box::new(self.parse_or()?))
48989            } else {
48990                None
48991            };
48992            let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
48993                if self.match_token(TokenType::LParen) {
48994                    let exprs = self.parse_expression_list()?;
48995                    self.expect(TokenType::RParen)?;
48996                    if exprs.len() == 1 {
48997                        Some(Box::new(exprs.into_iter().next().unwrap()))
48998                    } else {
48999                        Some(Box::new(Expression::Tuple(Box::new(
49000                            crate::expressions::Tuple { expressions: exprs },
49001                        ))))
49002                    }
49003                } else {
49004                    None
49005                }
49006            } else {
49007                None
49008            };
49009            Some(Box::new(WithFill {
49010                from_,
49011                to,
49012                step,
49013                staleness,
49014                interpolate,
49015            }))
49016        } else {
49017            None
49018        };
49019
49020        Ok(Some(Ordered {
49021            this: expr,
49022            desc,
49023            nulls_first,
49024            explicit_asc,
49025            with_fill,
49026        }))
49027    }
49028
49029    /// parse_ordered - Implemented from Python _parse_ordered (wrapper for parse_ordered_item)
49030    #[allow(unused_variables, unused_mut)]
49031    pub fn parse_ordered(&mut self) -> Result<Option<Expression>> {
49032        if let Some(ordered) = self.parse_ordered_item()? {
49033            return Ok(Some(Expression::Ordered(Box::new(ordered))));
49034        }
49035        if self.match_text_seq(&["NULLS", "FIRST"]) {
49036            return Ok(Some(Expression::WithFill(Box::new(WithFill {
49037                from_: None,
49038                to: None,
49039                step: None,
49040                staleness: None,
49041                interpolate: None,
49042            }))));
49043        }
49044        if self.match_text_seq(&["NULLS", "LAST"]) {
49045            // Matched: NULLS LAST
49046            return Ok(None);
49047        }
49048        if self.match_text_seq(&["WITH", "FILL"]) {
49049            // Matched: WITH FILL
49050            return Ok(None);
49051        }
49052        Ok(None)
49053    }
49054
49055    /// parse_overlay - Ported from Python _parse_overlay
49056    /// Parses OVERLAY function: OVERLAY(string PLACING replacement FROM position [FOR length])
49057    #[allow(unused_variables, unused_mut)]
49058    pub fn parse_overlay(&mut self) -> Result<Option<Expression>> {
49059        // Parse the string to be modified
49060        let this = match self.parse_bitwise() {
49061            Ok(Some(expr)) => expr,
49062            Ok(None) => return Ok(None),
49063            Err(e) => return Err(e),
49064        };
49065
49066        // Parse PLACING replacement (or comma then replacement)
49067        let replacement = if self.match_text_seq(&["PLACING"]) || self.match_token(TokenType::Comma)
49068        {
49069            match self.parse_bitwise() {
49070                Ok(Some(expr)) => expr,
49071                Ok(None) => {
49072                    return Err(self.parse_error("Expected replacement expression in OVERLAY"))
49073                }
49074                Err(e) => return Err(e),
49075            }
49076        } else {
49077            return Err(self.parse_error("Expected PLACING in OVERLAY function"));
49078        };
49079
49080        // Parse FROM position (or comma then position)
49081        let from = if self.match_token(TokenType::From) || self.match_token(TokenType::Comma) {
49082            match self.parse_bitwise() {
49083                Ok(Some(expr)) => expr,
49084                Ok(None) => return Err(self.parse_error("Expected position expression in OVERLAY")),
49085                Err(e) => return Err(e),
49086            }
49087        } else {
49088            return Err(self.parse_error("Expected FROM in OVERLAY function"));
49089        };
49090
49091        // Parse optional FOR length (or comma then length)
49092        let length = if self.match_token(TokenType::For) || self.match_token(TokenType::Comma) {
49093            match self.parse_bitwise() {
49094                Ok(Some(expr)) => Some(expr),
49095                Ok(None) => None,
49096                Err(_) => None,
49097            }
49098        } else {
49099            None
49100        };
49101
49102        Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
49103            this,
49104            replacement,
49105            from,
49106            length,
49107        }))))
49108    }
49109
49110    /// parse_parameter - Parse named parameter (@name or :name)
49111    /// Python: this = self._parse_identifier() or self._parse_primary_or_var(); return exp.Parameter(this=this)
49112    pub fn parse_parameter(&mut self) -> Result<Option<Expression>> {
49113        // Check for parameter token types
49114        if self.match_token(TokenType::Parameter) {
49115            let text = self.previous().text.clone();
49116            return Ok(Some(Expression::Parameter(Box::new(Parameter {
49117                name: Some(text),
49118                index: None,
49119                style: ParameterStyle::Colon,
49120                quoted: false,
49121                string_quoted: false,
49122                expression: None,
49123            }))));
49124        }
49125
49126        // Check for session parameter (@@name)
49127        if self.match_token(TokenType::SessionParameter) {
49128            let text = self.previous().text.clone();
49129            return Ok(Some(Expression::SessionParameter(Box::new(
49130                SessionParameter {
49131                    this: Box::new(Expression::Identifier(Identifier::new(text))),
49132                    kind: None,
49133                },
49134            ))));
49135        }
49136
49137        Ok(None)
49138    }
49139
49140    /// parse_paren - Ported from Python _parse_paren
49141    /// Parses parenthesized expressions: (expr), (select ...), or (a, b, c)
49142    #[allow(unused_variables, unused_mut)]
49143    pub fn parse_paren(&mut self) -> Result<Option<Expression>> {
49144        if !self.match_token(TokenType::LParen) {
49145            return Ok(None);
49146        }
49147
49148        // Check for empty tuple ()
49149        if self.match_token(TokenType::RParen) {
49150            return Ok(Some(Expression::Tuple(Box::new(Tuple {
49151                expressions: Vec::new(),
49152            }))));
49153        }
49154
49155        // Try to parse as subquery first
49156        // ClickHouse also allows (EXPLAIN ...) as subquery
49157        if self.check(TokenType::Select)
49158            || self.check(TokenType::With)
49159            || (matches!(
49160                self.config.dialect,
49161                Some(crate::dialects::DialectType::ClickHouse)
49162            ) && self.check(TokenType::Var)
49163                && self.peek().text.eq_ignore_ascii_case("EXPLAIN"))
49164        {
49165            let query = self.parse_statement()?;
49166            self.expect(TokenType::RParen)?;
49167            return Ok(Some(Expression::Subquery(Box::new(Subquery {
49168                this: query,
49169                alias: None,
49170                column_aliases: Vec::new(),
49171                order_by: None,
49172                limit: None,
49173                offset: None,
49174                lateral: false,
49175                modifiers_inside: true,
49176                trailing_comments: Vec::new(),
49177                distribute_by: None,
49178                sort_by: None,
49179                cluster_by: None,
49180                inferred_type: None,
49181            }))));
49182        }
49183
49184        // Parse comma-separated expressions
49185        let mut expressions = Vec::new();
49186        let mut trailing_comma = false;
49187        loop {
49188            match self.parse_expression() {
49189                Ok(expr) => expressions.push(expr),
49190                Err(_) => break,
49191            }
49192            if !self.match_token(TokenType::Comma) {
49193                break;
49194            }
49195            // ClickHouse: trailing comma makes a single-element tuple, e.g., (1,)
49196            if self.check(TokenType::RParen) {
49197                trailing_comma = true;
49198                break;
49199            }
49200        }
49201
49202        self.expect(TokenType::RParen)?;
49203
49204        // Single expression with trailing comma → tuple, e.g., (1,)
49205        if trailing_comma && expressions.len() == 1 {
49206            return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
49207        }
49208
49209        // Single expression - return the unwrapped Paren
49210        if expressions.len() == 1 {
49211            return Ok(Some(Expression::Paren(Box::new(Paren {
49212                this: expressions.remove(0),
49213                trailing_comments: Vec::new(),
49214            }))));
49215        }
49216
49217        // Multiple expressions - return as tuple
49218        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
49219    }
49220
49221    /// parse_partition - Parses PARTITION/SUBPARTITION clause
49222    /// Python: _parse_partition
49223    pub fn parse_partition(&mut self) -> Result<Option<Expression>> {
49224        // PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"}
49225        if !self.match_texts(&["PARTITION", "SUBPARTITION"]) {
49226            return Ok(None);
49227        }
49228
49229        let subpartition = self.previous().text.to_uppercase() == "SUBPARTITION";
49230
49231        // Parse wrapped CSV of disjunction expressions
49232        if !self.match_token(TokenType::LParen) {
49233            // Without parentheses, still return a Partition with empty expressions
49234            return Ok(Some(Expression::Partition(Box::new(Partition {
49235                expressions: Vec::new(),
49236                subpartition,
49237            }))));
49238        }
49239
49240        let mut expressions = Vec::new();
49241        loop {
49242            if let Some(expr) = self.parse_disjunction()? {
49243                expressions.push(expr);
49244            } else {
49245                break;
49246            }
49247
49248            if !self.match_token(TokenType::Comma) {
49249                break;
49250            }
49251        }
49252
49253        self.match_token(TokenType::RParen);
49254
49255        Ok(Some(Expression::Partition(Box::new(Partition {
49256            expressions,
49257            subpartition,
49258        }))))
49259    }
49260
49261    /// parse_partition_and_order - Delegates to parse_partition_by
49262    #[allow(unused_variables, unused_mut)]
49263    pub fn parse_partition_and_order(&mut self) -> Result<Option<Expression>> {
49264        self.parse_partition_by()
49265    }
49266
49267    /// parse_partition_bound_spec - Implemented from Python _parse_partition_bound_spec
49268    /// Calls: parse_bitwise, parse_number
49269    #[allow(unused_variables, unused_mut)]
49270    pub fn parse_partition_bound_spec_legacy(&mut self) -> Result<Option<Expression>> {
49271        if self.match_text_seq(&["MINVALUE"]) {
49272            return Ok(Some(Expression::PartitionBoundSpec(Box::new(
49273                PartitionBoundSpec {
49274                    this: None,
49275                    expression: None,
49276                    from_expressions: None,
49277                    to_expressions: None,
49278                },
49279            ))));
49280        }
49281        if self.match_text_seq(&["MAXVALUE"]) {
49282            // Matched: MAXVALUE
49283            return Ok(None);
49284        }
49285        if self.match_text_seq(&["TO"]) {
49286            // Matched: TO
49287            return Ok(None);
49288        }
49289        Ok(None)
49290    }
49291
49292    /// parse_partition_by - Ported from Python _parse_partition_by
49293    /// Parses PARTITION BY expression list
49294    #[allow(unused_variables, unused_mut)]
49295    pub fn parse_partition_by(&mut self) -> Result<Option<Expression>> {
49296        if !self.match_keywords(&[TokenType::Partition, TokenType::By]) {
49297            return Ok(None);
49298        }
49299        let expressions = self.parse_expression_list()?;
49300        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
49301    }
49302
49303    /// parse_partitioned_by - Parses PARTITIONED BY clause
49304    /// Python: _parse_partitioned_by
49305    pub fn parse_partitioned_by(&mut self) -> Result<Option<Expression>> {
49306        // Optionally match '='
49307        self.match_token(TokenType::Eq);
49308
49309        // Try to parse a schema first
49310        if let Some(schema) = self.parse_schema()? {
49311            return Ok(Some(Expression::PartitionedByProperty(Box::new(
49312                PartitionedByProperty {
49313                    this: Box::new(schema),
49314                },
49315            ))));
49316        }
49317
49318        // Fall back to bracket(field)
49319        if let Some(bracket) = self.parse_bracket()? {
49320            return Ok(Some(Expression::PartitionedByProperty(Box::new(
49321                PartitionedByProperty {
49322                    this: Box::new(bracket),
49323                },
49324            ))));
49325        }
49326
49327        // Try to parse a field directly
49328        if let Some(field) = self.parse_field()? {
49329            return Ok(Some(Expression::PartitionedByProperty(Box::new(
49330                PartitionedByProperty {
49331                    this: Box::new(field),
49332                },
49333            ))));
49334        }
49335
49336        Ok(None)
49337    }
49338
49339    /// parse_partitioned_by_bucket_or_truncate - Parses BUCKET or TRUNCATE partition transforms
49340    /// Python: _parse_partitioned_by_bucket_or_truncate
49341    /// Syntax: BUCKET(col, num_buckets) or TRUNCATE(col, width)
49342    /// Handles both Hive (num, col) and Trino (col, num) ordering, normalizes to (col, num)
49343    pub fn parse_partitioned_by_bucket_or_truncate(&mut self) -> Result<Option<Expression>> {
49344        // If no L_PAREN follows, this should be parsed as an identifier, not a function call
49345        if !self.check(TokenType::LParen) {
49346            // Retreat: go back one token (previous was BUCKET or TRUNCATE)
49347            if self.current > 0 {
49348                self.current -= 1;
49349            }
49350            return Ok(None);
49351        }
49352
49353        // Determine if it's BUCKET or TRUNCATE based on previous token
49354        let is_bucket = self.previous().text.to_uppercase() == "BUCKET";
49355
49356        // Parse wrapped arguments
49357        self.expect(TokenType::LParen)?;
49358        let mut args = Vec::new();
49359
49360        if !self.check(TokenType::RParen) {
49361            loop {
49362                // Try to parse primary or column
49363                if let Some(expr) = self.parse_primary_or_var()? {
49364                    args.push(expr);
49365                } else if let Some(col) = self.parse_column()? {
49366                    args.push(col);
49367                }
49368
49369                if !self.match_token(TokenType::Comma) {
49370                    break;
49371                }
49372            }
49373        }
49374        self.match_token(TokenType::RParen);
49375
49376        // Get first two arguments
49377        let (mut this, mut expr) = (args.get(0).cloned(), args.get(1).cloned());
49378
49379        // Normalize: if first arg is a Literal, swap (Hive uses (num, col), Trino uses (col, num))
49380        // We canonicalize to (col, num)
49381        if let Some(Expression::Literal(_)) = &this {
49382            std::mem::swap(&mut this, &mut expr);
49383        }
49384
49385        // Ensure we have both arguments
49386        let this_expr = this.unwrap_or(Expression::Null(Null));
49387        let expr_expr = expr.unwrap_or(Expression::Null(Null));
49388
49389        if is_bucket {
49390            Ok(Some(Expression::PartitionedByBucket(Box::new(
49391                PartitionedByBucket {
49392                    this: Box::new(this_expr),
49393                    expression: Box::new(expr_expr),
49394                },
49395            ))))
49396        } else {
49397            Ok(Some(Expression::PartitionByTruncate(Box::new(
49398                PartitionByTruncate {
49399                    this: Box::new(this_expr),
49400                    expression: Box::new(expr_expr),
49401                },
49402            ))))
49403        }
49404    }
49405
49406    /// parse_doris_partition_by_range_or_list - Parses Doris PARTITION BY RANGE/LIST syntax
49407    /// Handles:
49408    ///   PARTITION BY RANGE (`col`) (PARTITION name VALUES LESS THAN (val), ...)
49409    ///   PARTITION BY RANGE (`col`) (PARTITION name VALUES [(val1), (val2)), ...)
49410    ///   PARTITION BY RANGE (`col`) (FROM ('start') TO ('end') INTERVAL n UNIT)
49411    ///   PARTITION BY LIST (`col`) (PARTITION name VALUES IN (val1, val2), ...)
49412    fn parse_doris_partition_by_range_or_list(&mut self, kind: &str) -> Result<Expression> {
49413        // Parse partition column expressions: (`col1`, `col2`, ...) or (STR2DATE(col, fmt))
49414        // Use parse_wrapped_csv to handle function calls in partition columns
49415        let partition_expressions = self.parse_wrapped_csv()?;
49416
49417        // Check for partition definitions in parentheses
49418        let create_expressions = if self.check(TokenType::LParen) {
49419            self.advance(); // consume (
49420
49421            if kind == "LIST" {
49422                // Parse LIST partition definitions: PARTITION name VALUES IN (val1, val2), ...
49423                let partitions = self.parse_doris_list_partition_definitions()?;
49424                self.expect(TokenType::RParen)?;
49425                Some(Box::new(Expression::Tuple(Box::new(Tuple {
49426                    expressions: partitions,
49427                }))))
49428            } else {
49429                // RANGE: check for FROM (dynamic), START (StarRocks dynamic), or PARTITION (static)
49430                if self.check(TokenType::From) {
49431                    // Dynamic: FROM ('start') TO ('end') INTERVAL n UNIT
49432                    let dynamic_expr = self.parse_doris_dynamic_partition()?;
49433                    self.expect(TokenType::RParen)?;
49434                    Some(Box::new(dynamic_expr))
49435                } else if self.check(TokenType::Start) {
49436                    // StarRocks dynamic: START ('val') END ('val') EVERY (expr), ...
49437                    let mut dynamics = Vec::new();
49438                    loop {
49439                        if !self.check(TokenType::Start) {
49440                            break;
49441                        }
49442                        let dynamic_expr = self.parse_starrocks_start_end_every()?;
49443                        dynamics.push(dynamic_expr);
49444                        if !self.match_token(TokenType::Comma) {
49445                            break;
49446                        }
49447                    }
49448                    self.expect(TokenType::RParen)?;
49449                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
49450                        expressions: dynamics,
49451                    }))))
49452                } else if self.check(TokenType::Partition) {
49453                    // Static: PARTITION name VALUES LESS THAN (val) or VALUES [(val1), (val2))
49454                    let partitions = self.parse_doris_range_partition_definitions()?;
49455                    self.expect(TokenType::RParen)?;
49456                    Some(Box::new(Expression::Tuple(Box::new(Tuple {
49457                        expressions: partitions,
49458                    }))))
49459                } else {
49460                    self.expect(TokenType::RParen)?;
49461                    None
49462                }
49463            }
49464        } else {
49465            None
49466        };
49467
49468        if kind == "LIST" {
49469            Ok(Expression::PartitionByListProperty(Box::new(
49470                PartitionByListProperty {
49471                    partition_expressions: partition_expressions.map(Box::new),
49472                    create_expressions,
49473                },
49474            )))
49475        } else {
49476            Ok(Expression::PartitionByRangeProperty(Box::new(
49477                PartitionByRangeProperty {
49478                    partition_expressions: partition_expressions.map(Box::new),
49479                    create_expressions,
49480                },
49481            )))
49482        }
49483    }
49484
49485    /// Parse Doris LIST partition definitions: PARTITION name VALUES IN (val1, val2), ...
49486    fn parse_doris_list_partition_definitions(&mut self) -> Result<Vec<Expression>> {
49487        let mut partitions = Vec::new();
49488        loop {
49489            if !self.match_token(TokenType::Partition) {
49490                break;
49491            }
49492            let name = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
49493            self.match_text_seq(&["VALUES", "IN"]);
49494            let values = self.parse_wrapped_csv_expressions()?;
49495
49496            let part_list = Expression::PartitionList(Box::new(PartitionList {
49497                this: Box::new(name),
49498                expressions: values,
49499            }));
49500            partitions.push(Expression::Partition(Box::new(Partition {
49501                expressions: vec![part_list],
49502                subpartition: false,
49503            })));
49504
49505            if !self.match_token(TokenType::Comma) {
49506                break;
49507            }
49508        }
49509        Ok(partitions)
49510    }
49511
49512    /// Parse Doris RANGE partition definitions
49513    fn parse_doris_range_partition_definitions(&mut self) -> Result<Vec<Expression>> {
49514        let mut partitions = Vec::new();
49515        loop {
49516            if !self.match_token(TokenType::Partition) {
49517                break;
49518            }
49519            let name = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
49520            self.match_text_seq(&["VALUES"]);
49521
49522            let part_range = if self.match_text_seq(&["LESS", "THAN"]) {
49523                // VALUES LESS THAN (val) or VALUES LESS THAN (MAXVALUE)
49524                let values = self.parse_wrapped_csv_expressions()?;
49525                Expression::PartitionRange(Box::new(PartitionRange {
49526                    this: Box::new(name),
49527                    expression: None,
49528                    expressions: values,
49529                }))
49530            } else if self.check(TokenType::LBracket) {
49531                // VALUES [(val1), (val2)) - note asymmetric brackets
49532                self.advance(); // consume [
49533                let mut value_tuples = Vec::new();
49534                loop {
49535                    let vals = self.parse_wrapped_csv_expressions()?;
49536                    // Wrap in a Tuple for each (val)
49537                    value_tuples.push(Expression::Tuple(Box::new(Tuple { expressions: vals })));
49538                    if !self.match_token(TokenType::Comma) {
49539                        break;
49540                    }
49541                }
49542                // Expect ) to close the asymmetric bracket
49543                self.expect(TokenType::RParen)?;
49544                Expression::PartitionRange(Box::new(PartitionRange {
49545                    this: Box::new(name),
49546                    expression: None,
49547                    expressions: value_tuples,
49548                }))
49549            } else {
49550                // Fallback: no values
49551                Expression::PartitionRange(Box::new(PartitionRange {
49552                    this: Box::new(name),
49553                    expression: None,
49554                    expressions: Vec::new(),
49555                }))
49556            };
49557
49558            partitions.push(Expression::Partition(Box::new(Partition {
49559                expressions: vec![part_range],
49560                subpartition: false,
49561            })));
49562
49563            if !self.match_token(TokenType::Comma) {
49564                break;
49565            }
49566        }
49567        Ok(partitions)
49568    }
49569
49570    /// Parse Doris dynamic partition: FROM ('start') TO ('end') INTERVAL n UNIT
49571    fn parse_doris_dynamic_partition(&mut self) -> Result<Expression> {
49572        self.expect(TokenType::From)?;
49573        let start = self.parse_wrapped_expression()?;
49574        self.expect(TokenType::To)?;
49575        let end = self.parse_wrapped_expression()?;
49576
49577        // Parse INTERVAL n UNIT
49578        let every = if self.match_token(TokenType::Interval) {
49579            let number = self.parse_expression()?;
49580            let unit = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
49581                let unit_text = self.advance().text.to_uppercase();
49582                // Convert unit text to IntervalUnit
49583                let interval_unit = match unit_text.as_str() {
49584                    "YEAR" | "YEARS" => crate::expressions::IntervalUnit::Year,
49585                    "MONTH" | "MONTHS" => crate::expressions::IntervalUnit::Month,
49586                    "DAY" | "DAYS" => crate::expressions::IntervalUnit::Day,
49587                    "HOUR" | "HOURS" => crate::expressions::IntervalUnit::Hour,
49588                    "MINUTE" | "MINUTES" => crate::expressions::IntervalUnit::Minute,
49589                    "SECOND" | "SECONDS" => crate::expressions::IntervalUnit::Second,
49590                    _ => crate::expressions::IntervalUnit::Day, // Default fallback
49591                };
49592                Some(crate::expressions::IntervalUnitSpec::Simple {
49593                    unit: interval_unit,
49594                    use_plural: unit_text.ends_with('S'),
49595                })
49596            } else {
49597                None
49598            };
49599            Some(Box::new(Expression::Interval(Box::new(Interval {
49600                this: Some(number),
49601                unit,
49602            }))))
49603        } else {
49604            None
49605        };
49606
49607        Ok(Expression::PartitionByRangePropertyDynamic(Box::new(
49608            PartitionByRangePropertyDynamic {
49609                this: None,
49610                start: Some(Box::new(start)),
49611                end: Some(Box::new(end)),
49612                every,
49613                use_start_end: false,
49614            },
49615        )))
49616    }
49617
49618    /// Parse StarRocks START ('val') END ('val') EVERY (expr) syntax
49619    fn parse_starrocks_start_end_every(&mut self) -> Result<Expression> {
49620        self.expect(TokenType::Start)?;
49621        let start = self.parse_wrapped_expression()?;
49622        self.expect(TokenType::End)?;
49623        let end = self.parse_wrapped_expression()?;
49624
49625        // Parse EVERY (expr)
49626        let every = if self.match_identifier("EVERY") {
49627            self.expect(TokenType::LParen)?;
49628            let expr = self.parse_expression()?;
49629            self.expect(TokenType::RParen)?;
49630            Some(Box::new(expr))
49631        } else {
49632            None
49633        };
49634
49635        Ok(Expression::PartitionByRangePropertyDynamic(Box::new(
49636            PartitionByRangePropertyDynamic {
49637                this: None,
49638                start: Some(Box::new(start)),
49639                end: Some(Box::new(end)),
49640                every,
49641                use_start_end: true,
49642            },
49643        )))
49644    }
49645
49646    /// Parse wrapped comma-separated expressions: (expr, expr, ...)
49647    fn parse_wrapped_csv_expressions(&mut self) -> Result<Vec<Expression>> {
49648        self.expect(TokenType::LParen)?;
49649        let mut exprs = Vec::new();
49650        if !self.check(TokenType::RParen) {
49651            loop {
49652                // Check for MAXVALUE special keyword
49653                if self.match_token(TokenType::Maxvalue) {
49654                    exprs.push(Expression::Var(Box::new(Var {
49655                        this: "MAXVALUE".to_string(),
49656                    })));
49657                } else {
49658                    exprs.push(self.parse_expression()?);
49659                }
49660                if !self.match_token(TokenType::Comma) {
49661                    break;
49662                }
49663            }
49664        }
49665        self.expect(TokenType::RParen)?;
49666        Ok(exprs)
49667    }
49668
49669    /// Parse a single wrapped expression: (expr)
49670    fn parse_wrapped_expression(&mut self) -> Result<Expression> {
49671        self.expect(TokenType::LParen)?;
49672        let expr = self.parse_expression()?;
49673        self.expect(TokenType::RParen)?;
49674        Ok(expr)
49675    }
49676
49677    /// parse_partitioned_of - Implemented from Python _parse_partitioned_of
49678    #[allow(unused_variables, unused_mut)]
49679    pub fn parse_partitioned_of(&mut self) -> Result<Option<Expression>> {
49680        if self.match_text_seq(&["OF"]) {
49681            return Ok(Some(Expression::PartitionBoundSpec(Box::new(
49682                PartitionBoundSpec {
49683                    this: None,
49684                    expression: None,
49685                    from_expressions: None,
49686                    to_expressions: None,
49687                },
49688            ))));
49689        }
49690        if self.match_text_seq(&["FOR", "VALUES"]) {
49691            // Matched: FOR VALUES
49692            return Ok(None);
49693        }
49694        Ok(None)
49695    }
49696
49697    /// parse_period_for_system_time - Parses PERIOD FOR SYSTEM_TIME constraint
49698    /// Python: _parse_period_for_system_time
49699    /// Syntax: PERIOD FOR SYSTEM_TIME (start_col, end_col)
49700    pub fn parse_period_for_system_time(&mut self) -> Result<Option<Expression>> {
49701        // Check for SYSTEM_TIME / TIMESTAMP_SNAPSHOT token
49702        if !self.match_token(TokenType::TimestampSnapshot) {
49703            // Retreat: go back one token
49704            if self.current > 0 {
49705                self.current -= 1;
49706            }
49707            return Ok(None);
49708        }
49709
49710        // Parse wrapped id vars (two column names)
49711        let id_vars = self.parse_wrapped_id_vars()?;
49712
49713        // Extract the two columns from the tuple
49714        let (this, expression) = if let Some(Expression::Tuple(tuple)) = id_vars {
49715            let exprs = &tuple.expressions;
49716            (
49717                exprs.get(0).cloned().unwrap_or(Expression::Null(Null)),
49718                exprs.get(1).cloned().unwrap_or(Expression::Null(Null)),
49719            )
49720        } else {
49721            return Ok(None);
49722        };
49723
49724        Ok(Some(Expression::PeriodForSystemTimeConstraint(Box::new(
49725            PeriodForSystemTimeConstraint {
49726                this: Box::new(this),
49727                expression: Box::new(expression),
49728            },
49729        ))))
49730    }
49731
49732    /// parse_pipe_syntax_aggregate - Implemented from Python _parse_pipe_syntax_aggregate
49733    #[allow(unused_variables, unused_mut)]
49734    pub fn parse_pipe_syntax_aggregate(&mut self) -> Result<Option<Expression>> {
49735        if self.match_text_seq(&["AGGREGATE"]) {
49736            return Ok(Some(Expression::Select(Box::new(Select {
49737                expressions: Vec::new(),
49738                from: None,
49739                joins: Vec::new(),
49740                lateral_views: Vec::new(),
49741                prewhere: None,
49742                where_clause: None,
49743                group_by: None,
49744                having: None,
49745                qualify: None,
49746                order_by: None,
49747                distribute_by: None,
49748                cluster_by: None,
49749                sort_by: None,
49750                limit: None,
49751                offset: None,
49752                limit_by: None,
49753                fetch: None,
49754                distinct: false,
49755                distinct_on: None,
49756                top: None,
49757                with: None,
49758                sample: None,
49759                settings: None,
49760                format: None,
49761                windows: None,
49762                hint: None,
49763                connect: None,
49764                into: None,
49765                locks: Vec::new(),
49766                for_xml: Vec::new(),
49767                leading_comments: Vec::new(),
49768                post_select_comments: Vec::new(),
49769                kind: None,
49770                operation_modifiers: Vec::new(),
49771                qualify_after_window: false,
49772                option: None,
49773                exclude: None,
49774            }))));
49775        }
49776        if self.match_text_seq(&["GROUP", "AND"]) {
49777            // Matched: GROUP AND
49778            return Ok(None);
49779        }
49780        Ok(None)
49781    }
49782
49783    /// parse_pipe_syntax_aggregate_fields - Implemented from Python _parse_pipe_syntax_aggregate_fields
49784    /// Calls: parse_disjunction
49785    #[allow(unused_variables, unused_mut)]
49786    pub fn parse_pipe_syntax_aggregate_fields(&mut self) -> Result<Option<Expression>> {
49787        if self.match_text_seq(&["GROUP", "AND"]) {
49788            // Matched: GROUP AND
49789            return Ok(None);
49790        }
49791        Ok(None)
49792    }
49793
49794    /// parse_pipe_syntax_aggregate_group_order_by - Parses pipe syntax aggregate fields with grouping and ordering
49795    /// Python: _parse_pipe_syntax_aggregate_group_order_by
49796    /// Parses comma-separated aggregate fields and separates them into aggregates/groups and ORDER BY specs
49797    /// Returns a Tuple with two elements: (aggregates_and_groups, order_by_specs)
49798    pub fn parse_pipe_syntax_aggregate_group_order_by(&mut self) -> Result<Option<Expression>> {
49799        // Parse CSV of pipe syntax aggregate fields
49800        let mut aggregates_or_groups = Vec::new();
49801        let mut orders = Vec::new();
49802
49803        loop {
49804            if let Some(element) = self.parse_pipe_syntax_aggregate_fields()? {
49805                // Check if it's an Ordered expression (ORDER BY spec)
49806                match &element {
49807                    Expression::Ordered(ordered) => {
49808                        // Extract the inner expression, potentially adjusting for alias
49809                        let this = match &ordered.this {
49810                            Expression::Alias(alias) => {
49811                                // Use the alias name as an Identifier expression
49812                                Expression::Identifier(alias.alias.clone())
49813                            }
49814                            other => other.clone(),
49815                        };
49816                        // Add modified Ordered to orders
49817                        orders.push(Expression::Ordered(Box::new(Ordered {
49818                            this: this.clone(),
49819                            desc: ordered.desc,
49820                            nulls_first: ordered.nulls_first,
49821                            explicit_asc: ordered.explicit_asc,
49822                            with_fill: ordered.with_fill.clone(),
49823                        })));
49824                        aggregates_or_groups.push(this);
49825                    }
49826                    _ => {
49827                        aggregates_or_groups.push(element);
49828                    }
49829                }
49830            }
49831
49832            if !self.match_token(TokenType::Comma) {
49833                break;
49834            }
49835        }
49836
49837        if aggregates_or_groups.is_empty() && orders.is_empty() {
49838            return Ok(None);
49839        }
49840
49841        // Return a tuple with (aggregates_or_groups, orders)
49842        Ok(Some(Expression::Tuple(Box::new(Tuple {
49843            expressions: vec![
49844                Expression::Tuple(Box::new(Tuple {
49845                    expressions: aggregates_or_groups,
49846                })),
49847                Expression::Tuple(Box::new(Tuple {
49848                    expressions: orders,
49849                })),
49850            ],
49851        }))))
49852    }
49853
49854    /// parse_pipe_syntax_extend - Implemented from Python _parse_pipe_syntax_extend
49855    #[allow(unused_variables, unused_mut)]
49856    pub fn parse_pipe_syntax_extend(&mut self) -> Result<Option<Expression>> {
49857        if self.match_text_seq(&["EXTEND"]) {
49858            return Ok(Some(Expression::Select(Box::new(Select {
49859                expressions: Vec::new(),
49860                from: None,
49861                joins: Vec::new(),
49862                lateral_views: Vec::new(),
49863                prewhere: None,
49864                where_clause: None,
49865                group_by: None,
49866                having: None,
49867                qualify: None,
49868                order_by: None,
49869                distribute_by: None,
49870                cluster_by: None,
49871                sort_by: None,
49872                limit: None,
49873                offset: None,
49874                limit_by: None,
49875                fetch: None,
49876                distinct: false,
49877                distinct_on: None,
49878                top: None,
49879                with: None,
49880                sample: None,
49881                settings: None,
49882                format: None,
49883                windows: None,
49884                hint: None,
49885                connect: None,
49886                into: None,
49887                locks: Vec::new(),
49888                for_xml: Vec::new(),
49889                leading_comments: Vec::new(),
49890                post_select_comments: Vec::new(),
49891                kind: None,
49892                operation_modifiers: Vec::new(),
49893                qualify_after_window: false,
49894                option: None,
49895                exclude: None,
49896            }))));
49897        }
49898        Ok(None)
49899    }
49900
49901    /// parse_pipe_syntax_join - Parses JOIN in BigQuery pipe syntax
49902    /// Python: _parse_pipe_syntax_join
49903    /// Format: |> JOIN table ON condition
49904    pub fn parse_pipe_syntax_join(&mut self) -> Result<Option<Expression>> {
49905        // Parse the JOIN clause
49906        self.parse_join()
49907    }
49908
49909    /// parse_pipe_syntax_limit - Parses LIMIT/OFFSET in BigQuery pipe syntax
49910    /// Python: _parse_pipe_syntax_limit
49911    /// Format: |> LIMIT n [OFFSET m]
49912    pub fn parse_pipe_syntax_limit(&mut self) -> Result<Option<Expression>> {
49913        // Parse the LIMIT clause
49914        let limit = self.parse_limit()?;
49915
49916        // Parse optional OFFSET
49917        let offset = self.parse_offset()?;
49918
49919        // Combine into a tuple if both present
49920        match (limit, offset) {
49921            (Some(l), Some(o)) => Ok(Some(Expression::Tuple(Box::new(Tuple {
49922                expressions: vec![l, o],
49923            })))),
49924            (Some(l), None) => Ok(Some(l)),
49925            (None, Some(o)) => Ok(Some(o)),
49926            (None, None) => Ok(None),
49927        }
49928    }
49929
49930    /// parse_pipe_syntax_pivot - Parses PIVOT in BigQuery pipe syntax
49931    /// Python: _parse_pipe_syntax_pivot
49932    /// Format: |> PIVOT (agg_function FOR column IN (values))
49933    pub fn parse_pipe_syntax_pivot(&mut self) -> Result<Option<Expression>> {
49934        // For pipe syntax, we don't have a source yet - return pivot aggregation
49935        // The actual pivot parsing will be done in the query transformer
49936        self.parse_pivot_aggregation()
49937    }
49938
49939    /// parse_pipe_syntax_query - Parses a query with pipe syntax transformations
49940    /// Python: _parse_pipe_syntax_query
49941    /// Handles queries like: FROM table |> WHERE ... |> SELECT ... |> AGGREGATE ...
49942    pub fn parse_pipe_syntax_query(&mut self) -> Result<Option<Expression>> {
49943        // Start with a base query (could be a FROM clause or subquery)
49944        let mut query = self.parse_select_query()?;
49945
49946        if query.is_none() {
49947            return Ok(None);
49948        }
49949
49950        // Process pipe syntax chain: |> transform1 |> transform2 |> ...
49951        while self.match_token(TokenType::PipeGt) {
49952            let start_pos = self.current;
49953            let operator_text = self.peek().text.to_uppercase();
49954
49955            // Try to match known pipe syntax transforms
49956            let transform_result = match operator_text.as_str() {
49957                "WHERE" => {
49958                    self.advance();
49959                    self.parse_where()?
49960                }
49961                "SELECT" => {
49962                    self.advance();
49963                    self.parse_pipe_syntax_select()?
49964                }
49965                "AGGREGATE" => {
49966                    self.advance();
49967                    self.parse_pipe_syntax_aggregate()?
49968                }
49969                "EXTEND" => {
49970                    self.advance();
49971                    self.parse_pipe_syntax_extend()?
49972                }
49973                "LIMIT" => {
49974                    self.advance();
49975                    self.parse_pipe_syntax_limit()?
49976                }
49977                "JOIN" | "LEFT" | "RIGHT" | "INNER" | "OUTER" | "CROSS" | "FULL" => {
49978                    self.parse_pipe_syntax_join()?
49979                }
49980                "UNION" | "INTERSECT" | "EXCEPT" => self.parse_pipe_syntax_set_operator()?,
49981                "PIVOT" => {
49982                    self.advance();
49983                    self.parse_pipe_syntax_pivot()?
49984                }
49985                "TABLESAMPLE" => {
49986                    self.advance();
49987                    self.parse_pipe_syntax_tablesample()?
49988                }
49989                _ => {
49990                    // Try set operator or join as fallback
49991                    let set_op = self.parse_pipe_syntax_set_operator()?;
49992                    if set_op.is_some() {
49993                        set_op
49994                    } else {
49995                        let join_op = self.parse_pipe_syntax_join()?;
49996                        if join_op.is_some() {
49997                            join_op
49998                        } else {
49999                            // Unsupported operator, retreat and break
50000                            self.current = start_pos;
50001                            break;
50002                        }
50003                    }
50004                }
50005            };
50006
50007            // Apply transform to query
50008            if let Some(transform) = transform_result {
50009                // Wrap current query with transform in a PipeOperator
50010                let current_query = query.ok_or_else(|| {
50011                    self.parse_error("Expected base query before pipe syntax transform")
50012                })?;
50013                query = Some(Expression::PipeOperator(Box::new(PipeOperator {
50014                    this: current_query,
50015                    expression: transform,
50016                })));
50017            }
50018        }
50019
50020        Ok(query)
50021    }
50022
50023    /// parse_pipe_syntax_select - Parses SELECT in BigQuery pipe syntax
50024    /// Python: _parse_pipe_syntax_select
50025    /// Format: |> SELECT expressions
50026    pub fn parse_pipe_syntax_select(&mut self) -> Result<Option<Expression>> {
50027        // Parse the SELECT expressions without consuming the pipe
50028        let expressions = self.parse_expressions()?;
50029
50030        match expressions {
50031            Some(expr) => Ok(Some(expr)),
50032            None => Ok(Some(Expression::Star(Star {
50033                table: None,
50034                except: None,
50035                replace: None,
50036                rename: None,
50037                trailing_comments: Vec::new(),
50038                span: None,
50039            }))),
50040        }
50041    }
50042
50043    /// parse_pipe_syntax_set_operator - Parses set operation in BigQuery pipe syntax
50044    /// Python: _parse_pipe_syntax_set_operator
50045    /// Format: |> UNION ALL/INTERSECT/EXCEPT (subquery1, subquery2, ...)
50046    pub fn parse_pipe_syntax_set_operator(&mut self) -> Result<Option<Expression>> {
50047        // Try to parse as a set operation (UNION, INTERSECT, EXCEPT)
50048        if let Some(set_op) = self.parse_set_operations()? {
50049            Ok(Some(set_op))
50050        } else {
50051            Ok(None)
50052        }
50053    }
50054
50055    /// parse_pipe_syntax_tablesample - Parses TABLESAMPLE in BigQuery pipe syntax
50056    /// Python: _parse_pipe_syntax_tablesample
50057    /// Format: |> TABLESAMPLE SYSTEM (percent PERCENT)
50058    pub fn parse_pipe_syntax_tablesample(&mut self) -> Result<Option<Expression>> {
50059        // Parse the TABLESAMPLE clause
50060        self.parse_table_sample()
50061    }
50062
50063    /// parse_pivot_aggregation - Ported from Python _parse_pivot_aggregation
50064    /// Parses an aggregation function in PIVOT clause, optionally with alias
50065    #[allow(unused_variables, unused_mut)]
50066    pub fn parse_pivot_aggregation(&mut self) -> Result<Option<Expression>> {
50067        // Parse a function
50068        let func = self.parse_function()?;
50069
50070        if func.is_none() {
50071            // If previous token was a comma, silently return None
50072            if self.previous().token_type == TokenType::Comma {
50073                return Ok(None);
50074            }
50075            // Otherwise this could be an error, but we'll just return None
50076            return Ok(None);
50077        }
50078
50079        // Try to parse an alias for the function
50080        self.parse_alias_with_expr(func)
50081    }
50082
50083    /// parse_pivot_in - Parses the IN clause of a PIVOT
50084    /// Python: _parse_pivot_in
50085    /// Format: column IN (value1 [AS alias1], value2 [AS alias2], ...)
50086    pub fn parse_pivot_in(&mut self) -> Result<Option<Expression>> {
50087        // Parse the column being pivoted
50088        let value = self.parse_column()?;
50089        let value_expr = value.unwrap_or(Expression::Null(Null));
50090
50091        // Expect IN keyword
50092        if !self.match_token(TokenType::In) {
50093            return Err(self.parse_error("Expecting IN"));
50094        }
50095
50096        // Check if it's a parenthesized list or a field reference
50097        if self.match_token(TokenType::LParen) {
50098            // Check for ANY keyword
50099            let expressions = if self.match_text_seq(&["ANY"]) {
50100                // Parse PivotAny with optional ORDER BY
50101                let order = self.parse_order()?;
50102                vec![Expression::PivotAny(Box::new(PivotAny {
50103                    this: order.map(Box::new),
50104                }))]
50105            } else {
50106                // Parse comma-separated list of expressions, optionally aliased
50107                let mut exprs = Vec::new();
50108                loop {
50109                    if let Some(expr) = self.parse_select_or_expression()? {
50110                        // Check for alias
50111                        let final_expr = if self.match_token(TokenType::Alias) {
50112                            if let Some(alias) = self.parse_bitwise()? {
50113                                // Store the alias expression directly
50114                                Expression::PivotAlias(Box::new(PivotAlias { this: expr, alias }))
50115                            } else {
50116                                expr
50117                            }
50118                        } else {
50119                            expr
50120                        };
50121                        exprs.push(final_expr);
50122                    } else {
50123                        break;
50124                    }
50125                    if !self.match_token(TokenType::Comma) {
50126                        break;
50127                    }
50128                }
50129                exprs
50130            };
50131
50132            self.expect(TokenType::RParen)?;
50133
50134            Ok(Some(Expression::In(Box::new(In {
50135                this: value_expr,
50136                expressions,
50137                query: None,
50138                not: false,
50139                global: false,
50140                unnest: None,
50141                is_field: false,
50142            }))))
50143        } else {
50144            // Parse as a field reference: IN field_name
50145            let field = self.parse_id_var()?;
50146            // Convert field to expression and add to expressions
50147            let expressions = if let Some(f) = field {
50148                vec![f]
50149            } else {
50150                Vec::new()
50151            };
50152            Ok(Some(Expression::In(Box::new(In {
50153                this: value_expr,
50154                expressions,
50155                query: None,
50156                not: false,
50157                global: false,
50158                unnest: None,
50159                is_field: true,
50160            }))))
50161        }
50162    }
50163
50164    /// parse_pivots - Ported from Python _parse_pivots
50165    /// Parses one or more PIVOT/UNPIVOT clauses attached to a source expression
50166    /// Uses the existing parse_pivot/parse_unpivot methods
50167    pub fn parse_pivots_for_source(&mut self, source: Expression) -> Result<Option<Expression>> {
50168        let mut result = source;
50169
50170        loop {
50171            if self.match_token(TokenType::Pivot) {
50172                result = self.parse_pivot(result)?;
50173            } else if self.match_texts(&["UNPIVOT"]) {
50174                result = self.parse_unpivot(result)?;
50175            } else {
50176                break;
50177            }
50178        }
50179
50180        // Return None if no pivots were parsed
50181        if matches!(result, Expression::Null(_)) {
50182            Ok(None)
50183        } else {
50184            Ok(Some(result))
50185        }
50186    }
50187
50188    /// parse_placeholder - Parse placeholder token (? or :name)
50189    /// Python: if self._match_set(self.PLACEHOLDER_PARSERS): return placeholder
50190    pub fn parse_placeholder(&mut self) -> Result<Option<Expression>> {
50191        // Match positional placeholder (?)
50192        if self.match_token(TokenType::Placeholder) {
50193            return Ok(Some(Expression::Placeholder(Placeholder { index: None })));
50194        }
50195        // Match colon placeholder (:name) - handled by Parameter token
50196        if self.match_token(TokenType::Parameter) {
50197            let text = self.previous().text.clone();
50198            return Ok(Some(Expression::Parameter(Box::new(Parameter {
50199                name: Some(text),
50200                index: None,
50201                style: ParameterStyle::Colon,
50202                quoted: false,
50203                string_quoted: false,
50204                expression: None,
50205            }))));
50206        }
50207        Ok(None)
50208    }
50209
50210    /// Parse ClickHouse query parameter syntax: {name: Type}
50211    fn parse_clickhouse_braced_parameter(&mut self) -> Result<Option<Expression>> {
50212        if !matches!(
50213            self.config.dialect,
50214            Some(crate::dialects::DialectType::ClickHouse)
50215        ) {
50216            return Ok(None);
50217        }
50218        if !self.check(TokenType::LBrace) {
50219            return Ok(None);
50220        }
50221
50222        let start = self.current;
50223        self.advance(); // consume {
50224
50225        if !(self.is_identifier_token() || self.is_safe_keyword_as_identifier()) {
50226            self.current = start;
50227            return Ok(None);
50228        }
50229        let name = self.advance().text.clone();
50230
50231        if !self.match_token(TokenType::Colon) {
50232            self.current = start;
50233            return Ok(None);
50234        }
50235
50236        let kind_start = self.current;
50237        let mut paren_depth = 0usize;
50238        let mut bracket_depth = 0usize;
50239
50240        while !self.is_at_end() {
50241            let token_type = self.peek().token_type;
50242            match token_type {
50243                TokenType::LParen => {
50244                    paren_depth += 1;
50245                    self.advance();
50246                }
50247                TokenType::RParen => {
50248                    if paren_depth == 0 {
50249                        break;
50250                    }
50251                    paren_depth -= 1;
50252                    self.advance();
50253                }
50254                TokenType::LBracket => {
50255                    bracket_depth += 1;
50256                    self.advance();
50257                }
50258                TokenType::RBracket => {
50259                    if bracket_depth == 0 {
50260                        break;
50261                    }
50262                    bracket_depth -= 1;
50263                    self.advance();
50264                }
50265                TokenType::RBrace => {
50266                    if paren_depth == 0 && bracket_depth == 0 {
50267                        break;
50268                    }
50269                    self.advance();
50270                }
50271                _ => {
50272                    self.advance();
50273                }
50274            }
50275        }
50276
50277        if self.current <= kind_start || !self.match_token(TokenType::RBrace) {
50278            return Err(self.parse_error("Expected } in ClickHouse query parameter"));
50279        }
50280
50281        let kind = self
50282            .tokens_to_sql(kind_start, self.current - 1)
50283            .trim()
50284            .to_string();
50285        if kind.is_empty() {
50286            return Err(self.parse_error("Expected parameter kind in ClickHouse query parameter"));
50287        }
50288
50289        Ok(Some(Expression::Parameter(Box::new(Parameter {
50290            name: Some(name),
50291            index: None,
50292            style: ParameterStyle::Brace,
50293            quoted: false,
50294            string_quoted: false,
50295            expression: Some(kind),
50296        }))))
50297    }
50298
50299    /// parse_position - Ported from Python _parse_position
50300    /// Parses POSITION function: POSITION(substr IN str) or POSITION(needle, haystack, start)
50301    #[allow(unused_variables, unused_mut)]
50302    pub fn parse_position(&mut self) -> Result<Option<Expression>> {
50303        // Parse comma-separated arguments first
50304        let mut args: Vec<Expression> = Vec::new();
50305
50306        match self.parse_bitwise() {
50307            Ok(Some(expr)) => {
50308                let expr = self.maybe_clickhouse_alias(expr);
50309                let expr = self.try_clickhouse_func_arg_alias(expr);
50310                args.push(expr);
50311            }
50312            Ok(None) => return Ok(None),
50313            Err(e) => return Err(e),
50314        }
50315
50316        // Check for IN keyword (SQL standard syntax: POSITION(substr IN str))
50317        if self.match_token(TokenType::In) {
50318            match self.parse_bitwise() {
50319                Ok(Some(haystack)) => {
50320                    let haystack = self.maybe_clickhouse_alias(haystack);
50321                    let haystack = self.try_clickhouse_func_arg_alias(haystack);
50322                    return Ok(Some(Expression::StrPosition(Box::new(StrPosition {
50323                        this: Box::new(haystack),
50324                        substr: Some(Box::new(args.remove(0))),
50325                        position: None,
50326                        occurrence: None,
50327                    }))));
50328                }
50329                Ok(None) => {
50330                    return Err(self.parse_error("Expected expression after IN in POSITION"))
50331                }
50332                Err(e) => return Err(e),
50333            }
50334        }
50335
50336        // Parse comma-separated additional arguments
50337        while self.match_token(TokenType::Comma) {
50338            match self.parse_bitwise() {
50339                Ok(Some(expr)) => {
50340                    let expr = self.maybe_clickhouse_alias(expr);
50341                    let expr = self.try_clickhouse_func_arg_alias(expr);
50342                    args.push(expr);
50343                }
50344                Ok(None) => break,
50345                Err(e) => return Err(e),
50346            }
50347        }
50348
50349        // Function syntax: POSITION(needle, haystack, start?) or ClickHouse POSITION(haystack, needle, start?)
50350        let position = args.get(2).cloned();
50351        let (haystack, needle) = if matches!(
50352            self.config.dialect,
50353            Some(crate::dialects::DialectType::ClickHouse)
50354        ) {
50355            (args.get(0).cloned(), args.get(1).cloned())
50356        } else {
50357            (args.get(1).cloned(), args.get(0).cloned())
50358        };
50359
50360        Ok(Some(Expression::StrPosition(Box::new(StrPosition {
50361            this: Box::new(
50362                haystack.unwrap_or_else(|| Expression::Literal(Literal::String("".to_string()))),
50363            ),
50364            substr: needle.map(Box::new),
50365            position: position.map(Box::new),
50366            occurrence: None,
50367        }))))
50368    }
50369
50370    /// parse_prewhere - Ported from Python _parse_prewhere
50371    /// Parses PREWHERE clause (ClickHouse specific)
50372    #[allow(unused_variables, unused_mut)]
50373    pub fn parse_prewhere(&mut self) -> Result<Option<Expression>> {
50374        if !self.match_token(TokenType::Prewhere) {
50375            return Ok(None);
50376        }
50377        // Parse the condition expression
50378        let condition = self.parse_expression()?;
50379        Ok(Some(Expression::PreWhere(Box::new(PreWhere {
50380            this: condition,
50381        }))))
50382    }
50383
50384    /// parse_primary_key - Parses PRIMARY KEY constraint
50385    /// Python: _parse_primary_key
50386    /// Can return either PrimaryKeyColumnConstraint (column-level) or PrimaryKey (table-level)
50387    pub fn parse_primary_key(&mut self) -> Result<Option<Expression>> {
50388        self.parse_primary_key_impl(false, false)
50389    }
50390
50391    /// Implementation of parse_primary_key with options
50392    pub fn parse_primary_key_impl(
50393        &mut self,
50394        wrapped_optional: bool,
50395        in_props: bool,
50396    ) -> Result<Option<Expression>> {
50397        // Check for ASC/DESC
50398        let desc = if self.match_token(TokenType::Asc) {
50399            false
50400        } else if self.match_token(TokenType::Desc) {
50401            true
50402        } else {
50403            false
50404        };
50405
50406        // Parse optional constraint name (if current token is identifier and next is L_PAREN)
50407        let this = if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
50408            && self.check_next(TokenType::LParen)
50409        {
50410            self.parse_id_var()?
50411        } else {
50412            None
50413        };
50414
50415        // If not in_props and no L_PAREN ahead, return column-level constraint
50416        if !in_props && !self.check(TokenType::LParen) {
50417            let options = self.parse_key_constraint_options_list()?;
50418            return Ok(Some(Expression::PrimaryKeyColumnConstraint(Box::new(
50419                PrimaryKeyColumnConstraint {
50420                    desc: if desc {
50421                        Some(Box::new(Expression::Boolean(BooleanLiteral {
50422                            value: true,
50423                        })))
50424                    } else {
50425                        None
50426                    },
50427                    options,
50428                },
50429            ))));
50430        }
50431
50432        // Parse table-level PRIMARY KEY (column_list)
50433        let expressions = if self.match_token(TokenType::LParen) {
50434            let mut exprs = Vec::new();
50435            loop {
50436                if let Some(part) = self.parse_primary_key_part()? {
50437                    exprs.push(part);
50438                }
50439                if !self.match_token(TokenType::Comma) {
50440                    break;
50441                }
50442            }
50443            self.expect(TokenType::RParen)?;
50444            exprs
50445        } else if wrapped_optional {
50446            Vec::new()
50447        } else {
50448            return Err(self.parse_error("Expected '(' for PRIMARY KEY column list"));
50449        };
50450
50451        // Parse INCLUDE clause for covering index
50452        let include = self.parse_index_params()?;
50453
50454        // Parse constraint options
50455        let options = self.parse_key_constraint_options_list()?;
50456
50457        Ok(Some(Expression::PrimaryKey(Box::new(PrimaryKey {
50458            this: this.map(Box::new),
50459            expressions,
50460            options,
50461            include: include.map(Box::new),
50462        }))))
50463    }
50464
50465    /// Parse key constraint options as a list of expressions
50466    fn parse_key_constraint_options_list(&mut self) -> Result<Vec<Expression>> {
50467        let mut options = Vec::new();
50468
50469        loop {
50470            if self.is_at_end() {
50471                break;
50472            }
50473
50474            if self.match_token(TokenType::On) {
50475                // Parse ON DELETE/UPDATE action
50476                let on_what = if !self.is_at_end() {
50477                    let token = self.advance();
50478                    token.text.clone()
50479                } else {
50480                    break;
50481                };
50482
50483                let action = if self.match_text_seq(&["NO", "ACTION"]) {
50484                    "NO ACTION"
50485                } else if self.match_text_seq(&["CASCADE"]) {
50486                    "CASCADE"
50487                } else if self.match_text_seq(&["RESTRICT"]) {
50488                    "RESTRICT"
50489                } else if self.match_token(TokenType::Set) && self.match_token(TokenType::Null) {
50490                    "SET NULL"
50491                } else if self.match_token(TokenType::Set) && self.match_token(TokenType::Default) {
50492                    "SET DEFAULT"
50493                } else {
50494                    break;
50495                };
50496
50497                options.push(Expression::Var(Box::new(Var {
50498                    this: format!("ON {} {}", on_what, action),
50499                })));
50500            } else if self.match_text_seq(&["NOT", "ENFORCED"]) {
50501                options.push(Expression::Var(Box::new(Var {
50502                    this: "NOT ENFORCED".to_string(),
50503                })));
50504            } else if self.match_text_seq(&["DEFERRABLE"]) {
50505                options.push(Expression::Var(Box::new(Var {
50506                    this: "DEFERRABLE".to_string(),
50507                })));
50508            } else if self.match_text_seq(&["INITIALLY", "DEFERRED"]) {
50509                options.push(Expression::Var(Box::new(Var {
50510                    this: "INITIALLY DEFERRED".to_string(),
50511                })));
50512            } else if self.match_text_seq(&["NORELY"]) {
50513                options.push(Expression::Var(Box::new(Var {
50514                    this: "NORELY".to_string(),
50515                })));
50516            } else if self.match_text_seq(&["RELY"]) {
50517                options.push(Expression::Var(Box::new(Var {
50518                    this: "RELY".to_string(),
50519                })));
50520            } else {
50521                break;
50522            }
50523        }
50524
50525        Ok(options)
50526    }
50527
50528    /// parse_primary_key_part - Delegates to parse_field
50529    #[allow(unused_variables, unused_mut)]
50530    pub fn parse_primary_key_part(&mut self) -> Result<Option<Expression>> {
50531        // ClickHouse: PRIMARY KEY can contain full expressions (e.g., t.a, c0 IN (SELECT 1))
50532        if matches!(
50533            self.config.dialect,
50534            Some(crate::dialects::DialectType::ClickHouse)
50535        ) {
50536            return self.parse_expression().map(Some);
50537        }
50538        if (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
50539            && self.check_next(TokenType::LParen)
50540        {
50541            return self.parse_expression().map(Some);
50542        }
50543        if let Some(field) = self.parse_field()? {
50544            Ok(Some(field))
50545        } else {
50546            self.parse_expression().map(Some)
50547        }
50548    }
50549
50550    /// parse_primary_or_var - Parses a primary expression or variable
50551    /// Python: _parse_primary_or_var
50552    /// Returns: parse_primary() or parse_var(any_token=True)
50553    pub fn parse_primary_or_var(&mut self) -> Result<Option<Expression>> {
50554        // First try to parse a primary expression
50555        let saved_pos = self.current;
50556        match self.parse_primary() {
50557            Ok(expr) => return Ok(Some(expr)),
50558            Err(_) => {
50559                // Reset position and try parse_var
50560                self.current = saved_pos;
50561            }
50562        }
50563
50564        // Fall back to parsing a variable
50565        self.parse_var()
50566    }
50567
50568    /// parse_procedure_option - Implemented from Python _parse_procedure_option
50569    #[allow(unused_variables, unused_mut)]
50570    pub fn parse_procedure_option(&mut self) -> Result<Option<Expression>> {
50571        if self.match_text_seq(&["EXECUTE", "AS"]) {
50572            // Matched: EXECUTE AS
50573            return Ok(None);
50574        }
50575        Ok(None)
50576    }
50577
50578    /// parse_projections - Delegates to parse_expressions
50579    #[allow(unused_variables, unused_mut)]
50580    pub fn parse_projections(&mut self) -> Result<Option<Expression>> {
50581        self.parse_expressions()
50582    }
50583
50584    /// parse_properties - Parses table/column properties
50585    /// Python: _parse_properties
50586    /// Collects a list of properties using parse_property
50587    pub fn parse_properties(&mut self) -> Result<Option<Expression>> {
50588        self.parse_properties_impl(None)
50589    }
50590
50591    /// Implementation of parse_properties with before option
50592    pub fn parse_properties_impl(&mut self, before: Option<bool>) -> Result<Option<Expression>> {
50593        let mut properties = Vec::new();
50594
50595        loop {
50596            let prop = if before == Some(true) {
50597                self.parse_property_before()?
50598            } else {
50599                self.parse_property()?
50600            };
50601
50602            if let Some(p) = prop {
50603                properties.push(p);
50604            } else {
50605                break;
50606            }
50607        }
50608
50609        if properties.is_empty() {
50610            Ok(None)
50611        } else {
50612            Ok(Some(Expression::Properties(Box::new(Properties {
50613                expressions: properties,
50614            }))))
50615        }
50616    }
50617
50618    /// parse_property - Implemented from Python _parse_property
50619    /// Calls: parse_bitwise, parse_column, parse_sequence_properties
50620    #[allow(unused_variables, unused_mut)]
50621    pub fn parse_property(&mut self) -> Result<Option<Expression>> {
50622        if self.match_text_seq(&["COMPOUND", "SORTKEY"]) {
50623            return Ok(Some(Expression::Identifier(Identifier {
50624                name: String::new(),
50625                quoted: false,
50626                trailing_comments: Vec::new(),
50627                span: None,
50628            })));
50629        }
50630        if self.match_text_seq(&["SQL", "SECURITY"]) {
50631            // Matched: SQL SECURITY
50632            return Ok(None);
50633        }
50634        if self.match_texts(&["DEFINER", "INVOKER"]) {
50635            // Matched one of: DEFINER, INVOKER
50636            return Ok(None);
50637        }
50638        Ok(None)
50639    }
50640
50641    /// parse_on_cluster_clause - Parse ClickHouse ON CLUSTER clause
50642    fn parse_on_cluster_clause(&mut self) -> Result<Option<OnCluster>> {
50643        if !matches!(
50644            self.config.dialect,
50645            Some(crate::dialects::DialectType::ClickHouse)
50646        ) {
50647            return Ok(None);
50648        }
50649
50650        let start = self.current;
50651        if !self.match_token(TokenType::On) {
50652            return Ok(None);
50653        }
50654
50655        if !self.match_token(TokenType::Cluster) {
50656            self.current = start;
50657            return Ok(None);
50658        }
50659
50660        let this = if self.check(TokenType::String) {
50661            let value = self.expect_string()?;
50662            Expression::Literal(Literal::String(value))
50663        } else if let Some(id_expr) = self.parse_id_var()? {
50664            id_expr
50665        } else if self.is_safe_keyword_as_identifier() {
50666            let name = self.advance().text;
50667            Expression::Identifier(Identifier {
50668                name,
50669                quoted: false,
50670                trailing_comments: Vec::new(),
50671                span: None,
50672            })
50673        } else {
50674            return Err(self.parse_error("Expected cluster name after ON CLUSTER"));
50675        };
50676
50677        Ok(Some(OnCluster {
50678            this: Box::new(this),
50679        }))
50680    }
50681
50682    /// parse_clickhouse_table_properties - Parse ClickHouse table properties after column defs
50683    fn parse_clickhouse_table_properties(
50684        &mut self,
50685        properties: &mut Vec<Expression>,
50686    ) -> Result<()> {
50687        loop {
50688            if self.match_identifier("ENGINE") {
50689                self.match_token(TokenType::Eq);
50690                let engine = self.parse_clickhouse_engine_expression()?;
50691                properties.push(Expression::EngineProperty(Box::new(EngineProperty {
50692                    this: Box::new(engine),
50693                })));
50694                continue;
50695            }
50696
50697            if self.match_token(TokenType::Order) {
50698                self.expect(TokenType::By)?;
50699                let order_by = if matches!(
50700                    self.config.dialect,
50701                    Some(crate::dialects::DialectType::ClickHouse)
50702                ) && self.match_token(TokenType::LParen)
50703                {
50704                    // ClickHouse: ORDER BY (col1 [ASC|DESC], col2 [ASC|DESC], ...)
50705                    // or ORDER BY () for no ordering
50706                    if self.check(TokenType::RParen) {
50707                        self.advance();
50708                        OrderBy {
50709                            expressions: vec![Ordered::asc(Expression::Tuple(Box::new(Tuple {
50710                                expressions: Vec::new(),
50711                            })))],
50712                            siblings: false,
50713                            comments: Vec::new(),
50714                        }
50715                    } else {
50716                        // Parse all expressions inside the parentheses
50717                        let mut inner_exprs = Vec::new();
50718                        loop {
50719                            let expr = self.parse_expression()?;
50720                            inner_exprs.push(expr);
50721                            if !self.match_token(TokenType::Comma) {
50722                                break;
50723                            }
50724                        }
50725                        self.expect(TokenType::RParen)?;
50726                        // Wrap in a Tuple for multi-expr, Paren for single-expr
50727                        let wrapper = if inner_exprs.len() == 1 {
50728                            Expression::Paren(Box::new(Paren {
50729                                this: inner_exprs.into_iter().next().unwrap(),
50730                                trailing_comments: Vec::new(),
50731                            }))
50732                        } else {
50733                            Expression::Tuple(Box::new(Tuple {
50734                                expressions: inner_exprs,
50735                            }))
50736                        };
50737                        OrderBy {
50738                            expressions: vec![Ordered::asc(wrapper)],
50739                            siblings: false,
50740                            comments: Vec::new(),
50741                        }
50742                    }
50743                } else {
50744                    self.parse_order_by()?
50745                };
50746                properties.push(Expression::OrderBy(Box::new(order_by)));
50747                continue;
50748            }
50749
50750            if self.match_token(TokenType::Partition) {
50751                self.expect(TokenType::By)?;
50752                if self.check(TokenType::Order) && self.check_next(TokenType::By) {
50753                    return Err(self.parse_error("Expected expression after PARTITION BY"));
50754                }
50755                let expr = self
50756                    .parse_assignment()?
50757                    .ok_or_else(|| self.parse_error("Expected expression after PARTITION BY"))?;
50758                properties.push(Expression::PartitionedByProperty(Box::new(
50759                    PartitionedByProperty {
50760                        this: Box::new(expr),
50761                    },
50762                )));
50763                continue;
50764            }
50765
50766            if self.match_token(TokenType::PrimaryKey) {
50767                // ClickHouse supports PRIMARY KEY id and PRIMARY KEY (id, ...)
50768                let _ = self.match_token(TokenType::Key);
50769                if self.check(TokenType::LParen) {
50770                    if let Some(pk) = self.parse_primary_key_impl(false, true)? {
50771                        properties.push(pk);
50772                    }
50773                } else if let Some(expr) = self.parse_conjunction()? {
50774                    // ClickHouse: PRIMARY KEY expr (e.g., PRIMARY KEY tuple(), PRIMARY KEY id)
50775                    let mut exprs = vec![expr];
50776                    while self.match_token(TokenType::Comma) {
50777                        if let Some(next_expr) = self.parse_field()? {
50778                            exprs.push(next_expr);
50779                        } else {
50780                            break;
50781                        }
50782                    }
50783                    properties.push(Expression::PrimaryKey(Box::new(PrimaryKey {
50784                        this: None,
50785                        expressions: exprs,
50786                        options: Vec::new(),
50787                        include: None,
50788                    })));
50789                } else {
50790                    return Err(self.parse_error("Expected expression after PRIMARY KEY"));
50791                }
50792                continue;
50793            }
50794
50795            if self.match_token(TokenType::Sample) {
50796                let _ = self.match_token(TokenType::By);
50797                let expr = self.parse_expression()?;
50798                properties.push(Expression::SampleProperty(Box::new(SampleProperty {
50799                    this: Box::new(expr),
50800                })));
50801                continue;
50802            }
50803
50804            if self.match_token(TokenType::Settings) {
50805                let mut settings = Vec::new();
50806                loop {
50807                    settings.push(self.parse_expression()?);
50808                    if !self.match_token(TokenType::Comma) {
50809                        break;
50810                    }
50811                }
50812                properties.push(Expression::SettingsProperty(Box::new(SettingsProperty {
50813                    expressions: settings,
50814                })));
50815                continue;
50816            }
50817
50818            if self.match_token(TokenType::Comment) {
50819                let comment_expr = if self.check(TokenType::String) {
50820                    Expression::Literal(Literal::String(self.expect_string()?))
50821                } else {
50822                    self.parse_expression()?
50823                };
50824                properties.push(Expression::SchemaCommentProperty(Box::new(
50825                    SchemaCommentProperty {
50826                        this: Box::new(comment_expr),
50827                    },
50828                )));
50829                continue;
50830            }
50831
50832            // TTL time_column + INTERVAL '1' MONTH [DELETE|RECOMPRESS|TO DISK|TO VOLUME] [WHERE ...]
50833            if self.match_identifier("TTL") {
50834                if let Some(ttl_expr) = self.parse_ttl()? {
50835                    properties.push(ttl_expr);
50836                }
50837                continue;
50838            }
50839
50840            if self.match_identifier("SOURCE") {
50841                if let Some(prop) = self.parse_dict_property("SOURCE")? {
50842                    properties.push(prop);
50843                }
50844                continue;
50845            }
50846
50847            if self.match_identifier("LAYOUT") {
50848                if let Some(prop) = self.parse_dict_property("LAYOUT")? {
50849                    properties.push(prop);
50850                }
50851                continue;
50852            }
50853
50854            if self.match_identifier("LIFETIME") {
50855                if let Some(range) = self.parse_dict_range("LIFETIME")? {
50856                    properties.push(range);
50857                }
50858                continue;
50859            }
50860
50861            if self.match_identifier("RANGE") || self.match_token(TokenType::Range) {
50862                if let Some(range) = self.parse_dict_range("RANGE")? {
50863                    properties.push(range);
50864                }
50865                continue;
50866            }
50867
50868            break;
50869        }
50870
50871        Ok(())
50872    }
50873
50874    /// ClickHouse implicit alias in function arguments: `expr identifier` (without AS keyword).
50875    /// The token after the alias must be a delimiter (comma, RParen, FROM, FOR, AS).
50876    fn try_clickhouse_implicit_alias(&mut self, expr: Expression) -> Expression {
50877        if !matches!(
50878            self.config.dialect,
50879            Some(crate::dialects::DialectType::ClickHouse)
50880        ) {
50881            return expr;
50882        }
50883        if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
50884            let next_after = self.peek_nth(1).map(|t| t.token_type);
50885            let is_delimiter = matches!(
50886                next_after,
50887                Some(TokenType::Comma)
50888                    | Some(TokenType::RParen)
50889                    | Some(TokenType::From)
50890                    | Some(TokenType::For)
50891                    | Some(TokenType::As)
50892            );
50893            if is_delimiter {
50894                let alias_token = self.advance();
50895                let alias_name = alias_token.text.clone();
50896                return Expression::Alias(Box::new(crate::expressions::Alias::new(
50897                    expr,
50898                    Identifier::new(alias_name),
50899                )));
50900            }
50901        }
50902        expr
50903    }
50904
50905    /// ClickHouse alias in function arguments: handles both implicit (`expr identifier`)
50906    /// and explicit (`expr AS identifier`) aliases. Use this in special function parsers
50907    /// (SUBSTRING, TRIM, EXTRACT) but NOT in CAST (which has its own AS handling).
50908    /// Normalize TSQL date part aliases (e.g., dd -> DAY, yy -> YEAR, etc.)
50909    fn normalize_tsql_date_part(&self, expr: Expression) -> Expression {
50910        let name = match &expr {
50911            Expression::Var(v) => Some(v.this.to_uppercase()),
50912            Expression::Column(c) if c.table.is_none() => Some(c.name.name.to_uppercase()),
50913            Expression::Identifier(id) => Some(id.name.to_uppercase()),
50914            _ => None,
50915        };
50916        if let Some(name) = name {
50917            let mapped = match name.as_str() {
50918                "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
50919                "MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
50920                "D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
50921                "DOW" | "DW" | "WEEKDAY" => "DAYOFWEEK",
50922                "DOY" | "DY" | "Y" => "DAYOFYEAR",
50923                "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" => "WEEK",
50924                "Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
50925                "H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
50926                "MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
50927                "S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
50928                "MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
50929                | "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
50930                "US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
50931                | "MICROSECONDS" | "MCS" => "MICROSECOND",
50932                "NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
50933                "TZH" => "TIMEZONE_HOUR",
50934                "TZM" | "TZOFFSET" | "TZ" => "TIMEZONE_MINUTE",
50935                "DEC" | "DECS" | "DECADES" => "DECADE",
50936                "MIL" | "MILS" | "MILLENIA" => "MILLENNIUM",
50937                "C" | "CENT" | "CENTS" | "CENTURIES" => "CENTURY",
50938                "ISOWK" | "ISOWW" | "ISO_WEEK" | "WEEKOFYEARISO" | "WEEKOFYEAR_ISO"
50939                | "WEEK_ISO" => "WEEKISO",
50940                _ => return expr, // No mapping, return as-is
50941            };
50942            return Expression::Var(Box::new(Var {
50943                this: mapped.to_string(),
50944            }));
50945        }
50946        expr
50947    }
50948
50949    fn try_parse_date_part_unit_expr(&self, expr: &Expression) -> Option<IntervalUnit> {
50950        let upper = self.date_part_expr_name(expr)?.to_uppercase();
50951        let canonical = match upper.as_str() {
50952            // Year
50953            "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
50954            // Quarter
50955            "Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
50956            // Month
50957            "MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
50958            // Week
50959            "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" | "WEEKS" => "WEEK",
50960            // Day
50961            "D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
50962            // Hour
50963            "H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
50964            // Minute
50965            "MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
50966            // Second
50967            "S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
50968            // Millisecond
50969            "MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
50970            | "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
50971            // Microsecond
50972            "US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
50973            | "MICROSECONDS" | "MCS" => "MICROSECOND",
50974            // Nanosecond
50975            "NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
50976            _ => upper.as_str(),
50977        };
50978
50979        Self::parse_interval_unit_from_string(canonical)
50980    }
50981
50982    fn try_parse_date_part_unit_identifier_expr(&self, expr: &Expression) -> Option<IntervalUnit> {
50983        let upper = self.date_part_identifier_expr_name(expr)?.to_uppercase();
50984        let canonical = match upper.as_str() {
50985            "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
50986            "Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
50987            "MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
50988            "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" | "WEEKS" => "WEEK",
50989            "D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
50990            "H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
50991            "MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
50992            "S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
50993            "MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
50994            | "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
50995            "US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
50996            | "MICROSECONDS" | "MCS" => "MICROSECOND",
50997            "NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
50998            _ => upper.as_str(),
50999        };
51000
51001        Self::parse_interval_unit_from_string(canonical)
51002    }
51003
51004    fn try_parse_date_part_field_identifier_expr(
51005        &self,
51006        expr: &Expression,
51007    ) -> Option<DateTimeField> {
51008        let upper = self.date_part_identifier_expr_name(expr)?.to_uppercase();
51009        Some(match upper.as_str() {
51010            "YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => DateTimeField::Year,
51011            "MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => DateTimeField::Month,
51012            "DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => DateTimeField::Day,
51013            "HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => DateTimeField::Hour,
51014            "MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => DateTimeField::Minute,
51015            "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => DateTimeField::Second,
51016            "MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => DateTimeField::Millisecond,
51017            "MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => DateTimeField::Microsecond,
51018            "DOW" | "DAYOFWEEK" | "DW" => DateTimeField::DayOfWeek,
51019            "DOY" | "DAYOFYEAR" | "DY" => DateTimeField::DayOfYear,
51020            "WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WW" => DateTimeField::Week,
51021            "QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => DateTimeField::Quarter,
51022            "EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => DateTimeField::Epoch,
51023            "TIMEZONE" => DateTimeField::Timezone,
51024            "TIMEZONE_HOUR" | "TZH" => DateTimeField::TimezoneHour,
51025            "TIMEZONE_MINUTE" | "TZM" => DateTimeField::TimezoneMinute,
51026            "DATE" => DateTimeField::Date,
51027            "TIME" => DateTimeField::Time,
51028            other => DateTimeField::Custom(other.to_string()),
51029        })
51030    }
51031
51032    fn convert_date_part_identifier_expr_to_var(&self, expr: Expression) -> Expression {
51033        match expr {
51034            Expression::Var(_) => expr,
51035            Expression::Column(c) if c.table.is_none() => {
51036                Expression::Var(Box::new(Var { this: c.name.name }))
51037            }
51038            Expression::Identifier(id) => Expression::Var(Box::new(Var { this: id.name })),
51039            _ => expr,
51040        }
51041    }
51042
51043    fn date_part_identifier_expr_name<'a>(&self, expr: &'a Expression) -> Option<&'a str> {
51044        match expr {
51045            Expression::Var(v) => Some(v.this.as_str()),
51046            Expression::Column(c) if c.table.is_none() => Some(c.name.name.as_str()),
51047            Expression::Identifier(id) => Some(id.name.as_str()),
51048            _ => None,
51049        }
51050    }
51051
51052    fn date_part_expr_name<'a>(&self, expr: &'a Expression) -> Option<&'a str> {
51053        self.date_part_identifier_expr_name(expr).or(match expr {
51054            Expression::Literal(Literal::String(s)) => Some(s.as_str()),
51055            _ => None,
51056        })
51057    }
51058
51059    fn try_clickhouse_func_arg_alias(&mut self, expr: Expression) -> Expression {
51060        if !matches!(
51061            self.config.dialect,
51062            Some(crate::dialects::DialectType::ClickHouse)
51063        ) {
51064            return expr;
51065        }
51066        // Try implicit alias first
51067        if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
51068            let next_after = self.peek_nth(1).map(|t| t.token_type);
51069            let is_delimiter = matches!(
51070                next_after,
51071                Some(TokenType::Comma)
51072                    | Some(TokenType::RParen)
51073                    | Some(TokenType::From)
51074                    | Some(TokenType::For)
51075                    | Some(TokenType::As)
51076            );
51077            if is_delimiter {
51078                let alias_token = self.advance();
51079                let alias_name = alias_token.text.clone();
51080                return Expression::Alias(Box::new(crate::expressions::Alias::new(
51081                    expr,
51082                    Identifier::new(alias_name),
51083                )));
51084            }
51085        }
51086        // Try explicit AS alias
51087        if self.check(TokenType::As) {
51088            let next_idx = self.current + 1;
51089            let after_alias_idx = self.current + 2;
51090            let is_alias_token = next_idx < self.tokens.len()
51091                && matches!(
51092                    self.tokens[next_idx].token_type,
51093                    TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
51094                );
51095            let is_delimiter = is_alias_token
51096                && after_alias_idx < self.tokens.len()
51097                && matches!(
51098                    self.tokens[after_alias_idx].token_type,
51099                    TokenType::Comma
51100                        | TokenType::RParen
51101                        | TokenType::From
51102                        | TokenType::For
51103                        | TokenType::As
51104                );
51105            if is_delimiter {
51106                self.advance(); // consume AS
51107                let alias_token = self.advance();
51108                let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
51109                    let mut ident = Identifier::new(alias_token.text.clone());
51110                    ident.quoted = true;
51111                    ident
51112                } else {
51113                    Identifier::new(alias_token.text.clone())
51114                };
51115                return Expression::Alias(Box::new(crate::expressions::Alias::new(
51116                    expr, alias_name,
51117                )));
51118            }
51119        }
51120        expr
51121    }
51122
51123    /// parse_clickhouse_engine_expression - Parse ENGINE expression with optional args
51124    fn parse_clickhouse_engine_expression(&mut self) -> Result<Expression> {
51125        if self.is_at_end() {
51126            return Err(self.parse_error("Expected engine name after ENGINE"));
51127        }
51128
51129        let token = self.advance();
51130        let quoted = matches!(token.token_type, TokenType::QuotedIdentifier);
51131        let name = token.text.clone();
51132
51133        let ident = Expression::Identifier(Identifier {
51134            name,
51135            quoted,
51136            trailing_comments: Vec::new(),
51137            span: None,
51138        });
51139
51140        if self.match_token(TokenType::LParen) {
51141            let args = if self.check(TokenType::RParen) {
51142                Vec::new()
51143            } else {
51144                self.parse_expression_list()?
51145            };
51146            self.expect(TokenType::RParen)?;
51147            Ok(Expression::Anonymous(Box::new(Anonymous {
51148                this: Box::new(ident),
51149                expressions: args,
51150            })))
51151        } else {
51152            Ok(ident)
51153        }
51154    }
51155
51156    /// parse_property_assignment - Ported from Python _parse_property_assignment
51157    /// Parses a property assignment: optionally = or AS, then a value
51158    #[allow(unused_variables, unused_mut)]
51159    pub fn parse_property_assignment(&mut self) -> Result<Option<Expression>> {
51160        // Optionally match = or AS
51161        let _ = self.match_token(TokenType::Eq);
51162        let _ = self.match_token(TokenType::Alias);
51163
51164        // Parse the value as an unquoted field
51165        let value = self.parse_unquoted_field()?;
51166
51167        Ok(value)
51168    }
51169
51170    /// parse_property_before - Implemented from Python _parse_property_before
51171    #[allow(unused_variables, unused_mut)]
51172    pub fn parse_property_before(&mut self) -> Result<Option<Expression>> {
51173        if self.match_text_seq(&["NO"]) {
51174            // Matched: NO
51175            return Ok(None);
51176        }
51177        if self.match_text_seq(&["DUAL"]) {
51178            // Matched: DUAL
51179            return Ok(None);
51180        }
51181        if self.match_text_seq(&["BEFORE"]) {
51182            // Matched: BEFORE
51183            return Ok(None);
51184        }
51185        if self.match_texts(&["MIN", "MINIMUM"]) {
51186            // Matched one of: MIN, MINIMUM
51187            return Ok(None);
51188        }
51189        if self.match_texts(&["MAX", "MAXIMUM"]) {
51190            // Matched one of: MAX, MAXIMUM
51191            return Ok(None);
51192        }
51193        Ok(None)
51194    }
51195
51196    /// parse_qualify - Parse QUALIFY clause (Snowflake, BigQuery)
51197    /// Python: if not self._match(TokenType.QUALIFY): return None; return exp.Qualify(this=self._parse_disjunction())
51198    pub fn parse_qualify(&mut self) -> Result<Option<Expression>> {
51199        if !self.match_token(TokenType::Qualify) {
51200            return Ok(None);
51201        }
51202        let condition = self.parse_expression()?;
51203        Ok(Some(Expression::Qualify(Box::new(Qualify {
51204            this: condition,
51205        }))))
51206    }
51207
51208    /// parse_range - Parses range expressions (BETWEEN, LIKE, IN, IS, etc.)
51209    /// Python: _parse_range
51210    pub fn parse_range(&mut self) -> Result<Option<Expression>> {
51211        // First parse a bitwise expression as the left side
51212        let mut this = self.parse_bitwise()?;
51213        if this.is_none() {
51214            return Ok(None);
51215        }
51216
51217        // Check for NOT (for NOT LIKE, NOT IN, NOT BETWEEN, etc.)
51218        let negate = self.match_token(TokenType::Not);
51219
51220        // BETWEEN
51221        if self.match_token(TokenType::Between) {
51222            let between = self.parse_between_with_expr(this.clone(), negate)?;
51223            this = Some(between);
51224            return Ok(this);
51225        }
51226
51227        // LIKE
51228        if self.match_token(TokenType::Like) {
51229            let left = this.clone().expect("left expression checked above");
51230            let right = self
51231                .parse_bitwise()?
51232                .ok_or_else(|| self.parse_error("Expected expression after LIKE"))?;
51233            let escape = self.parse_escape()?;
51234            let like = Expression::Like(Box::new(LikeOp {
51235                left,
51236                right,
51237                escape,
51238                quantifier: None,
51239                inferred_type: None,
51240            }));
51241            this = if negate {
51242                Some(Expression::Not(Box::new(UnaryOp {
51243                    this: like,
51244                    inferred_type: None,
51245                })))
51246            } else {
51247                Some(like)
51248            };
51249            return Ok(this);
51250        }
51251
51252        // ILIKE
51253        if self.match_token(TokenType::ILike) {
51254            let left = this.clone().expect("left expression checked above");
51255            let right = self
51256                .parse_bitwise()?
51257                .ok_or_else(|| self.parse_error("Expected expression after ILIKE"))?;
51258            let escape = self.parse_escape()?;
51259            let ilike = Expression::ILike(Box::new(LikeOp {
51260                left,
51261                right,
51262                escape,
51263                quantifier: None,
51264                inferred_type: None,
51265            }));
51266            this = if negate {
51267                Some(Expression::Not(Box::new(UnaryOp {
51268                    this: ilike,
51269                    inferred_type: None,
51270                })))
51271            } else {
51272                Some(ilike)
51273            };
51274            return Ok(this);
51275        }
51276
51277        // IN
51278        if self.match_token(TokenType::In) {
51279            let in_expr = self.parse_in_with_expr(this.clone())?;
51280            this = if negate {
51281                Some(Expression::Not(Box::new(UnaryOp {
51282                    this: in_expr,
51283                    inferred_type: None,
51284                })))
51285            } else {
51286                Some(in_expr)
51287            };
51288            return Ok(this);
51289        }
51290
51291        // IS [NOT] NULL / IS [NOT] TRUE / IS [NOT] FALSE
51292        if self.match_token(TokenType::Is) {
51293            let is_expr = self.parse_is_with_expr(this.clone())?;
51294            this = Some(is_expr);
51295            return Ok(this);
51296        }
51297
51298        // Handle standalone NOT with NULL (for NOT NULL pattern after negate)
51299        if negate && self.match_token(TokenType::Null) {
51300            if let Some(left) = this {
51301                let is_null = Expression::Is(Box::new(BinaryOp {
51302                    left,
51303                    right: Expression::Null(Null),
51304                    left_comments: Vec::new(),
51305                    operator_comments: Vec::new(),
51306                    trailing_comments: Vec::new(),
51307                    inferred_type: None,
51308                }));
51309                return Ok(Some(Expression::Not(Box::new(UnaryOp {
51310                    this: is_null,
51311                    inferred_type: None,
51312                }))));
51313            }
51314        }
51315
51316        Ok(this)
51317    }
51318
51319    /// parse_between_with_expr - Parses BETWEEN expression with given left side
51320    fn parse_between_with_expr(
51321        &mut self,
51322        this: Option<Expression>,
51323        negate: bool,
51324    ) -> Result<Expression> {
51325        let this_expr = match this {
51326            Some(e) => e,
51327            None => return Err(self.parse_error("Expected expression before BETWEEN")),
51328        };
51329
51330        // Check for SYMMETRIC/ASYMMETRIC qualifier
51331        let symmetric = if self.match_texts(&["SYMMETRIC"]) {
51332            Some(true)
51333        } else if self.match_texts(&["ASYMMETRIC"]) {
51334            Some(false)
51335        } else {
51336            None
51337        };
51338
51339        let low = self
51340            .parse_bitwise()?
51341            .ok_or_else(|| self.parse_error("Expected low expression after BETWEEN"))?;
51342
51343        if !self.match_token(TokenType::And) {
51344            return Err(self.parse_error("Expected AND in BETWEEN expression"));
51345        }
51346
51347        let high = self
51348            .parse_bitwise()?
51349            .ok_or_else(|| self.parse_error("Expected high expression after AND in BETWEEN"))?;
51350
51351        Ok(Expression::Between(Box::new(Between {
51352            this: this_expr,
51353            low,
51354            high,
51355            not: negate,
51356            symmetric,
51357        })))
51358    }
51359
51360    /// parse_in_with_expr - Parses IN expression with given left side
51361    fn parse_in_with_expr(&mut self, this: Option<Expression>) -> Result<Expression> {
51362        let this_expr = match this {
51363            Some(e) => e,
51364            None => return Err(self.parse_error("Expected expression before IN")),
51365        };
51366
51367        // BigQuery: IN UNNEST(expr) — UNNEST without wrapping parentheses
51368        if self.check_identifier("UNNEST") {
51369            self.advance(); // consume UNNEST
51370            self.expect(TokenType::LParen)?;
51371            let unnest_expr = self.parse_expression()?;
51372            self.expect(TokenType::RParen)?;
51373            return Ok(Expression::In(Box::new(In {
51374                this: this_expr,
51375                expressions: Vec::new(),
51376                query: None,
51377                not: false,
51378                global: false,
51379                unnest: Some(Box::new(unnest_expr)),
51380                is_field: false,
51381            })));
51382        }
51383
51384        // Parse the IN list (subquery or value list)
51385        if !self.match_token(TokenType::LParen) {
51386            // DuckDB: IN without parentheses for array/list membership: 'red' IN tbl.flags
51387            // Try to parse as a single expression (column/array reference)
51388            if let Ok(expr) = self.parse_primary() {
51389                return Ok(Expression::In(Box::new(In {
51390                    this: this_expr,
51391                    expressions: vec![expr],
51392                    query: None,
51393                    not: false,
51394                    global: false,
51395                    unnest: None,
51396                    is_field: true,
51397                })));
51398            }
51399            return Err(self.parse_error("Expected expression or parenthesized list after IN"));
51400        }
51401
51402        // Check if it's a subquery
51403        if self.check(TokenType::Select) {
51404            let subquery = self.parse_select()?;
51405            self.expect(TokenType::RParen)?;
51406            return Ok(Expression::In(Box::new(In {
51407                this: this_expr,
51408                expressions: Vec::new(),
51409                query: Some(subquery),
51410                not: false,
51411                global: false,
51412                unnest: None,
51413                is_field: false,
51414            })));
51415        }
51416
51417        // Parse value list. Pre-size for large IN lists to reduce reallocations.
51418        let capacity_hint = self.estimate_expression_list_capacity_until_rparen();
51419        let expressions = self.parse_expression_list_with_capacity(capacity_hint)?;
51420        self.expect(TokenType::RParen)?;
51421
51422        if expressions.is_empty() {
51423            return Err(self.parse_error("Expected expression list after IN"));
51424        }
51425
51426        Ok(Expression::In(Box::new(In {
51427            this: this_expr,
51428            expressions,
51429            query: None,
51430            not: false,
51431            global: false,
51432            unnest: None,
51433            is_field: false,
51434        })))
51435    }
51436
51437    /// parse_is_with_expr - Parses IS expression with given left side
51438    fn parse_is_with_expr(&mut self, this: Option<Expression>) -> Result<Expression> {
51439        let this_expr = match this {
51440            Some(e) => e,
51441            None => return Err(self.parse_error("Expected expression before IS")),
51442        };
51443
51444        let negate = self.match_token(TokenType::Not);
51445
51446        // IS NULL
51447        if self.match_token(TokenType::Null) {
51448            let is_null = Expression::Is(Box::new(BinaryOp {
51449                left: this_expr,
51450                right: Expression::Null(Null),
51451                left_comments: Vec::new(),
51452                operator_comments: Vec::new(),
51453                trailing_comments: Vec::new(),
51454                inferred_type: None,
51455            }));
51456            return if negate {
51457                Ok(Expression::Not(Box::new(UnaryOp {
51458                    this: is_null,
51459                    inferred_type: None,
51460                })))
51461            } else {
51462                Ok(is_null)
51463            };
51464        }
51465
51466        // IS TRUE
51467        if self.match_texts(&["TRUE"]) {
51468            let is_true = Expression::Is(Box::new(BinaryOp {
51469                left: this_expr,
51470                right: Expression::Boolean(BooleanLiteral { value: true }),
51471                left_comments: Vec::new(),
51472                operator_comments: Vec::new(),
51473                trailing_comments: Vec::new(),
51474                inferred_type: None,
51475            }));
51476            return if negate {
51477                Ok(Expression::Not(Box::new(UnaryOp {
51478                    this: is_true,
51479                    inferred_type: None,
51480                })))
51481            } else {
51482                Ok(is_true)
51483            };
51484        }
51485
51486        // IS FALSE
51487        if self.match_texts(&["FALSE"]) {
51488            let is_false = Expression::Is(Box::new(BinaryOp {
51489                left: this_expr,
51490                right: Expression::Boolean(BooleanLiteral { value: false }),
51491                left_comments: Vec::new(),
51492                operator_comments: Vec::new(),
51493                trailing_comments: Vec::new(),
51494                inferred_type: None,
51495            }));
51496            return if negate {
51497                Ok(Expression::Not(Box::new(UnaryOp {
51498                    this: is_false,
51499                    inferred_type: None,
51500                })))
51501            } else {
51502                Ok(is_false)
51503            };
51504        }
51505
51506        // IS JSON [VALUE|SCALAR|OBJECT|ARRAY] [WITH UNIQUE KEYS|WITHOUT UNIQUE KEYS|UNIQUE KEYS]
51507        if self.match_texts(&["JSON"]) {
51508            // Parse optional JSON type
51509            let json_type = if self.match_texts(&["VALUE"]) {
51510                Some("VALUE".to_string())
51511            } else if self.match_texts(&["SCALAR"]) {
51512                Some("SCALAR".to_string())
51513            } else if self.match_texts(&["OBJECT"]) {
51514                Some("OBJECT".to_string())
51515            } else if self.match_texts(&["ARRAY"]) {
51516                Some("ARRAY".to_string())
51517            } else {
51518                None
51519            };
51520
51521            // Parse optional key uniqueness constraint
51522            let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE", "KEYS"]) {
51523                Some(JsonUniqueKeys::With)
51524            } else if self.match_text_seq(&["WITHOUT", "UNIQUE", "KEYS"]) {
51525                Some(JsonUniqueKeys::Without)
51526            } else if self.match_text_seq(&["UNIQUE", "KEYS"]) {
51527                // Shorthand for WITH UNIQUE KEYS
51528                Some(JsonUniqueKeys::Shorthand)
51529            } else {
51530                None
51531            };
51532
51533            return Ok(Expression::IsJson(Box::new(IsJson {
51534                this: this_expr,
51535                json_type,
51536                unique_keys,
51537                negated: negate,
51538            })));
51539        }
51540
51541        // IS DISTINCT FROM / IS NOT DISTINCT FROM
51542        if self.match_text_seq(&["DISTINCT", "FROM"]) {
51543            let right = self.parse_bitwise()?;
51544            if let Some(right_expr) = right {
51545                // IS DISTINCT FROM is semantically "not equal with null handling"
51546                // Use NullSafeNeq for IS DISTINCT FROM
51547                // If negate was set (IS NOT DISTINCT FROM), use NullSafeEq
51548                let expr = if negate {
51549                    Expression::NullSafeEq(Box::new(BinaryOp {
51550                        left: this_expr,
51551                        right: right_expr,
51552                        left_comments: Vec::new(),
51553                        operator_comments: Vec::new(),
51554                        trailing_comments: Vec::new(),
51555                        inferred_type: None,
51556                    }))
51557                } else {
51558                    Expression::NullSafeNeq(Box::new(BinaryOp {
51559                        left: this_expr,
51560                        right: right_expr,
51561                        left_comments: Vec::new(),
51562                        operator_comments: Vec::new(),
51563                        trailing_comments: Vec::new(),
51564                        inferred_type: None,
51565                    }))
51566                };
51567                return Ok(expr);
51568            }
51569            return Err(self.parse_error("Expected expression after IS DISTINCT FROM"));
51570        }
51571
51572        Err(self.parse_error("Expected NULL, TRUE, FALSE, JSON, or DISTINCT FROM after IS"))
51573    }
51574
51575    /// parse_reads_property - Implemented from Python _parse_reads_property
51576    #[allow(unused_variables, unused_mut)]
51577    pub fn parse_reads_property(&mut self) -> Result<Option<Expression>> {
51578        if self.match_text_seq(&["SQL", "DATA"]) {
51579            // Matched: SQL DATA
51580            return Ok(None);
51581        }
51582        Ok(None)
51583    }
51584
51585    /// parse_recursive_with_search - Parse SEARCH/CYCLE clause for recursive CTEs (PostgreSQL)
51586    /// Syntax: SEARCH BREADTH|DEPTH FIRST BY column SET column [USING column]
51587    ///     or: CYCLE column SET column USING column
51588    #[allow(unused_variables, unused_mut)]
51589    pub fn parse_recursive_with_search(&mut self) -> Result<Option<Box<Expression>>> {
51590        // Check for SEARCH or CYCLE keyword
51591        let kind = if self.match_text_seq(&["SEARCH"]) {
51592            // SEARCH BREADTH|DEPTH FIRST BY ...
51593            let search_kind = if self.match_text_seq(&["BREADTH"]) {
51594                "BREADTH"
51595            } else if self.match_text_seq(&["DEPTH"]) {
51596                "DEPTH"
51597            } else {
51598                return Ok(None);
51599            };
51600            // Consume "FIRST BY"
51601            self.match_text_seq(&["FIRST"]);
51602            self.match_text_seq(&["BY"]);
51603            search_kind.to_string()
51604        } else if self.match_token(TokenType::Cycle) {
51605            "CYCLE".to_string()
51606        } else {
51607            return Ok(None);
51608        };
51609
51610        // Parse the column(s) - for CYCLE this is typically a single column
51611        let this = self.expect_identifier()?;
51612        let this_expr = Expression::Identifier(Identifier::new(this));
51613
51614        // SET column
51615        let expression = if self.match_text_seq(&["SET"]) {
51616            let set_col = self.expect_identifier()?;
51617            Expression::Identifier(Identifier::new(set_col))
51618        } else {
51619            return Err(self.parse_error("Expected SET in CYCLE/SEARCH clause"));
51620        };
51621
51622        // USING column (optional for SEARCH, required for CYCLE)
51623        let using = if self.match_token(TokenType::Using) {
51624            let using_col = self.expect_identifier()?;
51625            Some(Box::new(Expression::Identifier(Identifier::new(using_col))))
51626        } else {
51627            None
51628        };
51629
51630        Ok(Some(Box::new(Expression::RecursiveWithSearch(Box::new(
51631            RecursiveWithSearch {
51632                kind,
51633                this: Box::new(this_expr),
51634                expression: Box::new(expression),
51635                using,
51636            },
51637        )))))
51638    }
51639
51640    /// parse_references - Ported from Python _parse_references
51641    /// Parses REFERENCES clause for foreign key constraints
51642    #[allow(unused_variables, unused_mut)]
51643    pub fn parse_references(&mut self) -> Result<Option<Expression>> {
51644        if !self.match_token(TokenType::References) {
51645            return Ok(None);
51646        }
51647
51648        // Parse referenced table
51649        let this = self.parse_table()?;
51650        if this.is_none() {
51651            return Err(self.parse_error("Expected table name after REFERENCES"));
51652        }
51653
51654        // Parse optional column list (table(col1, col2))
51655        let expressions = if self.match_token(TokenType::LParen) {
51656            let cols = self.parse_identifier_list()?;
51657            self.expect(TokenType::RParen)?;
51658            cols.into_iter()
51659                .map(|id| Expression::Identifier(id))
51660                .collect()
51661        } else {
51662            Vec::new()
51663        };
51664
51665        // Parse optional constraint options (ON DELETE, ON UPDATE, etc.)
51666        let options = self.parse_fk_constraint_options()?;
51667
51668        Ok(Some(Expression::Reference(Box::new(Reference {
51669            this: Box::new(this.unwrap()),
51670            expressions,
51671            options,
51672        }))))
51673    }
51674
51675    /// Parse key constraint options (ON DELETE CASCADE, ON UPDATE SET NULL, etc.)
51676    fn parse_fk_constraint_options(&mut self) -> Result<Vec<Expression>> {
51677        let mut options = Vec::new();
51678
51679        while self.match_token(TokenType::On) {
51680            let kind = if self.match_token(TokenType::Delete) {
51681                "DELETE"
51682            } else if self.match_token(TokenType::Update) {
51683                "UPDATE"
51684            } else {
51685                break;
51686            };
51687
51688            let action = if self.match_text_seq(&["NO", "ACTION"]) {
51689                "NO ACTION"
51690            } else if self.match_text_seq(&["SET", "NULL"]) {
51691                "SET NULL"
51692            } else if self.match_text_seq(&["SET", "DEFAULT"]) {
51693                "SET DEFAULT"
51694            } else if self.match_token(TokenType::Cascade) {
51695                "CASCADE"
51696            } else if self.match_token(TokenType::Restrict) {
51697                "RESTRICT"
51698            } else {
51699                continue;
51700            };
51701
51702            // Store as simple identifier with the full action description
51703            options.push(Expression::Identifier(Identifier {
51704                name: format!("ON {} {}", kind, action),
51705                quoted: false,
51706                trailing_comments: Vec::new(),
51707                span: None,
51708            }));
51709        }
51710
51711        // Parse MATCH option
51712        if self.match_token(TokenType::Match) {
51713            let match_type = if self.match_identifier("FULL") {
51714                "FULL"
51715            } else if self.match_identifier("PARTIAL") {
51716                "PARTIAL"
51717            } else if self.match_identifier("SIMPLE") {
51718                "SIMPLE"
51719            } else {
51720                ""
51721            };
51722            if !match_type.is_empty() {
51723                options.push(Expression::Identifier(Identifier {
51724                    name: format!("MATCH {}", match_type),
51725                    quoted: false,
51726                    trailing_comments: Vec::new(),
51727                    span: None,
51728                }));
51729            }
51730        }
51731
51732        Ok(options)
51733    }
51734
51735    /// parse_refresh - Implemented from Python _parse_refresh
51736    #[allow(unused_variables, unused_mut)]
51737    /// parse_refresh - Parses REFRESH TABLE or REFRESH MATERIALIZED VIEW
51738    /// Python: parser.py:7656-7668
51739    pub fn parse_refresh(&mut self) -> Result<Option<Expression>> {
51740        let kind = if self.match_token(TokenType::Table) {
51741            "TABLE".to_string()
51742        } else if self.match_text_seq(&["MATERIALIZED", "VIEW"]) {
51743            "MATERIALIZED VIEW".to_string()
51744        } else {
51745            String::new()
51746        };
51747
51748        // Parse the object name (string literal or table name)
51749        // First try a string literal, then fall back to table reference
51750        if let Some(s) = self.parse_string()? {
51751            return Ok(Some(Expression::Refresh(Box::new(Refresh {
51752                this: Box::new(s),
51753                kind,
51754            }))));
51755        }
51756
51757        // Parse as a table reference (schema.table format)
51758        let table_ref = self.parse_table_ref()?;
51759        let table_expr = Expression::Table(table_ref);
51760
51761        Ok(Some(Expression::Refresh(Box::new(Refresh {
51762            this: Box::new(table_expr),
51763            kind,
51764        }))))
51765    }
51766
51767    /// parse_refresh_trigger_property - Doris REFRESH clause for materialized views
51768    /// Syntax: REFRESH method ON kind [EVERY n UNIT] [STARTS 'datetime']
51769    /// Examples:
51770    ///   REFRESH COMPLETE ON MANUAL
51771    ///   REFRESH AUTO ON COMMIT
51772    ///   REFRESH AUTO ON SCHEDULE EVERY 5 MINUTE STARTS '2025-01-01 00:00:00'
51773    pub fn parse_refresh_trigger_property(&mut self) -> Result<RefreshTriggerProperty> {
51774        // Parse method: COMPLETE or AUTO
51775        let method = self.expect_identifier_or_keyword()?.to_uppercase();
51776
51777        // Parse ON
51778        self.expect(TokenType::On)?;
51779
51780        // Parse kind: MANUAL, COMMIT, or SCHEDULE
51781        let kind_text = self.expect_identifier_or_keyword()?.to_uppercase();
51782        let kind = Some(kind_text.clone());
51783
51784        // For SCHEDULE, parse EVERY n UNIT [STARTS 'datetime']
51785        let (every, unit, starts) = if kind_text == "SCHEDULE" {
51786            // EVERY n UNIT
51787            let every = if self.match_identifier("EVERY") {
51788                // parse_number returns Option<Expression> with Expression::Literal(Literal::Number(...))
51789                self.parse_number()?.map(Box::new)
51790            } else {
51791                None
51792            };
51793
51794            // Unit: MINUTE, HOUR, DAY, etc.
51795            let unit = if every.is_some() {
51796                Some(self.expect_identifier_or_keyword()?.to_uppercase())
51797            } else {
51798                None
51799            };
51800
51801            // STARTS 'datetime'
51802            let starts = if self.match_identifier("STARTS") {
51803                let s = self.expect_string()?;
51804                Some(Box::new(Expression::Literal(Literal::String(s))))
51805            } else {
51806                None
51807            };
51808
51809            (every, unit, starts)
51810        } else {
51811            (None, None, None)
51812        };
51813
51814        Ok(RefreshTriggerProperty {
51815            method,
51816            kind,
51817            every,
51818            unit,
51819            starts,
51820        })
51821    }
51822
51823    /// parse_remote_with_connection - Implemented from Python _parse_remote_with_connection
51824    #[allow(unused_variables, unused_mut)]
51825    pub fn parse_remote_with_connection(&mut self) -> Result<Option<Expression>> {
51826        if self.match_text_seq(&["WITH", "CONNECTION"]) {
51827            // Matched: WITH CONNECTION
51828            return Ok(None);
51829        }
51830        Ok(None)
51831    }
51832
51833    /// parse_respect_or_ignore_nulls - Implemented from Python _parse_respect_or_ignore_nulls
51834    #[allow(unused_variables, unused_mut)]
51835    pub fn parse_respect_or_ignore_nulls(&mut self) -> Result<Option<Expression>> {
51836        if self.match_text_seq(&["IGNORE", "NULLS"]) {
51837            // Matched: IGNORE NULLS
51838            return Ok(None);
51839        }
51840        if self.match_text_seq(&["RESPECT", "NULLS"]) {
51841            // Matched: RESPECT NULLS
51842            return Ok(None);
51843        }
51844        Ok(None)
51845    }
51846
51847    /// parse_retention_period - Parses HISTORY_RETENTION_PERIOD (TSQL)
51848    /// Python: _parse_retention_period
51849    /// Format: INFINITE | <number> DAY | DAYS | MONTH | MONTHS | YEAR | YEARS
51850    pub fn parse_retention_period(&mut self) -> Result<Option<Expression>> {
51851        // Try to parse a number first
51852        let number = self.parse_number()?;
51853        let number_str = number
51854            .map(|n| match n {
51855                Expression::Literal(Literal::Number(s)) => format!("{} ", s),
51856                _ => String::new(),
51857            })
51858            .unwrap_or_default();
51859
51860        // Parse the unit (any token as a variable)
51861        let unit = self.parse_var_any_token()?;
51862        let unit_str = unit
51863            .map(|u| match u {
51864                Expression::Var(v) => v.this.clone(),
51865                _ => String::new(),
51866            })
51867            .unwrap_or_default();
51868
51869        let result = format!("{}{}", number_str, unit_str);
51870        Ok(Some(Expression::Var(Box::new(Var { this: result }))))
51871    }
51872
51873    /// parse_var_any_token - Parses any token as a Var (for flexible parsing)
51874    fn parse_var_any_token(&mut self) -> Result<Option<Expression>> {
51875        if !self.is_at_end() {
51876            let token = self.advance();
51877            Ok(Some(Expression::Var(Box::new(Var {
51878                this: token.text.clone(),
51879            }))))
51880        } else {
51881            Ok(None)
51882        }
51883    }
51884
51885    /// parse_returning - Creates Returning expression
51886    /// Parses RETURNING clause (PostgreSQL) for INSERT/UPDATE/DELETE
51887    #[allow(unused_variables, unused_mut)]
51888    pub fn parse_returning(&mut self) -> Result<Option<Expression>> {
51889        if !self.match_token(TokenType::Returning) {
51890            return Ok(None);
51891        }
51892
51893        // Parse expressions (column list or *)
51894        let expressions = self.parse_expression_list()?;
51895
51896        // Check for INTO target_table (Oracle style)
51897        let into = if self.match_token(TokenType::Into) {
51898            self.parse_table()?.map(Box::new)
51899        } else {
51900            None
51901        };
51902
51903        Ok(Some(Expression::Returning(Box::new(Returning {
51904            expressions,
51905            into,
51906        }))))
51907    }
51908
51909    /// parse_output_clause - Parses OUTPUT clause (TSQL)
51910    /// Used in INSERT/UPDATE/DELETE and MERGE statements
51911    /// Supports expressions with optional AS aliases: OUTPUT col1, col2 AS alias, col3
51912    pub fn parse_output_clause(&mut self) -> Result<OutputClause> {
51913        // Parse comma-separated list of columns/expressions with optional aliases
51914        let mut columns = Vec::new();
51915        loop {
51916            let expr = self.parse_expression()?;
51917            // Check for optional AS alias
51918            let expr = if self.match_token(TokenType::As) {
51919                let alias = self.expect_identifier_or_keyword_with_quoted()?;
51920                Expression::Alias(Box::new(Alias {
51921                    this: expr,
51922                    alias,
51923                    column_aliases: Vec::new(),
51924                    pre_alias_comments: Vec::new(),
51925                    trailing_comments: Vec::new(),
51926                    inferred_type: None,
51927                }))
51928            } else {
51929                expr
51930            };
51931            columns.push(expr);
51932            if !self.match_token(TokenType::Comma) {
51933                break;
51934            }
51935        }
51936
51937        // Check for INTO target
51938        let into_table = if self.match_token(TokenType::Into) {
51939            Some(self.parse_expression()?)
51940        } else {
51941            None
51942        };
51943
51944        Ok(OutputClause {
51945            columns,
51946            into_table,
51947        })
51948    }
51949
51950    /// parse_returns - Implemented from Python _parse_returns
51951    /// Calls: parse_types
51952    #[allow(unused_variables, unused_mut)]
51953    pub fn parse_returns(&mut self) -> Result<Option<Expression>> {
51954        if self.match_text_seq(&["NULL", "ON", "NULL", "INPUT"]) {
51955            return Ok(Some(Expression::Schema(Box::new(Schema {
51956                this: None,
51957                expressions: Vec::new(),
51958            }))));
51959        }
51960        Ok(None)
51961    }
51962
51963    /// parse_row - Parses ROW FORMAT clause
51964    /// Returns RowFormatSerdeProperty or RowFormatDelimitedProperty
51965    pub fn parse_row(&mut self) -> Result<Option<Expression>> {
51966        // Python: if not self._match(TokenType.FORMAT): return None
51967        if !self.match_token(TokenType::Format) {
51968            return Ok(None);
51969        }
51970        self.parse_row_format()
51971    }
51972
51973    /// parse_row_format - Implemented from Python _parse_row_format
51974    /// Parses SERDE or DELIMITED row format specifications
51975    pub fn parse_row_format(&mut self) -> Result<Option<Expression>> {
51976        // Check for SERDE row format
51977        if self.match_text_seq(&["SERDE"]) {
51978            let this = self.parse_string()?;
51979            let serde_properties = self.parse_serde_properties(false)?;
51980
51981            return Ok(Some(Expression::RowFormatSerdeProperty(Box::new(
51982                RowFormatSerdeProperty {
51983                    this: Box::new(this.unwrap_or(Expression::Null(Null))),
51984                    serde_properties: serde_properties.map(Box::new),
51985                },
51986            ))));
51987        }
51988
51989        // Check for DELIMITED row format
51990        self.match_text_seq(&["DELIMITED"]);
51991
51992        let mut fields = None;
51993        let mut escaped = None;
51994        let mut collection_items = None;
51995        let mut map_keys = None;
51996        let mut lines = None;
51997        let mut null = None;
51998
51999        // Parse FIELDS TERMINATED BY
52000        if self.match_text_seq(&["FIELDS", "TERMINATED", "BY"]) {
52001            fields = self.parse_string()?.map(Box::new);
52002            // Parse optional ESCAPED BY
52003            if self.match_text_seq(&["ESCAPED", "BY"]) {
52004                escaped = self.parse_string()?.map(Box::new);
52005            }
52006        }
52007
52008        // Parse COLLECTION ITEMS TERMINATED BY
52009        if self.match_text_seq(&["COLLECTION", "ITEMS", "TERMINATED", "BY"]) {
52010            collection_items = self.parse_string()?.map(Box::new);
52011        }
52012
52013        // Parse MAP KEYS TERMINATED BY
52014        if self.match_text_seq(&["MAP", "KEYS", "TERMINATED", "BY"]) {
52015            map_keys = self.parse_string()?.map(Box::new);
52016        }
52017
52018        // Parse LINES TERMINATED BY
52019        if self.match_text_seq(&["LINES", "TERMINATED", "BY"]) {
52020            lines = self.parse_string()?.map(Box::new);
52021        }
52022
52023        // Parse NULL DEFINED AS
52024        if self.match_text_seq(&["NULL", "DEFINED", "AS"]) {
52025            null = self.parse_string()?.map(Box::new);
52026        }
52027
52028        // Parse optional WITH SERDEPROPERTIES
52029        let serde = self.parse_serde_properties(false)?.map(Box::new);
52030
52031        Ok(Some(Expression::RowFormatDelimitedProperty(Box::new(
52032            RowFormatDelimitedProperty {
52033                fields,
52034                escaped,
52035                collection_items,
52036                map_keys,
52037                lines,
52038                null,
52039                serde,
52040            },
52041        ))))
52042    }
52043
52044    /// parse_schema - Ported from Python _parse_schema
52045    /// Parses schema definition: (col1 type1, col2 type2, ...)
52046    /// Used for CREATE TABLE column definitions
52047    #[allow(unused_variables, unused_mut)]
52048    pub fn parse_schema(&mut self) -> Result<Option<Expression>> {
52049        self.parse_schema_with_this(None)
52050    }
52051
52052    /// parse_schema_with_this - Parses schema with optional table reference
52053    fn parse_schema_with_this(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
52054        // Check for opening parenthesis
52055        if !self.match_token(TokenType::LParen) {
52056            return Ok(this.map(|e| e));
52057        }
52058
52059        // Check if this is a subquery (SELECT, WITH, etc.) not a schema
52060        if self.check(TokenType::Select) || self.check(TokenType::With) {
52061            // Retreat - put back the LParen
52062            self.current -= 1;
52063            return Ok(this.map(|e| e));
52064        }
52065
52066        // Parse column definitions and constraints
52067        let mut expressions = Vec::new();
52068        if !self.check(TokenType::RParen) {
52069            loop {
52070                // Try to parse constraint first, then field definition
52071                if let Some(constraint) = self.parse_constraint()? {
52072                    expressions.push(constraint);
52073                } else if let Some(field_def) = self.parse_field_def()? {
52074                    expressions.push(field_def);
52075                } else {
52076                    break;
52077                }
52078
52079                if !self.match_token(TokenType::Comma) {
52080                    break;
52081                }
52082            }
52083        }
52084
52085        self.expect(TokenType::RParen)?;
52086
52087        Ok(Some(Expression::Schema(Box::new(Schema {
52088            this: this.map(Box::new),
52089            expressions,
52090        }))))
52091    }
52092
52093    /// Parse schema identifier: name or name(columns)
52094    /// Used for TSQL ON filegroup (partition_column) syntax
52095    fn parse_schema_identifier(&mut self) -> Result<Expression> {
52096        // Parse the identifier (filegroup name)
52097        let name = self.expect_identifier_with_quoted()?;
52098        let name_expr = Expression::Identifier(name);
52099
52100        // Check for optional parenthesized columns
52101        if self.match_token(TokenType::LParen) {
52102            let mut columns = Vec::new();
52103            loop {
52104                let col = self.expect_identifier_with_quoted()?;
52105                columns.push(Expression::Identifier(col));
52106                if !self.match_token(TokenType::Comma) {
52107                    break;
52108                }
52109            }
52110            self.expect(TokenType::RParen)?;
52111            Ok(Expression::Schema(Box::new(Schema {
52112                this: Some(Box::new(name_expr)),
52113                expressions: columns,
52114            })))
52115        } else {
52116            // Just the identifier, no columns
52117            Ok(name_expr)
52118        }
52119    }
52120
52121    /// parse_security - Implemented from Python _parse_security
52122    #[allow(unused_variables, unused_mut)]
52123    pub fn parse_security(&mut self) -> Result<Option<Expression>> {
52124        if self.match_texts(&["NONE", "DEFINER", "INVOKER"]) {
52125            // Matched one of: NONE, DEFINER, INVOKER
52126            return Ok(None);
52127        }
52128        Ok(None)
52129    }
52130
52131    /// parse_select_or_expression - Parses either a SELECT statement or an expression
52132    /// Python: _parse_select_or_expression
52133    pub fn parse_select_or_expression(&mut self) -> Result<Option<Expression>> {
52134        // Save position for potential backtracking
52135        let start_pos = self.current;
52136
52137        // First try to parse a SELECT statement if we're at a SELECT keyword
52138        if self.check(TokenType::Select) {
52139            return Ok(Some(self.parse_select()?));
52140        }
52141
52142        // Otherwise try to parse an expression (assignment)
52143        if let Some(expr) = self.parse_disjunction()? {
52144            return Ok(Some(expr));
52145        }
52146
52147        // Backtrack if nothing worked
52148        self.current = start_pos;
52149
52150        Ok(None)
52151    }
52152
52153    /// parse_select_query - Implemented from Python _parse_select_query
52154    /// Calls: parse_string, parse_table, parse_describe
52155    #[allow(unused_variables, unused_mut)]
52156    pub fn parse_select_query(&mut self) -> Result<Option<Expression>> {
52157        if self.match_texts(&["STRUCT", "VALUE"]) {
52158            // Matched one of: STRUCT, VALUE
52159            return Ok(None);
52160        }
52161        Ok(None)
52162    }
52163
52164    /// parse_sequence_properties - Implemented from Python _parse_sequence_properties
52165    /// Calls: parse_number, parse_term, parse_column
52166    #[allow(unused_variables, unused_mut)]
52167    pub fn parse_sequence_properties(&mut self) -> Result<Option<Expression>> {
52168        if self.match_text_seq(&["INCREMENT"]) {
52169            return Ok(Some(Expression::SequenceProperties(Box::new(
52170                SequenceProperties {
52171                    increment: None,
52172                    minvalue: None,
52173                    maxvalue: None,
52174                    cache: None,
52175                    start: None,
52176                    owned: None,
52177                    options: Vec::new(),
52178                },
52179            ))));
52180        }
52181        if self.match_text_seq(&["BY"]) {
52182            // Matched: BY
52183            return Ok(None);
52184        }
52185        if self.match_text_seq(&["="]) {
52186            // Matched: =
52187            return Ok(None);
52188        }
52189        Ok(None)
52190    }
52191
52192    /// parse_serde_properties - Implemented from Python _parse_serde_properties
52193    /// Parses SERDEPROPERTIES clause: [WITH] SERDEPROPERTIES (key=value, ...)
52194    pub fn parse_serde_properties(&mut self, with_: bool) -> Result<Option<Expression>> {
52195        let start_index = self.current;
52196        let has_with = with_ || self.match_text_seq(&["WITH"]);
52197
52198        // Check for SERDEPROPERTIES keyword
52199        if !self.match_token(TokenType::SerdeProperties) {
52200            self.current = start_index;
52201            return Ok(None);
52202        }
52203
52204        // Parse wrapped properties manually since parse_property doesn't handle 'key'='value' syntax
52205        let mut expressions = Vec::new();
52206        if self.match_token(TokenType::LParen) {
52207            loop {
52208                if self.check(TokenType::RParen) {
52209                    break;
52210                }
52211                // Parse 'key'='value' or key=value
52212                let key = self.parse_primary()?;
52213                if self.match_token(TokenType::Eq) {
52214                    let value = self.parse_primary()?;
52215                    expressions.push(Expression::Eq(Box::new(BinaryOp::new(key, value))));
52216                } else {
52217                    expressions.push(key);
52218                }
52219                if !self.match_token(TokenType::Comma) {
52220                    break;
52221                }
52222            }
52223            self.expect(TokenType::RParen)?;
52224        }
52225
52226        Ok(Some(Expression::SerdeProperties(Box::new(
52227            SerdeProperties {
52228                expressions,
52229                with_: if has_with {
52230                    Some(Box::new(Expression::Boolean(BooleanLiteral {
52231                        value: true,
52232                    })))
52233                } else {
52234                    None
52235                },
52236            },
52237        ))))
52238    }
52239
52240    /// parse_session_parameter - Ported from Python _parse_session_parameter
52241    #[allow(unused_variables, unused_mut)]
52242    /// parse_session_parameter - Parses session parameters (@@var or @@session.var)
52243    /// Example: @@session.sql_mode, @@global.autocommit
52244    pub fn parse_session_parameter(&mut self) -> Result<Option<Expression>> {
52245        // Parse the first identifier or primary
52246        let first = if let Some(id) = self.parse_id_var()? {
52247            id
52248        } else if let Some(primary) = self.parse_primary_or_var()? {
52249            primary
52250        } else {
52251            return Ok(None);
52252        };
52253
52254        // Check for dot notation (kind.name)
52255        let (kind, this) = if self.match_token(TokenType::Dot) {
52256            // kind is the first part, parse the second
52257            let kind_name = match &first {
52258                Expression::Identifier(id) => Some(id.name.clone()),
52259                _ => None,
52260            };
52261            let second = self
52262                .parse_var()?
52263                .or_else(|| self.parse_primary_or_var().ok().flatten());
52264            (kind_name, second.unwrap_or(first))
52265        } else {
52266            (None, first)
52267        };
52268
52269        Ok(Some(Expression::SessionParameter(Box::new(
52270            SessionParameter {
52271                this: Box::new(this),
52272                kind,
52273            },
52274        ))))
52275    }
52276
52277    /// parse_set_item - Ported from Python _parse_set_item
52278    /// Parses an item in a SET statement (GLOBAL, LOCAL, SESSION prefixes, or assignment)
52279    #[allow(unused_variables, unused_mut)]
52280    pub fn parse_set_item(&mut self) -> Result<Option<Expression>> {
52281        // Check for specific prefixes
52282        let kind = if self.match_text_seq(&["GLOBAL"]) {
52283            Some("GLOBAL".to_string())
52284        } else if self.match_text_seq(&["LOCAL"]) {
52285            Some("LOCAL".to_string())
52286        } else if self.match_text_seq(&["SESSION"]) {
52287            Some("SESSION".to_string())
52288        } else {
52289            None
52290        };
52291
52292        // Delegate to set_item_assignment
52293        self.parse_set_item_assignment()
52294    }
52295
52296    /// parse_set_item_assignment - Implemented from Python _parse_set_item_assignment
52297    /// Parses SET variable = value assignments
52298    pub fn parse_set_item_assignment(&mut self) -> Result<Option<Expression>> {
52299        let start_index = self.current;
52300
52301        // Try to parse as TRANSACTION
52302        if self.match_text_seq(&["TRANSACTION"]) {
52303            // This is handled by parse_set_transaction
52304            return Ok(Some(Expression::SetItem(Box::new(SetItem {
52305                name: Expression::Var(Box::new(Var {
52306                    this: "TRANSACTION".to_string(),
52307                })),
52308                value: Expression::Null(Null),
52309                kind: None,
52310                no_equals: false,
52311            }))));
52312        }
52313
52314        // Parse left side: primary or column
52315        let left = self
52316            .parse_primary_or_var()?
52317            .or_else(|| self.parse_column().ok().flatten());
52318
52319        if left.is_none() {
52320            self.current = start_index;
52321            return Ok(None);
52322        }
52323
52324        // Check for assignment delimiter (= or TO or :=)
52325        if !self.match_texts(&["=", "TO", ":="]) {
52326            self.current = start_index;
52327            return Ok(None);
52328        }
52329
52330        // Parse right side: value
52331        // First try string literals (preserve quoting), then booleans/numbers, then identifiers
52332        let right_val = if self.check(TokenType::String) {
52333            let text = self.advance().text.clone();
52334            Expression::Literal(Literal::String(text))
52335        } else if self.check(TokenType::False) {
52336            self.advance();
52337            Expression::Boolean(BooleanLiteral { value: false })
52338        } else if self.check(TokenType::True) {
52339            self.advance();
52340            Expression::Boolean(BooleanLiteral { value: true })
52341        } else {
52342            let right = self
52343                .parse_id_var()?
52344                .or_else(|| self.parse_primary_or_var().ok().flatten());
52345            // Convert Column/Identifier to Var
52346            match right {
52347                Some(Expression::Column(col)) => Expression::Var(Box::new(Var {
52348                    this: col.name.name.clone(),
52349                })),
52350                Some(Expression::Identifier(id)) => Expression::Var(Box::new(Var {
52351                    this: id.name.clone(),
52352                })),
52353                Some(other) => other,
52354                None => Expression::Null(Null),
52355            }
52356        };
52357
52358        Ok(Some(Expression::SetItem(Box::new(SetItem {
52359            name: left
52360                .ok_or_else(|| self.parse_error("Expected variable name in SET statement"))?,
52361            value: right_val,
52362            kind: None,
52363            no_equals: false,
52364        }))))
52365    }
52366
52367    /// parse_set_operations - Parses UNION/INTERSECT/EXCEPT operations
52368    /// This version parses from current position (expects to be at set operator)
52369    /// Python: _parse_set_operations
52370    pub fn parse_set_operations(&mut self) -> Result<Option<Expression>> {
52371        // Parse a SELECT or subquery first
52372        let left = if self.check(TokenType::Select) {
52373            Some(self.parse_select()?)
52374        } else if self.match_token(TokenType::LParen) {
52375            let inner = self.parse_select()?;
52376            self.match_token(TokenType::RParen);
52377            Some(inner)
52378        } else {
52379            None
52380        };
52381
52382        if left.is_none() {
52383            return Ok(None);
52384        }
52385
52386        self.parse_set_operations_with_expr(left)
52387    }
52388
52389    /// parse_set_operations_with_expr - Parses set operations with a left expression
52390    pub fn parse_set_operations_with_expr(
52391        &mut self,
52392        this: Option<Expression>,
52393    ) -> Result<Option<Expression>> {
52394        let mut result = this;
52395
52396        while result.is_some() {
52397            if let Some(setop) = self.parse_set_operation_with_expr(result.clone())? {
52398                result = Some(setop);
52399            } else {
52400                break;
52401            }
52402        }
52403
52404        Ok(result)
52405    }
52406
52407    /// parse_set_operation_with_expr - Parses a single set operation (UNION, INTERSECT, EXCEPT)
52408    fn parse_set_operation_with_expr(
52409        &mut self,
52410        left: Option<Expression>,
52411    ) -> Result<Option<Expression>> {
52412        let left_expr = match left {
52413            Some(e) => e,
52414            None => return Ok(None),
52415        };
52416
52417        // Check for UNION, INTERSECT, EXCEPT
52418        let op_type = if self.match_token(TokenType::Union) {
52419            "UNION"
52420        } else if self.match_token(TokenType::Intersect) {
52421            "INTERSECT"
52422        } else if self.match_token(TokenType::Except) {
52423            "EXCEPT"
52424        } else {
52425            return Ok(Some(left_expr));
52426        };
52427
52428        // Check for ALL or DISTINCT
52429        let (all, distinct) = if self.match_token(TokenType::All) {
52430            (true, false)
52431        } else {
52432            let d = self.match_token(TokenType::Distinct);
52433            (false, d)
52434        };
52435
52436        // DuckDB: UNION [ALL] BY NAME SELECT ...
52437        let by_name = self.match_token(TokenType::By) && self.match_identifier("NAME");
52438
52439        // Parse the right side (SELECT or subquery)
52440        let right = if self.check(TokenType::Select) {
52441            self.parse_select()?
52442        } else if self.match_token(TokenType::LParen) {
52443            let inner = self.parse_select()?;
52444            self.match_token(TokenType::RParen);
52445            inner
52446        } else {
52447            return Ok(Some(left_expr));
52448        };
52449
52450        // Create the appropriate set operation expression
52451        match op_type {
52452            "UNION" => Ok(Some(Expression::Union(Box::new(Union {
52453                left: left_expr,
52454                right,
52455                all,
52456                distinct,
52457                with: None,
52458                order_by: None,
52459                limit: None,
52460                offset: None,
52461                distribute_by: None,
52462                sort_by: None,
52463                cluster_by: None,
52464                by_name,
52465                side: None,
52466                kind: None,
52467                corresponding: false,
52468                strict: false,
52469                on_columns: Vec::new(),
52470            })))),
52471            "INTERSECT" => Ok(Some(Expression::Intersect(Box::new(Intersect {
52472                left: left_expr,
52473                right,
52474                all,
52475                distinct,
52476                with: None,
52477                order_by: None,
52478                limit: None,
52479                offset: None,
52480                distribute_by: None,
52481                sort_by: None,
52482                cluster_by: None,
52483                by_name,
52484                side: None,
52485                kind: None,
52486                corresponding: false,
52487                strict: false,
52488                on_columns: Vec::new(),
52489            })))),
52490            "EXCEPT" => Ok(Some(Expression::Except(Box::new(Except {
52491                left: left_expr,
52492                right,
52493                all,
52494                distinct,
52495                with: None,
52496                order_by: None,
52497                limit: None,
52498                offset: None,
52499                distribute_by: None,
52500                sort_by: None,
52501                cluster_by: None,
52502                by_name,
52503                side: None,
52504                kind: None,
52505                corresponding: false,
52506                strict: false,
52507                on_columns: Vec::new(),
52508            })))),
52509            _ => Ok(Some(left_expr)),
52510        }
52511    }
52512
52513    /// parse_set_transaction - Implemented from Python _parse_set_transaction
52514    #[allow(unused_variables, unused_mut)]
52515    pub fn parse_set_transaction(&mut self) -> Result<Option<Expression>> {
52516        if self.match_text_seq(&["TRANSACTION"]) {
52517            // Matched: TRANSACTION
52518            return Ok(None);
52519        }
52520        Ok(None)
52521    }
52522
52523    /// Helper to consume an optional ClickHouse SETTINGS clause
52524    /// Used in SHOW, CHECK TABLE, and other ClickHouse statements
52525    fn parse_clickhouse_settings_clause(&mut self) -> Result<()> {
52526        if self.match_token(TokenType::Settings) {
52527            let _ = self.parse_settings_property()?;
52528        }
52529        Ok(())
52530    }
52531
52532    /// parse_settings_property - Parses SETTINGS property (ClickHouse)
52533    /// Python: _parse_settings_property
52534    /// Format: SETTINGS key=value, key=value, ...
52535    pub fn parse_settings_property(&mut self) -> Result<Option<Expression>> {
52536        // Parse comma-separated assignment expressions
52537        let mut expressions = Vec::new();
52538        loop {
52539            if let Some(assignment) = self.parse_assignment()? {
52540                expressions.push(assignment);
52541            } else {
52542                break;
52543            }
52544            if !self.match_token(TokenType::Comma) {
52545                break;
52546            }
52547        }
52548
52549        Ok(Some(Expression::SettingsProperty(Box::new(
52550            SettingsProperty { expressions },
52551        ))))
52552    }
52553
52554    /// parse_simplified_pivot - Ported from Python _parse_simplified_pivot
52555    /// Handles DuckDB simplified PIVOT/UNPIVOT syntax:
52556    ///   PIVOT table ON columns [IN (...)] USING agg_func [AS alias], ... [GROUP BY ...]
52557    ///   UNPIVOT table ON columns [INTO NAME col VALUE col, ...]
52558    #[allow(unused_variables, unused_mut)]
52559    pub fn parse_simplified_pivot(&mut self, is_unpivot: bool) -> Result<Option<Expression>> {
52560        // Parse the source table (can be a subquery like (SELECT 1 AS col1, 2 AS col2))
52561        let this = if self.check(TokenType::LParen) {
52562            // Could be parenthesized subquery
52563            self.advance(); // consume (
52564            if self.check(TokenType::Select) || self.check(TokenType::With) {
52565                let inner = self.parse_statement()?;
52566                self.expect(TokenType::RParen)?;
52567                Some(Expression::Subquery(Box::new(Subquery {
52568                    this: inner,
52569                    alias: None,
52570                    column_aliases: Vec::new(),
52571                    order_by: None,
52572                    limit: None,
52573                    offset: None,
52574                    lateral: false,
52575                    modifiers_inside: false,
52576                    trailing_comments: Vec::new(),
52577                    distribute_by: None,
52578                    sort_by: None,
52579                    cluster_by: None,
52580                    inferred_type: None,
52581                })))
52582            } else {
52583                // Not a subquery, retreat and parse as expression in parens
52584                self.current -= 1; // un-consume the (
52585                Some(self.parse_primary()?)
52586            }
52587        } else {
52588            // Parse table reference (e.g., Cities, schema.table, duckdb_functions())
52589            Some(self.parse_primary()?)
52590        };
52591
52592        // Parse ON columns
52593        let expressions = if self.match_text_seq(&["ON"]) {
52594            let mut on_exprs = Vec::new();
52595            loop {
52596                // Parse ON expression - use parse_bitwise to handle complex expressions like Country || '_' || Name
52597                let on_expr = self.parse_bitwise()?;
52598                if on_expr.is_none() {
52599                    break;
52600                }
52601                let mut expr = on_expr.unwrap();
52602
52603                // Check for IN clause on this column
52604                if self.match_token(TokenType::In) {
52605                    if self.match_token(TokenType::LParen) {
52606                        let mut in_exprs = Vec::new();
52607                        loop {
52608                            if self.check(TokenType::RParen) {
52609                                break;
52610                            }
52611                            if let Some(val) = self.parse_select_or_expression()? {
52612                                in_exprs.push(val);
52613                            }
52614                            if !self.match_token(TokenType::Comma) {
52615                                break;
52616                            }
52617                        }
52618                        self.expect(TokenType::RParen)?;
52619                        expr = Expression::In(Box::new(In {
52620                            this: expr,
52621                            expressions: in_exprs,
52622                            query: None,
52623                            not: false,
52624                            global: false,
52625                            unnest: None,
52626                            is_field: false,
52627                        }));
52628                    }
52629                }
52630                // Check for alias (UNPIVOT ON (jan, feb, mar) AS q1, ...)
52631                else if self.match_token(TokenType::As) {
52632                    let alias_name = self.expect_identifier()?;
52633                    expr =
52634                        Expression::Alias(Box::new(Alias::new(expr, Identifier::new(alias_name))));
52635                }
52636
52637                on_exprs.push(expr);
52638
52639                // Continue if comma
52640                if !self.match_token(TokenType::Comma) {
52641                    break;
52642                }
52643            }
52644            on_exprs
52645        } else {
52646            Vec::new()
52647        };
52648
52649        // Parse INTO for UNPIVOT columns (INTO NAME col VALUE col, ...)
52650        let into = self.parse_unpivot_columns()?;
52651
52652        // Parse USING clause (aggregation functions with optional aliases)
52653        // e.g., USING SUM(Population), USING SUM(Population) AS total, MAX(Population) AS max
52654        // e.g., USING CAST(AVG(LENGTH(function_name)) AS INT)
52655        let using = if self.match_text_seq(&["USING"]) {
52656            let mut using_exprs = Vec::new();
52657            loop {
52658                // Stop if we hit GROUP BY or end of input
52659                if self.is_at_end() || self.check(TokenType::Group) || self.check(TokenType::RParen)
52660                {
52661                    break;
52662                }
52663                // Parse the primary expression (function call, possibly with cast :: operator)
52664                let func = self.parse_primary()?;
52665                // Check for :: cast operator (e.g., SUM(Population)::INTEGER)
52666                let expr = if self.match_token(TokenType::DColon) {
52667                    let data_type = self.parse_data_type()?;
52668                    Expression::Cast(Box::new(Cast {
52669                        this: func,
52670                        to: data_type,
52671                        trailing_comments: Vec::new(),
52672                        double_colon_syntax: true,
52673                        format: None,
52674                        default: None,
52675                        inferred_type: None,
52676                    }))
52677                } else {
52678                    func
52679                };
52680                // Try to parse alias (AS alias)
52681                if self.match_token(TokenType::As) {
52682                    let alias_name = self.expect_identifier()?;
52683                    using_exprs.push(Expression::Alias(Box::new(Alias::new(
52684                        expr,
52685                        Identifier::new(alias_name),
52686                    ))));
52687                } else {
52688                    using_exprs.push(expr);
52689                }
52690                if !self.match_token(TokenType::Comma) {
52691                    break;
52692                }
52693            }
52694            using_exprs
52695        } else {
52696            Vec::new()
52697        };
52698
52699        // Parse optional GROUP BY
52700        let group = self.parse_group()?;
52701
52702        let source = this.unwrap();
52703
52704        Ok(Some(Expression::Pivot(Box::new(Pivot {
52705            this: source,
52706            expressions,
52707            fields: Vec::new(),
52708            using,
52709            group: group.map(Box::new),
52710            unpivot: is_unpivot,
52711            into: into.map(Box::new),
52712            alias: None,
52713            include_nulls: None,
52714            default_on_null: None,
52715            with: None,
52716        }))))
52717    }
52718
52719    /// parse_slice - Parses array slice syntax [start:end:step]
52720    /// Python: _parse_slice
52721    /// Takes an optional 'this' expression (the start of the slice)
52722    pub fn parse_slice(&mut self) -> Result<Option<Expression>> {
52723        self.parse_slice_with_this(None)
52724    }
52725
52726    /// Implementation of parse_slice with 'this' parameter
52727    pub fn parse_slice_with_this(
52728        &mut self,
52729        this: Option<Expression>,
52730    ) -> Result<Option<Expression>> {
52731        // Check for colon - if not found, return this as-is
52732        if !self.match_token(TokenType::Colon) {
52733            return Ok(this);
52734        }
52735
52736        // Parse end expression
52737        // Handle special case: -: which means -1 (from end)
52738        let end = if self.check(TokenType::Dash) && self.check_next(TokenType::Colon) {
52739            // -: pattern means -1 (from end)
52740            self.advance(); // consume dash
52741            Some(Expression::Neg(Box::new(UnaryOp::new(
52742                Expression::Literal(Literal::Number("1".to_string())),
52743            ))))
52744        } else if self.check(TokenType::Colon) || self.check(TokenType::RBracket) {
52745            // Empty end like [start::step] or [start:]
52746            None
52747        } else {
52748            Some(self.parse_unary()?)
52749        };
52750
52751        // Parse optional step expression after second colon
52752        let step = if self.match_token(TokenType::Colon) {
52753            if self.check(TokenType::RBracket) {
52754                None
52755            } else {
52756                Some(self.parse_unary()?)
52757            }
52758        } else {
52759            None
52760        };
52761
52762        Ok(Some(Expression::Slice(Box::new(Slice {
52763            this: this.map(Box::new),
52764            expression: end.map(Box::new),
52765            step: step.map(Box::new),
52766        }))))
52767    }
52768
52769    /// Parse a slice element (start, end, or step in array slicing)
52770    /// This uses parse_unary to avoid interpreting : as parameter syntax
52771    /// Returns None for empty elements (e.g., [:] or [::step])
52772    fn parse_slice_element(&mut self) -> Result<Option<Expression>> {
52773        // Check for empty element (next is : or ])
52774        if self.check(TokenType::Colon) || self.check(TokenType::RBracket) {
52775            return Ok(None);
52776        }
52777        // Handle special case: -: means -1 (from the end)
52778        // This is used in slicing like [:-:-1] where the first -: means end=-1
52779        if self.check(TokenType::Dash) && self.check_next(TokenType::Colon) {
52780            self.advance(); // consume dash
52781                            // Don't consume the colon - let the caller handle it
52782            return Ok(Some(Expression::Neg(Box::new(UnaryOp::new(
52783                Expression::Literal(Literal::Number("1".to_string())),
52784            )))));
52785        }
52786        // Parse full expression (including binary ops like y - 1) but stop at : or ]
52787        let expr = self.parse_disjunction()?;
52788        Ok(expr)
52789    }
52790
52791    /// parse_sort - Ported from Python _parse_sort
52792    /// Parses SORT BY clause (Hive/Spark)
52793    #[allow(unused_variables, unused_mut)]
52794    pub fn parse_sort(&mut self) -> Result<Option<Expression>> {
52795        // Check for SORT BY token
52796        if !self.match_keywords(&[TokenType::Sort, TokenType::By]) {
52797            return Ok(None);
52798        }
52799
52800        // Parse comma-separated ordered expressions
52801        let mut expressions = Vec::new();
52802        loop {
52803            if let Some(ordered) = self.parse_ordered_item()? {
52804                expressions.push(ordered);
52805            } else {
52806                break;
52807            }
52808            if !self.match_token(TokenType::Comma) {
52809                break;
52810            }
52811        }
52812
52813        Ok(Some(Expression::SortBy(Box::new(SortBy { expressions }))))
52814    }
52815
52816    /// parse_cluster_by_clause - Parses CLUSTER BY clause (Hive/Spark)
52817    #[allow(unused_variables, unused_mut)]
52818    pub fn parse_cluster_by_clause(&mut self) -> Result<Option<Expression>> {
52819        if !self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
52820            return Ok(None);
52821        }
52822
52823        // Parse comma-separated ordered expressions
52824        let mut expressions: Vec<Ordered> = Vec::new();
52825        loop {
52826            if let Some(ordered) = self.parse_ordered_item()? {
52827                expressions.push(ordered);
52828            } else {
52829                break;
52830            }
52831            if !self.match_token(TokenType::Comma) {
52832                break;
52833            }
52834        }
52835        Ok(Some(Expression::ClusterBy(Box::new(ClusterBy {
52836            expressions,
52837        }))))
52838    }
52839
52840    /// parse_distribute_by_clause - Parses DISTRIBUTE BY clause (Hive/Spark)
52841    #[allow(unused_variables, unused_mut)]
52842    pub fn parse_distribute_by_clause(&mut self) -> Result<Option<Expression>> {
52843        if !self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
52844            return Ok(None);
52845        }
52846
52847        let expressions = self.parse_expression_list()?;
52848        Ok(Some(Expression::DistributeBy(Box::new(DistributeBy {
52849            expressions,
52850        }))))
52851    }
52852
52853    /// parse_sortkey - Redshift/PostgreSQL SORTKEY property
52854    /// Parses SORTKEY(column1, column2, ...) with optional COMPOUND modifier
52855    #[allow(unused_variables, unused_mut)]
52856    pub fn parse_sortkey(&mut self) -> Result<Option<Expression>> {
52857        // Parse the wrapped list of columns/identifiers
52858        let this = if self.match_token(TokenType::LParen) {
52859            let mut columns = Vec::new();
52860            loop {
52861                if let Some(id) = self.parse_id_var()? {
52862                    columns.push(id);
52863                } else {
52864                    break;
52865                }
52866                if !self.match_token(TokenType::Comma) {
52867                    break;
52868                }
52869            }
52870            self.match_token(TokenType::RParen);
52871
52872            if columns.is_empty() {
52873                return Ok(None);
52874            }
52875
52876            if columns.len() == 1 {
52877                columns.into_iter().next().unwrap()
52878            } else {
52879                Expression::Tuple(Box::new(Tuple {
52880                    expressions: columns,
52881                }))
52882            }
52883        } else {
52884            // Single column without parens
52885            if let Some(id) = self.parse_id_var()? {
52886                id
52887            } else {
52888                return Ok(None);
52889            }
52890        };
52891
52892        Ok(Some(Expression::SortKeyProperty(Box::new(
52893            SortKeyProperty {
52894                this: Box::new(this),
52895                compound: None, // compound is set by caller if COMPOUND keyword was matched
52896            },
52897        ))))
52898    }
52899
52900    /// parse_star - Parse STAR (*) token with optional EXCEPT/REPLACE/RENAME
52901    /// Python: if self._match(TokenType.STAR): return self._parse_star_ops()
52902    pub fn parse_star(&mut self) -> Result<Option<Expression>> {
52903        if !self.match_token(TokenType::Star) {
52904            return Ok(None);
52905        }
52906
52907        // Parse optional EXCEPT/EXCLUDE columns
52908        let except = self.parse_star_except()?;
52909
52910        // Parse optional REPLACE expressions
52911        let replace = self.parse_star_replace()?;
52912
52913        // Parse optional RENAME columns
52914        let rename = self.parse_star_rename()?;
52915
52916        Ok(Some(Expression::Star(Star {
52917            table: None,
52918            except,
52919            replace,
52920            rename,
52921            trailing_comments: Vec::new(),
52922            span: None,
52923        })))
52924    }
52925
52926    /// try_parse_identifier - Try to parse an identifier, returning None if not found
52927    fn try_parse_identifier(&mut self) -> Option<Identifier> {
52928        if self.is_identifier_token() {
52929            let token = self.advance();
52930            let quoted = token.token_type == TokenType::QuotedIdentifier;
52931            Some(Identifier {
52932                name: token.text,
52933                quoted,
52934                trailing_comments: Vec::new(),
52935                span: None,
52936            })
52937        } else {
52938            None
52939        }
52940    }
52941
52942    /// parse_star_except - Parse EXCEPT/EXCLUDE clause for Star
52943    /// Example: * EXCEPT (col1, col2)
52944    fn parse_star_except(&mut self) -> Result<Option<Vec<Identifier>>> {
52945        if !self.match_texts(&["EXCEPT", "EXCLUDE"]) {
52946            return Ok(None);
52947        }
52948
52949        // Parse (col1, col2, ...)
52950        if self.match_token(TokenType::LParen) {
52951            let mut columns = Vec::new();
52952            loop {
52953                if let Some(id) = self.try_parse_identifier() {
52954                    columns.push(id);
52955                } else if self.is_safe_keyword_as_identifier() {
52956                    // ClickHouse: allow keywords like 'key' as column names in EXCEPT
52957                    let token = self.advance();
52958                    columns.push(Identifier {
52959                        name: token.text,
52960                        quoted: false,
52961                        trailing_comments: Vec::new(),
52962                        span: None,
52963                    });
52964                } else {
52965                    break;
52966                }
52967                if !self.match_token(TokenType::Comma) {
52968                    break;
52969                }
52970            }
52971            self.match_token(TokenType::RParen);
52972            return Ok(Some(columns));
52973        }
52974
52975        // Single column without parens
52976        if let Some(id) = self.try_parse_identifier() {
52977            return Ok(Some(vec![id]));
52978        }
52979
52980        Ok(None)
52981    }
52982
52983    /// parse_star_replace - Parse REPLACE clause for Star
52984    /// Example: * REPLACE (col1 AS alias1, col2 AS alias2)
52985    fn parse_star_replace(&mut self) -> Result<Option<Vec<Alias>>> {
52986        if !self.match_texts(&["REPLACE"]) {
52987            return Ok(None);
52988        }
52989
52990        if self.match_token(TokenType::LParen) {
52991            let mut aliases = Vec::new();
52992            loop {
52993                // Parse expression AS alias
52994                if let Some(expr) = self.parse_disjunction()? {
52995                    let alias_name = if self.match_token(TokenType::As) {
52996                        self.try_parse_identifier()
52997                    } else {
52998                        None
52999                    };
53000
53001                    aliases.push(Alias {
53002                        this: expr,
53003                        alias: alias_name.unwrap_or_else(|| Identifier::new("")),
53004                        column_aliases: Vec::new(),
53005                        pre_alias_comments: Vec::new(),
53006                        trailing_comments: Vec::new(),
53007                        inferred_type: None,
53008                    });
53009                } else {
53010                    break;
53011                }
53012                if !self.match_token(TokenType::Comma) {
53013                    break;
53014                }
53015            }
53016            self.match_token(TokenType::RParen);
53017            return Ok(Some(aliases));
53018        }
53019
53020        Ok(None)
53021    }
53022
53023    /// parse_star_rename - Parse RENAME clause for Star
53024    /// Example: * RENAME (old_col AS new_col, ...)
53025    fn parse_star_rename(&mut self) -> Result<Option<Vec<(Identifier, Identifier)>>> {
53026        if !self.match_texts(&["RENAME"]) {
53027            return Ok(None);
53028        }
53029
53030        if self.match_token(TokenType::LParen) {
53031            let mut renames = Vec::new();
53032            loop {
53033                // Parse old_name AS new_name
53034                if let Some(old_name) = self.try_parse_identifier() {
53035                    if self.match_token(TokenType::As) {
53036                        if let Some(new_name) = self.try_parse_identifier() {
53037                            renames.push((old_name, new_name));
53038                        }
53039                    }
53040                } else {
53041                    break;
53042                }
53043                if !self.match_token(TokenType::Comma) {
53044                    break;
53045                }
53046            }
53047            self.match_token(TokenType::RParen);
53048            return Ok(Some(renames));
53049        }
53050
53051        Ok(None)
53052    }
53053
53054    /// parse_star_op - Helper to parse EXCEPT/REPLACE/RENAME with keywords
53055    /// Returns list of expressions if keywords match
53056    pub fn parse_star_op(&mut self, keywords: &[&str]) -> Result<Option<Vec<Expression>>> {
53057        if !self.match_texts(keywords) {
53058            return Ok(None);
53059        }
53060
53061        // If followed by paren, parse wrapped CSV
53062        if self.match_token(TokenType::LParen) {
53063            let expressions = self.parse_expression_list()?;
53064            self.match_token(TokenType::RParen);
53065            return Ok(Some(expressions));
53066        }
53067
53068        // Otherwise parse single aliased expression
53069        if let Some(expr) = self.parse_disjunction()? {
53070            // Try to parse explicit alias
53071            let result = if self.match_token(TokenType::As) {
53072                if let Some(alias_name) = self.try_parse_identifier() {
53073                    Expression::Alias(Box::new(Alias {
53074                        this: expr,
53075                        alias: alias_name,
53076                        column_aliases: Vec::new(),
53077                        pre_alias_comments: Vec::new(),
53078                        trailing_comments: Vec::new(),
53079                        inferred_type: None,
53080                    }))
53081                } else {
53082                    expr
53083                }
53084            } else {
53085                expr
53086            };
53087            return Ok(Some(vec![result]));
53088        }
53089
53090        Ok(None)
53091    }
53092
53093    /// parse_star_ops - Implemented from Python _parse_star_ops
53094    /// Creates a Star expression with EXCEPT/REPLACE/RENAME clauses
53095    /// Also handles * COLUMNS(pattern) syntax for DuckDB column selection
53096    pub fn parse_star_ops(&mut self) -> Result<Option<Expression>> {
53097        // Handle * COLUMNS(pattern) function (DuckDB)
53098        // This parses patterns like: * COLUMNS(c ILIKE '%suffix')
53099        if self.match_text_seq(&["COLUMNS"]) && self.check(TokenType::LParen) {
53100            // Parse the COLUMNS function arguments
53101            self.expect(TokenType::LParen)?;
53102            let this = self.parse_expression()?;
53103            self.expect(TokenType::RParen)?;
53104
53105            // Return a Columns expression with unpack=true (since it came from * COLUMNS())
53106            return Ok(Some(Expression::Columns(Box::new(Columns {
53107                this: Box::new(this),
53108                unpack: Some(Box::new(Expression::Boolean(BooleanLiteral {
53109                    value: true,
53110                }))),
53111            }))));
53112        }
53113
53114        // Parse EXCEPT/EXCLUDE
53115        let except_exprs = self.parse_star_op(&["EXCEPT", "EXCLUDE"])?;
53116        let except = except_exprs.map(|exprs| {
53117            exprs
53118                .into_iter()
53119                .filter_map(|e| match e {
53120                    Expression::Identifier(id) => Some(id),
53121                    Expression::Column(col) => Some(col.name),
53122                    _ => None,
53123                })
53124                .collect()
53125        });
53126
53127        // Parse REPLACE
53128        let replace_exprs = self.parse_star_op(&["REPLACE"])?;
53129        let replace = replace_exprs.map(|exprs| {
53130            exprs
53131                .into_iter()
53132                .filter_map(|e| match e {
53133                    Expression::Alias(a) => Some(*a),
53134                    _ => None,
53135                })
53136                .collect()
53137        });
53138
53139        // Parse RENAME
53140        let _rename_exprs = self.parse_star_op(&["RENAME"])?;
53141        let rename: Option<Vec<(Identifier, Identifier)>> = None; // Complex to extract from expressions
53142
53143        Ok(Some(Expression::Star(Star {
53144            table: None,
53145            except,
53146            replace,
53147            rename,
53148            trailing_comments: Vec::new(),
53149            span: None,
53150        })))
53151    }
53152
53153    /// parse_stored - Implemented from Python _parse_stored
53154    #[allow(unused_variables, unused_mut)]
53155    pub fn parse_stored(&mut self) -> Result<Option<Expression>> {
53156        if self.match_text_seq(&["BY"]) {
53157            return Ok(Some(Expression::InputOutputFormat(Box::new(
53158                InputOutputFormat {
53159                    input_format: None,
53160                    output_format: None,
53161                },
53162            ))));
53163        }
53164        if self.match_text_seq(&["INPUTFORMAT"]) {
53165            // Matched: INPUTFORMAT
53166            return Ok(None);
53167        }
53168        Ok(None)
53169    }
53170
53171    /// parse_stream - Implemented from Python _parse_stream
53172    #[allow(unused_variables, unused_mut)]
53173    pub fn parse_stream(&mut self) -> Result<Option<Expression>> {
53174        if self.match_text_seq(&["STREAM"]) {
53175            // Matched: STREAM
53176            return Ok(None);
53177        }
53178        Ok(None)
53179    }
53180
53181    /// parse_string - Parse string literal
53182    /// Python: if self._match_set(self.STRING_PARSERS): return STRING_PARSERS[token_type](...)
53183    pub fn parse_string(&mut self) -> Result<Option<Expression>> {
53184        // Regular string literal
53185        if self.match_token(TokenType::String) {
53186            let text = self.previous().text.clone();
53187            return Ok(Some(Expression::Literal(Literal::String(text))));
53188        }
53189        // National string (N'...')
53190        if self.match_token(TokenType::NationalString) {
53191            let text = self.previous().text.clone();
53192            return Ok(Some(Expression::Literal(Literal::NationalString(text))));
53193        }
53194        // Raw string (r"..." or r'...')
53195        if self.match_token(TokenType::RawString) {
53196            let text = self.previous().text.clone();
53197            return Ok(Some(Expression::Literal(Literal::RawString(text))));
53198        }
53199        // Heredoc string
53200        if self.match_token(TokenType::HeredocString) {
53201            let text = self.previous().text.clone();
53202            return Ok(Some(Expression::Literal(Literal::String(text))));
53203        }
53204        // Hex string (X'...' or 0x...)
53205        if self.match_token(TokenType::HexString) {
53206            let text = self.previous().text.clone();
53207            return Ok(Some(Expression::Literal(Literal::HexString(text))));
53208        }
53209        // Bit string (B'...')
53210        if self.match_token(TokenType::BitString) {
53211            let text = self.previous().text.clone();
53212            return Ok(Some(Expression::Literal(Literal::BitString(text))));
53213        }
53214        // Byte string (b"..." - BigQuery style)
53215        if self.match_token(TokenType::ByteString) {
53216            let text = self.previous().text.clone();
53217            return Ok(Some(Expression::Literal(Literal::ByteString(text))));
53218        }
53219        Ok(None)
53220    }
53221
53222    /// parse_string_agg - Parses STRING_AGG function arguments
53223    /// Python: parser.py:6849-6899
53224    /// Handles DISTINCT, separator, ORDER BY, ON OVERFLOW, WITHIN GROUP
53225    #[allow(unused_variables, unused_mut)]
53226    pub fn parse_string_agg(&mut self) -> Result<Option<Expression>> {
53227        // Check for DISTINCT
53228        let distinct = self.match_token(TokenType::Distinct);
53229
53230        // Parse main expression
53231        let this = self.parse_disjunction()?;
53232        if this.is_none() {
53233            return Ok(None);
53234        }
53235
53236        // Parse optional separator
53237        let separator = if self.match_token(TokenType::Comma) {
53238            self.parse_disjunction()?
53239        } else {
53240            None
53241        };
53242
53243        // Parse ON OVERFLOW clause
53244        let on_overflow = if self.match_text_seq(&["ON", "OVERFLOW"]) {
53245            if self.match_text_seq(&["ERROR"]) {
53246                Some(Box::new(Expression::Var(Box::new(Var {
53247                    this: "ERROR".to_string(),
53248                }))))
53249            } else {
53250                self.match_text_seq(&["TRUNCATE"]);
53251                let truncate_str = self.parse_string()?;
53252                let with_count = if self.match_text_seq(&["WITH", "COUNT"]) {
53253                    Some(true)
53254                } else if self.match_text_seq(&["WITHOUT", "COUNT"]) {
53255                    Some(false)
53256                } else {
53257                    None
53258                };
53259                Some(Box::new(Expression::OverflowTruncateBehavior(Box::new(
53260                    OverflowTruncateBehavior {
53261                        this: truncate_str.map(Box::new),
53262                        with_count: with_count
53263                            .map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
53264                    },
53265                ))))
53266            }
53267        } else {
53268            None
53269        };
53270
53271        // Parse ORDER BY or WITHIN GROUP
53272        let order_by = if self.match_token(TokenType::OrderBy) {
53273            Some(self.parse_expression_list()?)
53274        } else if self.match_text_seq(&["WITHIN", "GROUP"]) {
53275            self.match_token(TokenType::LParen);
53276            let order = self.parse_order()?;
53277            self.match_token(TokenType::RParen);
53278            order.map(|o| vec![o])
53279        } else {
53280            None
53281        };
53282
53283        // Return as GroupConcat (which is the canonical form for STRING_AGG)
53284        Ok(Some(Expression::GroupConcat(Box::new(GroupConcatFunc {
53285            this: this.unwrap(),
53286            separator: separator,
53287            order_by: None,
53288            distinct,
53289            filter: None,
53290            inferred_type: None,
53291        }))))
53292    }
53293
53294    /// parse_string_as_identifier - Parses a string literal as a quoted identifier
53295    /// Python: _parse_string_as_identifier
53296    /// Used for cases where a string can be used as an identifier (e.g., MySQL)
53297    pub fn parse_string_as_identifier(&mut self) -> Result<Option<Expression>> {
53298        if self.match_token(TokenType::String) {
53299            let text = self.previous().text.clone();
53300            // Remove quotes if present
53301            let name = if text.starts_with('\'') && text.ends_with('\'') && text.len() >= 2 {
53302                text[1..text.len() - 1].to_string()
53303            } else if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
53304                text[1..text.len() - 1].to_string()
53305            } else {
53306                text
53307            };
53308
53309            Ok(Some(Expression::Identifier(Identifier {
53310                name,
53311                quoted: true,
53312                trailing_comments: Vec::new(),
53313                span: None,
53314            })))
53315        } else {
53316            Ok(None)
53317        }
53318    }
53319
53320    /// parse_struct_types - Delegates to parse_types
53321    #[allow(unused_variables, unused_mut)]
53322    pub fn parse_struct_types(&mut self) -> Result<Option<Expression>> {
53323        self.parse_types()
53324    }
53325
53326    /// parse_subquery - Ported from Python _parse_subquery
53327    /// Parses a parenthesized SELECT as subquery: (SELECT ...)
53328    #[allow(unused_variables, unused_mut)]
53329    pub fn parse_subquery(&mut self) -> Result<Option<Expression>> {
53330        // Check for opening paren
53331        if !self.match_token(TokenType::LParen) {
53332            return Ok(None);
53333        }
53334
53335        // Check if it's a SELECT or WITH statement
53336        if !self.check(TokenType::Select) && !self.check(TokenType::With) {
53337            // Not a subquery, retreat
53338            self.current -= 1;
53339            return Ok(None);
53340        }
53341
53342        // Parse the query
53343        let query = self.parse_statement()?;
53344        self.expect(TokenType::RParen)?;
53345
53346        // Parse optional table alias
53347        let alias = self.parse_table_alias_if_present()?;
53348
53349        Ok(Some(Expression::Subquery(Box::new(Subquery {
53350            this: query,
53351            alias,
53352            column_aliases: Vec::new(),
53353            order_by: None,
53354            limit: None,
53355            offset: None,
53356            lateral: false,
53357            modifiers_inside: false,
53358            trailing_comments: Vec::new(),
53359            distribute_by: None,
53360            sort_by: None,
53361            cluster_by: None,
53362            inferred_type: None,
53363        }))))
53364    }
53365
53366    /// Helper to parse table alias if present
53367    fn parse_table_alias_if_present(&mut self) -> Result<Option<Identifier>> {
53368        // Check for AS keyword
53369        let explicit_as = self.match_token(TokenType::As);
53370
53371        // ClickHouse: keywords can be used as table aliases when AS is explicit
53372        let is_keyword_alias = explicit_as
53373            && matches!(
53374                self.config.dialect,
53375                Some(crate::dialects::DialectType::ClickHouse)
53376            )
53377            && self.peek().token_type.is_keyword();
53378
53379        // Try to parse identifier
53380        if self.check(TokenType::Identifier)
53381            || self.check(TokenType::QuotedIdentifier)
53382            || is_keyword_alias
53383        {
53384            if is_keyword_alias
53385                && !self.check(TokenType::Identifier)
53386                && !self.check(TokenType::QuotedIdentifier)
53387            {
53388                let token = self.advance();
53389                return Ok(Some(Identifier::new(token.text)));
53390            }
53391            if let Some(Expression::Identifier(id)) = self.parse_identifier()? {
53392                return Ok(Some(id));
53393            }
53394        } else if explicit_as {
53395            // AS was present but no identifier follows - this is an error
53396            return Err(self.parse_error("Expected identifier after AS"));
53397        }
53398
53399        Ok(None)
53400    }
53401
53402    /// parse_substring - Ported from Python _parse_substring
53403    /// Parses SUBSTRING function with two syntax variants:
53404    /// 1. Standard SQL: SUBSTRING(str FROM start [FOR length])
53405    /// 2. Function style: SUBSTRING(str, start, length)
53406    #[allow(unused_variables, unused_mut)]
53407    pub fn parse_substring(&mut self) -> Result<Option<Expression>> {
53408        // Parse initial comma-separated arguments
53409        let mut args: Vec<Expression> = Vec::new();
53410
53411        // Parse first argument (the string)
53412        match self.parse_bitwise() {
53413            Ok(Some(expr)) => {
53414                let expr = self.try_clickhouse_func_arg_alias(expr);
53415                args.push(expr);
53416            }
53417            Ok(None) => return Ok(None),
53418            Err(e) => return Err(e),
53419        }
53420
53421        // Check for comma-separated additional arguments
53422        while self.match_token(TokenType::Comma) {
53423            match self.parse_bitwise() {
53424                Ok(Some(expr)) => {
53425                    let expr = self.try_clickhouse_func_arg_alias(expr);
53426                    args.push(expr);
53427                }
53428                Ok(None) => break,
53429                Err(e) => return Err(e),
53430            }
53431        }
53432
53433        // Check for FROM/FOR syntax (SQL standard)
53434        let mut start: Option<Expression> = None;
53435        let mut length: Option<Expression> = None;
53436        let mut from_for_syntax = false;
53437
53438        loop {
53439            if self.match_token(TokenType::From) {
53440                from_for_syntax = true;
53441                match self.parse_bitwise() {
53442                    Ok(Some(expr)) => {
53443                        let expr = self.try_clickhouse_func_arg_alias(expr);
53444                        start = Some(expr);
53445                    }
53446                    Ok(None) => {}
53447                    Err(e) => return Err(e),
53448                }
53449            } else if self.match_token(TokenType::For) {
53450                from_for_syntax = true;
53451                // If no start specified yet, default to 1
53452                if start.is_none() {
53453                    start = Some(Expression::Literal(Literal::Number("1".to_string())));
53454                }
53455                match self.parse_bitwise() {
53456                    Ok(Some(expr)) => {
53457                        let expr = self.try_clickhouse_func_arg_alias(expr);
53458                        length = Some(expr);
53459                    }
53460                    Ok(None) => {}
53461                    Err(e) => return Err(e),
53462                }
53463            } else {
53464                break;
53465            }
53466        }
53467
53468        // Build the substring expression
53469        if args.is_empty() {
53470            return Ok(None);
53471        }
53472
53473        let this = args.remove(0);
53474
53475        // Determine start and length
53476        let final_start = if let Some(s) = start {
53477            s
53478        } else if !args.is_empty() {
53479            args.remove(0)
53480        } else {
53481            Expression::Literal(Literal::Number("1".to_string()))
53482        };
53483
53484        let final_length = if length.is_some() {
53485            length
53486        } else if !args.is_empty() {
53487            Some(args.remove(0))
53488        } else {
53489            None
53490        };
53491
53492        Ok(Some(Expression::Substring(Box::new(SubstringFunc {
53493            this,
53494            start: final_start,
53495            length: final_length,
53496            from_for_syntax,
53497        }))))
53498    }
53499
53500    /// parse_system_versioning_property - Implemented from Python _parse_system_versioning_property
53501    /// Calls: parse_table_parts, parse_retention_period
53502    #[allow(unused_variables, unused_mut)]
53503    pub fn parse_system_versioning_property(&mut self) -> Result<Option<Expression>> {
53504        if self.match_text_seq(&["OFF"]) {
53505            return Ok(Some(Expression::WithSystemVersioningProperty(Box::new(
53506                WithSystemVersioningProperty {
53507                    on: None,
53508                    this: None,
53509                    data_consistency: None,
53510                    retention_period: None,
53511                    with_: None,
53512                },
53513            ))));
53514        }
53515        if self.match_text_seq(&["HISTORY_TABLE", "="]) {
53516            // Matched: HISTORY_TABLE =
53517            return Ok(None);
53518        }
53519        if self.match_text_seq(&["DATA_CONSISTENCY_CHECK", "="]) {
53520            // Matched: DATA_CONSISTENCY_CHECK =
53521            return Ok(None);
53522        }
53523        Ok(None)
53524    }
53525
53526    /// Parse PostgreSQL ROWS FROM syntax:
53527    /// ROWS FROM (func1(args) AS alias1(col1 type1, col2 type2), func2(...) AS alias2(...)) [WITH ORDINALITY] [AS outer_alias(...)]
53528    fn parse_rows_from(&mut self) -> Result<Expression> {
53529        // Expect opening paren
53530        self.expect(TokenType::LParen)?;
53531
53532        let mut expressions = Vec::new();
53533
53534        loop {
53535            // Parse each function expression inside ROWS FROM
53536            // Each element is: func_name(args) [AS alias(col1 type1, col2 type2, ...)]
53537            let func_expr = self.parse_rows_from_function()?;
53538            expressions.push(func_expr);
53539
53540            if !self.match_token(TokenType::Comma) {
53541                break;
53542            }
53543        }
53544
53545        self.expect(TokenType::RParen)?;
53546
53547        // Check for WITH ORDINALITY
53548        let ordinality =
53549            if self.match_token(TokenType::With) && self.match_token(TokenType::Ordinality) {
53550                true
53551            } else {
53552                false
53553            };
53554
53555        // Check for outer alias: AS alias(col1 type1, col2 type2, ...)
53556        let alias = if self.match_token(TokenType::As) {
53557            Some(Box::new(self.parse_rows_from_alias()?))
53558        } else {
53559            None
53560        };
53561
53562        Ok(Expression::RowsFrom(Box::new(RowsFrom {
53563            expressions,
53564            ordinality,
53565            alias,
53566        })))
53567    }
53568
53569    /// Parse a single function in ROWS FROM: func_name(args) [AS alias(col1 type1, ...)]
53570    fn parse_rows_from_function(&mut self) -> Result<Expression> {
53571        // Parse function name
53572        let func_name = self.expect_identifier_or_keyword()?;
53573
53574        // Parse function arguments
53575        self.expect(TokenType::LParen)?;
53576        let args = if self.check(TokenType::RParen) {
53577            Vec::new()
53578        } else {
53579            self.parse_function_arguments()?
53580        };
53581        self.expect(TokenType::RParen)?;
53582
53583        let func_expr = Expression::Function(Box::new(Function {
53584            name: func_name,
53585            args,
53586            distinct: false,
53587            trailing_comments: Vec::new(),
53588            use_bracket_syntax: false,
53589            no_parens: false,
53590            quoted: false,
53591            span: None,
53592            inferred_type: None,
53593        }));
53594
53595        // Check for AS alias(col1 type1, col2 type2, ...)
53596        // Return a Tuple(function, TableAlias) so the generator can output: FUNC() AS alias(col type)
53597        if self.match_token(TokenType::As) {
53598            let alias_expr = self.parse_rows_from_alias()?;
53599            Ok(Expression::Tuple(Box::new(Tuple {
53600                expressions: vec![func_expr, alias_expr],
53601            })))
53602        } else {
53603            Ok(func_expr)
53604        }
53605    }
53606
53607    /// Parse ROWS FROM alias with typed columns: alias_name(col1 type1, col2 type2, ...)
53608    fn parse_rows_from_alias(&mut self) -> Result<Expression> {
53609        let alias_name = self.expect_identifier_or_keyword_with_quoted()?;
53610
53611        // Check for column definitions: (col1 type1, col2 type2, ...)
53612        let columns = if self.match_token(TokenType::LParen) {
53613            let mut cols = Vec::new();
53614            loop {
53615                if self.check(TokenType::RParen) {
53616                    break;
53617                }
53618                // Parse column name (can be quoted)
53619                let col_name = self.expect_identifier_or_keyword_with_quoted()?;
53620                // Parse column type
53621                let col_type = self.parse_data_type()?;
53622                // Create ColumnDef expression, preserving the quoted status
53623                let mut col_def = ColumnDef::new(col_name.name.clone(), col_type);
53624                col_def.name = col_name; // Preserve the full identifier with quoted flag
53625                cols.push(Expression::ColumnDef(Box::new(col_def)));
53626
53627                if !self.match_token(TokenType::Comma) {
53628                    break;
53629                }
53630            }
53631            self.expect(TokenType::RParen)?;
53632            cols
53633        } else {
53634            Vec::new()
53635        };
53636
53637        Ok(Expression::TableAlias(Box::new(TableAlias {
53638            this: Some(Box::new(Expression::Identifier(alias_name))),
53639            columns,
53640        })))
53641    }
53642
53643    /// parse_table - Implemented from Python _parse_table
53644    /// Calls: parse_table_hints, parse_unnest, parse_partition
53645    #[allow(unused_variables, unused_mut)]
53646    pub fn parse_table(&mut self) -> Result<Option<Expression>> {
53647        if self.match_text_seq(&["ROWS", "FROM"]) {
53648            // ROWS FROM is handled by parse_rows_from() in parse_table_expression()
53649            return Ok(None);
53650        }
53651        if self.match_text_seq(&["*"]) {
53652            // Matched: *
53653            return Ok(None);
53654        }
53655        if self.match_text_seq(&["NOT", "INDEXED"]) {
53656            // Matched: NOT INDEXED
53657            return Ok(None);
53658        }
53659        Ok(None)
53660    }
53661
53662    /// parse_table_alias - Ported from Python _parse_table_alias
53663    /// Parses table alias: AS alias [(col1, col2, ...)]
53664    #[allow(unused_variables, unused_mut)]
53665    pub fn parse_table_alias(&mut self) -> Result<Option<Expression>> {
53666        // Check for AS keyword (optional in most dialects)
53667        let has_as = self.match_token(TokenType::As);
53668
53669        // Handle AS (col1, col2) - no alias name, just column aliases
53670        if has_as && self.check(TokenType::LParen) {
53671            // Parse (col1, col2, ...)
53672            self.advance(); // consume LParen
53673            let mut cols = Vec::new();
53674            loop {
53675                if self.check(TokenType::RParen) {
53676                    break;
53677                }
53678                if let Ok(Some(col)) = self.parse_id_var() {
53679                    cols.push(col);
53680                }
53681                if !self.match_token(TokenType::Comma) {
53682                    break;
53683                }
53684            }
53685            self.expect(TokenType::RParen)?;
53686            return Ok(Some(Expression::TableAlias(Box::new(TableAlias {
53687                this: None,
53688                columns: cols,
53689            }))));
53690        }
53691
53692        // Parse the alias identifier
53693        // ClickHouse: keywords can be used as table aliases (e.g., AS select, AS from)
53694        let is_keyword_alias = has_as
53695            && matches!(
53696                self.config.dialect,
53697                Some(crate::dialects::DialectType::ClickHouse)
53698            )
53699            && self.peek().token_type.is_keyword();
53700        if !self.check(TokenType::Identifier)
53701            && !self.check(TokenType::QuotedIdentifier)
53702            && !self.check(TokenType::Var)
53703            && !is_keyword_alias
53704        {
53705            if has_as {
53706                return Err(self.parse_error("Expected identifier after AS"));
53707            }
53708            return Ok(None);
53709        }
53710
53711        let alias_token = self.advance();
53712        let is_quoted = alias_token.token_type == TokenType::QuotedIdentifier;
53713        let mut alias_ident = Identifier::new(alias_token.text.clone());
53714        if is_quoted {
53715            alias_ident.quoted = true;
53716        }
53717        let alias = Expression::Identifier(alias_ident);
53718
53719        // Check for column list: (col1, col2, ...)
53720        let columns = if self.match_token(TokenType::LParen) {
53721            let mut cols = Vec::new();
53722            loop {
53723                if self.check(TokenType::RParen) {
53724                    break;
53725                }
53726                if let Ok(Some(col)) = self.parse_id_var() {
53727                    cols.push(col);
53728                }
53729                if !self.match_token(TokenType::Comma) {
53730                    break;
53731                }
53732            }
53733            self.expect(TokenType::RParen)?;
53734            cols
53735        } else {
53736            Vec::new()
53737        };
53738
53739        Ok(Some(Expression::TableAlias(Box::new(TableAlias {
53740            this: Some(Box::new(alias)),
53741            columns,
53742        }))))
53743    }
53744
53745    /// parse_table_hints - Ported from Python _parse_table_hints
53746    /// Parses table hints (SQL Server WITH (...) or MySQL USE/IGNORE/FORCE INDEX)
53747    #[allow(unused_variables, unused_mut)]
53748    pub fn parse_table_hints(&mut self) -> Result<Option<Expression>> {
53749        let mut hints = Vec::new();
53750
53751        // SQL Server style: WITH (hint1, hint2, ...)
53752        if self.match_text_seq(&["WITH"]) && self.match_token(TokenType::LParen) {
53753            let mut expressions = Vec::new();
53754            loop {
53755                // Parse function or variable as hint
53756                if let Some(func) = self.parse_function()? {
53757                    expressions.push(func);
53758                } else if let Some(var) = self.parse_var()? {
53759                    expressions.push(var);
53760                } else {
53761                    break;
53762                }
53763                if !self.match_token(TokenType::Comma) {
53764                    break;
53765                }
53766            }
53767            self.match_token(TokenType::RParen);
53768
53769            if !expressions.is_empty() {
53770                hints.push(Expression::WithTableHint(Box::new(WithTableHint {
53771                    expressions,
53772                })));
53773            }
53774        } else {
53775            // MySQL style: USE INDEX, IGNORE INDEX, FORCE INDEX
53776            while self.match_texts(&["USE", "IGNORE", "FORCE"]) {
53777                let hint_type = self.previous().text.to_uppercase();
53778
53779                // Match INDEX or KEY
53780                let _ = self.match_texts(&["INDEX", "KEY"]);
53781
53782                // Check for optional FOR clause: FOR JOIN, FOR ORDER BY, FOR GROUP BY
53783                let target = if self.match_text_seq(&["FOR"]) {
53784                    let target_token = self.advance();
53785                    let target_text = target_token.text.to_uppercase();
53786                    // For ORDER BY and GROUP BY, combine into a single target name
53787                    let full_target = if (target_text == "ORDER" || target_text == "GROUP")
53788                        && self.check(TokenType::By)
53789                    {
53790                        self.advance(); // consume BY
53791                        format!("{} BY", target_text)
53792                    } else {
53793                        target_text
53794                    };
53795                    Some(Box::new(Expression::Identifier(Identifier {
53796                        name: full_target,
53797                        quoted: false,
53798                        trailing_comments: Vec::new(),
53799                        span: None,
53800                    })))
53801                } else {
53802                    None
53803                };
53804
53805                // Parse wrapped identifiers (index names)
53806                let expressions = if self.match_token(TokenType::LParen) {
53807                    let mut ids = Vec::new();
53808                    loop {
53809                        if let Some(id) = self.parse_id_var()? {
53810                            ids.push(id);
53811                        }
53812                        if !self.match_token(TokenType::Comma) {
53813                            break;
53814                        }
53815                    }
53816                    self.match_token(TokenType::RParen);
53817                    ids
53818                } else {
53819                    Vec::new()
53820                };
53821
53822                hints.push(Expression::IndexTableHint(Box::new(IndexTableHint {
53823                    this: Box::new(Expression::Identifier(Identifier {
53824                        name: hint_type,
53825                        quoted: false,
53826                        trailing_comments: Vec::new(),
53827                        span: None,
53828                    })),
53829                    expressions,
53830                    target,
53831                })));
53832            }
53833        }
53834
53835        if hints.is_empty() {
53836            return Ok(None);
53837        }
53838
53839        // Return as a Tuple containing hints
53840        Ok(Some(Expression::Tuple(Box::new(Tuple {
53841            expressions: hints,
53842        }))))
53843    }
53844
53845    /// Parse TSQL TRUNCATE table hints: WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))
53846    /// Unlike regular table hints, PARTITIONS arguments can contain TO ranges.
53847    pub fn parse_truncate_table_hints(&mut self) -> Result<Option<Expression>> {
53848        if !self.match_text_seq(&["WITH"]) || !self.match_token(TokenType::LParen) {
53849            return Ok(None);
53850        }
53851
53852        let mut hints = Vec::new();
53853
53854        // Check for PARTITIONS specifically
53855        if self.check_identifier("PARTITIONS") {
53856            self.advance(); // consume PARTITIONS
53857            self.expect(TokenType::LParen)?;
53858
53859            // Parse partition ranges: 1, 2 TO 5, 10 TO 20, 84
53860            let mut parts = Vec::new();
53861            loop {
53862                if self.check(TokenType::RParen) {
53863                    break;
53864                }
53865                let low = self.parse_primary()?;
53866                if self.match_text_seq(&["TO"]) {
53867                    let high = self.parse_primary()?;
53868                    parts.push(Expression::PartitionRange(Box::new(PartitionRange {
53869                        this: Box::new(low),
53870                        expression: Some(Box::new(high)),
53871                        expressions: Vec::new(),
53872                    })));
53873                } else {
53874                    parts.push(low);
53875                }
53876                if !self.match_token(TokenType::Comma) {
53877                    break;
53878                }
53879            }
53880            self.expect(TokenType::RParen)?; // close PARTITIONS(...)
53881
53882            // Create an Anonymous function for PARTITIONS(...)
53883            hints.push(Expression::Anonymous(Box::new(Anonymous {
53884                this: Box::new(Expression::Identifier(Identifier {
53885                    name: "PARTITIONS".to_string(),
53886                    quoted: false,
53887                    trailing_comments: Vec::new(),
53888                    span: None,
53889                })),
53890                expressions: parts,
53891            })));
53892        } else {
53893            // Fall back to regular hint parsing (function or var)
53894            loop {
53895                if let Some(func) = self.parse_function()? {
53896                    hints.push(func);
53897                } else if let Some(var) = self.parse_var()? {
53898                    hints.push(var);
53899                } else {
53900                    break;
53901                }
53902                if !self.match_token(TokenType::Comma) {
53903                    break;
53904                }
53905            }
53906        }
53907
53908        self.expect(TokenType::RParen)?; // close WITH(...)
53909
53910        if hints.is_empty() {
53911            return Ok(None);
53912        }
53913
53914        // Wrap in WithTableHint then Tuple (same as parse_table_hints)
53915        let hint = Expression::WithTableHint(Box::new(WithTableHint { expressions: hints }));
53916
53917        Ok(Some(Expression::Tuple(Box::new(Tuple {
53918            expressions: vec![hint],
53919        }))))
53920    }
53921
53922    /// parse_table_part - Parse a single part of a table reference
53923    /// Tries: identifier, string as identifier, placeholder
53924    #[allow(unused_variables, unused_mut)]
53925    pub fn parse_table_part(&mut self) -> Result<Option<Expression>> {
53926        // Try to parse an identifier
53927        if let Some(id) = self.parse_id_var()? {
53928            return Ok(Some(id));
53929        }
53930
53931        // Try to parse a string as identifier
53932        if let Some(str_id) = self.parse_string_as_identifier()? {
53933            return Ok(Some(str_id));
53934        }
53935
53936        // Try to parse a placeholder
53937        if let Some(placeholder) = self.parse_placeholder()? {
53938            return Ok(Some(placeholder));
53939        }
53940
53941        // Accept keywords as identifiers in table part context (e.g., db.cluster where "cluster" is a keyword)
53942        // This mirrors Python sqlglot's ID_VAR_TOKENS which includes many keyword types
53943        if self.check_keyword_as_identifier() {
53944            let text = self.peek().text.clone();
53945            self.advance();
53946            return Ok(Some(Expression::Identifier(Identifier {
53947                name: text,
53948                quoted: false,
53949                trailing_comments: Vec::new(),
53950                span: None,
53951            })));
53952        }
53953
53954        Ok(None)
53955    }
53956
53957    /// Check if the current token is a keyword that can be used as an identifier in certain contexts
53958    /// This includes many SQL keywords like CLUSTER, TABLE, INDEX, etc.
53959    fn check_keyword_as_identifier(&self) -> bool {
53960        if self.is_at_end() {
53961            return false;
53962        }
53963        let token_type = self.peek().token_type;
53964        // Keywords that can be used as identifiers (similar to Python's ID_VAR_TOKENS)
53965        matches!(
53966            token_type,
53967            TokenType::Cluster
53968                | TokenType::Table
53969                | TokenType::Index
53970                | TokenType::View
53971                | TokenType::Database
53972                | TokenType::Schema
53973                | TokenType::Column
53974                | TokenType::Function
53975                | TokenType::Procedure
53976                | TokenType::Constraint
53977                | TokenType::Sequence
53978                | TokenType::Type
53979                | TokenType::Partition
53980                | TokenType::Comment
53981                | TokenType::Cache
53982                | TokenType::Commit
53983                | TokenType::Begin
53984                | TokenType::End
53985                | TokenType::Set
53986                | TokenType::Show
53987                | TokenType::Describe
53988                | TokenType::Use
53989                | TokenType::Execute
53990                | TokenType::Delete
53991                | TokenType::Update
53992                | TokenType::Merge
53993                | TokenType::Load
53994                | TokenType::Copy
53995                | TokenType::Truncate
53996                | TokenType::Replace
53997                | TokenType::Refresh
53998                | TokenType::Rename
53999                | TokenType::Filter
54000                | TokenType::Format
54001                | TokenType::First
54002                | TokenType::Next
54003                | TokenType::Last
54004                | TokenType::Keep
54005                | TokenType::Match
54006                | TokenType::Over
54007                | TokenType::Range
54008                | TokenType::Rows
54009                | TokenType::Row
54010                | TokenType::Offset
54011                | TokenType::Limit
54012                | TokenType::Top
54013                | TokenType::Cube
54014                | TokenType::Rollup
54015                | TokenType::Pivot
54016                | TokenType::Unpivot
54017                | TokenType::Window
54018                | TokenType::Recursive
54019                | TokenType::Unique
54020                | TokenType::Temporary
54021                | TokenType::Volatile
54022                | TokenType::References
54023                | TokenType::Natural
54024                | TokenType::Left
54025                | TokenType::Right
54026                | TokenType::Full
54027                | TokenType::Semi
54028                | TokenType::Anti
54029                | TokenType::Apply
54030                | TokenType::All
54031                | TokenType::Asc
54032                | TokenType::Desc
54033                | TokenType::Analyze
54034        )
54035    }
54036
54037    /// parse_table_parts - Parse catalog.schema.table or schema.table or table
54038    /// Returns a Table expression with all parts
54039    #[allow(unused_variables, unused_mut)]
54040    pub fn parse_table_parts(&mut self) -> Result<Option<Expression>> {
54041        // Parse the first part
54042        let first = self.parse_table_part()?;
54043        if first.is_none() {
54044            return Ok(None);
54045        }
54046
54047        let mut parts = vec![first.unwrap()];
54048
54049        // Parse additional dot-separated parts
54050        while self.match_token(TokenType::Dot) {
54051            if let Some(part) = self.parse_table_part()? {
54052                parts.push(part);
54053            } else {
54054                break;
54055            }
54056        }
54057
54058        // Convert parts to Table expression
54059        // Last part is table name, second-to-last is schema, third-to-last is catalog
54060        let (catalog, schema, name) = match parts.len() {
54061            1 => (None, None, parts.pop().unwrap()),
54062            2 => {
54063                let table = parts.pop().unwrap();
54064                let schema = parts.pop().unwrap();
54065                (None, Some(schema), table)
54066            }
54067            _ => {
54068                let table = parts.pop().unwrap();
54069                let schema = parts.pop().unwrap();
54070                let catalog = parts.pop();
54071                (catalog, Some(schema), table)
54072            }
54073        };
54074
54075        // Extract identifier from Expression
54076        let name_ident = match name {
54077            Expression::Identifier(id) => id,
54078            _ => Identifier::new(String::new()),
54079        };
54080        let schema_ident = schema.map(|s| match s {
54081            Expression::Identifier(id) => id,
54082            _ => Identifier::new(String::new()),
54083        });
54084        let catalog_ident = catalog.map(|c| match c {
54085            Expression::Identifier(id) => id,
54086            _ => Identifier::new(String::new()),
54087        });
54088
54089        Ok(Some(Expression::Table(TableRef {
54090            name: name_ident,
54091            schema: schema_ident,
54092            catalog: catalog_ident,
54093            alias: None,
54094            alias_explicit_as: false,
54095            column_aliases: Vec::new(),
54096            trailing_comments: Vec::new(),
54097            when: None,
54098            only: false,
54099            final_: false,
54100            table_sample: None,
54101            hints: Vec::new(),
54102            system_time: None,
54103            partitions: Vec::new(),
54104            identifier_func: None,
54105            changes: None,
54106            version: None,
54107            span: None,
54108        })))
54109    }
54110
54111    /// parse_table_sample - Implemented from Python _parse_table_sample
54112    /// Calls: parse_number, parse_factor, parse_placeholder
54113    #[allow(unused_variables, unused_mut)]
54114    pub fn parse_table_sample(&mut self) -> Result<Option<Expression>> {
54115        if self.match_text_seq(&["USING", "SAMPLE"]) {
54116            return Ok(Some(Expression::TableSample(Box::new(TableSample {
54117                this: None,
54118                sample: None,
54119                expressions: Vec::new(),
54120                method: None,
54121                bucket_numerator: None,
54122                bucket_denominator: None,
54123                bucket_field: None,
54124                percent: None,
54125                rows: None,
54126                size: None,
54127                seed: None,
54128            }))));
54129        }
54130        if self.match_text_seq(&["BUCKET"]) {
54131            // Matched: BUCKET
54132            return Ok(None);
54133        }
54134        if self.match_text_seq(&["OUT", "OF"]) {
54135            // Matched: OUT OF
54136            return Ok(None);
54137        }
54138        if self.match_texts(&["SEED", "REPEATABLE"]) {
54139            // Matched one of: SEED, REPEATABLE
54140            return Ok(None);
54141        }
54142        Ok(None)
54143    }
54144
54145    /// parse_term - Parses addition/subtraction expressions (+ - operators)
54146    /// Python: _parse_term
54147    /// Delegates to the existing parse_addition in the operator precedence chain
54148    pub fn parse_term(&mut self) -> Result<Option<Expression>> {
54149        // Delegate to the existing addition parsing
54150        match self.parse_addition() {
54151            Ok(expr) => Ok(Some(expr)),
54152            Err(_) => Ok(None),
54153        }
54154    }
54155
54156    /// parse_to_table - ClickHouse TO table property
54157    /// Parses: TO table_name
54158    #[allow(unused_variables, unused_mut)]
54159    pub fn parse_to_table(&mut self) -> Result<Option<Expression>> {
54160        // Parse the table reference
54161        let table = self.parse_table_parts()?;
54162        if table.is_none() {
54163            return Ok(None);
54164        }
54165
54166        Ok(Some(Expression::ToTableProperty(Box::new(
54167            ToTableProperty {
54168                this: Box::new(table.unwrap()),
54169            },
54170        ))))
54171    }
54172
54173    /// parse_tokens - Operator precedence parser
54174    #[allow(unused_variables, unused_mut)]
54175    pub fn parse_tokens(&mut self) -> Result<Option<Expression>> {
54176        // Uses operator precedence parsing pattern
54177        Ok(None)
54178    }
54179
54180    /// parse_trim - Ported from Python _parse_trim
54181    /// Parses TRIM function: TRIM([BOTH|LEADING|TRAILING] chars FROM str) or TRIM(str, chars)
54182    #[allow(unused_variables, unused_mut)]
54183    pub fn parse_trim(&mut self) -> Result<Option<Expression>> {
54184        // Check for position keyword (BOTH, LEADING, TRAILING)
54185        let (position, position_explicit) = if self.match_texts(&["BOTH"]) {
54186            (TrimPosition::Both, true)
54187        } else if self.match_texts(&["LEADING"]) {
54188            (TrimPosition::Leading, true)
54189        } else if self.match_texts(&["TRAILING"]) {
54190            (TrimPosition::Trailing, true)
54191        } else {
54192            (TrimPosition::Both, false)
54193        };
54194
54195        // Parse first expression
54196        let first = match self.parse_bitwise() {
54197            Ok(Some(expr)) => self.try_clickhouse_func_arg_alias(expr),
54198            Ok(None) => return Ok(None),
54199            Err(e) => return Err(e),
54200        };
54201
54202        // Check for FROM or comma to see if there's a second expression
54203        let (this, characters, sql_standard_syntax) = if self.match_token(TokenType::From) {
54204            // SQL standard syntax: TRIM([position] chars FROM str)
54205            let second = match self.parse_bitwise() {
54206                Ok(Some(expr)) => self.try_clickhouse_func_arg_alias(expr),
54207                Ok(None) => return Err(self.parse_error("Expected expression after FROM in TRIM")),
54208                Err(e) => return Err(e),
54209            };
54210            // In SQL standard syntax: first is characters, second is the string
54211            (second, Some(first), true)
54212        } else if self.match_token(TokenType::Comma) {
54213            // Function syntax: TRIM(a, b)
54214            let second = match self.parse_bitwise() {
54215                Ok(Some(expr)) => Some(expr),
54216                Ok(None) => None,
54217                Err(e) => return Err(e),
54218            };
54219            // In Spark, comma syntax is TRIM(chars, str) - pattern first
54220            // In other dialects, comma syntax is TRIM(str, chars) - string first
54221            let trim_pattern_first = matches!(
54222                self.config.dialect,
54223                Some(crate::dialects::DialectType::Spark)
54224            );
54225            if trim_pattern_first && second.is_some() {
54226                // first=chars, second=str
54227                (second.unwrap(), Some(first), false)
54228            } else {
54229                (first, second, false)
54230            }
54231        } else {
54232            // Single argument: TRIM(str)
54233            (first, None, false)
54234        };
54235
54236        Ok(Some(Expression::Trim(Box::new(TrimFunc {
54237            this,
54238            characters,
54239            position,
54240            sql_standard_syntax,
54241            position_explicit,
54242        }))))
54243    }
54244
54245    /// parse_truncate_table - Implemented from Python _parse_truncate_table
54246    /// Calls: parse_on_property, parse_partition, parse_function
54247    #[allow(unused_variables, unused_mut)]
54248    pub fn parse_truncate_table(&mut self) -> Result<Option<Expression>> {
54249        if self.match_text_seq(&["RESTART", "IDENTITY"]) {
54250            return Ok(Some(Expression::TruncateTable(Box::new(TruncateTable {
54251                expressions: Vec::new(),
54252                is_database: None,
54253                exists: false,
54254                only: None,
54255                cluster: None,
54256                identity: None,
54257                option: None,
54258                partition: None,
54259            }))));
54260        }
54261        if self.match_text_seq(&["CONTINUE", "IDENTITY"]) {
54262            // Matched: CONTINUE IDENTITY
54263            return Ok(None);
54264        }
54265        if self.match_text_seq(&["CASCADE"]) {
54266            // Matched: CASCADE
54267            return Ok(None);
54268        }
54269        Ok(None)
54270    }
54271
54272    /// parse_ttl - Implemented from Python _parse_ttl
54273    /// Parses ClickHouse TTL expression with optional DELETE, RECOMPRESS, TO DISK/VOLUME
54274    pub fn parse_ttl(&mut self) -> Result<Option<Expression>> {
54275        // Parse CSV of TTL actions
54276        let mut expressions = Vec::new();
54277
54278        loop {
54279            // Parse the base expression
54280            let base_start = self.current;
54281            let this = match self.parse_bitwise() {
54282                Ok(Some(expr)) => expr,
54283                _ => {
54284                    self.current = base_start;
54285                    let mut paren_depth = 0usize;
54286                    while !self.is_at_end() {
54287                        if paren_depth == 0
54288                            && (self.check(TokenType::Comma)
54289                                || self.peek().text.eq_ignore_ascii_case("DELETE")
54290                                || self.peek().text.eq_ignore_ascii_case("RECOMPRESS")
54291                                || self.peek().text.eq_ignore_ascii_case("TO")
54292                                || self.peek().text.eq_ignore_ascii_case("WHERE")
54293                                || self.peek().text.eq_ignore_ascii_case("GROUP")
54294                                || self.peek().text.eq_ignore_ascii_case("SET"))
54295                        {
54296                            break;
54297                        }
54298                        if self.check(TokenType::LParen) {
54299                            paren_depth += 1;
54300                        } else if self.check(TokenType::RParen) {
54301                            if paren_depth == 0 {
54302                                break;
54303                            }
54304                            paren_depth -= 1;
54305                        }
54306                        self.advance();
54307                    }
54308                    if self.current == base_start {
54309                        break;
54310                    }
54311                    let raw = self
54312                        .tokens_to_sql(base_start, self.current)
54313                        .trim()
54314                        .to_string();
54315                    Expression::Var(Box::new(Var { this: raw }))
54316                }
54317            };
54318
54319            // Check for TTL action
54320            let action = if self.match_text_seq(&["DELETE"]) {
54321                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
54322                    this: Box::new(this),
54323                    delete: Some(Box::new(Expression::Boolean(BooleanLiteral {
54324                        value: true,
54325                    }))),
54326                    recompress: None,
54327                    to_disk: None,
54328                    to_volume: None,
54329                }))
54330            } else if self.match_text_seq(&["RECOMPRESS"]) {
54331                let recompress = if self.match_identifier("CODEC") {
54332                    self.expect(TokenType::LParen)?;
54333                    let mut args = Vec::new();
54334                    if !self.check(TokenType::RParen) {
54335                        args.push(self.parse_expression()?);
54336                        while self.match_token(TokenType::Comma) {
54337                            args.push(self.parse_expression()?);
54338                        }
54339                    }
54340                    self.expect(TokenType::RParen)?;
54341                    Some(Box::new(Expression::Function(Box::new(Function::new(
54342                        "CODEC".to_string(),
54343                        args,
54344                    )))))
54345                } else {
54346                    self.parse_bitwise()?.map(Box::new)
54347                };
54348                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
54349                    this: Box::new(this),
54350                    delete: None,
54351                    recompress,
54352                    to_disk: None,
54353                    to_volume: None,
54354                }))
54355            } else if self.match_text_seq(&["TO", "DISK"]) {
54356                let to_disk = self.parse_string()?.map(Box::new);
54357                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
54358                    this: Box::new(this),
54359                    delete: None,
54360                    recompress: None,
54361                    to_disk,
54362                    to_volume: None,
54363                }))
54364            } else if self.match_text_seq(&["TO", "VOLUME"]) {
54365                let to_volume = self.parse_string()?.map(Box::new);
54366                Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
54367                    this: Box::new(this),
54368                    delete: None,
54369                    recompress: None,
54370                    to_disk: None,
54371                    to_volume,
54372                }))
54373            } else {
54374                this
54375            };
54376
54377            expressions.push(action);
54378
54379            if !self.match_token(TokenType::Comma) {
54380                break;
54381            }
54382        }
54383
54384        // Parse optional top-level WHERE clause (for backwards compatibility)
54385        let where_ = self.parse_where()?.map(Box::new);
54386
54387        // Parse optional GROUP BY
54388        let group = if self.match_token(TokenType::Group) {
54389            self.expect(TokenType::By)?;
54390            let mut exprs = Vec::new();
54391            exprs.push(self.parse_expression()?);
54392            while self.match_token(TokenType::Comma) {
54393                exprs.push(self.parse_expression()?);
54394            }
54395            Some(Box::new(Expression::Group(Box::new(Group {
54396                expressions: exprs,
54397                grouping_sets: None,
54398                cube: None,
54399                rollup: None,
54400                totals: None,
54401                all: None,
54402            }))))
54403        } else {
54404            None
54405        };
54406
54407        // Parse optional SET (aggregates) after GROUP BY
54408        let aggregates = if group.is_some() && self.match_token(TokenType::Set) {
54409            let mut aggs = Vec::new();
54410            loop {
54411                aggs.push(self.parse_expression()?);
54412                if !self.match_token(TokenType::Comma) {
54413                    break;
54414                }
54415            }
54416            if aggs.is_empty() {
54417                None
54418            } else {
54419                Some(Box::new(Expression::Tuple(Box::new(Tuple {
54420                    expressions: aggs,
54421                }))))
54422            }
54423        } else {
54424            None
54425        };
54426
54427        Ok(Some(Expression::MergeTreeTTL(Box::new(MergeTreeTTL {
54428            expressions,
54429            where_,
54430            group,
54431            aggregates,
54432        }))))
54433    }
54434
54435    /// parse_type - Parses a data type expression
54436    /// Python: _parse_type
54437    pub fn parse_type(&mut self) -> Result<Option<Expression>> {
54438        // First try to parse an interval
54439        if let Some(interval) = self.parse_interval()? {
54440            return self.parse_column_ops_with_expr(Some(interval));
54441        }
54442
54443        // Try to parse a data type
54444        let data_type = self.parse_types()?;
54445
54446        if let Some(dt) = data_type {
54447            // If it's a Cast (BigQuery inline constructor), apply column ops
54448            if matches!(dt, Expression::Cast(_)) {
54449                return self.parse_column_ops_with_expr(Some(dt));
54450            }
54451
54452            // Try to parse a primary expression after the type
54453            let start_pos = self.current;
54454            if let Some(primary) = self.parse_primary_or_var()? {
54455                // If it's a literal, this might be a type cast like DATE '2020-01-01'
54456                if let Expression::Literal(_) = &primary {
54457                    let result = self.parse_column_ops_with_expr(Some(primary))?;
54458                    if let Some(value) = result {
54459                        // Create a Cast expression
54460                        if let Expression::DataType(data_type_struct) = dt {
54461                            return Ok(Some(Expression::Cast(Box::new(Cast {
54462                                this: value,
54463                                to: data_type_struct,
54464                                trailing_comments: Vec::new(),
54465                                double_colon_syntax: false,
54466                                format: None,
54467                                default: None,
54468                                inferred_type: None,
54469                            }))));
54470                        }
54471                    }
54472                }
54473                // Backtrack if not a type-literal pattern
54474                self.current = start_pos;
54475            }
54476
54477            return Ok(Some(dt));
54478        }
54479
54480        Ok(None)
54481    }
54482
54483    /// parse_type_size - Ported from Python _parse_type_size
54484    /// Parses type size parameters like 10 in VARCHAR(10) or 10, 2 in DECIMAL(10, 2)
54485    #[allow(unused_variables, unused_mut)]
54486    pub fn parse_type_size(&mut self) -> Result<Option<Expression>> {
54487        // First try to parse a type - this handles both numeric literals and type names
54488        let this = self.parse_type()?;
54489
54490        if this.is_none() {
54491            return Ok(None);
54492        }
54493
54494        let mut result = this.unwrap();
54495
54496        // If it's a Column with no table, convert it to an Identifier (var)
54497        // This handles cases like CHAR in VARCHAR(100 CHAR)
54498        if let Expression::Column(ref col) = result {
54499            if col.table.is_none() {
54500                result = Expression::Identifier(col.name.clone());
54501            }
54502        }
54503
54504        // Check for optional expression after the type (e.g., "CHAR" in "100 CHAR")
54505        // This is for byte/char length specifiers in some dialects
54506        if let Some(var_token) = self.parse_var()? {
54507            // We have an additional specifier, combine them
54508            // For now, just return the original result since Rust doesn't have DataTypeParam
54509            // The var expression would be attached as an expression in Python
54510        }
54511
54512        Ok(Some(result))
54513    }
54514
54515    /// parse_types - Implemented from Python _parse_types
54516    /// Calls: parse_string
54517    #[allow(unused_variables, unused_mut)]
54518    pub fn parse_types(&mut self) -> Result<Option<Expression>> {
54519        if self.match_text_seq(&["SYSUDTLIB", "."]) {
54520            return Ok(Some(Expression::Identifier(Identifier {
54521                name: String::new(),
54522                quoted: false,
54523                trailing_comments: Vec::new(),
54524                span: None,
54525            })));
54526        }
54527        if self.match_text_seq(&["WITH", "TIME", "ZONE"]) {
54528            // Matched: WITH TIME ZONE
54529            return Ok(None);
54530        }
54531        if self.match_text_seq(&["WITH", "LOCAL", "TIME", "ZONE"]) {
54532            // Matched: WITH LOCAL TIME ZONE
54533            return Ok(None);
54534        }
54535        Ok(None)
54536    }
54537
54538    /// parse_unique - Implemented from Python _parse_unique
54539    /// Parses UNIQUE [KEY|INDEX] [NULLS NOT DISTINCT] [(columns)] [USING index_type]
54540    #[allow(unused_variables, unused_mut)]
54541    pub fn parse_unique(&mut self) -> Result<Option<Expression>> {
54542        // Check for optional KEY/INDEX
54543        let _ = self.match_texts(&["KEY", "INDEX"]);
54544
54545        // Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
54546        let nulls = if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
54547            Some(Box::new(Expression::Boolean(BooleanLiteral {
54548                value: true,
54549            })))
54550        } else {
54551            None
54552        };
54553
54554        // Parse the optional key name and schema (column list)
54555        let unique_key = self.parse_unique_key()?;
54556        let this = self.parse_schema_with_this(unique_key)?;
54557
54558        // Parse optional USING index_type
54559        let index_type = if self.match_token(TokenType::Using) {
54560            self.advance();
54561            Some(Box::new(Expression::Var(Box::new(Var {
54562                this: self.previous().text.clone(),
54563            }))))
54564        } else {
54565            None
54566        };
54567
54568        Ok(Some(Expression::UniqueColumnConstraint(Box::new(
54569            UniqueColumnConstraint {
54570                this: this.map(Box::new),
54571                index_type,
54572                on_conflict: None,
54573                nulls,
54574                options: Vec::new(),
54575            },
54576        ))))
54577    }
54578
54579    /// parse_unique_key - Parse the key/index name for UNIQUE constraint
54580    /// Simply parses an identifier
54581    #[allow(unused_variables, unused_mut)]
54582    pub fn parse_unique_key(&mut self) -> Result<Option<Expression>> {
54583        self.parse_id_var()
54584    }
54585
54586    /// parse_unnest - Ported from Python _parse_unnest
54587    /// Parses UNNEST(array_expr) [WITH ORDINALITY] [AS alias]
54588    #[allow(unused_variables, unused_mut)]
54589    pub fn parse_unnest(&mut self) -> Result<Option<Expression>> {
54590        // Check for UNNEST keyword
54591        if !self.match_texts(&["UNNEST"]) {
54592            return Ok(None);
54593        }
54594
54595        // Expect opening parenthesis
54596        if !self.match_token(TokenType::LParen) {
54597            return Ok(None);
54598        }
54599
54600        // Parse comma-separated array expression(s): UNNEST(arr1, arr2, ...)
54601        let this = match self.parse_expression() {
54602            Ok(expr) => expr,
54603            Err(e) => return Err(e),
54604        };
54605
54606        let mut extra_expressions = Vec::new();
54607        while self.match_token(TokenType::Comma) {
54608            let expr = self.parse_expression()?;
54609            extra_expressions.push(expr);
54610        }
54611
54612        // Expect closing parenthesis
54613        self.expect(TokenType::RParen)?;
54614
54615        // Check for WITH ORDINALITY
54616        let with_ordinality = self.match_text_seq(&["WITH", "ORDINALITY"]);
54617
54618        // Parse optional alias
54619        let alias = if self.match_token(TokenType::As)
54620            || self.check(TokenType::Identifier)
54621            || self.check(TokenType::QuotedIdentifier)
54622        {
54623            if self.check(TokenType::Identifier) || self.check(TokenType::QuotedIdentifier) {
54624                let is_quoted = self.check(TokenType::QuotedIdentifier);
54625                let token = self.advance();
54626                let mut ident = Identifier::new(token.text.clone());
54627                if is_quoted {
54628                    ident.quoted = true;
54629                }
54630                Some(ident)
54631            } else {
54632                None
54633            }
54634        } else {
54635            None
54636        };
54637
54638        Ok(Some(Expression::Unnest(Box::new(UnnestFunc {
54639            this,
54640            expressions: extra_expressions,
54641            with_ordinality,
54642            alias,
54643            offset_alias: None,
54644        }))))
54645    }
54646
54647    /// parse_unpivot_columns - Implemented from Python _parse_unpivot_columns
54648    /// Python: parser.py:4454-4462
54649    /// Parses INTO NAME column VALUE col1, col2, ...
54650    #[allow(unused_variables, unused_mut)]
54651    pub fn parse_unpivot_columns(&mut self) -> Result<Option<Expression>> {
54652        // Must match INTO keyword
54653        if !self.match_token(TokenType::Into) {
54654            return Ok(None);
54655        }
54656
54657        // Parse NAME column
54658        let this = if self.match_text_seq(&["NAME"]) {
54659            self.parse_column()?
54660        } else {
54661            None
54662        };
54663
54664        // Parse VALUE columns
54665        let expressions = if self.match_text_seq(&["VALUE"]) {
54666            let mut cols = Vec::new();
54667            loop {
54668                if let Some(col) = self.parse_column()? {
54669                    cols.push(col);
54670                }
54671                if !self.match_token(TokenType::Comma) {
54672                    break;
54673                }
54674            }
54675            cols
54676        } else {
54677            Vec::new()
54678        };
54679
54680        // If we have either this or expressions, return an UnpivotColumns
54681        if this.is_some() || !expressions.is_empty() {
54682            Ok(Some(Expression::UnpivotColumns(Box::new(UnpivotColumns {
54683                this: Box::new(this.unwrap_or(Expression::Null(Null))),
54684                expressions,
54685            }))))
54686        } else {
54687            Ok(None)
54688        }
54689    }
54690
54691    /// parse_unquoted_field - Parses a field and converts unquoted identifiers to Var
54692    /// Python: _parse_unquoted_field
54693    pub fn parse_unquoted_field(&mut self) -> Result<Option<Expression>> {
54694        let field = self.parse_field()?;
54695
54696        // If field is an unquoted identifier, convert it to a Var
54697        match field {
54698            Some(Expression::Identifier(id)) if !id.quoted => {
54699                Ok(Some(Expression::Var(Box::new(Var { this: id.name }))))
54700            }
54701            other => Ok(other),
54702        }
54703    }
54704
54705    /// parse_user_defined_function - Parses user-defined function call
54706    /// Python: _parse_user_defined_function
54707    /// Parses: schema.function_name(param1, param2, ...)
54708    pub fn parse_user_defined_function(&mut self) -> Result<Option<Expression>> {
54709        // Parse table parts (potentially schema-qualified function name)
54710        let this = self.parse_table_parts()?;
54711        if this.is_none() {
54712            return Ok(None);
54713        }
54714
54715        // If no L_PAREN, return just the table parts (not a function call)
54716        if !self.match_token(TokenType::LParen) {
54717            return Ok(this);
54718        }
54719
54720        // Parse function parameters
54721        let mut expressions = Vec::new();
54722        if !self.check(TokenType::RParen) {
54723            loop {
54724                if let Some(param) = self.parse_function_parameter()? {
54725                    expressions.push(param);
54726                }
54727                if !self.match_token(TokenType::Comma) {
54728                    break;
54729                }
54730            }
54731        }
54732
54733        self.match_token(TokenType::RParen);
54734
54735        Ok(Some(Expression::UserDefinedFunction(Box::new(
54736            UserDefinedFunction {
54737                this: Box::new(this.unwrap()),
54738                expressions,
54739                wrapped: Some(Box::new(Expression::Boolean(BooleanLiteral {
54740                    value: true,
54741                }))),
54742            },
54743        ))))
54744    }
54745
54746    /// parse_user_defined_function_expression - Parse user-defined function expression
54747    #[allow(unused_variables, unused_mut)]
54748    pub fn parse_user_defined_function_expression(&mut self) -> Result<Option<Expression>> {
54749        // Parse a statement and wrap in Some if successful
54750        match self.parse_statement() {
54751            Ok(stmt) => Ok(Some(stmt)),
54752            Err(_) => Ok(None),
54753        }
54754    }
54755
54756    /// parse_user_defined_type - Parses a user-defined type reference
54757    /// Python: _parse_user_defined_type
54758    /// Format: schema.type_name or just type_name
54759    pub fn parse_user_defined_type(
54760        &mut self,
54761        identifier: Identifier,
54762    ) -> Result<Option<Expression>> {
54763        let mut type_name = identifier.name.clone();
54764
54765        // Handle dotted names (schema.type_name)
54766        while self.match_token(TokenType::Dot) {
54767            if !self.is_at_end() {
54768                let token = self.advance();
54769                type_name = format!("{}.{}", type_name, token.text);
54770            } else {
54771                break;
54772            }
54773        }
54774
54775        // Return as a custom data type
54776        Ok(Some(Expression::DataType(DataType::Custom {
54777            name: type_name,
54778        })))
54779    }
54780
54781    /// parse_using_identifiers - Ported from Python _parse_using_identifiers
54782    /// Parses (col1, col2, ...) for JOIN USING clause
54783    #[allow(unused_variables, unused_mut)]
54784    pub fn parse_using_identifiers(&mut self) -> Result<Option<Expression>> {
54785        // Optionally expect opening paren
54786        let has_paren = self.match_token(TokenType::LParen);
54787
54788        let mut identifiers = Vec::new();
54789        loop {
54790            // Parse column as identifier
54791            if let Some(expr) = self.parse_identifier()? {
54792                identifiers.push(expr);
54793            } else {
54794                break;
54795            }
54796            if !self.match_token(TokenType::Comma) {
54797                break;
54798            }
54799        }
54800
54801        // Match closing paren if we matched opening
54802        if has_paren {
54803            self.expect(TokenType::RParen)?;
54804        }
54805
54806        if identifiers.is_empty() {
54807            Ok(None)
54808        } else {
54809            Ok(Some(Expression::Tuple(Box::new(Tuple {
54810                expressions: identifiers,
54811            }))))
54812        }
54813    }
54814
54815    /// parse_value - Parses a value tuple for INSERT VALUES clause
54816    /// Python: _parse_value
54817    /// Syntax: (expr1, expr2, ...) or just expr (single value)
54818    pub fn parse_value(&mut self) -> Result<Option<Expression>> {
54819        // Check for parenthesized list of expressions
54820        if self.match_token(TokenType::LParen) {
54821            let mut expressions = Vec::new();
54822
54823            if !self.check(TokenType::RParen) {
54824                loop {
54825                    // Support DEFAULT keyword in VALUES
54826                    if self.match_texts(&["DEFAULT"]) {
54827                        let text = self.previous().text.to_uppercase();
54828                        expressions.push(Expression::Var(Box::new(Var { this: text })));
54829                    } else {
54830                        // Try to parse an expression
54831                        let saved_pos = self.current;
54832                        match self.parse_expression() {
54833                            Ok(expr) => expressions.push(expr),
54834                            Err(_) => {
54835                                self.current = saved_pos;
54836                            }
54837                        }
54838                    }
54839
54840                    if !self.match_token(TokenType::Comma) {
54841                        break;
54842                    }
54843                }
54844            }
54845
54846            self.match_token(TokenType::RParen);
54847            return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
54848        }
54849
54850        // Single value without parentheses (some dialects support VALUES 1, 2)
54851        let saved_pos = self.current;
54852        match self.parse_expression() {
54853            Ok(expr) => {
54854                return Ok(Some(Expression::Tuple(Box::new(Tuple {
54855                    expressions: vec![expr],
54856                }))));
54857            }
54858            Err(_) => {
54859                self.current = saved_pos;
54860            }
54861        }
54862
54863        Ok(None)
54864    }
54865
54866    /// parse_var - Parse variable reference (unquoted identifier)
54867    /// Python: if self._match(TokenType.VAR): return exp.Var(this=self._prev.text)
54868    pub fn parse_var(&mut self) -> Result<Option<Expression>> {
54869        if self.match_token(TokenType::Var) {
54870            let text = self.previous().text.clone();
54871            return Ok(Some(Expression::Var(Box::new(Var { this: text }))));
54872        }
54873        // Fall back to placeholder parsing
54874        self.parse_placeholder()
54875    }
54876
54877    /// parse_var_from_options - Ported from Python _parse_var_from_options
54878    /// Parses a variable/identifier from a predefined set of options
54879    #[allow(unused_variables, unused_mut)]
54880    pub fn parse_var_from_options(&mut self) -> Result<Option<Expression>> {
54881        // Without the options dict, we just try to parse an identifier
54882        if self.is_at_end() {
54883            return Ok(None);
54884        }
54885
54886        // Get current token text as the option
54887        let token = self.peek().clone();
54888        if token.token_type == TokenType::Identifier || token.token_type == TokenType::Var {
54889            self.advance();
54890            return Ok(Some(Expression::Var(Box::new(Var {
54891                this: token.text.to_uppercase(),
54892            }))));
54893        }
54894
54895        Ok(None)
54896    }
54897
54898    /// parse_var_or_string - Delegates to parse_string
54899    #[allow(unused_variables, unused_mut)]
54900    /// parse_var_or_string - Parses a string literal or a variable
54901    /// Python: parser.py:7506-7507
54902    pub fn parse_var_or_string(&mut self) -> Result<Option<Expression>> {
54903        // Try string first, then var
54904        if let Some(s) = self.parse_string()? {
54905            return Ok(Some(s));
54906        }
54907        self.parse_var_any_token()
54908    }
54909
54910    /// parse_vector_expressions - Transforms vector type parameters
54911    /// Python: _parse_vector_expressions
54912    /// In Python, this transforms a list of expressions where the first element (identifier)
54913    /// is converted to a DataType. In Rust, since VECTOR type parsing is handled inline in
54914    /// parse_data_type, this method parses vector expressions (element_type, dimension) from
54915    /// the current position and returns them as a Tuple.
54916    pub fn parse_vector_expressions(&mut self) -> Result<Option<Expression>> {
54917        let mut expressions = Vec::new();
54918
54919        // Parse element type - convert identifier to DataType
54920        if let Some(type_expr) = self.parse_type()? {
54921            expressions.push(type_expr);
54922        } else {
54923            return Ok(None);
54924        }
54925
54926        // Parse optional dimension or additional parameters
54927        while self.match_token(TokenType::Comma) {
54928            if let Some(expr) = self.parse_primary_or_var()? {
54929                expressions.push(expr);
54930            }
54931        }
54932
54933        if expressions.is_empty() {
54934            return Ok(None);
54935        }
54936
54937        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
54938    }
54939
54940    /// parse_version - Implemented from Python _parse_version
54941    /// Python: parser.py:4266-4295
54942    /// Parses FOR SYSTEM_TIME AS OF, VERSIONS BETWEEN, etc.
54943    #[allow(unused_variables, unused_mut)]
54944    pub fn parse_version(&mut self) -> Result<Option<Expression>> {
54945        // Check for TIMESTAMP or VERSION snapshot token
54946        let this = if self.match_token(TokenType::TimestampSnapshot) {
54947            "TIMESTAMP".to_string()
54948        } else if self.match_token(TokenType::VersionSnapshot) {
54949            "VERSION".to_string()
54950        } else {
54951            return Ok(None);
54952        };
54953
54954        // Parse the kind and expression
54955        let (kind, expression) = if self.match_texts(&["FROM", "BETWEEN"]) {
54956            // FROM start TO end or BETWEEN start AND end
54957            let kind_str = self.previous().text.to_uppercase();
54958            let start = self.parse_bitwise()?;
54959            self.match_texts(&["TO", "AND"]);
54960            let end = self.parse_bitwise()?;
54961            let tuple = Expression::Tuple(Box::new(Tuple {
54962                expressions: vec![
54963                    start.unwrap_or(Expression::Null(Null)),
54964                    end.unwrap_or(Expression::Null(Null)),
54965                ],
54966            }));
54967            (kind_str, Some(Box::new(tuple)))
54968        } else if self.match_text_seq(&["CONTAINED", "IN"]) {
54969            // CONTAINED IN (values)
54970            let expressions = if self.match_token(TokenType::LParen) {
54971                let exprs = self.parse_expression_list()?;
54972                self.expect(TokenType::RParen)?;
54973                exprs
54974            } else {
54975                Vec::new()
54976            };
54977            (
54978                "CONTAINED IN".to_string(),
54979                Some(Box::new(Expression::Tuple(Box::new(Tuple { expressions })))),
54980            )
54981        } else if self.match_token(TokenType::All) {
54982            // ALL
54983            ("ALL".to_string(), None)
54984        } else {
54985            // AS OF
54986            self.match_text_seq(&["AS", "OF"]);
54987            let type_expr = self.parse_type()?;
54988            ("AS OF".to_string(), type_expr.map(Box::new))
54989        };
54990
54991        Ok(Some(Expression::Version(Box::new(Version {
54992            this: Box::new(Expression::Var(Box::new(Var { this }))),
54993            kind,
54994            expression,
54995        }))))
54996    }
54997
54998    /// parse_volatile_property - Parses VOLATILE property
54999    /// Python: _parse_volatile_property
55000    /// Returns VolatileProperty for table volatility or StabilityProperty for function stability
55001    pub fn parse_volatile_property(&mut self) -> Result<Option<Expression>> {
55002        // Check the token before VOLATILE to determine context
55003        // In SQL, VOLATILE can mean:
55004        // 1. Table volatility (CREATE VOLATILE TABLE)
55005        // 2. Function stability (CREATE FUNCTION ... VOLATILE)
55006
55007        // Look back to see if this is in a table context
55008        // PRE_VOLATILE_TOKENS typically include: CREATE, REPLACE, GLOBAL, etc.
55009        let is_table_context = if self.current >= 2 {
55010            let pre_token = &self.tokens[self.current - 2];
55011            matches!(
55012                pre_token.token_type,
55013                TokenType::Create | TokenType::Global | TokenType::Temporary | TokenType::Replace
55014            )
55015        } else {
55016            false
55017        };
55018
55019        if is_table_context {
55020            Ok(Some(Expression::VolatileProperty(Box::new(
55021                VolatileProperty { this: None },
55022            ))))
55023        } else {
55024            // Function stability - return StabilityProperty with "VOLATILE" literal
55025            Ok(Some(Expression::StabilityProperty(Box::new(
55026                StabilityProperty {
55027                    this: Box::new(Expression::Literal(Literal::String("VOLATILE".to_string()))),
55028                },
55029            ))))
55030        }
55031    }
55032
55033    /// parse_when_matched - Implemented from Python _parse_when_matched
55034    /// Calls: parse_disjunction, parse_star, parse_value
55035    #[allow(unused_variables, unused_mut)]
55036    /// Parse WHEN [NOT] MATCHED clauses for MERGE statements
55037    /// This is the public entry point that calls parse_when_matched_clauses
55038    pub fn parse_when_matched(&mut self) -> Result<Option<Expression>> {
55039        self.parse_when_matched_clauses()
55040    }
55041
55042    /// parse_where - Parse WHERE clause
55043    /// Python: if not self._match(TokenType.WHERE): return None; return exp.Where(this=self._parse_disjunction())
55044    pub fn parse_where(&mut self) -> Result<Option<Expression>> {
55045        if !self.match_token(TokenType::Where) {
55046            return Ok(None);
55047        }
55048        // Parse the condition expression
55049        let condition = self.parse_expression()?;
55050        Ok(Some(Expression::Where(Box::new(Where { this: condition }))))
55051    }
55052
55053    /// parse_window - Implemented from Python _parse_window
55054    /// Calls: parse_window_spec, parse_partition_and_order
55055    #[allow(unused_variables, unused_mut)]
55056    pub fn parse_window(&mut self) -> Result<Option<Expression>> {
55057        if self.match_text_seq(&["WITHIN", "GROUP"]) {
55058            return Ok(Some(Expression::WindowSpec(Box::new(WindowSpec {
55059                partition_by: Vec::new(),
55060                order_by: Vec::new(),
55061                frame: None,
55062            }))));
55063        }
55064        if self.match_text_seq(&["LAST"]) {
55065            // Matched: LAST
55066            return Ok(None);
55067        }
55068        if self.match_text_seq(&["EXCLUDE"]) {
55069            // Matched: EXCLUDE
55070            return Ok(None);
55071        }
55072        Ok(None)
55073    }
55074
55075    /// parse_window_clause - Ported from Python _parse_window_clause
55076    /// Parses WINDOW named_window_definition [, named_window_definition, ...]
55077    #[allow(unused_variables, unused_mut)]
55078    pub fn parse_window_clause(&mut self) -> Result<Option<Expression>> {
55079        if !self.match_token(TokenType::Window) {
55080            return Ok(None);
55081        }
55082
55083        // Parse comma-separated named window definitions
55084        let mut windows = Vec::new();
55085        loop {
55086            // Parse window name
55087            let name = self.parse_identifier()?;
55088            if name.is_none() {
55089                break;
55090            }
55091
55092            // Expect AS
55093            self.expect(TokenType::As)?;
55094
55095            // Parse window specification (parenthesized)
55096            self.expect(TokenType::LParen)?;
55097            let spec = self.parse_window_spec_inner()?;
55098            self.expect(TokenType::RParen)?;
55099
55100            if let (Some(name_expr), Some(spec_expr)) = (name, spec) {
55101                // Create an Alias expression wrapping the spec with the name
55102                let alias_ident = if let Expression::Identifier(id) = name_expr {
55103                    id
55104                } else {
55105                    Identifier::new("window")
55106                };
55107                windows.push(Expression::Alias(Box::new(Alias {
55108                    this: spec_expr,
55109                    alias: alias_ident,
55110                    column_aliases: Vec::new(),
55111                    pre_alias_comments: Vec::new(),
55112                    trailing_comments: Vec::new(),
55113                    inferred_type: None,
55114                })));
55115            }
55116
55117            if !self.match_token(TokenType::Comma) {
55118                break;
55119            }
55120        }
55121
55122        if windows.is_empty() {
55123            Ok(None)
55124        } else {
55125            Ok(Some(Expression::Tuple(Box::new(Tuple {
55126                expressions: windows,
55127            }))))
55128        }
55129    }
55130
55131    /// Parse window spec inner (without parentheses)
55132    fn parse_window_spec_inner(&mut self) -> Result<Option<Expression>> {
55133        // Parse optional base window name (identifier not followed by PARTITION or ORDER or DISTRIBUTE or SORT)
55134        let _base = if (self.check(TokenType::Identifier)
55135            || self.check(TokenType::QuotedIdentifier))
55136            && !self.check(TokenType::Partition)
55137            && !self.check(TokenType::Order)
55138            && !self.check(TokenType::Distribute)
55139            && !self.check(TokenType::Sort)
55140        {
55141            self.parse_identifier()?
55142        } else {
55143            None
55144        };
55145
55146        // Parse PARTITION BY or DISTRIBUTE BY (Hive uses DISTRIBUTE BY in window specs)
55147        let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
55148            self.parse_expression_list()?
55149        } else if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
55150            // Hive: DISTRIBUTE BY is equivalent to PARTITION BY in window specs
55151            self.parse_expression_list()?
55152        } else {
55153            Vec::new()
55154        };
55155
55156        // Parse ORDER BY or SORT BY (Hive uses SORT BY in window specs)
55157        let order_by = if self.match_token(TokenType::Order) {
55158            self.match_token(TokenType::By);
55159            let mut orders = Vec::new();
55160            loop {
55161                if let Some(ordered) = self.parse_ordered_item()? {
55162                    orders.push(ordered);
55163                } else {
55164                    break;
55165                }
55166                if !self.match_token(TokenType::Comma) {
55167                    break;
55168                }
55169            }
55170            orders
55171        } else if self.match_token(TokenType::Sort) {
55172            // Hive: SORT BY is equivalent to ORDER BY in window specs
55173            self.match_token(TokenType::By);
55174            let mut orders = Vec::new();
55175            loop {
55176                if let Some(ordered) = self.parse_ordered_item()? {
55177                    orders.push(ordered);
55178                } else {
55179                    break;
55180                }
55181                if !self.match_token(TokenType::Comma) {
55182                    break;
55183                }
55184            }
55185            orders
55186        } else {
55187            Vec::new()
55188        };
55189
55190        // Parse frame specification (ROWS/RANGE/GROUPS BETWEEN ... AND ...)
55191        let frame = self.parse_window_frame()?;
55192
55193        Ok(Some(Expression::WindowSpec(Box::new(WindowSpec {
55194            partition_by,
55195            order_by,
55196            frame,
55197        }))))
55198    }
55199
55200    /// parse_window_spec - Implemented from Python _parse_window_spec
55201    #[allow(unused_variables, unused_mut)]
55202    pub fn parse_window_spec(&mut self) -> Result<Option<Expression>> {
55203        if self.match_text_seq(&["UNBOUNDED"]) {
55204            // Matched: UNBOUNDED
55205            return Ok(None);
55206        }
55207        if self.match_text_seq(&["CURRENT", "ROW"]) {
55208            // Matched: CURRENT ROW
55209            return Ok(None);
55210        }
55211        Ok(None)
55212    }
55213
55214    /// parse_with_operator - Parse column with operator class (PostgreSQL)
55215    /// Parses: ordered_expression [WITH operator]
55216    #[allow(unused_variables, unused_mut)]
55217    pub fn parse_with_operator(&mut self) -> Result<Option<Expression>> {
55218        // First parse an ordered expression with optional operator class
55219        let this = if let Some(opclass) = self.parse_opclass()? {
55220            opclass
55221        } else if let Some(ordered) = self.parse_ordered()? {
55222            ordered
55223        } else {
55224            return Ok(None);
55225        };
55226
55227        // Check for WITH operator
55228        if !self.match_token(TokenType::With) {
55229            return Ok(Some(this));
55230        }
55231
55232        // Parse the operator
55233        let op = self.parse_var()?;
55234        let op_str = match op {
55235            Some(Expression::Identifier(id)) => id.name,
55236            Some(Expression::Var(v)) => v.this.clone(),
55237            _ => String::new(),
55238        };
55239
55240        Ok(Some(Expression::WithOperator(Box::new(WithOperator {
55241            this: Box::new(this),
55242            op: op_str,
55243        }))))
55244    }
55245
55246    /// parse_with_property - Implemented from Python _parse_with_property
55247    /// Calls: parse_withjournaltable, parse_withisolatedloading, parse_wrapped_properties
55248    #[allow(unused_variables, unused_mut)]
55249    pub fn parse_with_property(&mut self) -> Result<Option<Expression>> {
55250        if self.match_text_seq(&["(", "SYSTEM_VERSIONING"]) {
55251            return Ok(Some(Expression::WithProcedureOptions(Box::new(
55252                WithProcedureOptions {
55253                    expressions: Vec::new(),
55254                },
55255            ))));
55256        }
55257        if self.match_text_seq(&["JOURNAL"]) {
55258            // Matched: JOURNAL
55259            return Ok(None);
55260        }
55261        if self.match_text_seq(&["DATA"]) {
55262            // Matched: DATA
55263            return Ok(None);
55264        }
55265        Ok(None)
55266    }
55267
55268    /// parse_withdata - Implemented from Python _parse_withdata
55269    #[allow(unused_variables, unused_mut)]
55270    pub fn parse_withdata(&mut self) -> Result<Option<Expression>> {
55271        if self.match_text_seq(&["AND", "STATISTICS"]) {
55272            return Ok(Some(Expression::WithDataProperty(Box::new(
55273                WithDataProperty {
55274                    no: None,
55275                    statistics: None,
55276                },
55277            ))));
55278        }
55279        if self.match_text_seq(&["AND", "NO", "STATISTICS"]) {
55280            // Matched: AND NO STATISTICS
55281            return Ok(None);
55282        }
55283        Ok(None)
55284    }
55285
55286    /// parse_withisolatedloading - Implemented from Python _parse_withisolatedloading
55287    #[allow(unused_variables, unused_mut)]
55288    pub fn parse_withisolatedloading(&mut self) -> Result<Option<Expression>> {
55289        if self.match_text_seq(&["NO"]) {
55290            return Ok(Some(Expression::IsolatedLoadingProperty(Box::new(
55291                IsolatedLoadingProperty {
55292                    no: None,
55293                    concurrent: None,
55294                    target: None,
55295                },
55296            ))));
55297        }
55298        if self.match_text_seq(&["CONCURRENT"]) {
55299            // Matched: CONCURRENT
55300            return Ok(None);
55301        }
55302        Ok(None)
55303    }
55304
55305    /// parse_withjournaltable - Teradata WITH JOURNAL TABLE property
55306    /// Parses: WITH JOURNAL TABLE = table_name
55307    #[allow(unused_variables, unused_mut)]
55308    pub fn parse_withjournaltable(&mut self) -> Result<Option<Expression>> {
55309        // Optionally consume TABLE keyword
55310        self.match_token(TokenType::Table);
55311
55312        // Optionally consume = sign
55313        self.match_token(TokenType::Eq);
55314
55315        // Parse the table reference
55316        let table = self.parse_table_parts()?;
55317        if table.is_none() {
55318            return Ok(None);
55319        }
55320
55321        Ok(Some(Expression::WithJournalTableProperty(Box::new(
55322            WithJournalTableProperty {
55323                this: Box::new(table.unwrap()),
55324            },
55325        ))))
55326    }
55327
55328    /// parse_wrapped - Parses an expression wrapped in parentheses
55329    /// Python: _parse_wrapped(parse_method)
55330    /// This version parses a disjunction (expression) inside parentheses
55331    pub fn parse_wrapped(&mut self) -> Result<Option<Expression>> {
55332        if !self.match_token(TokenType::LParen) {
55333            return Ok(None);
55334        }
55335
55336        let result = self.parse_disjunction()?;
55337        self.match_token(TokenType::RParen);
55338
55339        Ok(result)
55340    }
55341
55342    /// parse_wrapped_csv - Parses comma-separated expressions wrapped in parentheses
55343    /// Python: _parse_wrapped_csv(parse_method)
55344    pub fn parse_wrapped_csv(&mut self) -> Result<Option<Expression>> {
55345        if !self.match_token(TokenType::LParen) {
55346            return Ok(None);
55347        }
55348
55349        let expressions = self.parse_expression_list()?;
55350        self.match_token(TokenType::RParen);
55351
55352        if expressions.is_empty() {
55353            return Ok(None);
55354        }
55355
55356        Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
55357    }
55358
55359    /// parse_wrapped_id_vars - Parses comma-separated identifiers wrapped in parentheses
55360    /// Python: _parse_wrapped_id_vars
55361    pub fn parse_wrapped_id_vars(&mut self) -> Result<Option<Expression>> {
55362        if !self.match_token(TokenType::LParen) {
55363            return Ok(None);
55364        }
55365
55366        let mut identifiers = Vec::new();
55367        loop {
55368            if let Some(id) = self.parse_id_var()? {
55369                identifiers.push(id);
55370            } else {
55371                break;
55372            }
55373            if !self.match_token(TokenType::Comma) {
55374                break;
55375            }
55376        }
55377
55378        self.match_token(TokenType::RParen);
55379
55380        if identifiers.is_empty() {
55381            return Ok(None);
55382        }
55383
55384        Ok(Some(Expression::Tuple(Box::new(Tuple {
55385            expressions: identifiers,
55386        }))))
55387    }
55388
55389    /// parse_wrapped_options - Implemented from Python _parse_wrapped_options
55390    /// Parses space-separated properties wrapped in parentheses (for Snowflake STAGE_FILE_FORMAT, etc.)
55391    /// Format: = (KEY=VALUE KEY2=VALUE2 ...)
55392    pub fn parse_wrapped_options(&mut self) -> Result<Option<Expression>> {
55393        // Match optional = before opening paren
55394        self.match_token(TokenType::Eq);
55395
55396        // Expect opening paren
55397        if !self.match_token(TokenType::LParen) {
55398            return Ok(None);
55399        }
55400
55401        // Parse space-separated properties (no comma required between them)
55402        let mut properties = Vec::new();
55403        while !self.check(TokenType::RParen) && !self.is_at_end() {
55404            // Try to parse a property: KEY=VALUE
55405            if let Some(prop) = self.parse_option_property()? {
55406                properties.push(prop);
55407            } else {
55408                break;
55409            }
55410        }
55411
55412        // Expect closing paren
55413        self.match_token(TokenType::RParen);
55414
55415        if properties.is_empty() {
55416            Ok(None)
55417        } else {
55418            Ok(Some(Expression::Tuple(Box::new(Tuple {
55419                expressions: properties,
55420            }))))
55421        }
55422    }
55423
55424    /// Parse a single option property: KEY=VALUE
55425    /// Handles various value types: identifiers, strings, numbers, nested parens like ('') or (val1, val2)
55426    fn parse_option_property(&mut self) -> Result<Option<Expression>> {
55427        // Save position to retreat if this isn't a property
55428        let index = self.current;
55429
55430        // Parse the key (identifier/column name)
55431        // For Snowflake options, keys are identifiers like TYPE, FIELD_DELIMITER, NULL_IF, etc.
55432        let key = if self.check(TokenType::Identifier)
55433            || self.check(TokenType::Var)
55434            || self
55435                .peek()
55436                .text
55437                .chars()
55438                .all(|c| c.is_ascii_alphanumeric() || c == '_')
55439        {
55440            let name = self.peek().text.clone();
55441            self.advance();
55442            Some(Expression::Var(Box::new(Var { this: name })))
55443        } else {
55444            None
55445        };
55446
55447        let key = match key {
55448            Some(k) => k,
55449            None => {
55450                self.current = index;
55451                return Ok(None);
55452            }
55453        };
55454
55455        // Expect =
55456        if !self.match_token(TokenType::Eq) {
55457            self.current = index;
55458            return Ok(None);
55459        }
55460
55461        // Parse the value - can be:
55462        // - Simple identifier: CSV, SKIP_FILE, BASE64, TRUE, FALSE, CASE_SENSITIVE
55463        // - String literal: '|', '"', 'TZHTZM YYYY-MM-DD HH24:MI:SS.FF9'
55464        // - Number: 5
55465        // - Nested parens for tuple: ('')
55466        let value = if self.check(TokenType::LParen) {
55467            // Parse nested parenthesized value like NULL_IF=('')
55468            self.advance(); // consume (
55469            let mut inner_exprs = Vec::new();
55470            while !self.check(TokenType::RParen) && !self.is_at_end() {
55471                if let Some(expr) = self.parse_primary_for_option()? {
55472                    inner_exprs.push(expr);
55473                }
55474                // Allow comma between nested values
55475                self.match_token(TokenType::Comma);
55476            }
55477            self.match_token(TokenType::RParen);
55478            Expression::Tuple(Box::new(Tuple {
55479                expressions: inner_exprs,
55480            }))
55481        } else if let Some(primary) = self.parse_primary_for_option()? {
55482            primary
55483        } else {
55484            // Fallback: try to parse as a var
55485            let text = self.peek().text.clone();
55486            self.advance();
55487            Expression::Var(Box::new(Var { this: text }))
55488        };
55489
55490        // Return as a Property expression (KEY=VALUE)
55491        Ok(Some(Expression::Property(Box::new(Property {
55492            this: Box::new(key),
55493            value: Some(Box::new(value)),
55494        }))))
55495    }
55496
55497    /// Parse a primary value for option properties
55498    /// Handles strings, numbers, identifiers, TRUE/FALSE
55499    fn parse_primary_for_option(&mut self) -> Result<Option<Expression>> {
55500        // String literal
55501        if self.check(TokenType::String) {
55502            let text = self.peek().text.clone();
55503            self.advance();
55504            return Ok(Some(Expression::Literal(Literal::String(text))));
55505        }
55506
55507        // Number
55508        if self.check(TokenType::Number) {
55509            let text = self.peek().text.clone();
55510            self.advance();
55511            return Ok(Some(Expression::Literal(Literal::Number(text))));
55512        }
55513
55514        // TRUE/FALSE
55515        if self.check(TokenType::True) {
55516            self.advance();
55517            return Ok(Some(Expression::Boolean(BooleanLiteral { value: true })));
55518        }
55519        if self.check(TokenType::False) {
55520            self.advance();
55521            return Ok(Some(Expression::Boolean(BooleanLiteral { value: false })));
55522        }
55523
55524        // Identifier or keyword used as value (CSV, SKIP_FILE, BASE64, etc.)
55525        if self.check(TokenType::Identifier)
55526            || self.check(TokenType::Var)
55527            || (!self.check(TokenType::RParen)
55528                && !self.check(TokenType::Comma)
55529                && !self.check(TokenType::Eq)
55530                && !self.is_at_end())
55531        {
55532            let text = self.peek().text.clone();
55533            // Don't consume if it's a closing paren or could be the next property key followed by =
55534            if self.check(TokenType::RParen) {
55535                return Ok(None);
55536            }
55537            // Check if this is the start of next property (followed by =)
55538            if self.check_next(TokenType::Eq) {
55539                return Ok(None);
55540            }
55541            self.advance();
55542            return Ok(Some(Expression::Var(Box::new(Var { this: text }))));
55543        }
55544
55545        Ok(None)
55546    }
55547
55548    /// parse_options_list - Parses BigQuery-style OPTIONS list: (key=value, key=value, ...)
55549    /// Parses key=value assignments where values can be complex expressions
55550    pub fn parse_options_list(&mut self) -> Result<Vec<Expression>> {
55551        // Expect opening paren
55552        if !self.match_token(TokenType::LParen) {
55553            return Ok(Vec::new());
55554        }
55555
55556        // Parse comma-separated key=value pairs
55557        let mut options = Vec::new();
55558        loop {
55559            // Check for empty OPTIONS () or end of list
55560            if self.check(TokenType::RParen) {
55561                break;
55562            }
55563
55564            // Parse key=value using parse_assignment which handles EQ operations
55565            if let Some(opt) = self.parse_assignment()? {
55566                options.push(opt);
55567            } else {
55568                break;
55569            }
55570
55571            if !self.match_token(TokenType::Comma) {
55572                break;
55573            }
55574        }
55575
55576        // Expect closing paren
55577        self.expect(TokenType::RParen)?;
55578
55579        Ok(options)
55580    }
55581
55582    /// Parse BigQuery PARTITION BY property and return a typed AST node.
55583    fn parse_bigquery_partition_by_property(&mut self) -> Result<Option<Expression>> {
55584        let start = self.current;
55585        let matched_partition = if self.match_token(TokenType::PartitionBy) {
55586            true
55587        } else if self.match_token(TokenType::Partition) {
55588            self.match_token(TokenType::By)
55589        } else {
55590            false
55591        };
55592
55593        if !matched_partition {
55594            self.current = start;
55595            return Ok(None);
55596        }
55597
55598        let mut expressions = Vec::new();
55599        while !self.is_at_end()
55600            && !self.check(TokenType::Cluster)
55601            && !self.check(TokenType::As)
55602            && !self.check(TokenType::Semicolon)
55603            && !self.check(TokenType::RParen)
55604            && !self.check_identifier("OPTIONS")
55605        {
55606            match self.parse_expression() {
55607                Ok(expr) => expressions.push(expr),
55608                Err(_) => {
55609                    // Fall back to generic/raw parsing if typed parsing can't consume this form.
55610                    self.current = start;
55611                    return Ok(None);
55612                }
55613            }
55614
55615            if !self.match_token(TokenType::Comma) {
55616                break;
55617            }
55618        }
55619
55620        if expressions.is_empty() {
55621            self.current = start;
55622            return Ok(None);
55623        }
55624
55625        Ok(Some(Expression::PartitionByProperty(Box::new(
55626            PartitionByProperty { expressions },
55627        ))))
55628    }
55629
55630    /// Parse BigQuery CLUSTER BY property and return a typed AST node.
55631    fn parse_bigquery_cluster_by_property(&mut self) -> Result<Option<Expression>> {
55632        let start = self.current;
55633        if !self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
55634            self.current = start;
55635            return Ok(None);
55636        }
55637
55638        let mut columns = Vec::new();
55639        loop {
55640            if let Some(Expression::Identifier(id)) = self.parse_identifier()? {
55641                columns.push(id);
55642            } else if self.is_identifier_or_keyword_token() {
55643                let name = self.advance().text;
55644                columns.push(Identifier {
55645                    name,
55646                    quoted: false,
55647                    trailing_comments: Vec::new(),
55648                    span: None,
55649                });
55650            } else {
55651                // Fall back to generic/raw parsing if typed parsing can't consume this form.
55652                self.current = start;
55653                return Ok(None);
55654            }
55655
55656            if !self.match_token(TokenType::Comma) {
55657                break;
55658            }
55659        }
55660
55661        if columns.is_empty() {
55662            self.current = start;
55663            return Ok(None);
55664        }
55665
55666        Ok(Some(Expression::ClusterByColumnsProperty(Box::new(
55667            ClusterByColumnsProperty { columns },
55668        ))))
55669    }
55670
55671    /// Parse BigQuery OPTIONS (...) clause into typed entries when possible.
55672    /// Falls back to generic `Properties` when options are not simple key/value assignments.
55673    fn parse_bigquery_options_property(&mut self) -> Result<Option<Expression>> {
55674        let start = self.current;
55675        if !self.match_identifier("OPTIONS") {
55676            self.current = start;
55677            return Ok(None);
55678        }
55679
55680        let options = self.parse_options_list()?;
55681        if options.is_empty() {
55682            return Ok(Some(Expression::OptionsProperty(Box::new(
55683                OptionsProperty {
55684                    entries: Vec::new(),
55685                },
55686            ))));
55687        }
55688
55689        let mut entries = Vec::new();
55690        for option_expr in &options {
55691            let Some(entry) = Self::option_entry_from_expression(option_expr) else {
55692                return Ok(Some(Expression::Properties(Box::new(Properties {
55693                    expressions: options,
55694                }))));
55695            };
55696            entries.push(entry);
55697        }
55698
55699        Ok(Some(Expression::OptionsProperty(Box::new(
55700            OptionsProperty { entries },
55701        ))))
55702    }
55703
55704    fn option_entry_from_expression(expr: &Expression) -> Option<OptionEntry> {
55705        let Expression::Eq(eq) = expr else {
55706            return None;
55707        };
55708
55709        let key = match &eq.left {
55710            Expression::Column(col) if col.table.is_none() => col.name.clone(),
55711            Expression::Identifier(id) => id.clone(),
55712            Expression::Var(var) => Identifier {
55713                name: var.this.clone(),
55714                quoted: false,
55715                trailing_comments: Vec::new(),
55716                span: None,
55717            },
55718            _ => return None,
55719        };
55720
55721        Some(OptionEntry {
55722            key,
55723            value: eq.right.clone(),
55724        })
55725    }
55726
55727    /// parse_environment_list - Parses Databricks ENVIRONMENT list: (dependencies = '...', environment_version = '...')
55728    /// Parses key=value assignments where values can be string literals
55729    pub fn parse_environment_list(&mut self) -> Result<Vec<Expression>> {
55730        // Expect opening paren
55731        if !self.match_token(TokenType::LParen) {
55732            return Ok(Vec::new());
55733        }
55734
55735        // Parse comma-separated key=value pairs
55736        let mut env_items = Vec::new();
55737        loop {
55738            // Check for empty ENVIRONMENT () or end of list
55739            if self.check(TokenType::RParen) {
55740                break;
55741            }
55742
55743            // Parse key=value using parse_assignment which handles EQ operations
55744            if let Some(opt) = self.parse_assignment()? {
55745                env_items.push(opt);
55746            } else {
55747                break;
55748            }
55749
55750            if !self.match_token(TokenType::Comma) {
55751                break;
55752            }
55753        }
55754
55755        // Expect closing paren
55756        self.expect(TokenType::RParen)?;
55757
55758        Ok(env_items)
55759    }
55760
55761    /// parse_wrapped_properties - Ported from Python _parse_wrapped_properties
55762    /// Parses properties wrapped in parentheses
55763    #[allow(unused_variables, unused_mut)]
55764    pub fn parse_wrapped_properties(&mut self) -> Result<Option<Expression>> {
55765        // Parse wrapped list of properties: (prop1, prop2, ...)
55766        if !self.match_token(TokenType::LParen) {
55767            return Ok(None);
55768        }
55769
55770        let mut props = Vec::new();
55771        loop {
55772            if let Some(prop) = self.parse_property()? {
55773                props.push(prop);
55774            }
55775            if !self.match_token(TokenType::Comma) {
55776                break;
55777            }
55778        }
55779
55780        self.match_token(TokenType::RParen);
55781
55782        if props.is_empty() {
55783            return Ok(None);
55784        }
55785
55786        // Return as a Properties expression
55787        Ok(Some(Expression::Properties(Box::new(Properties {
55788            expressions: props,
55789        }))))
55790    }
55791
55792    /// parse_wrapped_select - Ported from Python _parse_wrapped_select
55793    /// Parses wrapped select statements including PIVOT/UNPIVOT and FROM-first syntax
55794    #[allow(unused_variables, unused_mut)]
55795    pub fn parse_wrapped_select(&mut self, table: bool) -> Result<Option<Expression>> {
55796        // Check for PIVOT/UNPIVOT
55797        let is_unpivot = self.check(TokenType::Unpivot);
55798        if self.match_token(TokenType::Pivot) || self.match_token(TokenType::Unpivot) {
55799            // Call simplified pivot parser
55800            return self.parse_simplified_pivot(is_unpivot);
55801        }
55802
55803        // Check for FROM (DuckDB FROM-first syntax)
55804        if self.match_token(TokenType::From) {
55805            // Parse the FROM clause (table reference)
55806            let from_expr = self.parse_table()?;
55807
55808            // Try to parse a full SELECT
55809            let select = self.parse_select_query()?;
55810
55811            if let Some(sel) = select {
55812                // Apply set operations and query modifiers
55813                let with_ops = self.parse_set_operations_with_expr(Some(sel))?;
55814                return Ok(with_ops);
55815            } else if let Some(from_table) = from_expr {
55816                // Create a SELECT * FROM <table>
55817                let mut select_struct = Select::new();
55818                select_struct.expressions = vec![Expression::Star(Star {
55819                    table: None,
55820                    except: None,
55821                    replace: None,
55822                    rename: None,
55823                    trailing_comments: Vec::new(),
55824                    span: None,
55825                })];
55826                select_struct.from = Some(From {
55827                    expressions: vec![from_table],
55828                });
55829                let select_all = Expression::Select(Box::new(select_struct));
55830                let with_ops = self.parse_set_operations_with_expr(Some(select_all))?;
55831                return Ok(with_ops);
55832            }
55833            return Ok(None);
55834        }
55835
55836        // Regular case: parse table or nested select
55837        let this = if table {
55838            self.parse_table()?
55839        } else {
55840            // Parse nested select without set operations
55841            self.parse_select_query()?
55842        };
55843
55844        if this.is_none() {
55845            return Ok(None);
55846        }
55847
55848        // Apply set operations and query modifiers
55849        let with_ops = self.parse_set_operations_with_expr(this)?;
55850        Ok(with_ops)
55851    }
55852
55853    /// Helper for parse_wrapped_select with default table=false
55854    pub fn parse_wrapped_select_default(&mut self) -> Result<Option<Expression>> {
55855        self.parse_wrapped_select(false)
55856    }
55857
55858    /// parse_xml_element - Implemented from Python _parse_xml_element
55859    /// Python: parser.py:6917-6931
55860    /// Parses XMLELEMENT(NAME name [, expr, ...]) or XMLELEMENT(EVALNAME expr [, expr, ...])
55861    #[allow(unused_variables, unused_mut)]
55862    pub fn parse_xml_element(&mut self) -> Result<Option<Expression>> {
55863        let (this, evalname) = if self.match_text_seq(&["EVALNAME"]) {
55864            // EVALNAME - parse expression for dynamic element name
55865            let expr = self.parse_bitwise()?;
55866            (
55867                expr,
55868                Some(Box::new(Expression::Boolean(BooleanLiteral {
55869                    value: true,
55870                }))),
55871            )
55872        } else {
55873            // NAME - parse static element name
55874            self.match_text_seq(&["NAME"]);
55875            let id = self.parse_id_var()?;
55876            (id, None)
55877        };
55878
55879        // Parse optional expressions (comma-separated content/attributes)
55880        let expressions = if self.match_token(TokenType::Comma) {
55881            self.parse_expression_list()?
55882        } else {
55883            Vec::new()
55884        };
55885
55886        match this {
55887            Some(t) => Ok(Some(Expression::XMLElement(Box::new(XMLElement {
55888                this: Box::new(t),
55889                expressions,
55890                evalname,
55891            })))),
55892            None => Ok(None),
55893        }
55894    }
55895
55896    /// parse_xml_namespace - Ported from Python _parse_xml_namespace
55897    /// Parses XML namespace declarations
55898    #[allow(unused_variables, unused_mut)]
55899    pub fn parse_xml_namespace(&mut self) -> Result<Option<Expression>> {
55900        let mut namespaces = Vec::new();
55901
55902        loop {
55903            // Check for DEFAULT namespace
55904            let is_default = self.match_text_seq(&["DEFAULT"]);
55905
55906            // Parse the URI string
55907            let uri = if is_default {
55908                self.parse_string()?
55909            } else {
55910                // Parse URI with optional alias (AS name)
55911                let uri_expr = self.parse_string()?;
55912                if let Some(u) = uri_expr {
55913                    self.parse_alias_with_expr(Some(u))?
55914                } else {
55915                    None
55916                }
55917            };
55918
55919            if let Some(u) = uri {
55920                namespaces.push(u);
55921            }
55922
55923            // Continue if comma
55924            if !self.match_token(TokenType::Comma) {
55925                break;
55926            }
55927        }
55928
55929        if namespaces.is_empty() {
55930            return Ok(None);
55931        }
55932
55933        // Return as a Tuple (list of namespaces)
55934        Ok(Some(Expression::Tuple(Box::new(Tuple {
55935            expressions: namespaces,
55936        }))))
55937    }
55938
55939    /// parse_xml_table - Implemented from Python _parse_xml_table
55940    /// Python: parser.py:6933-6961
55941    /// Parses XMLTABLE(xpath_expr PASSING xml_doc COLUMNS ...)
55942    #[allow(unused_variables, unused_mut)]
55943    pub fn parse_xml_table(&mut self) -> Result<Option<Expression>> {
55944        // Parse optional XMLNAMESPACES clause
55945        let namespaces = if self.match_text_seq(&["XMLNAMESPACES", "("]) {
55946            let ns = self.parse_xml_namespace()?;
55947            self.match_text_seq(&[")", ","]);
55948            ns.map(Box::new)
55949        } else {
55950            None
55951        };
55952
55953        // Parse XPath expression (string)
55954        let this = self.parse_string()?;
55955        if this.is_none() {
55956            return Ok(None);
55957        }
55958
55959        // Parse PASSING clause
55960        let passing = if self.match_text_seq(&["PASSING"]) {
55961            // BY VALUE is optional
55962            self.match_text_seq(&["BY", "VALUE"]);
55963            // Parse comma-separated expressions.
55964            // Oracle XMLTABLE PASSING accepts full expressions (including function calls),
55965            // not just column references.
55966            // We need to stop before COLUMNS, RETURNING, or )
55967            let mut cols = Vec::new();
55968            loop {
55969                // Check for stop keywords before parsing a column
55970                if !self.is_at_end() {
55971                    let next_text = self.peek().text.to_uppercase();
55972                    if next_text == "COLUMNS" || next_text == "RETURNING" {
55973                        break;
55974                    }
55975                    if self.check(TokenType::RParen) {
55976                        break;
55977                    }
55978                }
55979                if let Some(col) = self.parse_assignment()? {
55980                    cols.push(col);
55981                } else {
55982                    break;
55983                }
55984                if !self.match_token(TokenType::Comma) {
55985                    break;
55986                }
55987            }
55988            if cols.is_empty() {
55989                None
55990            } else {
55991                Some(Box::new(Expression::Tuple(Box::new(Tuple {
55992                    expressions: cols,
55993                }))))
55994            }
55995        } else {
55996            None
55997        };
55998
55999        // Parse optional RETURNING SEQUENCE BY REF
56000        let by_ref = if self.match_text_seq(&["RETURNING", "SEQUENCE", "BY", "REF"]) {
56001            Some(Box::new(Expression::Boolean(BooleanLiteral {
56002                value: true,
56003            })))
56004        } else {
56005            None
56006        };
56007
56008        // Parse COLUMNS clause
56009        let columns = if self.match_text_seq(&["COLUMNS"]) {
56010            let mut cols = Vec::new();
56011            loop {
56012                // Stop if we hit the closing paren
56013                if self.check(TokenType::RParen) {
56014                    break;
56015                }
56016                // Be permissive with leading commas in multiline XMLTABLE COLUMNS lists.
56017                if self.match_token(TokenType::Comma) {
56018                    continue;
56019                }
56020                if let Some(col_def) = self.parse_field_def()? {
56021                    cols.push(col_def);
56022                } else {
56023                    break;
56024                }
56025                if !self.match_token(TokenType::Comma) {
56026                    break;
56027                }
56028            }
56029            cols
56030        } else {
56031            Vec::new()
56032        };
56033
56034        Ok(Some(Expression::XMLTable(Box::new(XMLTable {
56035            this: Box::new(this.unwrap()),
56036            namespaces,
56037            passing,
56038            columns,
56039            by_ref,
56040        }))))
56041    }
56042
56043    /// Parse UNLOAD statement (Athena/Presto/Redshift)
56044    /// UNLOAD (SELECT ...) TO 'location' WITH (options)
56045    fn parse_unload(&mut self) -> Result<Expression> {
56046        // Collect entire statement as a Command
56047        let mut parts = Vec::new();
56048        parts.push(self.advance().text.clone()); // consume UNLOAD
56049        parts.push(" ".to_string()); // space after UNLOAD
56050
56051        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
56052            let token_type = self.peek().token_type;
56053            let token_text = self.peek().text.clone();
56054
56055            // Track string literals
56056            if token_type == TokenType::String {
56057                parts.push(format!("'{}'", token_text.replace('\'', "''")));
56058                self.advance();
56059                // Add space after string unless followed by punctuation
56060                if !self.is_at_end() {
56061                    let next_type = self.peek().token_type;
56062                    if !matches!(
56063                        next_type,
56064                        TokenType::Comma | TokenType::RParen | TokenType::Semicolon
56065                    ) {
56066                        parts.push(" ".to_string());
56067                    }
56068                }
56069                continue;
56070            }
56071
56072            // Handle ARRAY[...] syntax - no space between ARRAY and [
56073            if token_text.eq_ignore_ascii_case("ARRAY")
56074                && self
56075                    .peek_nth(1)
56076                    .is_some_and(|t| t.token_type == TokenType::LBracket)
56077            {
56078                parts.push(token_text);
56079                self.advance();
56080                // Consume [
56081                parts.push("[".to_string());
56082                self.advance();
56083                // Collect until RBracket
56084                while !self.is_at_end() && !self.check(TokenType::RBracket) {
56085                    let inner_type = self.peek().token_type;
56086                    let inner_text = self.peek().text.clone();
56087                    if inner_type == TokenType::String {
56088                        parts.push(format!("'{}'", inner_text.replace('\'', "''")));
56089                    } else {
56090                        parts.push(inner_text);
56091                    }
56092                    self.advance();
56093                    if self.check(TokenType::Comma) {
56094                        parts.push(", ".to_string());
56095                        self.advance();
56096                    }
56097                }
56098                if self.check(TokenType::RBracket) {
56099                    parts.push("]".to_string());
56100                    self.advance();
56101                }
56102                continue;
56103            }
56104
56105            parts.push(token_text);
56106            self.advance();
56107
56108            // Add space after most tokens except punctuation
56109            if !self.is_at_end() {
56110                let next_type = self.peek().token_type;
56111                let no_space_before = matches!(
56112                    next_type,
56113                    TokenType::Comma
56114                        | TokenType::RParen
56115                        | TokenType::RBracket
56116                        | TokenType::Semicolon
56117                        | TokenType::LBracket
56118                );
56119                let no_space_after = matches!(token_type, TokenType::LParen | TokenType::LBracket);
56120                if !no_space_before && !no_space_after {
56121                    parts.push(" ".to_string());
56122                }
56123            }
56124        }
56125
56126        Ok(Expression::Command(Box::new(Command {
56127            this: parts.join(""),
56128        })))
56129    }
56130
56131    /// Parse USING EXTERNAL FUNCTION statement (Athena)
56132    /// USING EXTERNAL FUNCTION name(params) RETURNS type LAMBDA 'arn' SELECT ...
56133    fn parse_using_external_function(&mut self) -> Result<Expression> {
56134        // Record start position
56135        let start_pos = self.peek().span.start;
56136
56137        // Advance through all tokens until end or semicolon
56138        while !self.is_at_end() && !self.check(TokenType::Semicolon) {
56139            self.advance();
56140        }
56141
56142        // Get end position from the last consumed token
56143        let end_pos = if self.current > 0 {
56144            self.tokens[self.current - 1].span.end
56145        } else {
56146            start_pos
56147        };
56148
56149        // Extract exact text from source if available
56150        let command_text = if let Some(ref source) = self.source {
56151            source[start_pos..end_pos].to_string()
56152        } else {
56153            // Fallback: reconstruct from tokens (loses whitespace)
56154            let mut parts = Vec::new();
56155            for i in 0..self.current {
56156                if self.tokens[i].span.start >= start_pos && self.tokens[i].span.end <= end_pos {
56157                    if self.tokens[i].token_type == TokenType::String {
56158                        parts.push(format!("'{}'", self.tokens[i].text.replace('\'', "''")));
56159                    } else {
56160                        parts.push(self.tokens[i].text.clone());
56161                    }
56162                    if i + 1 < self.current {
56163                        parts.push(" ".to_string());
56164                    }
56165                }
56166            }
56167            parts.join("")
56168        };
56169
56170        Ok(Expression::Command(Box::new(Command {
56171            this: command_text,
56172        })))
56173    }
56174}
56175
56176#[cfg(test)]
56177mod tests {
56178    use super::*;
56179    use crate::traversal::ExpressionWalk;
56180
56181    #[test]
56182    fn test_comment_before_limit() {
56183        let sql = "SELECT a FROM b WHERE foo AND bla\n-- comment 3\nLIMIT 10";
56184        let result = Parser::parse_sql(sql).unwrap();
56185        let output = crate::Generator::sql(&result[0]).unwrap();
56186        assert_eq!(
56187            output,
56188            "SELECT a FROM b WHERE foo AND bla LIMIT 10 /* comment 3 */"
56189        );
56190    }
56191
56192    #[test]
56193    fn test_variadic_array_postgres() {
56194        use crate::dialects::DialectType;
56195        use crate::transpile;
56196
56197        // Test: ARRAY[10, -1, 5, 4.4] should parse correctly in Postgres
56198        let sql = "SELECT ARRAY[10, -1, 5, 4.4]";
56199        let result = transpile(sql, DialectType::PostgreSQL, DialectType::PostgreSQL).unwrap();
56200        eprintln!("Array test: {} -> {}", sql, result[0]);
56201
56202        // Test: VARIADIC ARRAY[10, -1, 5, 4.4] in function call
56203        let sql2 = "SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])";
56204        let result2 = transpile(sql2, DialectType::PostgreSQL, DialectType::PostgreSQL).unwrap();
56205        eprintln!("VARIADIC test: {} -> {}", sql2, result2[0]);
56206        assert_eq!(result2[0], sql2);
56207    }
56208
56209    #[test]
56210    fn test_parse_simple_select() {
56211        let result = Parser::parse_sql("SELECT 1").unwrap();
56212        assert_eq!(result.len(), 1);
56213        assert!(result[0].is_select());
56214    }
56215
56216    #[test]
56217    fn test_parse_select_from() {
56218        let result = Parser::parse_sql("SELECT a, b FROM t").unwrap();
56219        assert_eq!(result.len(), 1);
56220
56221        let select = result[0].as_select().unwrap();
56222        assert_eq!(select.expressions.len(), 2);
56223        assert!(select.from.is_some());
56224    }
56225
56226    #[test]
56227    fn test_parse_select_where() {
56228        let result = Parser::parse_sql("SELECT * FROM t WHERE x = 1").unwrap();
56229        let select = result[0].as_select().unwrap();
56230        assert!(select.where_clause.is_some());
56231    }
56232
56233    #[test]
56234    fn test_parse_balances_large_and_chain_depth() {
56235        let mut sql = String::from("SELECT 1 WHERE c0 = 0");
56236        for i in 1..4096 {
56237            sql.push_str(&format!(" AND c{i} = {i}"));
56238        }
56239
56240        let result = Parser::parse_sql(&sql).unwrap();
56241        let select = result[0].as_select().unwrap();
56242        let where_clause = select.where_clause.as_ref().expect("WHERE clause missing");
56243        let depth = where_clause.this.tree_depth();
56244        assert!(
56245            depth < 128,
56246            "Expected balanced boolean tree depth, got {}",
56247            depth
56248        );
56249    }
56250
56251    #[test]
56252    fn test_parse_balances_large_or_chain_depth() {
56253        let mut sql = String::from("SELECT 1 WHERE c0 = 0");
56254        for i in 1..4096 {
56255            sql.push_str(&format!(" OR c{i} = {i}"));
56256        }
56257
56258        let result = Parser::parse_sql(&sql).unwrap();
56259        let select = result[0].as_select().unwrap();
56260        let where_clause = select.where_clause.as_ref().expect("WHERE clause missing");
56261        let depth = where_clause.this.tree_depth();
56262        assert!(
56263            depth < 128,
56264            "Expected balanced boolean tree depth, got {}",
56265            depth
56266        );
56267    }
56268
56269    #[test]
56270    fn test_parse_select_join() {
56271        let result = Parser::parse_sql("SELECT * FROM a JOIN b ON a.id = b.id").unwrap();
56272        let select = result[0].as_select().unwrap();
56273        assert_eq!(select.joins.len(), 1);
56274        assert_eq!(select.joins[0].kind, JoinKind::Inner);
56275    }
56276
56277    #[test]
56278    fn test_parse_expression_precedence() {
56279        let result = Parser::parse_sql("SELECT 1 + 2 * 3").unwrap();
56280        let select = result[0].as_select().unwrap();
56281        // Should parse as 1 + (2 * 3) due to precedence
56282        assert!(matches!(select.expressions[0], Expression::Add(_)));
56283    }
56284
56285    #[test]
56286    fn test_parse_function() {
56287        // COUNT(*) is now a typed Count expression
56288        let result = Parser::parse_sql("SELECT COUNT(*)").unwrap();
56289        let select = result[0].as_select().unwrap();
56290        assert!(matches!(select.expressions[0], Expression::Count(_)));
56291
56292        // Unknown functions stay as generic Function
56293        let result = Parser::parse_sql("SELECT MY_CUSTOM_FUNC(name)").unwrap();
56294        let select = result[0].as_select().unwrap();
56295        assert!(matches!(select.expressions[0], Expression::Function(_)));
56296
56297        // Known aggregate functions are now typed
56298        let result = Parser::parse_sql("SELECT SUM(amount)").unwrap();
56299        let select = result[0].as_select().unwrap();
56300        assert!(matches!(select.expressions[0], Expression::Sum(_)));
56301    }
56302
56303    #[test]
56304    fn test_parse_window_function() {
56305        let result =
56306            Parser::parse_sql("SELECT ROW_NUMBER() OVER (PARTITION BY category ORDER BY id)")
56307                .unwrap();
56308        let select = result[0].as_select().unwrap();
56309        assert!(matches!(
56310            select.expressions[0],
56311            Expression::WindowFunction(_)
56312        ));
56313    }
56314
56315    #[test]
56316    fn test_parse_window_function_with_frame() {
56317        let result = Parser::parse_sql("SELECT SUM(amount) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)").unwrap();
56318        let select = result[0].as_select().unwrap();
56319        assert!(matches!(
56320            select.expressions[0],
56321            Expression::WindowFunction(_)
56322        ));
56323    }
56324
56325    #[test]
56326    fn test_parse_subscript() {
56327        // Array subscript
56328        let result = Parser::parse_sql("SELECT arr[0]").unwrap();
56329        let select = result[0].as_select().unwrap();
56330        assert!(matches!(select.expressions[0], Expression::Subscript(_)));
56331
56332        // Function result subscript
56333        let result = Parser::parse_sql("SELECT SPLIT(name, ',')[0]").unwrap();
56334        let select = result[0].as_select().unwrap();
56335        assert!(matches!(select.expressions[0], Expression::Subscript(_)));
56336    }
56337
56338    #[test]
56339    fn test_parse_case() {
56340        let result = Parser::parse_sql("SELECT CASE WHEN x = 1 THEN 'a' ELSE 'b' END").unwrap();
56341        let select = result[0].as_select().unwrap();
56342        assert!(matches!(select.expressions[0], Expression::Case(_)));
56343    }
56344
56345    #[test]
56346    fn test_parse_insert() {
56347        let result = Parser::parse_sql("INSERT INTO t (a, b) VALUES (1, 2)").unwrap();
56348        assert!(matches!(result[0], Expression::Insert(_)));
56349    }
56350
56351    #[test]
56352    fn test_parse_template_variable() {
56353        // Test Databricks/Hive ${variable} syntax
56354        let result = Parser::parse_sql("SELECT ${x} FROM ${y} WHERE ${z} > 1").unwrap();
56355        let select = result[0].as_select().unwrap();
56356        // The expression should be a Parameter with DollarBrace style
56357        assert!(
56358            matches!(&select.expressions[0], Expression::Parameter(p) if p.name == Some("x".to_string()))
56359        );
56360        // Check the style is DollarBrace
56361        if let Expression::Parameter(p) = &select.expressions[0] {
56362            assert_eq!(p.style, ParameterStyle::DollarBrace);
56363        }
56364    }
56365
56366    #[test]
56367    fn test_parse_update() {
56368        let result = Parser::parse_sql("UPDATE t SET a = 1 WHERE b = 2").unwrap();
56369        assert!(matches!(result[0], Expression::Update(_)));
56370    }
56371
56372    #[test]
56373    fn test_parse_delete() {
56374        let result = Parser::parse_sql("DELETE FROM t WHERE a = 1").unwrap();
56375        assert!(matches!(result[0], Expression::Delete(_)));
56376    }
56377
56378    // DDL tests
56379    #[test]
56380    fn test_parse_create_table() {
56381        let result = Parser::parse_sql(
56382            "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100) NOT NULL)",
56383        )
56384        .unwrap();
56385        assert!(matches!(result[0], Expression::CreateTable(_)));
56386
56387        if let Expression::CreateTable(ct) = &result[0] {
56388            assert_eq!(ct.name.name.name, "users");
56389            assert_eq!(ct.columns.len(), 2);
56390            assert!(ct.columns[0].primary_key);
56391            assert_eq!(ct.columns[1].nullable, Some(false));
56392        }
56393    }
56394
56395    #[test]
56396    fn test_parse_create_table_if_not_exists() {
56397        let result = Parser::parse_sql("CREATE TABLE IF NOT EXISTS t (id INT)").unwrap();
56398        if let Expression::CreateTable(ct) = &result[0] {
56399            assert!(ct.if_not_exists);
56400        }
56401    }
56402
56403    #[test]
56404    fn test_parse_create_temporary_table() {
56405        let result = Parser::parse_sql("CREATE TEMPORARY TABLE t (id INT)").unwrap();
56406        if let Expression::CreateTable(ct) = &result[0] {
56407            assert!(ct.temporary);
56408        }
56409    }
56410
56411    #[test]
56412    fn test_bigquery_create_table_properties_are_typed() {
56413        use crate::DialectType;
56414
56415        let sql = "CREATE OR REPLACE TABLE `p1`.`d1`.`t1` PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS(description='', labels=[('l1', 'v1'), ('l2', 'v2')]) AS SELECT CURRENT_DATE AS day, DATE_TRUNC(CURRENT_DATE(), month) AS month, 'c' AS some_cluster_column";
56416        let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
56417
56418        let create = match &parsed[0] {
56419            Expression::CreateTable(ct) => ct,
56420            other => panic!(
56421                "Expected CreateTable, got {:?}",
56422                std::mem::discriminant(other)
56423            ),
56424        };
56425
56426        assert!(
56427            create
56428                .properties
56429                .iter()
56430                .any(|p| matches!(p, Expression::PartitionByProperty(_))),
56431            "Expected typed PARTITION BY property"
56432        );
56433        assert!(
56434            create
56435                .properties
56436                .iter()
56437                .any(|p| matches!(p, Expression::ClusterByColumnsProperty(_))),
56438            "Expected typed CLUSTER BY property"
56439        );
56440        assert!(
56441            create
56442                .properties
56443                .iter()
56444                .any(|p| matches!(p, Expression::OptionsProperty(_))),
56445            "Expected typed OPTIONS property"
56446        );
56447        assert!(
56448            !create
56449                .properties
56450                .iter()
56451                .any(|p| matches!(p, Expression::Raw(_))),
56452            "BigQuery table properties should not fall back to Raw"
56453        );
56454
56455        let options = create
56456            .properties
56457            .iter()
56458            .find_map(|p| match p {
56459                Expression::OptionsProperty(o) => Some(o),
56460                _ => None,
56461            })
56462            .expect("Expected OptionsProperty");
56463        assert_eq!(options.entries.len(), 2);
56464        assert_eq!(options.entries[0].key.name, "description");
56465        assert_eq!(options.entries[1].key.name, "labels");
56466    }
56467
56468    #[test]
56469    fn test_bigquery_create_table_properties_roundtrip() {
56470        use crate::DialectType;
56471
56472        let sql = "CREATE TABLE t1 PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS(description='', labels=[('l1', 'v1')]) AS SELECT 1 AS day, 1 AS month, 'c' AS some_cluster_column";
56473        let expected = "CREATE TABLE t1 PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS (description='', labels=[('l1', 'v1')]) AS SELECT 1 AS day, 1 AS month, 'c' AS some_cluster_column";
56474        let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
56475        let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
56476        assert_eq!(generated, expected);
56477    }
56478
56479    #[test]
56480    fn test_parse_drop_table() {
56481        let result = Parser::parse_sql("DROP TABLE IF EXISTS users CASCADE").unwrap();
56482        assert!(matches!(result[0], Expression::DropTable(_)));
56483
56484        if let Expression::DropTable(dt) = &result[0] {
56485            assert!(dt.if_exists);
56486            assert!(dt.cascade);
56487            assert_eq!(dt.names.len(), 1);
56488        }
56489    }
56490
56491    #[test]
56492    fn test_parse_alter_table_add_column() {
56493        let result = Parser::parse_sql("ALTER TABLE users ADD COLUMN email VARCHAR(255)").unwrap();
56494        assert!(matches!(result[0], Expression::AlterTable(_)));
56495
56496        if let Expression::AlterTable(at) = &result[0] {
56497            assert_eq!(at.actions.len(), 1);
56498            assert!(matches!(at.actions[0], AlterTableAction::AddColumn { .. }));
56499        }
56500    }
56501
56502    #[test]
56503    fn test_parse_alter_table_drop_column() {
56504        let result = Parser::parse_sql("ALTER TABLE users DROP COLUMN email").unwrap();
56505        if let Expression::AlterTable(at) = &result[0] {
56506            assert!(matches!(at.actions[0], AlterTableAction::DropColumn { .. }));
56507        }
56508    }
56509
56510    #[test]
56511    fn test_tsql_alter_table_set_options() {
56512        use crate::{transpile, DialectType};
56513        let tests = vec![
56514            "ALTER TABLE tbl SET (SYSTEM_VERSIONING=OFF)",
56515            "ALTER TABLE tbl SET (FILESTREAM_ON = 'test')",
56516            "ALTER TABLE tbl SET (DATA_DELETION=ON)",
56517            "ALTER TABLE tbl SET (DATA_DELETION=OFF)",
56518            "ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS))",
56519            "ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, HISTORY_RETENTION_PERIOD=INFINITE))",
56520            "ALTER TABLE tbl SET (DATA_DELETION=ON(FILTER_COLUMN=col, RETENTION_PERIOD=5 MONTHS))",
56521        ];
56522        for sql in tests {
56523            let result = transpile(sql, DialectType::TSQL, DialectType::TSQL);
56524            match result {
56525                Ok(output) => {
56526                    assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
56527                }
56528                Err(e) => {
56529                    panic!("Parse/generate failed for: {} -- {:?}", sql, e);
56530                }
56531            }
56532        }
56533    }
56534
56535    #[test]
56536    fn test_parse_create_index() {
56537        let result = Parser::parse_sql("CREATE UNIQUE INDEX idx_email ON users (email)").unwrap();
56538        assert!(matches!(result[0], Expression::CreateIndex(_)));
56539
56540        if let Expression::CreateIndex(ci) = &result[0] {
56541            assert!(ci.unique);
56542            assert_eq!(ci.name.name, "idx_email");
56543            assert_eq!(ci.table.name.name, "users");
56544            assert_eq!(ci.columns.len(), 1);
56545        }
56546    }
56547
56548    #[test]
56549    fn test_parse_drop_index() {
56550        let result = Parser::parse_sql("DROP INDEX IF EXISTS idx_email ON users").unwrap();
56551        assert!(matches!(result[0], Expression::DropIndex(_)));
56552
56553        if let Expression::DropIndex(di) = &result[0] {
56554            assert!(di.if_exists);
56555            assert!(di.table.is_some());
56556        }
56557    }
56558
56559    #[test]
56560    fn test_parse_create_view() {
56561        let result =
56562            Parser::parse_sql("CREATE VIEW active_users AS SELECT * FROM users WHERE active = 1")
56563                .unwrap();
56564        assert!(matches!(result[0], Expression::CreateView(_)));
56565    }
56566
56567    #[test]
56568    fn test_parse_create_materialized_view() {
56569        let result =
56570            Parser::parse_sql("CREATE MATERIALIZED VIEW stats AS SELECT COUNT(*) FROM users")
56571                .unwrap();
56572        if let Expression::CreateView(cv) = &result[0] {
56573            assert!(cv.materialized);
56574        }
56575    }
56576
56577    #[test]
56578    fn test_hive_stored_by() {
56579        use crate::{transpile, DialectType};
56580        let sql = "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'";
56581        let result = transpile(sql, DialectType::Hive, DialectType::Hive);
56582        match result {
56583            Ok(output) => {
56584                assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
56585            }
56586            Err(e) => {
56587                panic!("Parse/generate failed for: {} -- {:?}", sql, e);
56588            }
56589        }
56590    }
56591
56592    #[test]
56593    fn test_hive_row_format_serde() {
56594        use crate::{transpile, DialectType};
56595
56596        // Test various Hive CREATE TABLE syntax
56597        let test_cases = vec![
56598            (
56599                "CREATE TABLE my_table (a7 ARRAY<DATE>)",
56600                "CREATE TABLE my_table (a7 ARRAY<DATE>)",
56601            ),
56602            (
56603                "CREATE EXTERNAL TABLE my_table (x INT) ROW FORMAT SERDE 'a'",
56604                "CREATE EXTERNAL TABLE my_table (x INT) ROW FORMAT SERDE 'a'",
56605            ),
56606            (
56607                "CREATE EXTERNAL TABLE my_table (x INT) STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c'",
56608                "CREATE EXTERNAL TABLE my_table (x INT) STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c'",
56609            ),
56610            (
56611                "CREATE EXTERNAL TABLE my_table (x INT) LOCATION 'd'",
56612                "CREATE EXTERNAL TABLE my_table (x INT) LOCATION 'd'",
56613            ),
56614            (
56615                "CREATE EXTERNAL TABLE my_table (x INT) TBLPROPERTIES ('e'='f')",
56616                "CREATE EXTERNAL TABLE my_table (x INT) TBLPROPERTIES ('e'='f')",
56617            ),
56618            (
56619                "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'",
56620                "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'",
56621            ),
56622        ];
56623
56624        for (sql, expected) in &test_cases {
56625            let result = transpile(sql, DialectType::Hive, DialectType::Hive);
56626            match result {
56627                Ok(output) => {
56628                    assert_eq!(output[0].trim(), *expected, "Identity failed for: {}", sql);
56629                }
56630                Err(e) => {
56631                    panic!("Parse/generate failed for: {} -- {:?}", sql, e);
56632                }
56633            }
56634        }
56635
56636        // Test full case with all Hive table properties
56637        let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
56638        let result = transpile(sql, DialectType::Hive, DialectType::Hive);
56639        match result {
56640            Ok(output) => {
56641                assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
56642            }
56643            Err(e) => {
56644                panic!("Parse/generate failed for: {} -- {:?}", sql, e);
56645            }
56646        }
56647    }
56648
56649    #[test]
56650    fn test_parse_drop_view() {
56651        let result = Parser::parse_sql("DROP VIEW IF EXISTS active_users").unwrap();
56652        assert!(matches!(result[0], Expression::DropView(_)));
56653    }
56654
56655    #[test]
56656    fn test_parse_truncate() {
56657        let result = Parser::parse_sql("TRUNCATE TABLE users CASCADE").unwrap();
56658        assert!(matches!(result[0], Expression::Truncate(_)));
56659
56660        if let Expression::Truncate(tr) = &result[0] {
56661            assert!(tr.cascade);
56662        }
56663    }
56664
56665    // Tests for typed aggregate functions
56666    #[test]
56667    fn test_parse_typed_aggregates() {
56668        // COUNT with DISTINCT
56669        let result = Parser::parse_sql("SELECT COUNT(DISTINCT user_id)").unwrap();
56670        let select = result[0].as_select().unwrap();
56671        if let Expression::Count(c) = &select.expressions[0] {
56672            assert!(c.distinct);
56673            assert!(!c.star);
56674        } else {
56675            panic!("Expected Count expression");
56676        }
56677
56678        // AVG
56679        let result = Parser::parse_sql("SELECT AVG(price)").unwrap();
56680        let select = result[0].as_select().unwrap();
56681        assert!(matches!(select.expressions[0], Expression::Avg(_)));
56682
56683        // MIN/MAX
56684        let result = Parser::parse_sql("SELECT MIN(a), MAX(b)").unwrap();
56685        let select = result[0].as_select().unwrap();
56686        assert!(matches!(select.expressions[0], Expression::Min(_)));
56687        assert!(matches!(select.expressions[1], Expression::Max(_)));
56688
56689        // STDDEV/VARIANCE
56690        let result = Parser::parse_sql("SELECT STDDEV(x), VARIANCE(y)").unwrap();
56691        let select = result[0].as_select().unwrap();
56692        assert!(matches!(select.expressions[0], Expression::Stddev(_)));
56693        assert!(matches!(select.expressions[1], Expression::Variance(_)));
56694    }
56695
56696    #[test]
56697    fn test_parse_typed_window_functions() {
56698        // ROW_NUMBER
56699        let result = Parser::parse_sql("SELECT ROW_NUMBER() OVER (ORDER BY id)").unwrap();
56700        let select = result[0].as_select().unwrap();
56701        if let Expression::WindowFunction(wf) = &select.expressions[0] {
56702            assert!(matches!(wf.this, Expression::RowNumber(_)));
56703        } else {
56704            panic!("Expected WindowFunction");
56705        }
56706
56707        // RANK and DENSE_RANK
56708        let result = Parser::parse_sql("SELECT RANK() OVER (), DENSE_RANK() OVER ()").unwrap();
56709        let select = result[0].as_select().unwrap();
56710        if let Expression::WindowFunction(wf) = &select.expressions[0] {
56711            assert!(matches!(wf.this, Expression::Rank(_)));
56712        }
56713        if let Expression::WindowFunction(wf) = &select.expressions[1] {
56714            assert!(matches!(wf.this, Expression::DenseRank(_)));
56715        }
56716
56717        // LEAD/LAG
56718        let result = Parser::parse_sql("SELECT LEAD(val, 1, 0) OVER (ORDER BY id)").unwrap();
56719        let select = result[0].as_select().unwrap();
56720        if let Expression::WindowFunction(wf) = &select.expressions[0] {
56721            if let Expression::Lead(f) = &wf.this {
56722                assert!(f.offset.is_some());
56723                assert!(f.default.is_some());
56724            } else {
56725                panic!("Expected Lead");
56726            }
56727        }
56728
56729        // NTILE
56730        let result = Parser::parse_sql("SELECT NTILE(4) OVER (ORDER BY score)").unwrap();
56731        let select = result[0].as_select().unwrap();
56732        if let Expression::WindowFunction(wf) = &select.expressions[0] {
56733            assert!(matches!(wf.this, Expression::NTile(_)));
56734        }
56735    }
56736
56737    #[test]
56738    fn test_parse_string_functions() {
56739        // CONTAINS, STARTS_WITH, ENDS_WITH
56740        let result = Parser::parse_sql("SELECT CONTAINS(name, 'test')").unwrap();
56741        let select = result[0].as_select().unwrap();
56742        assert!(matches!(select.expressions[0], Expression::Contains(_)));
56743
56744        let result = Parser::parse_sql("SELECT STARTS_WITH(name, 'A')").unwrap();
56745        let select = result[0].as_select().unwrap();
56746        assert!(matches!(select.expressions[0], Expression::StartsWith(_)));
56747
56748        let result = Parser::parse_sql("SELECT ENDS_WITH(name, 'z')").unwrap();
56749        let select = result[0].as_select().unwrap();
56750        assert!(matches!(select.expressions[0], Expression::EndsWith(_)));
56751    }
56752
56753    #[test]
56754    fn test_parse_math_functions() {
56755        // MOD function
56756        let result = Parser::parse_sql("SELECT MOD(10, 3)").unwrap();
56757        let select = result[0].as_select().unwrap();
56758        assert!(matches!(select.expressions[0], Expression::ModFunc(_)));
56759
56760        // RANDOM and RAND
56761        let result = Parser::parse_sql("SELECT RANDOM()").unwrap();
56762        let select = result[0].as_select().unwrap();
56763        assert!(matches!(select.expressions[0], Expression::Random(_)));
56764
56765        let result = Parser::parse_sql("SELECT RAND(42)").unwrap();
56766        let select = result[0].as_select().unwrap();
56767        assert!(matches!(select.expressions[0], Expression::Rand(_)));
56768
56769        // Trigonometric functions
56770        let result = Parser::parse_sql("SELECT SIN(x), COS(x), TAN(x)").unwrap();
56771        let select = result[0].as_select().unwrap();
56772        assert!(matches!(select.expressions[0], Expression::Sin(_)));
56773        assert!(matches!(select.expressions[1], Expression::Cos(_)));
56774        assert!(matches!(select.expressions[2], Expression::Tan(_)));
56775    }
56776
56777    #[test]
56778    fn test_parse_date_functions() {
56779        // Date part extraction functions
56780        let result =
56781            Parser::parse_sql("SELECT YEAR(date_col), MONTH(date_col), DAY(date_col)").unwrap();
56782        let select = result[0].as_select().unwrap();
56783        assert!(matches!(select.expressions[0], Expression::Year(_)));
56784        assert!(matches!(select.expressions[1], Expression::Month(_)));
56785        assert!(matches!(select.expressions[2], Expression::Day(_)));
56786
56787        // EPOCH and EPOCH_MS
56788        let result = Parser::parse_sql("SELECT EPOCH(ts), EPOCH_MS(ts)").unwrap();
56789        let select = result[0].as_select().unwrap();
56790        assert!(matches!(select.expressions[0], Expression::Epoch(_)));
56791        assert!(matches!(select.expressions[1], Expression::EpochMs(_)));
56792    }
56793
56794    #[test]
56795    fn test_parse_array_functions() {
56796        // ARRAY_LENGTH
56797        let result = Parser::parse_sql("SELECT ARRAY_LENGTH(arr)").unwrap();
56798        let select = result[0].as_select().unwrap();
56799        assert!(matches!(select.expressions[0], Expression::ArrayLength(_)));
56800
56801        // ARRAY_CONTAINS
56802        let result = Parser::parse_sql("SELECT ARRAY_CONTAINS(arr, 1)").unwrap();
56803        let select = result[0].as_select().unwrap();
56804        assert!(matches!(
56805            select.expressions[0],
56806            Expression::ArrayContains(_)
56807        ));
56808
56809        // EXPLODE
56810        let result = Parser::parse_sql("SELECT EXPLODE(arr)").unwrap();
56811        let select = result[0].as_select().unwrap();
56812        assert!(matches!(select.expressions[0], Expression::Explode(_)));
56813    }
56814
56815    #[test]
56816    fn test_parse_json_functions() {
56817        // JSON_EXTRACT
56818        let result = Parser::parse_sql("SELECT JSON_EXTRACT(data, '$.name')").unwrap();
56819        let select = result[0].as_select().unwrap();
56820        assert!(matches!(select.expressions[0], Expression::JsonExtract(_)));
56821
56822        // JSON_ARRAY_LENGTH
56823        let result = Parser::parse_sql("SELECT JSON_ARRAY_LENGTH(arr)").unwrap();
56824        let select = result[0].as_select().unwrap();
56825        assert!(matches!(
56826            select.expressions[0],
56827            Expression::JsonArrayLength(_)
56828        ));
56829
56830        // TO_JSON and PARSE_JSON
56831        let result = Parser::parse_sql("SELECT TO_JSON(obj), PARSE_JSON(str)").unwrap();
56832        let select = result[0].as_select().unwrap();
56833        assert!(matches!(select.expressions[0], Expression::ToJson(_)));
56834        assert!(matches!(select.expressions[1], Expression::ParseJson(_)));
56835
56836        // JSON literal: JSON '"foo"' -> ParseJson
56837        let result = Parser::parse_sql("SELECT JSON '\"foo\"'").unwrap();
56838        let select = result[0].as_select().unwrap();
56839        assert!(
56840            matches!(select.expressions[0], Expression::ParseJson(_)),
56841            "Expected ParseJson, got: {:?}",
56842            select.expressions[0]
56843        );
56844    }
56845
56846    #[test]
56847    fn test_parse_map_functions() {
56848        // MAP_KEYS and MAP_VALUES
56849        let result = Parser::parse_sql("SELECT MAP_KEYS(m), MAP_VALUES(m)").unwrap();
56850        let select = result[0].as_select().unwrap();
56851        assert!(matches!(select.expressions[0], Expression::MapKeys(_)));
56852        assert!(matches!(select.expressions[1], Expression::MapValues(_)));
56853
56854        // ELEMENT_AT
56855        let result = Parser::parse_sql("SELECT ELEMENT_AT(m, 'key')").unwrap();
56856        let select = result[0].as_select().unwrap();
56857        assert!(matches!(select.expressions[0], Expression::ElementAt(_)));
56858    }
56859
56860    #[test]
56861    fn test_parse_date_literals() {
56862        // DATE literal (generic mode normalizes to CAST)
56863        let result = Parser::parse_sql("SELECT DATE '2024-01-15'").unwrap();
56864        let select = result[0].as_select().unwrap();
56865        match &select.expressions[0] {
56866            Expression::Cast(cast) => {
56867                match &cast.this {
56868                    Expression::Literal(Literal::String(s)) => assert_eq!(s, "2024-01-15"),
56869                    other => panic!("Expected String literal in Cast, got {:?}", other),
56870                }
56871                assert!(matches!(cast.to, DataType::Date));
56872            }
56873            other => panic!("Expected Cast expression, got {:?}", other),
56874        }
56875
56876        // TIME literal
56877        let result = Parser::parse_sql("SELECT TIME '10:30:00'").unwrap();
56878        let select = result[0].as_select().unwrap();
56879        match &select.expressions[0] {
56880            Expression::Literal(Literal::Time(t)) => {
56881                assert_eq!(t, "10:30:00");
56882            }
56883            _ => panic!("Expected Time literal"),
56884        }
56885
56886        // TIMESTAMP literal -> CAST in generic mode
56887        let result = Parser::parse_sql("SELECT TIMESTAMP '2024-01-15 10:30:00'").unwrap();
56888        let select = result[0].as_select().unwrap();
56889        match &select.expressions[0] {
56890            Expression::Cast(cast) => {
56891                match &cast.this {
56892                    Expression::Literal(Literal::String(s)) => assert_eq!(s, "2024-01-15 10:30:00"),
56893                    other => panic!("Expected String literal inside Cast, got {:?}", other),
56894                }
56895                assert!(matches!(
56896                    &cast.to,
56897                    DataType::Timestamp {
56898                        precision: None,
56899                        timezone: false
56900                    }
56901                ));
56902            }
56903            _ => panic!("Expected Cast expression for TIMESTAMP literal"),
56904        }
56905    }
56906
56907    #[test]
56908    fn test_parse_star_exclude() {
56909        // EXCLUDE with multiple columns
56910        let result = Parser::parse_sql("SELECT * EXCLUDE (col1, col2) FROM t").unwrap();
56911        let select = result[0].as_select().unwrap();
56912        if let Expression::Star(star) = &select.expressions[0] {
56913            assert!(star.except.is_some());
56914            let except = star.except.as_ref().unwrap();
56915            assert_eq!(except.len(), 2);
56916            assert_eq!(except[0].name, "col1");
56917            assert_eq!(except[1].name, "col2");
56918        } else {
56919            panic!("Expected Star expression");
56920        }
56921
56922        // EXCEPT (BigQuery syntax)
56923        let result = Parser::parse_sql("SELECT * EXCEPT (id, created_at) FROM t").unwrap();
56924        let select = result[0].as_select().unwrap();
56925        if let Expression::Star(star) = &select.expressions[0] {
56926            assert!(star.except.is_some());
56927        } else {
56928            panic!("Expected Star expression");
56929        }
56930
56931        // table.* with EXCLUDE
56932        let result = Parser::parse_sql("SELECT t.* EXCLUDE (col1) FROM t").unwrap();
56933        let select = result[0].as_select().unwrap();
56934        if let Expression::Star(star) = &select.expressions[0] {
56935            assert!(star.table.is_some());
56936            assert_eq!(star.table.as_ref().unwrap().name, "t");
56937            assert!(star.except.is_some());
56938        } else {
56939            panic!("Expected Star expression");
56940        }
56941    }
56942
56943    #[test]
56944    fn test_parse_star_replace() {
56945        // REPLACE with single expression
56946        let result = Parser::parse_sql("SELECT * REPLACE (UPPER(name) AS name) FROM t").unwrap();
56947        let select = result[0].as_select().unwrap();
56948        if let Expression::Star(star) = &select.expressions[0] {
56949            assert!(star.replace.is_some());
56950            let replace = star.replace.as_ref().unwrap();
56951            assert_eq!(replace.len(), 1);
56952            assert_eq!(replace[0].alias.name, "name");
56953        } else {
56954            panic!("Expected Star expression");
56955        }
56956
56957        // REPLACE with multiple expressions
56958        let result = Parser::parse_sql("SELECT * REPLACE (a + 1 AS a, b * 2 AS b) FROM t").unwrap();
56959        let select = result[0].as_select().unwrap();
56960        if let Expression::Star(star) = &select.expressions[0] {
56961            let replace = star.replace.as_ref().unwrap();
56962            assert_eq!(replace.len(), 2);
56963        } else {
56964            panic!("Expected Star expression");
56965        }
56966    }
56967
56968    #[test]
56969    fn test_parse_star_rename() {
56970        // RENAME with multiple columns
56971        let result =
56972            Parser::parse_sql("SELECT * RENAME (old_col AS new_col, x AS y) FROM t").unwrap();
56973        let select = result[0].as_select().unwrap();
56974        if let Expression::Star(star) = &select.expressions[0] {
56975            assert!(star.rename.is_some());
56976            let rename = star.rename.as_ref().unwrap();
56977            assert_eq!(rename.len(), 2);
56978            assert_eq!(rename[0].0.name, "old_col");
56979            assert_eq!(rename[0].1.name, "new_col");
56980        } else {
56981            panic!("Expected Star expression");
56982        }
56983    }
56984
56985    #[test]
56986    fn test_parse_star_combined() {
56987        // EXCLUDE + REPLACE combined
56988        let result =
56989            Parser::parse_sql("SELECT * EXCLUDE (id) REPLACE (name || '!' AS name) FROM t")
56990                .unwrap();
56991        let select = result[0].as_select().unwrap();
56992        if let Expression::Star(star) = &select.expressions[0] {
56993            assert!(star.except.is_some());
56994            assert!(star.replace.is_some());
56995        } else {
56996            panic!("Expected Star expression");
56997        }
56998    }
56999
57000    #[test]
57001    fn test_parse_spatial_types() {
57002        // GEOMETRY with subtype and SRID (PostgreSQL syntax)
57003        let result = Parser::parse_sql("CREATE TABLE t (geom GEOMETRY(Point, 4326))").unwrap();
57004        if let Expression::CreateTable(ct) = &result[0] {
57005            assert_eq!(ct.columns.len(), 1);
57006            match &ct.columns[0].data_type {
57007                DataType::Geometry { subtype, srid } => {
57008                    assert_eq!(subtype.as_deref(), Some("POINT"));
57009                    assert_eq!(*srid, Some(4326));
57010                }
57011                _ => panic!("Expected Geometry type"),
57012            }
57013        }
57014
57015        // GEOGRAPHY without parameters
57016        let result = Parser::parse_sql("CREATE TABLE t (loc GEOGRAPHY)").unwrap();
57017        if let Expression::CreateTable(ct) = &result[0] {
57018            match &ct.columns[0].data_type {
57019                DataType::Geography { subtype, srid } => {
57020                    assert!(subtype.is_none());
57021                    assert!(srid.is_none());
57022                }
57023                _ => panic!("Expected Geography type"),
57024            }
57025        }
57026
57027        // GEOMETRY subtype only (no SRID)
57028        let result = Parser::parse_sql("CREATE TABLE t (geom GEOMETRY(LineString))").unwrap();
57029        if let Expression::CreateTable(ct) = &result[0] {
57030            match &ct.columns[0].data_type {
57031                DataType::Geometry { subtype, srid } => {
57032                    assert_eq!(subtype.as_deref(), Some("LINESTRING"));
57033                    assert!(srid.is_none());
57034                }
57035                _ => panic!("Expected Geometry type"),
57036            }
57037        }
57038
57039        // Simple POINT type (MySQL-style without SRID)
57040        let result = Parser::parse_sql("CREATE TABLE t (pt POINT)").unwrap();
57041        if let Expression::CreateTable(ct) = &result[0] {
57042            match &ct.columns[0].data_type {
57043                DataType::Geometry { subtype, srid } => {
57044                    assert_eq!(subtype.as_deref(), Some("POINT"));
57045                    assert!(srid.is_none());
57046                }
57047                _ => panic!("Expected Geometry type"),
57048            }
57049        }
57050    }
57051
57052    #[test]
57053    fn test_parse_duckdb_pivot_simple() {
57054        let sql = "PIVOT Cities ON Year USING SUM(Population)";
57055        let result = Parser::parse_sql(sql);
57056        assert!(
57057            result.is_ok(),
57058            "Failed to parse: {} - {:?}",
57059            sql,
57060            result.err()
57061        );
57062        let stmts = result.unwrap();
57063        assert_eq!(
57064            stmts.len(),
57065            1,
57066            "Expected 1 statement, got {}: {:?}",
57067            stmts.len(),
57068            stmts
57069        );
57070        match &stmts[0] {
57071            Expression::Pivot(p) => {
57072                assert!(!p.unpivot);
57073                assert!(!p.expressions.is_empty(), "Should have ON expressions");
57074                assert!(!p.using.is_empty(), "Should have USING expressions");
57075            }
57076            other => panic!("Expected Pivot, got {:?}", other),
57077        }
57078    }
57079
57080    #[test]
57081    fn test_parse_duckdb_pivot_with_group_by() {
57082        let sql = "PIVOT Cities ON Year USING SUM(Population) GROUP BY Country";
57083        let result = Parser::parse_sql(sql);
57084        assert!(
57085            result.is_ok(),
57086            "Failed to parse: {} - {:?}",
57087            sql,
57088            result.err()
57089        );
57090    }
57091
57092    #[test]
57093    fn test_parse_duckdb_unpivot() {
57094        let sql = "UNPIVOT monthly_sales ON jan, feb, mar INTO NAME month VALUE sales";
57095        let result = Parser::parse_sql(sql);
57096        assert!(
57097            result.is_ok(),
57098            "Failed to parse: {} - {:?}",
57099            sql,
57100            result.err()
57101        );
57102    }
57103
57104    #[test]
57105    fn test_parse_standard_pivot_in_from() {
57106        let sql = "SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020))";
57107        let result = Parser::parse_sql(sql);
57108        assert!(
57109            result.is_ok(),
57110            "Failed to parse: {} - {:?}",
57111            sql,
57112            result.err()
57113        );
57114    }
57115
57116    fn assert_pivot_roundtrip(sql: &str) {
57117        let parsed = crate::parse(sql, crate::DialectType::DuckDB);
57118        assert!(
57119            parsed.is_ok(),
57120            "Failed to parse: {} - {:?}",
57121            sql,
57122            parsed.err()
57123        );
57124        let stmts = parsed.unwrap();
57125        assert_eq!(stmts.len(), 1, "Expected 1 statement for: {}", sql);
57126        let generated = crate::generate(&stmts[0], crate::DialectType::DuckDB);
57127        assert!(
57128            generated.is_ok(),
57129            "Failed to generate: {} - {:?}",
57130            sql,
57131            generated.err()
57132        );
57133        let result = generated.unwrap();
57134        assert_eq!(result.trim(), sql, "Round-trip mismatch for: {}", sql);
57135    }
57136
57137    fn assert_pivot_roundtrip_bq(sql: &str) {
57138        let parsed = crate::parse(sql, crate::DialectType::BigQuery);
57139        assert!(
57140            parsed.is_ok(),
57141            "Failed to parse: {} - {:?}",
57142            sql,
57143            parsed.err()
57144        );
57145        let stmts = parsed.unwrap();
57146        assert_eq!(stmts.len(), 1, "Expected 1 statement for: {}", sql);
57147        let generated = crate::generate(&stmts[0], crate::DialectType::BigQuery);
57148        assert!(
57149            generated.is_ok(),
57150            "Failed to generate: {} - {:?}",
57151            sql,
57152            generated.err()
57153        );
57154        let result = generated.unwrap();
57155        assert_eq!(result.trim(), sql, "Round-trip mismatch for: {}", sql);
57156    }
57157
57158    #[test]
57159    fn test_pivot_roundtrip_duckdb_simple() {
57160        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population)");
57161    }
57162
57163    #[test]
57164    fn test_pivot_roundtrip_duckdb_group_by() {
57165        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country");
57166    }
57167
57168    #[test]
57169    fn test_pivot_roundtrip_duckdb_in_clause() {
57170        assert_pivot_roundtrip(
57171            "PIVOT Cities ON Year IN (2000, 2010) USING SUM(Population) GROUP BY Country",
57172        );
57173    }
57174
57175    #[test]
57176    fn test_pivot_roundtrip_duckdb_multiple_using() {
57177        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) AS total, MAX(Population) AS max GROUP BY Country");
57178    }
57179
57180    #[test]
57181    fn test_pivot_roundtrip_duckdb_multiple_on() {
57182        assert_pivot_roundtrip("PIVOT Cities ON Country, Name USING SUM(Population)");
57183    }
57184
57185    #[test]
57186    fn test_pivot_roundtrip_duckdb_concat_on() {
57187        assert_pivot_roundtrip("PIVOT Cities ON Country || '_' || Name USING SUM(Population)");
57188    }
57189
57190    #[test]
57191    fn test_pivot_roundtrip_duckdb_multiple_group_by() {
57192        assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country, Name");
57193    }
57194
57195    #[test]
57196    fn test_pivot_roundtrip_duckdb_first() {
57197        assert_pivot_roundtrip("PIVOT Cities ON Year USING FIRST(Population)");
57198    }
57199
57200    #[test]
57201    fn test_unpivot_roundtrip_duckdb_basic() {
57202        assert_pivot_roundtrip(
57203            "UNPIVOT monthly_sales ON jan, feb, mar, apr, may, jun INTO NAME month VALUE sales",
57204        );
57205    }
57206
57207    #[test]
57208    fn test_unpivot_roundtrip_duckdb_subquery() {
57209        assert_pivot_roundtrip("UNPIVOT (SELECT 1 AS col1, 2 AS col2) ON foo, bar");
57210    }
57211
57212    #[test]
57213    fn test_pivot_roundtrip_duckdb_cte() {
57214        assert_pivot_roundtrip("WITH pivot_alias AS (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) SELECT * FROM pivot_alias");
57215    }
57216
57217    #[test]
57218    fn test_pivot_roundtrip_duckdb_subquery() {
57219        assert_pivot_roundtrip("SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias");
57220    }
57221
57222    #[test]
57223    fn test_pivot_roundtrip_standard_from() {
57224        assert_pivot_roundtrip("SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020) GROUP BY country)");
57225    }
57226
57227    #[test]
57228    fn test_pivot_roundtrip_standard_bare_in() {
57229        assert_pivot_roundtrip("SELECT * FROM t PIVOT(SUM(y) FOR foo IN y_enum)");
57230    }
57231
57232    #[test]
57233    fn test_unpivot_roundtrip_bigquery() {
57234        assert_pivot_roundtrip_bq("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))");
57235    }
57236
57237    #[test]
57238    fn test_pivot_roundtrip_bigquery_aliases() {
57239        assert_pivot_roundtrip_bq("SELECT cars, apples FROM some_table PIVOT(SUM(total_counts) FOR products IN ('general.cars' AS cars, 'food.apples' AS apples))");
57240    }
57241
57242    #[test]
57243    fn test_unpivot_roundtrip_bigquery_parens() {
57244        assert_pivot_roundtrip_bq(
57245            "SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2))",
57246        );
57247    }
57248
57249    #[test]
57250    fn test_pivot_roundtrip_bigquery_multi_agg() {
57251        // Note: BigQuery fixture expects implicit aliases to become explicit AS
57252        let sql = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))";
57253        assert_pivot_roundtrip_bq(sql);
57254    }
57255
57256    // Additional fixture tests for UNPIVOT with COLUMNS and grouped ON
57257    #[test]
57258    fn test_unpivot_roundtrip_duckdb_columns_exclude() {
57259        assert_pivot_roundtrip(
57260            "UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales",
57261        );
57262    }
57263
57264    #[test]
57265    fn test_unpivot_roundtrip_duckdb_grouped_columns() {
57266        assert_pivot_roundtrip("UNPIVOT monthly_sales ON (jan, feb, mar) AS q1, (apr, may, jun) AS q2 INTO NAME quarter VALUE month_1_sales, month_2_sales, month_3_sales");
57267    }
57268
57269    #[test]
57270    fn test_unpivot_roundtrip_duckdb_cte_columns() {
57271        assert_pivot_roundtrip("WITH unpivot_alias AS (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) SELECT * FROM unpivot_alias");
57272    }
57273
57274    #[test]
57275    fn test_unpivot_roundtrip_duckdb_subquery_columns() {
57276        assert_pivot_roundtrip("SELECT * FROM (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) AS unpivot_alias");
57277    }
57278
57279    #[test]
57280    fn test_pivot_roundtrip_duckdb_cte_with_columns() {
57281        assert_pivot_roundtrip("WITH cities(country, name, year, population) AS (SELECT 'NL', 'Amsterdam', 2000, 1005 UNION ALL SELECT 'US', 'Seattle', 2020, 738) PIVOT cities ON year USING SUM(population)");
57282    }
57283
57284    #[test]
57285    fn test_pivot_roundtrip_standard_first_with_alias() {
57286        // DuckDB fixture #73: comma before FOR is dropped in expected output
57287        let sql = "SELECT * FROM t PIVOT(FIRST(t) AS t, FOR quarter IN ('Q1', 'Q2'))";
57288        let expected = "SELECT * FROM t PIVOT(FIRST(t) AS t FOR quarter IN ('Q1', 'Q2'))";
57289        let parsed = crate::parse(sql, crate::DialectType::DuckDB);
57290        assert!(
57291            parsed.is_ok(),
57292            "Failed to parse: {} - {:?}",
57293            sql,
57294            parsed.err()
57295        );
57296        let stmts = parsed.unwrap();
57297        assert_eq!(stmts.len(), 1);
57298        let generated = crate::generate(&stmts[0], crate::DialectType::DuckDB);
57299        assert!(
57300            generated.is_ok(),
57301            "Failed to generate: {} - {:?}",
57302            sql,
57303            generated.err()
57304        );
57305        let result = generated.unwrap();
57306        assert_eq!(result.trim(), expected, "Round-trip mismatch");
57307    }
57308
57309    #[test]
57310    fn test_pivot_roundtrip_bigquery_implicit_alias() {
57311        // BigQuery fixture #134: implicit aliases become explicit AS
57312        let sql = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))";
57313        let expected = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))";
57314        let parsed = crate::parse(sql, crate::DialectType::BigQuery);
57315        assert!(
57316            parsed.is_ok(),
57317            "Failed to parse: {} - {:?}",
57318            sql,
57319            parsed.err()
57320        );
57321        let stmts = parsed.unwrap();
57322        assert_eq!(stmts.len(), 1);
57323        let generated = crate::generate(&stmts[0], crate::DialectType::BigQuery);
57324        assert!(
57325            generated.is_ok(),
57326            "Failed to generate: {} - {:?}",
57327            sql,
57328            generated.err()
57329        );
57330        let result = generated.unwrap();
57331        assert_eq!(result.trim(), expected, "Round-trip mismatch");
57332    }
57333
57334    #[test]
57335    fn test_duckdb_struct_enum_union_row_types() {
57336        use crate::DialectType;
57337
57338        // Helper to test roundtrip with DuckDB dialect - runs in a thread with larger stack
57339        fn check(sql: &str, expected: Option<&str>) {
57340            let sql = sql.to_string();
57341            let expected = expected.map(|s| s.to_string());
57342            let result = std::thread::Builder::new()
57343                .stack_size(16 * 1024 * 1024) // 16MB stack
57344                .spawn(move || {
57345                    let expected_out = expected.as_deref().unwrap_or(&sql);
57346                    let parsed = crate::parse(&sql, DialectType::DuckDB);
57347                    assert!(
57348                        parsed.is_ok(),
57349                        "Failed to parse: {} - {:?}",
57350                        sql,
57351                        parsed.err()
57352                    );
57353                    let stmts = parsed.unwrap();
57354                    assert!(!stmts.is_empty(), "No statements parsed: {}", sql);
57355                    let generated = crate::generate(&stmts[0], DialectType::DuckDB);
57356                    assert!(
57357                        generated.is_ok(),
57358                        "Failed to generate: {} - {:?}",
57359                        sql,
57360                        generated.err()
57361                    );
57362                    let result = generated.unwrap();
57363                    assert_eq!(result.trim(), expected_out, "Mismatch for: {}", sql);
57364                })
57365                .expect("Failed to spawn test thread")
57366                .join();
57367            assert!(result.is_ok(), "Test thread panicked");
57368        }
57369
57370        // UNION type
57371        check("CREATE TABLE tbl1 (u UNION(num INT, str TEXT))", None);
57372        // ENUM type
57373        check(
57374            "CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))",
57375            None,
57376        );
57377        // ROW type -> STRUCT
57378        check(
57379            "SELECT CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))",
57380            Some("SELECT CAST(ROW(1, 2) AS STRUCT(a INT, b INT))"),
57381        );
57382        // STRUCT with parens
57383        check("CAST(x AS STRUCT(number BIGINT))", None);
57384        // STRUCT with quoted field names
57385        check(
57386            "CAST({'i': 1, 's': 'foo'} AS STRUCT(\"s\" TEXT, \"i\" INT))",
57387            None,
57388        );
57389        // Nested STRUCT
57390        check(
57391            "CAST(ROW(1, ROW(1)) AS STRUCT(number BIGINT, row STRUCT(number BIGINT)))",
57392            None,
57393        );
57394        // STRUCT with array suffix - test just the type parsing part
57395        // Note: STRUCT_PACK -> struct literal transform is a separate feature
57396        check("CAST(x AS STRUCT(a BIGINT)[][])", None);
57397        check("CAST(x AS STRUCT(a BIGINT)[])", None);
57398        // Double-colon cast with STRUCT type
57399        check("CAST({'a': 'b'} AS STRUCT(a TEXT))", None);
57400    }
57401
57402    // Helper for roundtrip identity tests
57403    fn roundtrip(sql: &str) -> String {
57404        let ast =
57405            Parser::parse_sql(sql).unwrap_or_else(|e| panic!("Parse error for '{}': {}", sql, e));
57406        crate::generator::Generator::sql(&ast[0])
57407            .unwrap_or_else(|e| panic!("Generate error for '{}': {}", sql, e))
57408    }
57409
57410    fn assert_roundtrip(sql: &str) {
57411        let result = roundtrip(sql);
57412        assert_eq!(result, sql, "\n  Input:    {}\n  Output:   {}", sql, result);
57413    }
57414
57415    fn assert_roundtrip_expected(sql: &str, expected: &str) {
57416        let result = roundtrip(sql);
57417        assert_eq!(
57418            result, expected,
57419            "\n  Input:    {}\n  Expected: {}\n  Output:   {}",
57420            sql, expected, result
57421        );
57422    }
57423
57424    #[test]
57425    fn test_xmlelement_basic() {
57426        assert_roundtrip("SELECT XMLELEMENT(NAME foo)");
57427    }
57428
57429    #[test]
57430    fn test_xmlelement_with_xmlattributes() {
57431        assert_roundtrip("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar))");
57432    }
57433
57434    #[test]
57435    fn test_xmlelement_with_multiple_attrs() {
57436        assert_roundtrip("SELECT XMLELEMENT(NAME test, XMLATTRIBUTES(a, b)) FROM test");
57437    }
57438
57439    #[test]
57440    fn test_xmlelement_with_content() {
57441        assert_roundtrip(
57442            "SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES(CURRENT_DATE AS bar), 'cont', 'ent')",
57443        );
57444    }
57445
57446    #[test]
57447    fn test_xmlelement_nested() {
57448        assert_roundtrip("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar), XMLELEMENT(NAME abc), XMLCOMMENT('test'), XMLELEMENT(NAME xyz))");
57449    }
57450
57451    #[test]
57452    fn test_on_conflict_do_update() {
57453        assert_roundtrip("INSERT INTO newtable AS t(a, b, c) VALUES (1, 2, 3) ON CONFLICT(c) DO UPDATE SET a = t.a + 1 WHERE t.a < 1");
57454    }
57455
57456    #[test]
57457    fn test_on_conflict_do_nothing() {
57458        // ON CONFLICT(id) is the canonical form (no space before paren)
57459        assert_roundtrip_expected(
57460            "INSERT INTO test (id, name) VALUES (1, 'test') ON CONFLICT (id) DO NOTHING",
57461            "INSERT INTO test (id, name) VALUES (1, 'test') ON CONFLICT(id) DO NOTHING",
57462        );
57463    }
57464
57465    #[test]
57466    fn test_truncate_restart_identity() {
57467        assert_roundtrip("TRUNCATE TABLE t1 RESTART IDENTITY");
57468    }
57469
57470    #[test]
57471    fn test_truncate_restart_identity_restrict() {
57472        assert_roundtrip("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT");
57473    }
57474
57475    #[test]
57476    fn test_insert_by_name() {
57477        assert_roundtrip("INSERT INTO x BY NAME SELECT 1 AS y");
57478    }
57479
57480    #[test]
57481    fn test_insert_default_values_returning() {
57482        assert_roundtrip("INSERT INTO t DEFAULT VALUES RETURNING (c1)");
57483    }
57484
57485    #[test]
57486    fn test_union_all_by_name() {
57487        assert_roundtrip("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x");
57488    }
57489
57490    #[test]
57491    fn test_minus_as_except() {
57492        // MINUS is Oracle/Redshift syntax for EXCEPT
57493        assert_roundtrip_expected(
57494            "SELECT foo, bar FROM table_1 MINUS SELECT foo, bar FROM table_2",
57495            "SELECT foo, bar FROM table_1 EXCEPT SELECT foo, bar FROM table_2",
57496        );
57497    }
57498
57499    #[test]
57500    fn test_filter_without_where() {
57501        assert_roundtrip_expected(
57502            "SELECT SUM(x) FILTER (x = 1)",
57503            "SELECT SUM(x) FILTER(WHERE x = 1)",
57504        );
57505    }
57506
57507    #[test]
57508    fn test_comment_on_materialized_view() {
57509        assert_roundtrip("COMMENT ON MATERIALIZED VIEW my_view IS 'this'");
57510    }
57511
57512    #[test]
57513    fn test_create_index_concurrently() {
57514        assert_roundtrip("CREATE INDEX CONCURRENTLY idx ON t(c)");
57515    }
57516
57517    #[test]
57518    fn test_create_index_if_not_exists() {
57519        assert_roundtrip("CREATE INDEX IF NOT EXISTS idx ON t(c)");
57520    }
57521
57522    #[test]
57523    fn test_alter_table_partition_hive() {
57524        // Hive: ALTER TABLE x PARTITION(y=z) ADD COLUMN a VARCHAR(10)
57525        assert_roundtrip("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)");
57526    }
57527
57528    #[test]
57529    fn test_alter_table_change_column_hive() {
57530        // Hive/MySQL: CHANGE COLUMN old_name new_name data_type
57531        assert_roundtrip("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)");
57532    }
57533
57534    #[test]
57535    fn test_alter_table_add_columns_hive() {
57536        // Hive/Spark: ADD COLUMNS (col1 TYPE, col2 TYPE)
57537        assert_roundtrip("ALTER TABLE X ADD COLUMNS (y INT, z STRING)");
57538    }
57539
57540    #[test]
57541    fn test_alter_table_add_columns_cascade_hive() {
57542        // Hive/Spark: ADD COLUMNS (col1 TYPE, col2 TYPE) CASCADE
57543        assert_roundtrip("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE");
57544    }
57545
57546    #[test]
57547    fn test_group_by_with_cube() {
57548        // Hive/MySQL: GROUP BY ... WITH CUBE
57549        let sql = "SELECT key, value FROM T1 GROUP BY key, value WITH CUBE";
57550        let result = Parser::parse_sql(sql).unwrap();
57551        let select = result[0].as_select().unwrap();
57552
57553        if let Some(group_by) = &select.group_by {
57554            // Debug: print the expressions
57555            eprintln!("GROUP BY expressions: {:?}", group_by.expressions);
57556
57557            // Check if there's a Cube expression with empty expressions
57558            let has_cube = group_by.expressions.iter().any(|e| {
57559                if let Expression::Cube(c) = e {
57560                    c.expressions.is_empty()
57561                } else {
57562                    false
57563                }
57564            });
57565            assert!(
57566                has_cube,
57567                "Should have a Cube expression with empty expressions in GROUP BY"
57568            );
57569        } else {
57570            panic!("Should have GROUP BY clause");
57571        }
57572    }
57573
57574    #[test]
57575    fn test_group_by_with_rollup() {
57576        // Hive/MySQL: GROUP BY ... WITH ROLLUP
57577        let sql = "SELECT key, value FROM T1 GROUP BY key, value WITH ROLLUP";
57578        let result = Parser::parse_sql(sql).unwrap();
57579        let select = result[0].as_select().unwrap();
57580
57581        if let Some(group_by) = &select.group_by {
57582            // Check if there's a Rollup expression with empty expressions
57583            let has_rollup = group_by.expressions.iter().any(|e| {
57584                if let Expression::Rollup(r) = e {
57585                    r.expressions.is_empty()
57586                } else {
57587                    false
57588                }
57589            });
57590            assert!(
57591                has_rollup,
57592                "Should have a Rollup expression with empty expressions in GROUP BY"
57593            );
57594        } else {
57595            panic!("Should have GROUP BY clause");
57596        }
57597    }
57598}
57599
57600#[cfg(test)]
57601mod join_marker_tests {
57602    use super::*;
57603    use crate::dialects::DialectType;
57604
57605    #[test]
57606    fn test_oracle_join_marker_simple() {
57607        let sql = "select a.baz from a where a.baz = b.baz (+)";
57608        let result = Parser::parse_sql(sql);
57609        println!("Result: {:?}", result);
57610        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57611    }
57612
57613    #[test]
57614    fn test_oracle_join_marker_with_comma_join_and_aliases() {
57615        let sql = "SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)";
57616        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
57617        println!("Result: {:?}", result);
57618        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57619    }
57620
57621    #[test]
57622    fn test_oracle_xmltable_with_quoted_dot_columns() {
57623        let sql = "SELECT warehouse_name warehouse,\n   warehouse2.\"Water\", warehouse2.\"Rail\"\n   FROM warehouses,\n   XMLTABLE('/Warehouse'\n      PASSING warehouses.warehouse_spec\n      COLUMNS\n         \"Water\" varchar2(6) PATH 'WaterAccess',\n         \"Rail\" varchar2(6) PATH 'RailAccess')\n      warehouse2";
57624        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
57625        println!("Result: {:?}", result);
57626        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57627    }
57628
57629    #[test]
57630    fn test_oracle_quoted_dot_projection() {
57631        let sql = "SELECT warehouse2.\"Water\", warehouse2.\"Rail\" FROM warehouses warehouse2";
57632        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
57633        println!("Result: {:?}", result);
57634        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57635    }
57636
57637    #[test]
57638    fn test_oracle_xmltable_columns_only() {
57639        let sql = "SELECT * FROM XMLTABLE('/Warehouse' PASSING warehouses.warehouse_spec COLUMNS \"Water\" varchar2(6) PATH 'WaterAccess', \"Rail\" varchar2(6) PATH 'RailAccess') warehouse2";
57640        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
57641        println!("Result: {:?}", result);
57642        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57643    }
57644
57645    #[test]
57646    fn test_oracle_projection_alias_then_quoted_dot() {
57647        let sql =
57648            "SELECT warehouse_name warehouse, warehouse2.\"Water\" FROM warehouses warehouse2";
57649        let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
57650        println!("Result: {:?}", result);
57651        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57652    }
57653}
57654
57655#[cfg(test)]
57656mod clickhouse_parser_regression_tests {
57657    use crate::dialects::DialectType;
57658
57659    #[test]
57660    fn test_clickhouse_select_format_clause_not_alias() {
57661        let sql = "SELECT 1 FORMAT TabSeparated";
57662        let result = crate::dialects::Dialect::get(DialectType::ClickHouse).parse(sql);
57663        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57664    }
57665
57666    #[test]
57667    fn test_clickhouse_projection_select_group_by_parses() {
57668        let sql = "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()";
57669        let result = crate::dialects::Dialect::get(DialectType::ClickHouse).parse(sql);
57670        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57671    }
57672
57673    /// ClickHouse ternary operator AST structure tests.
57674    /// Ported from Python sqlglot: tests/dialects/test_clickhouse.py::test_ternary (lines 765-778).
57675    /// Verifies that `x ? (y ? 1 : 2) : 3` parses into nested IfFunc nodes
57676    /// with the correct AST shape.
57677    #[test]
57678    fn test_clickhouse_ternary_ast_structure() {
57679        use crate::expressions::Expression;
57680
57681        let result = crate::parse_one("x ? (y ? 1 : 2) : 3", DialectType::ClickHouse);
57682        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57683        let ternary = result.unwrap();
57684
57685        // Root should be IfFunc
57686        let if_func = match &ternary {
57687            Expression::IfFunc(f) => f,
57688            other => panic!("Expected IfFunc, got {:?}", std::mem::discriminant(other)),
57689        };
57690
57691        // this (condition) should be Column "x"
57692        assert!(
57693            matches!(&if_func.condition, Expression::Column(_)),
57694            "Expected condition to be Column, got {:?}",
57695            std::mem::discriminant(&if_func.condition)
57696        );
57697
57698        // true branch should be Paren
57699        assert!(
57700            matches!(&if_func.true_value, Expression::Paren(_)),
57701            "Expected true_value to be Paren, got {:?}",
57702            std::mem::discriminant(&if_func.true_value)
57703        );
57704
57705        // false branch should be Literal
57706        let false_value = if_func.false_value.as_ref().expect("Expected false_value");
57707        assert!(
57708            matches!(false_value, Expression::Literal(_)),
57709            "Expected false_value to be Literal, got {:?}",
57710            std::mem::discriminant(false_value)
57711        );
57712
57713        // Inside the Paren, the nested ternary should also be IfFunc
57714        let inner_paren = match &if_func.true_value {
57715            Expression::Paren(p) => p,
57716            _ => unreachable!(),
57717        };
57718        let nested_if = match &inner_paren.this {
57719            Expression::IfFunc(f) => f,
57720            other => panic!(
57721                "Expected nested IfFunc, got {:?}",
57722                std::mem::discriminant(other)
57723            ),
57724        };
57725
57726        // Nested condition should be Column "y"
57727        assert!(
57728            matches!(&nested_if.condition, Expression::Column(_)),
57729            "Expected nested condition to be Column, got {:?}",
57730            std::mem::discriminant(&nested_if.condition)
57731        );
57732
57733        // Nested true should be Literal 1
57734        assert!(
57735            matches!(&nested_if.true_value, Expression::Literal(_)),
57736            "Expected nested true_value to be Literal, got {:?}",
57737            std::mem::discriminant(&nested_if.true_value)
57738        );
57739
57740        // Nested false should be Literal 2
57741        let nested_false = nested_if
57742            .false_value
57743            .as_ref()
57744            .expect("Expected nested false_value");
57745        assert!(
57746            matches!(nested_false, Expression::Literal(_)),
57747            "Expected nested false_value to be Literal, got {:?}",
57748            std::mem::discriminant(nested_false)
57749        );
57750    }
57751
57752    /// Verify that `a AND b ? 1 : 2` has And as the ternary condition
57753    /// (AND binds tighter than ?).
57754    /// Ported from Python sqlglot: test_clickhouse.py line 778.
57755    #[test]
57756    fn test_clickhouse_ternary_and_precedence() {
57757        use crate::expressions::Expression;
57758
57759        let result = crate::parse_one("a and b ? 1 : 2", DialectType::ClickHouse);
57760        assert!(result.is_ok(), "Parse error: {:?}", result.err());
57761        let ternary = result.unwrap();
57762
57763        let if_func = match &ternary {
57764            Expression::IfFunc(f) => f,
57765            other => panic!("Expected IfFunc, got {:?}", std::mem::discriminant(other)),
57766        };
57767
57768        // The condition should be And (not just Column "b")
57769        assert!(
57770            matches!(&if_func.condition, Expression::And(_)),
57771            "Expected condition to be And, got {:?}",
57772            std::mem::discriminant(&if_func.condition)
57773        );
57774    }
57775
57776    #[test]
57777    fn test_parse_interval_bare_number_duckdb() {
57778        use crate::dialects::{Dialect, DialectType};
57779        let sql = "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL 3 DAY";
57780        let d = Dialect::get(DialectType::DuckDB);
57781        match d.parse(sql) {
57782            Ok(result) => {
57783                assert!(!result.is_empty(), "Should parse to at least one statement");
57784                // Test transpilation to DuckDB target - should normalize number to quoted string
57785                let output_duckdb = d.transpile_to(sql, DialectType::DuckDB).unwrap();
57786                assert_eq!(
57787                    output_duckdb[0],
57788                    "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY",
57789                    "DuckDB output should have quoted interval value"
57790                );
57791                // Test transpilation to Hive target
57792                let output_hive = d.transpile_to(sql, DialectType::Hive).unwrap();
57793                assert_eq!(
57794                    output_hive[0],
57795                    "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY",
57796                    "Hive output should have quoted interval value"
57797                );
57798            }
57799            Err(e) => panic!("Failed to parse DuckDB INTERVAL 3 DAY: {}", e),
57800        }
57801    }
57802}