Skip to main content

polyglot_sql/
transforms.rs

1//! SQL AST Transforms
2//!
3//! This module provides functions to transform SQL ASTs for dialect compatibility.
4//! These transforms are used during transpilation to convert dialect-specific features
5//! to forms that are supported by the target dialect.
6//!
7//! Based on the Python implementation in `sqlglot/transforms.py`.
8
9use crate::dialects::transform_recursive;
10use crate::dialects::{Dialect, DialectType};
11use crate::error::Result;
12use crate::expressions::{
13    Alias, BinaryOp, BooleanLiteral, Cast, DataType, Exists, Expression, From, Function,
14    Identifier, Join, JoinKind, Lateral, LateralView, Literal, NamedArgSeparator, NamedArgument,
15    Over, Select, StructField, Subquery, UnaryFunc, UnnestFunc, Where,
16};
17use std::cell::RefCell;
18
19/// Apply a chain of transforms to an expression
20///
21/// # Arguments
22/// * `expr` - The expression to transform
23/// * `transforms` - A list of transform functions to apply in order
24///
25/// # Returns
26/// The transformed expression
27pub fn preprocess<F>(expr: Expression, transforms: &[F]) -> Result<Expression>
28where
29    F: Fn(Expression) -> Result<Expression>,
30{
31    let mut result = expr;
32    for transform in transforms {
33        result = transform(result)?;
34    }
35    Ok(result)
36}
37
38/// Convert UNNEST to EXPLODE (for Spark/Hive compatibility)
39///
40/// UNNEST is standard SQL but Spark uses EXPLODE instead.
41pub fn unnest_to_explode(expr: Expression) -> Result<Expression> {
42    match expr {
43        Expression::Unnest(unnest) => {
44            Ok(Expression::Explode(Box::new(UnaryFunc::new(unnest.this))))
45        }
46        _ => Ok(expr),
47    }
48}
49
50/// Convert CROSS JOIN UNNEST to LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks.
51///
52/// This is a SELECT-level structural transformation that:
53/// 1. Converts UNNEST in FROM clause to INLINE/EXPLODE
54/// 2. Converts CROSS JOIN (LATERAL) UNNEST to LATERAL VIEW entries
55/// 3. For single-arg UNNEST: uses EXPLODE
56/// 4. For multi-arg UNNEST: uses INLINE(ARRAYS_ZIP(...))
57///
58/// Based on Python sqlglot's `unnest_to_explode` transform in transforms.py (lines 290-391).
59pub fn unnest_to_explode_select(expr: Expression) -> Result<Expression> {
60    transform_recursive(expr, &unnest_to_explode_select_inner)
61}
62
63/// Helper to determine the UDTF function for an UNNEST expression.
64/// Single-arg UNNEST → EXPLODE, multi-arg → INLINE
65fn make_udtf_expr(unnest: &UnnestFunc) -> Expression {
66    let has_multi_expr = !unnest.expressions.is_empty();
67    if has_multi_expr {
68        // Multi-arg: INLINE(ARRAYS_ZIP(arg1, arg2, ...))
69        let mut all_args = vec![unnest.this.clone()];
70        all_args.extend(unnest.expressions.iter().cloned());
71        let arrays_zip =
72            Expression::Function(Box::new(Function::new("ARRAYS_ZIP".to_string(), all_args)));
73        Expression::Function(Box::new(Function::new(
74            "INLINE".to_string(),
75            vec![arrays_zip],
76        )))
77    } else {
78        // Single-arg: EXPLODE(arg)
79        Expression::Explode(Box::new(UnaryFunc::new(unnest.this.clone())))
80    }
81}
82
83fn unnest_to_explode_select_inner(expr: Expression) -> Result<Expression> {
84    let Expression::Select(mut select) = expr else {
85        return Ok(expr);
86    };
87
88    // Process FROM clause: UNNEST items need conversion
89    if let Some(ref mut from) = select.from {
90        if from.expressions.len() >= 1 {
91            let mut new_from_exprs = Vec::new();
92            let mut new_lateral_views = Vec::new();
93            let first_is_unnest = is_unnest_expr(&from.expressions[0]);
94
95            for (idx, from_item) in from.expressions.drain(..).enumerate() {
96                if idx == 0 && first_is_unnest {
97                    // UNNEST is the first (and possibly only) item in FROM
98                    // Replace it with INLINE/EXPLODE, keeping alias
99                    let replaced = replace_from_unnest(from_item);
100                    new_from_exprs.push(replaced);
101                } else if idx > 0 && is_unnest_expr(&from_item) {
102                    // Additional UNNEST items in FROM (comma-joined) → LATERAL VIEW
103                    let (alias_name, column_aliases, unnest_func) = extract_unnest_info(from_item);
104                    let udtf = make_udtf_expr(&unnest_func);
105                    new_lateral_views.push(LateralView {
106                        this: udtf,
107                        table_alias: alias_name,
108                        column_aliases,
109                        outer: false,
110                    });
111                } else {
112                    new_from_exprs.push(from_item);
113                }
114            }
115
116            from.expressions = new_from_exprs;
117            // Append lateral views for comma-joined UNNESTs
118            select.lateral_views.extend(new_lateral_views);
119        }
120    }
121
122    // Process joins: CROSS JOIN (LATERAL) UNNEST → LATERAL VIEW
123    let mut remaining_joins = Vec::new();
124    for join in select.joins.drain(..) {
125        if matches!(join.kind, JoinKind::Cross | JoinKind::Inner) {
126            let (is_unnest, is_lateral) = check_join_unnest(&join.this);
127            if is_unnest {
128                // Extract UNNEST info from join, handling Lateral wrapper
129                let (lateral_alias, lateral_col_aliases, join_expr) = if is_lateral {
130                    if let Expression::Lateral(lat) = join.this {
131                        // Extract alias from Lateral struct
132                        let alias = lat.alias.map(|s| Identifier::new(&s));
133                        let col_aliases: Vec<Identifier> = lat
134                            .column_aliases
135                            .iter()
136                            .map(|s| Identifier::new(s))
137                            .collect();
138                        (alias, col_aliases, *lat.this)
139                    } else {
140                        (None, Vec::new(), join.this)
141                    }
142                } else {
143                    (None, Vec::new(), join.this)
144                };
145
146                let (alias_name, column_aliases, unnest_func) = extract_unnest_info(join_expr);
147
148                // Prefer Lateral's alias over UNNEST's alias
149                let final_alias = lateral_alias.or(alias_name);
150                let final_col_aliases = if !lateral_col_aliases.is_empty() {
151                    lateral_col_aliases
152                } else {
153                    column_aliases
154                };
155
156                // Use "unnest" as default alias if none provided (for single-arg case)
157                let table_alias = final_alias.or_else(|| Some(Identifier::new("unnest")));
158                let col_aliases = if final_col_aliases.is_empty() {
159                    vec![Identifier::new("unnest")]
160                } else {
161                    final_col_aliases
162                };
163
164                let udtf = make_udtf_expr(&unnest_func);
165                select.lateral_views.push(LateralView {
166                    this: udtf,
167                    table_alias,
168                    column_aliases: col_aliases,
169                    outer: false,
170                });
171            } else {
172                remaining_joins.push(join);
173            }
174        } else {
175            remaining_joins.push(join);
176        }
177    }
178    select.joins = remaining_joins;
179
180    Ok(Expression::Select(select))
181}
182
183/// Check if an expression is or wraps an UNNEST
184fn is_unnest_expr(expr: &Expression) -> bool {
185    match expr {
186        Expression::Unnest(_) => true,
187        Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
188        _ => false,
189    }
190}
191
192/// Check if a join's expression is an UNNEST (possibly wrapped in Lateral)
193fn check_join_unnest(expr: &Expression) -> (bool, bool) {
194    match expr {
195        Expression::Unnest(_) => (true, false),
196        Expression::Alias(a) => {
197            if matches!(a.this, Expression::Unnest(_)) {
198                (true, false)
199            } else {
200                (false, false)
201            }
202        }
203        Expression::Lateral(lat) => match &*lat.this {
204            Expression::Unnest(_) => (true, true),
205            Expression::Alias(a) => {
206                if matches!(a.this, Expression::Unnest(_)) {
207                    (true, true)
208                } else {
209                    (false, true)
210                }
211            }
212            _ => (false, true),
213        },
214        _ => (false, false),
215    }
216}
217
218/// Replace an UNNEST in FROM with INLINE/EXPLODE, preserving alias structure
219fn replace_from_unnest(from_item: Expression) -> Expression {
220    match from_item {
221        Expression::Alias(mut a) => {
222            if let Expression::Unnest(unnest) = a.this {
223                a.this = make_udtf_expr(&unnest);
224            }
225            Expression::Alias(a)
226        }
227        Expression::Unnest(unnest) => make_udtf_expr(&unnest),
228        other => other,
229    }
230}
231
232/// Extract alias info and UnnestFunc from an expression (possibly wrapped in Alias)
233fn extract_unnest_info(expr: Expression) -> (Option<Identifier>, Vec<Identifier>, UnnestFunc) {
234    match expr {
235        Expression::Alias(a) => {
236            if let Expression::Unnest(unnest) = a.this {
237                (Some(a.alias), a.column_aliases, *unnest)
238            } else {
239                // Should not happen if we already checked is_unnest_expr
240                (
241                    Some(a.alias),
242                    a.column_aliases,
243                    UnnestFunc {
244                        this: a.this,
245                        expressions: Vec::new(),
246                        with_ordinality: false,
247                        alias: None,
248                        offset_alias: None,
249                    },
250                )
251            }
252        }
253        Expression::Unnest(unnest) => {
254            let alias = unnest.alias.clone();
255            (alias, Vec::new(), *unnest)
256        }
257        _ => (
258            None,
259            Vec::new(),
260            UnnestFunc {
261                this: expr,
262                expressions: Vec::new(),
263                with_ordinality: false,
264                alias: None,
265                offset_alias: None,
266            },
267        ),
268    }
269}
270
271/// Convert EXPLODE to UNNEST (for standard SQL compatibility)
272pub fn explode_to_unnest(expr: Expression) -> Result<Expression> {
273    match expr {
274        Expression::Explode(explode) => Ok(Expression::Unnest(Box::new(UnnestFunc {
275            this: explode.this,
276            expressions: Vec::new(),
277            with_ordinality: false,
278            alias: None,
279            offset_alias: None,
280        }))),
281        _ => Ok(expr),
282    }
283}
284
285/// Replace boolean literals for dialects that don't support them
286///
287/// Converts TRUE/FALSE to 1/0 for dialects like older MySQL versions
288pub fn replace_bool_with_int(expr: Expression) -> Result<Expression> {
289    match expr {
290        Expression::Boolean(b) => {
291            let value = if b.value { "1" } else { "0" };
292            Ok(Expression::Literal(Box::new(Literal::Number(
293                value.to_string(),
294            ))))
295        }
296        _ => Ok(expr),
297    }
298}
299
300/// Replace integer literals for dialects that prefer boolean
301///
302/// Converts 1/0 to TRUE/FALSE
303pub fn replace_int_with_bool(expr: Expression) -> Result<Expression> {
304    match expr {
305        Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1" || n == "0") =>
306        {
307            let Literal::Number(n) = lit.as_ref() else {
308                unreachable!()
309            };
310            Ok(Expression::Boolean(BooleanLiteral { value: n == "1" }))
311        }
312        _ => Ok(expr),
313    }
314}
315
316/// Remove precision from parameterized types
317///
318/// Some dialects don't support precision parameters on certain types.
319/// This transform removes them, e.g., VARCHAR(255) → VARCHAR, DECIMAL(10,2) → DECIMAL
320pub fn remove_precision_parameterized_types(expr: Expression) -> Result<Expression> {
321    Ok(strip_type_params_recursive(expr))
322}
323
324/// Recursively strip type parameters from DataType values in an expression
325fn strip_type_params_recursive(expr: Expression) -> Expression {
326    match expr {
327        // Handle Cast expressions - strip precision from target type
328        Expression::Cast(mut cast) => {
329            cast.to = strip_data_type_params(cast.to);
330            // Also recursively process the expression being cast
331            cast.this = strip_type_params_recursive(cast.this);
332            Expression::Cast(cast)
333        }
334        // Handle TryCast expressions (uses same Cast struct)
335        Expression::TryCast(mut try_cast) => {
336            try_cast.to = strip_data_type_params(try_cast.to);
337            try_cast.this = strip_type_params_recursive(try_cast.this);
338            Expression::TryCast(try_cast)
339        }
340        // Handle SafeCast expressions (uses same Cast struct)
341        Expression::SafeCast(mut safe_cast) => {
342            safe_cast.to = strip_data_type_params(safe_cast.to);
343            safe_cast.this = strip_type_params_recursive(safe_cast.this);
344            Expression::SafeCast(safe_cast)
345        }
346        // For now, pass through other expressions
347        // A full implementation would recursively visit all nodes
348        _ => expr,
349    }
350}
351
352/// Strip precision/scale/length parameters from a DataType
353fn strip_data_type_params(dt: DataType) -> DataType {
354    match dt {
355        // Numeric types with precision/scale
356        DataType::Decimal { .. } => DataType::Decimal {
357            precision: None,
358            scale: None,
359        },
360        DataType::TinyInt { .. } => DataType::TinyInt { length: None },
361        DataType::SmallInt { .. } => DataType::SmallInt { length: None },
362        DataType::Int { .. } => DataType::Int {
363            length: None,
364            integer_spelling: false,
365        },
366        DataType::BigInt { .. } => DataType::BigInt { length: None },
367
368        // String types with length
369        DataType::Char { .. } => DataType::Char { length: None },
370        DataType::VarChar { .. } => DataType::VarChar {
371            length: None,
372            parenthesized_length: false,
373        },
374
375        // Binary types with length
376        DataType::Binary { .. } => DataType::Binary { length: None },
377        DataType::VarBinary { .. } => DataType::VarBinary { length: None },
378
379        // Bit types with length
380        DataType::Bit { .. } => DataType::Bit { length: None },
381        DataType::VarBit { .. } => DataType::VarBit { length: None },
382
383        // Time types with precision
384        DataType::Time { .. } => DataType::Time {
385            precision: None,
386            timezone: false,
387        },
388        DataType::Timestamp { timezone, .. } => DataType::Timestamp {
389            precision: None,
390            timezone,
391        },
392
393        // Array - recursively strip element type
394        DataType::Array {
395            element_type,
396            dimension,
397        } => DataType::Array {
398            element_type: Box::new(strip_data_type_params(*element_type)),
399            dimension,
400        },
401
402        // Map - recursively strip key and value types
403        DataType::Map {
404            key_type,
405            value_type,
406        } => DataType::Map {
407            key_type: Box::new(strip_data_type_params(*key_type)),
408            value_type: Box::new(strip_data_type_params(*value_type)),
409        },
410
411        // Struct - recursively strip field types
412        DataType::Struct { fields, nested } => DataType::Struct {
413            fields: fields
414                .into_iter()
415                .map(|f| {
416                    StructField::with_options(
417                        f.name,
418                        strip_data_type_params(f.data_type),
419                        f.options,
420                    )
421                })
422                .collect(),
423            nested,
424        },
425
426        // Vector - strip dimension
427        DataType::Vector { element_type, .. } => DataType::Vector {
428            element_type: element_type.map(|et| Box::new(strip_data_type_params(*et))),
429            dimension: None,
430        },
431
432        // Object - recursively strip field types
433        DataType::Object { fields, modifier } => DataType::Object {
434            fields: fields
435                .into_iter()
436                .map(|(name, ty, not_null)| (name, strip_data_type_params(ty), not_null))
437                .collect(),
438            modifier,
439        },
440
441        // Other types pass through unchanged
442        other => other,
443    }
444}
445
446/// Eliminate QUALIFY clause by converting to a subquery with WHERE filter
447///
448/// QUALIFY is supported by Snowflake, BigQuery, and DuckDB but not by most other dialects.
449///
450/// Converts:
451/// ```sql
452/// SELECT * FROM t QUALIFY ROW_NUMBER() OVER (...) = 1
453/// ```
454/// To:
455/// ```sql
456/// SELECT * FROM (SELECT *, ROW_NUMBER() OVER (...) AS _w FROM t) _t WHERE _w = 1
457/// ```
458///
459/// Reference: `transforms.py:194-255`
460pub fn eliminate_qualify(expr: Expression) -> Result<Expression> {
461    match expr {
462        Expression::Select(mut select) => {
463            if let Some(qualify) = select.qualify.take() {
464                // Python sqlglot approach:
465                // 1. Extract the window function from the qualify condition
466                // 2. Add it as _w alias to the inner select
467                // 3. Replace the window function reference with _w in the outer WHERE
468                // 4. Keep original select expressions in the outer query
469
470                let qualify_filter = qualify.this;
471                let window_alias_name = "_w".to_string();
472                let window_alias_ident = Identifier::new(window_alias_name.clone());
473
474                // Try to extract window function from comparison
475                // Pattern: WINDOW_FUNC = value -> inner adds WINDOW_FUNC AS _w, outer WHERE _w = value
476                let (window_expr, outer_where) =
477                    extract_window_from_condition(qualify_filter.clone(), &window_alias_ident);
478
479                if let Some(win_expr) = window_expr {
480                    // Add window function as _w alias to inner select
481                    let window_alias_expr =
482                        Expression::Alias(Box::new(crate::expressions::Alias {
483                            this: win_expr,
484                            alias: window_alias_ident.clone(),
485                            column_aliases: vec![],
486                            pre_alias_comments: vec![],
487                            trailing_comments: vec![],
488                            inferred_type: None,
489                        }));
490
491                    // For the outer SELECT, replace aliased expressions with just the alias reference
492                    // e.g., `1 AS other_id` in inner -> `other_id` in outer
493                    // Non-aliased expressions (columns, identifiers) stay as-is
494                    let outer_exprs: Vec<Expression> = select
495                        .expressions
496                        .iter()
497                        .map(|expr| {
498                            if let Expression::Alias(a) = expr {
499                                // Replace with just the alias identifier as a column reference
500                                Expression::Column(Box::new(crate::expressions::Column {
501                                    name: a.alias.clone(),
502                                    table: None,
503                                    join_mark: false,
504                                    trailing_comments: vec![],
505                                    span: None,
506                                    inferred_type: None,
507                                }))
508                            } else {
509                                expr.clone()
510                            }
511                        })
512                        .collect();
513                    select.expressions.push(window_alias_expr);
514
515                    // Create the inner subquery
516                    let inner_select = Expression::Select(select);
517                    let subquery = Subquery {
518                        this: inner_select,
519                        alias: Some(Identifier::new("_t".to_string())),
520                        column_aliases: vec![],
521                        order_by: None,
522                        limit: None,
523                        offset: None,
524                        distribute_by: None,
525                        sort_by: None,
526                        cluster_by: None,
527                        lateral: false,
528                        modifiers_inside: false,
529                        trailing_comments: vec![],
530                        inferred_type: None,
531                    };
532
533                    // Create the outer SELECT with alias-resolved expressions and WHERE _w <op> value
534                    let outer_select = Select {
535                        expressions: outer_exprs,
536                        from: Some(From {
537                            expressions: vec![Expression::Subquery(Box::new(subquery))],
538                        }),
539                        where_clause: Some(Where { this: outer_where }),
540                        ..Select::new()
541                    };
542
543                    return Ok(Expression::Select(Box::new(outer_select)));
544                } else {
545                    // Fallback: if we can't extract a window function, use old approach
546                    let qualify_alias = Expression::Alias(Box::new(crate::expressions::Alias {
547                        this: qualify_filter.clone(),
548                        alias: window_alias_ident.clone(),
549                        column_aliases: vec![],
550                        pre_alias_comments: vec![],
551                        trailing_comments: vec![],
552                        inferred_type: None,
553                    }));
554
555                    let original_exprs = select.expressions.clone();
556                    select.expressions.push(qualify_alias);
557
558                    let inner_select = Expression::Select(select);
559                    let subquery = Subquery {
560                        this: inner_select,
561                        alias: Some(Identifier::new("_t".to_string())),
562                        column_aliases: vec![],
563                        order_by: None,
564                        limit: None,
565                        offset: None,
566                        distribute_by: None,
567                        sort_by: None,
568                        cluster_by: None,
569                        lateral: false,
570                        modifiers_inside: false,
571                        trailing_comments: vec![],
572                        inferred_type: None,
573                    };
574
575                    let outer_select = Select {
576                        expressions: original_exprs,
577                        from: Some(From {
578                            expressions: vec![Expression::Subquery(Box::new(subquery))],
579                        }),
580                        where_clause: Some(Where {
581                            this: Expression::Column(Box::new(crate::expressions::Column {
582                                name: window_alias_ident,
583                                table: None,
584                                join_mark: false,
585                                trailing_comments: vec![],
586                                span: None,
587                                inferred_type: None,
588                            })),
589                        }),
590                        ..Select::new()
591                    };
592
593                    return Ok(Expression::Select(Box::new(outer_select)));
594                }
595            }
596            Ok(Expression::Select(select))
597        }
598        other => Ok(other),
599    }
600}
601
602/// Extract a window function from a qualify condition.
603/// Returns (window_expression, rewritten_condition) if found.
604/// The rewritten condition replaces the window function with a column reference to the alias.
605fn extract_window_from_condition(
606    condition: Expression,
607    alias: &Identifier,
608) -> (Option<Expression>, Expression) {
609    let alias_col = Expression::Column(Box::new(crate::expressions::Column {
610        name: alias.clone(),
611        table: None,
612        join_mark: false,
613        trailing_comments: vec![],
614        span: None,
615        inferred_type: None,
616    }));
617
618    // Check if condition is a simple comparison with a window function on one side
619    match condition {
620        // WINDOW_FUNC = value
621        Expression::Eq(ref op) => {
622            if is_window_expr(&op.left) {
623                (
624                    Some(op.left.clone()),
625                    Expression::Eq(Box::new(BinaryOp {
626                        left: alias_col,
627                        right: op.right.clone(),
628                        ..(**op).clone()
629                    })),
630                )
631            } else if is_window_expr(&op.right) {
632                (
633                    Some(op.right.clone()),
634                    Expression::Eq(Box::new(BinaryOp {
635                        left: op.left.clone(),
636                        right: alias_col,
637                        ..(**op).clone()
638                    })),
639                )
640            } else {
641                (None, condition)
642            }
643        }
644        Expression::Neq(ref op) => {
645            if is_window_expr(&op.left) {
646                (
647                    Some(op.left.clone()),
648                    Expression::Neq(Box::new(BinaryOp {
649                        left: alias_col,
650                        right: op.right.clone(),
651                        ..(**op).clone()
652                    })),
653                )
654            } else if is_window_expr(&op.right) {
655                (
656                    Some(op.right.clone()),
657                    Expression::Neq(Box::new(BinaryOp {
658                        left: op.left.clone(),
659                        right: alias_col,
660                        ..(**op).clone()
661                    })),
662                )
663            } else {
664                (None, condition)
665            }
666        }
667        Expression::Lt(ref op) => {
668            if is_window_expr(&op.left) {
669                (
670                    Some(op.left.clone()),
671                    Expression::Lt(Box::new(BinaryOp {
672                        left: alias_col,
673                        right: op.right.clone(),
674                        ..(**op).clone()
675                    })),
676                )
677            } else if is_window_expr(&op.right) {
678                (
679                    Some(op.right.clone()),
680                    Expression::Lt(Box::new(BinaryOp {
681                        left: op.left.clone(),
682                        right: alias_col,
683                        ..(**op).clone()
684                    })),
685                )
686            } else {
687                (None, condition)
688            }
689        }
690        Expression::Lte(ref op) => {
691            if is_window_expr(&op.left) {
692                (
693                    Some(op.left.clone()),
694                    Expression::Lte(Box::new(BinaryOp {
695                        left: alias_col,
696                        right: op.right.clone(),
697                        ..(**op).clone()
698                    })),
699                )
700            } else if is_window_expr(&op.right) {
701                (
702                    Some(op.right.clone()),
703                    Expression::Lte(Box::new(BinaryOp {
704                        left: op.left.clone(),
705                        right: alias_col,
706                        ..(**op).clone()
707                    })),
708                )
709            } else {
710                (None, condition)
711            }
712        }
713        Expression::Gt(ref op) => {
714            if is_window_expr(&op.left) {
715                (
716                    Some(op.left.clone()),
717                    Expression::Gt(Box::new(BinaryOp {
718                        left: alias_col,
719                        right: op.right.clone(),
720                        ..(**op).clone()
721                    })),
722                )
723            } else if is_window_expr(&op.right) {
724                (
725                    Some(op.right.clone()),
726                    Expression::Gt(Box::new(BinaryOp {
727                        left: op.left.clone(),
728                        right: alias_col,
729                        ..(**op).clone()
730                    })),
731                )
732            } else {
733                (None, condition)
734            }
735        }
736        Expression::Gte(ref op) => {
737            if is_window_expr(&op.left) {
738                (
739                    Some(op.left.clone()),
740                    Expression::Gte(Box::new(BinaryOp {
741                        left: alias_col,
742                        right: op.right.clone(),
743                        ..(**op).clone()
744                    })),
745                )
746            } else if is_window_expr(&op.right) {
747                (
748                    Some(op.right.clone()),
749                    Expression::Gte(Box::new(BinaryOp {
750                        left: op.left.clone(),
751                        right: alias_col,
752                        ..(**op).clone()
753                    })),
754                )
755            } else {
756                (None, condition)
757            }
758        }
759        // If the condition is just a window function (bare QUALIFY expression)
760        _ if is_window_expr(&condition) => (Some(condition), alias_col),
761        // Can't extract window function
762        _ => (None, condition),
763    }
764}
765
766/// Check if an expression is a window function
767fn is_window_expr(expr: &Expression) -> bool {
768    matches!(expr, Expression::Window(_) | Expression::WindowFunction(_))
769}
770
771/// Eliminate DISTINCT ON clause by converting to a subquery with ROW_NUMBER
772///
773/// DISTINCT ON is PostgreSQL-specific. For dialects that don't support it,
774/// this converts it to a subquery with a ROW_NUMBER() window function.
775///
776/// Converts:
777/// ```sql
778/// SELECT DISTINCT ON (a) a, b FROM t ORDER BY a, b
779/// ```
780/// To:
781/// ```sql
782/// SELECT a, b FROM (
783///     SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY a, b) AS _row_number
784///     FROM t
785/// ) _t WHERE _row_number = 1
786/// ```
787///
788/// Reference: `transforms.py:138-191`
789pub fn eliminate_distinct_on(expr: Expression) -> Result<Expression> {
790    eliminate_distinct_on_for_dialect(expr, None)
791}
792
793/// Eliminate DISTINCT ON with dialect-specific NULL ordering behavior.
794///
795/// For dialects where NULLs don't sort first by default in DESC ordering,
796/// we need to add explicit NULL ordering to preserve DISTINCT ON semantics.
797pub fn eliminate_distinct_on_for_dialect(
798    expr: Expression,
799    target: Option<DialectType>,
800) -> Result<Expression> {
801    use crate::expressions::Case;
802
803    // PostgreSQL and DuckDB support DISTINCT ON natively - skip elimination
804    if matches!(
805        target,
806        Some(DialectType::PostgreSQL) | Some(DialectType::DuckDB)
807    ) {
808        return Ok(expr);
809    }
810
811    // Determine NULL ordering mode based on target dialect
812    // Oracle/Redshift/Snowflake: NULLS FIRST is default for DESC -> no change needed
813    // BigQuery/Spark/Presto/Hive/etc: need explicit NULLS FIRST
814    // MySQL/StarRocks/TSQL: no NULLS FIRST syntax -> use CASE WHEN IS NULL
815    enum NullsMode {
816        None,       // Default NULLS FIRST behavior (Oracle, Redshift, Snowflake)
817        NullsFirst, // Add explicit NULLS FIRST (BigQuery, Spark, Presto, Hive, etc.)
818        CaseExpr,   // Use CASE WHEN IS NULL for NULLS FIRST simulation (MySQL, StarRocks, TSQL)
819    }
820
821    let nulls_mode = match target {
822        Some(DialectType::MySQL)
823        | Some(DialectType::StarRocks)
824        | Some(DialectType::SingleStore)
825        | Some(DialectType::TSQL)
826        | Some(DialectType::Fabric) => NullsMode::CaseExpr,
827        Some(DialectType::Oracle) | Some(DialectType::Redshift) | Some(DialectType::Snowflake) => {
828            NullsMode::None
829        }
830        // All other dialects that don't support DISTINCT ON: use NULLS FIRST
831        _ => NullsMode::NullsFirst,
832    };
833
834    match expr {
835        Expression::Select(mut select) => {
836            if let Some(distinct_cols) = select.distinct_on.take() {
837                if !distinct_cols.is_empty() {
838                    // Create ROW_NUMBER() OVER (PARTITION BY distinct_cols ORDER BY ...)
839                    let row_number_alias = Identifier::new("_row_number".to_string());
840
841                    // Get order_by expressions, or use distinct_cols as default order
842                    let order_exprs = if let Some(ref order_by) = select.order_by {
843                        let mut exprs = order_by.expressions.clone();
844                        // Add NULL ordering based on target dialect
845                        match nulls_mode {
846                            NullsMode::NullsFirst => {
847                                for ord in &mut exprs {
848                                    if ord.desc && ord.nulls_first.is_none() {
849                                        ord.nulls_first = Some(true);
850                                    }
851                                }
852                            }
853                            NullsMode::CaseExpr => {
854                                // For each DESC column without explicit nulls ordering,
855                                // prepend: CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
856                                let mut new_exprs = Vec::new();
857                                for ord in &exprs {
858                                    if ord.desc && ord.nulls_first.is_none() {
859                                        // Add CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
860                                        let null_check = Expression::Case(Box::new(Case {
861                                            operand: None,
862                                            whens: vec![(
863                                                Expression::IsNull(Box::new(
864                                                    crate::expressions::IsNull {
865                                                        this: ord.this.clone(),
866                                                        not: false,
867                                                        postfix_form: false,
868                                                    },
869                                                )),
870                                                Expression::Literal(Box::new(Literal::Number(
871                                                    "1".to_string(),
872                                                ))),
873                                            )],
874                                            else_: Some(Expression::Literal(Box::new(
875                                                Literal::Number("0".to_string()),
876                                            ))),
877                                            comments: Vec::new(),
878                                            inferred_type: None,
879                                        }));
880                                        new_exprs.push(crate::expressions::Ordered {
881                                            this: null_check,
882                                            desc: true,
883                                            nulls_first: None,
884                                            explicit_asc: false,
885                                            with_fill: None,
886                                        });
887                                    }
888                                    new_exprs.push(ord.clone());
889                                }
890                                exprs = new_exprs;
891                            }
892                            NullsMode::None => {}
893                        }
894                        exprs
895                    } else {
896                        distinct_cols
897                            .iter()
898                            .map(|e| crate::expressions::Ordered {
899                                this: e.clone(),
900                                desc: false,
901                                nulls_first: None,
902                                explicit_asc: false,
903                                with_fill: None,
904                            })
905                            .collect()
906                    };
907
908                    // Create window function: ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)
909                    let row_number_func =
910                        Expression::WindowFunction(Box::new(crate::expressions::WindowFunction {
911                            this: Expression::RowNumber(crate::expressions::RowNumber),
912                            over: Over {
913                                partition_by: distinct_cols,
914                                order_by: order_exprs,
915                                frame: None,
916                                window_name: None,
917                                alias: None,
918                            },
919                            keep: None,
920                            inferred_type: None,
921                        }));
922
923                    // Build aliased inner expressions and outer column references
924                    // Inner: SELECT a AS a, b AS b, ROW_NUMBER() OVER (...) AS _row_number
925                    // Outer: SELECT a, b FROM (...)
926                    let mut inner_aliased_exprs = Vec::new();
927                    let mut outer_select_exprs = Vec::new();
928                    for orig_expr in &select.expressions {
929                        match orig_expr {
930                            Expression::Alias(alias) => {
931                                // Already aliased - keep as-is in inner, reference alias in outer
932                                inner_aliased_exprs.push(orig_expr.clone());
933                                outer_select_exprs.push(Expression::Column(Box::new(
934                                    crate::expressions::Column {
935                                        name: alias.alias.clone(),
936                                        table: None,
937                                        join_mark: false,
938                                        trailing_comments: vec![],
939                                        span: None,
940                                        inferred_type: None,
941                                    },
942                                )));
943                            }
944                            Expression::Column(col) => {
945                                // Wrap in alias: a AS a in inner, just a in outer
946                                inner_aliased_exprs.push(Expression::Alias(Box::new(
947                                    crate::expressions::Alias {
948                                        this: orig_expr.clone(),
949                                        alias: col.name.clone(),
950                                        column_aliases: vec![],
951                                        pre_alias_comments: vec![],
952                                        trailing_comments: vec![],
953                                        inferred_type: None,
954                                    },
955                                )));
956                                outer_select_exprs.push(Expression::Column(Box::new(
957                                    crate::expressions::Column {
958                                        name: col.name.clone(),
959                                        table: None,
960                                        join_mark: false,
961                                        trailing_comments: vec![],
962                                        span: None,
963                                        inferred_type: None,
964                                    },
965                                )));
966                            }
967                            _ => {
968                                // Complex expression without alias - include as-is in both
969                                inner_aliased_exprs.push(orig_expr.clone());
970                                outer_select_exprs.push(orig_expr.clone());
971                            }
972                        }
973                    }
974
975                    // Add ROW_NUMBER as aliased expression to inner select list
976                    let row_number_alias_expr =
977                        Expression::Alias(Box::new(crate::expressions::Alias {
978                            this: row_number_func,
979                            alias: row_number_alias.clone(),
980                            column_aliases: vec![],
981                            pre_alias_comments: vec![],
982                            trailing_comments: vec![],
983                            inferred_type: None,
984                        }));
985                    inner_aliased_exprs.push(row_number_alias_expr);
986
987                    // Replace inner select's expressions with aliased versions
988                    select.expressions = inner_aliased_exprs;
989
990                    // Remove ORDER BY from inner query (it's now in the window function)
991                    let _inner_order_by = select.order_by.take();
992
993                    // Clear DISTINCT from inner select (DISTINCT ON is replaced by ROW_NUMBER)
994                    select.distinct = false;
995
996                    // Create inner subquery
997                    let inner_select = Expression::Select(select);
998                    let subquery = Subquery {
999                        this: inner_select,
1000                        alias: Some(Identifier::new("_t".to_string())),
1001                        column_aliases: vec![],
1002                        order_by: None,
1003                        limit: None,
1004                        offset: None,
1005                        distribute_by: None,
1006                        sort_by: None,
1007                        cluster_by: None,
1008                        lateral: false,
1009                        modifiers_inside: false,
1010                        trailing_comments: vec![],
1011                        inferred_type: None,
1012                    };
1013
1014                    // Create outer SELECT with WHERE _row_number = 1
1015                    // No ORDER BY on outer query
1016                    let outer_select = Select {
1017                        expressions: outer_select_exprs,
1018                        from: Some(From {
1019                            expressions: vec![Expression::Subquery(Box::new(subquery))],
1020                        }),
1021                        where_clause: Some(Where {
1022                            this: Expression::Eq(Box::new(BinaryOp {
1023                                left: Expression::Column(Box::new(crate::expressions::Column {
1024                                    name: row_number_alias,
1025                                    table: None,
1026                                    join_mark: false,
1027                                    trailing_comments: vec![],
1028                                    span: None,
1029                                    inferred_type: None,
1030                                })),
1031                                right: Expression::Literal(Box::new(Literal::Number(
1032                                    "1".to_string(),
1033                                ))),
1034                                left_comments: vec![],
1035                                operator_comments: vec![],
1036                                trailing_comments: vec![],
1037                                inferred_type: None,
1038                            })),
1039                        }),
1040                        ..Select::new()
1041                    };
1042
1043                    return Ok(Expression::Select(Box::new(outer_select)));
1044                }
1045            }
1046            Ok(Expression::Select(select))
1047        }
1048        other => Ok(other),
1049    }
1050}
1051
1052/// Convert SEMI and ANTI joins into equivalent forms that use EXISTS instead.
1053///
1054/// For dialects that don't support SEMI/ANTI join syntax, this converts:
1055/// - `SELECT * FROM a SEMI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1056/// - `SELECT * FROM a ANTI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1057///
1058/// Reference: `transforms.py:607-621`
1059pub fn eliminate_semi_and_anti_joins(expr: Expression) -> Result<Expression> {
1060    match expr {
1061        Expression::Select(mut select) => {
1062            let mut new_joins = Vec::new();
1063            let mut extra_where_conditions = Vec::new();
1064
1065            for join in select.joins.drain(..) {
1066                match join.kind {
1067                    JoinKind::Semi | JoinKind::LeftSemi => {
1068                        if let Some(on_condition) = join.on {
1069                            // Create: EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1070                            let subquery_select = Select {
1071                                expressions: vec![Expression::Literal(Box::new(Literal::Number(
1072                                    "1".to_string(),
1073                                )))],
1074                                from: Some(From {
1075                                    expressions: vec![join.this],
1076                                }),
1077                                where_clause: Some(Where { this: on_condition }),
1078                                ..Select::new()
1079                            };
1080
1081                            let exists = Expression::Exists(Box::new(Exists {
1082                                this: Expression::Subquery(Box::new(Subquery {
1083                                    this: Expression::Select(Box::new(subquery_select)),
1084                                    alias: None,
1085                                    column_aliases: vec![],
1086                                    order_by: None,
1087                                    limit: None,
1088                                    offset: None,
1089                                    distribute_by: None,
1090                                    sort_by: None,
1091                                    cluster_by: None,
1092                                    lateral: false,
1093                                    modifiers_inside: false,
1094                                    trailing_comments: vec![],
1095                                    inferred_type: None,
1096                                })),
1097                                not: false,
1098                            }));
1099
1100                            extra_where_conditions.push(exists);
1101                        }
1102                    }
1103                    JoinKind::Anti | JoinKind::LeftAnti => {
1104                        if let Some(on_condition) = join.on {
1105                            // Create: NOT EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1106                            let subquery_select = Select {
1107                                expressions: vec![Expression::Literal(Box::new(Literal::Number(
1108                                    "1".to_string(),
1109                                )))],
1110                                from: Some(From {
1111                                    expressions: vec![join.this],
1112                                }),
1113                                where_clause: Some(Where { this: on_condition }),
1114                                ..Select::new()
1115                            };
1116
1117                            // Use Exists with not: true for NOT EXISTS
1118                            let not_exists = Expression::Exists(Box::new(Exists {
1119                                this: Expression::Subquery(Box::new(Subquery {
1120                                    this: Expression::Select(Box::new(subquery_select)),
1121                                    alias: None,
1122                                    column_aliases: vec![],
1123                                    order_by: None,
1124                                    limit: None,
1125                                    offset: None,
1126                                    distribute_by: None,
1127                                    sort_by: None,
1128                                    cluster_by: None,
1129                                    lateral: false,
1130                                    modifiers_inside: false,
1131                                    trailing_comments: vec![],
1132                                    inferred_type: None,
1133                                })),
1134                                not: true,
1135                            }));
1136
1137                            extra_where_conditions.push(not_exists);
1138                        }
1139                    }
1140                    _ => {
1141                        // Keep other join types as-is
1142                        new_joins.push(join);
1143                    }
1144                }
1145            }
1146
1147            select.joins = new_joins;
1148
1149            // Add EXISTS conditions to WHERE clause
1150            if !extra_where_conditions.is_empty() {
1151                let combined = extra_where_conditions
1152                    .into_iter()
1153                    .reduce(|acc, cond| {
1154                        Expression::And(Box::new(BinaryOp {
1155                            left: acc,
1156                            right: cond,
1157                            left_comments: vec![],
1158                            operator_comments: vec![],
1159                            trailing_comments: vec![],
1160                            inferred_type: None,
1161                        }))
1162                    })
1163                    .unwrap();
1164
1165                select.where_clause = match select.where_clause {
1166                    Some(Where { this: existing }) => Some(Where {
1167                        this: Expression::And(Box::new(BinaryOp {
1168                            left: existing,
1169                            right: combined,
1170                            left_comments: vec![],
1171                            operator_comments: vec![],
1172                            trailing_comments: vec![],
1173                            inferred_type: None,
1174                        })),
1175                    }),
1176                    None => Some(Where { this: combined }),
1177                };
1178            }
1179
1180            Ok(Expression::Select(select))
1181        }
1182        other => Ok(other),
1183    }
1184}
1185
1186/// Convert FULL OUTER JOIN to a UNION of LEFT and RIGHT OUTER joins.
1187///
1188/// For dialects that don't support FULL OUTER JOIN, this converts:
1189/// ```sql
1190/// SELECT * FROM a FULL OUTER JOIN b ON a.x = b.x
1191/// ```
1192/// To:
1193/// ```sql
1194/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1195/// UNION ALL
1196/// SELECT * FROM a RIGHT OUTER JOIN b ON a.x = b.x
1197/// WHERE NOT EXISTS (SELECT 1 FROM a WHERE a.x = b.x)
1198/// ```
1199///
1200/// Note: This transformation currently only works for queries with a single FULL OUTER join.
1201///
1202/// Reference: `transforms.py:624-661`
1203pub fn eliminate_full_outer_join(expr: Expression) -> Result<Expression> {
1204    match expr {
1205        Expression::Select(mut select) => {
1206            // Find FULL OUTER joins
1207            let full_outer_join_idx = select.joins.iter().position(|j| j.kind == JoinKind::Full);
1208
1209            if let Some(idx) = full_outer_join_idx {
1210                // We only handle queries with a single FULL OUTER join
1211                let full_join_count = select
1212                    .joins
1213                    .iter()
1214                    .filter(|j| j.kind == JoinKind::Full)
1215                    .count();
1216                if full_join_count != 1 {
1217                    return Ok(Expression::Select(select));
1218                }
1219
1220                // Clone the query for the right side of the UNION
1221                let mut right_select = select.clone();
1222
1223                // Get the join condition from the FULL OUTER join
1224                let full_join = &select.joins[idx];
1225                let join_condition = full_join.on.clone();
1226
1227                // Left side: convert FULL to LEFT
1228                select.joins[idx].kind = JoinKind::Left;
1229
1230                // Right side: convert FULL to RIGHT and add NOT EXISTS condition
1231                right_select.joins[idx].kind = JoinKind::Right;
1232
1233                // Build NOT EXISTS for the right side to exclude rows that matched
1234                if let (Some(ref from), Some(ref join_cond)) = (&select.from, &join_condition) {
1235                    if !from.expressions.is_empty() {
1236                        let anti_subquery = Expression::Select(Box::new(Select {
1237                            expressions: vec![Expression::Literal(Box::new(Literal::Number(
1238                                "1".to_string(),
1239                            )))],
1240                            from: Some(from.clone()),
1241                            where_clause: Some(Where {
1242                                this: join_cond.clone(),
1243                            }),
1244                            ..Select::new()
1245                        }));
1246
1247                        let not_exists = Expression::Not(Box::new(crate::expressions::UnaryOp {
1248                            inferred_type: None,
1249                            this: Expression::Exists(Box::new(Exists {
1250                                this: Expression::Subquery(Box::new(Subquery {
1251                                    this: anti_subquery,
1252                                    alias: None,
1253                                    column_aliases: vec![],
1254                                    order_by: None,
1255                                    limit: None,
1256                                    offset: None,
1257                                    distribute_by: None,
1258                                    sort_by: None,
1259                                    cluster_by: None,
1260                                    lateral: false,
1261                                    modifiers_inside: false,
1262                                    trailing_comments: vec![],
1263                                    inferred_type: None,
1264                                })),
1265                                not: false,
1266                            })),
1267                        }));
1268
1269                        // Add NOT EXISTS to the WHERE clause
1270                        right_select.where_clause = Some(Where {
1271                            this: match right_select.where_clause {
1272                                Some(w) => Expression::And(Box::new(BinaryOp {
1273                                    left: w.this,
1274                                    right: not_exists,
1275                                    left_comments: vec![],
1276                                    operator_comments: vec![],
1277                                    trailing_comments: vec![],
1278                                    inferred_type: None,
1279                                })),
1280                                None => not_exists,
1281                            },
1282                        });
1283                    }
1284                }
1285
1286                // Remove WITH clause from right side (CTEs should only be on left)
1287                right_select.with = None;
1288
1289                // Remove ORDER BY from left side (will be applied after UNION)
1290                let order_by = select.order_by.take();
1291
1292                // Create UNION ALL of left and right
1293                let union = crate::expressions::Union {
1294                    left: Expression::Select(select),
1295                    right: Expression::Select(right_select),
1296                    all: true, // UNION ALL
1297                    distinct: false,
1298                    with: None,
1299                    order_by,
1300                    limit: None,
1301                    offset: None,
1302                    distribute_by: None,
1303                    sort_by: None,
1304                    cluster_by: None,
1305                    by_name: false,
1306                    side: None,
1307                    kind: None,
1308                    corresponding: false,
1309                    strict: false,
1310                    on_columns: Vec::new(),
1311                };
1312
1313                return Ok(Expression::Union(Box::new(union)));
1314            }
1315
1316            Ok(Expression::Select(select))
1317        }
1318        other => Ok(other),
1319    }
1320}
1321
1322/// Move CTEs to the top level of the query.
1323///
1324/// Some dialects (e.g., Hive, T-SQL, Spark prior to version 3) only allow CTEs to be
1325/// defined at the top-level, so for example queries like:
1326///
1327/// ```sql
1328/// SELECT * FROM (WITH t(c) AS (SELECT 1) SELECT * FROM t) AS subq
1329/// ```
1330///
1331/// are invalid in those dialects. This transformation moves all CTEs to the top level.
1332///
1333/// Reference: `transforms.py:664-700`
1334pub fn move_ctes_to_top_level(expr: Expression) -> Result<Expression> {
1335    match expr {
1336        Expression::Select(mut select) => {
1337            // Phase 1: Collect CTEs from nested subqueries (not inside CTE definitions)
1338            let mut collected_ctes: Vec<crate::expressions::Cte> = Vec::new();
1339            let mut has_recursive = false;
1340
1341            collect_nested_ctes(
1342                &Expression::Select(select.clone()),
1343                &mut collected_ctes,
1344                &mut has_recursive,
1345                true,
1346            );
1347
1348            // Phase 2: Flatten CTEs nested inside top-level CTE definitions
1349            // This handles: WITH c AS (WITH b AS (...) SELECT ...) -> WITH b AS (...), c AS (SELECT ...)
1350            let mut cte_body_collected: Vec<(String, Vec<crate::expressions::Cte>)> = Vec::new();
1351            if let Some(ref with) = select.with {
1352                for cte in &with.ctes {
1353                    let mut body_ctes: Vec<crate::expressions::Cte> = Vec::new();
1354                    collect_ctes_from_cte_body(&cte.this, &mut body_ctes, &mut has_recursive);
1355                    if !body_ctes.is_empty() {
1356                        cte_body_collected.push((cte.alias.name.clone(), body_ctes));
1357                    }
1358                }
1359            }
1360
1361            let has_subquery_ctes = !collected_ctes.is_empty();
1362            let has_body_ctes = !cte_body_collected.is_empty();
1363
1364            if has_subquery_ctes || has_body_ctes {
1365                // Strip WITH clauses from inner subqueries
1366                strip_nested_with_clauses(&mut select, true);
1367
1368                // Strip WITH clauses from CTE body definitions
1369                if has_body_ctes {
1370                    if let Some(ref mut with) = select.with {
1371                        for cte in with.ctes.iter_mut() {
1372                            strip_with_from_cte_body(&mut cte.this);
1373                        }
1374                    }
1375                }
1376
1377                let top_with = select.with.get_or_insert_with(|| crate::expressions::With {
1378                    ctes: Vec::new(),
1379                    recursive: false,
1380                    leading_comments: vec![],
1381                    search: None,
1382                });
1383
1384                if has_recursive {
1385                    top_with.recursive = true;
1386                }
1387
1388                // Insert body CTEs before their parent CTE (Python sqlglot behavior)
1389                if has_body_ctes {
1390                    let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
1391                    for mut cte in top_with.ctes.drain(..) {
1392                        // Check if this CTE has nested CTEs to insert before it
1393                        if let Some(pos) = cte_body_collected
1394                            .iter()
1395                            .position(|(name, _)| *name == cte.alias.name)
1396                        {
1397                            let (_, mut nested) = cte_body_collected.remove(pos);
1398                            // Strip WITH from each nested CTE's body too
1399                            for nested_cte in nested.iter_mut() {
1400                                strip_with_from_cte_body(&mut nested_cte.this);
1401                            }
1402                            new_ctes.extend(nested);
1403                        }
1404                        // Also strip WITH from the parent CTE's body
1405                        strip_with_from_cte_body(&mut cte.this);
1406                        new_ctes.push(cte);
1407                    }
1408                    top_with.ctes = new_ctes;
1409                }
1410
1411                // Append collected subquery CTEs after existing ones
1412                top_with.ctes.extend(collected_ctes);
1413            }
1414
1415            Ok(Expression::Select(select))
1416        }
1417        other => Ok(other),
1418    }
1419}
1420
1421/// Recursively collect CTEs from within CTE body expressions (for deep nesting)
1422fn collect_ctes_from_cte_body(
1423    expr: &Expression,
1424    collected: &mut Vec<crate::expressions::Cte>,
1425    has_recursive: &mut bool,
1426) {
1427    if let Expression::Select(select) = expr {
1428        if let Some(ref with) = select.with {
1429            if with.recursive {
1430                *has_recursive = true;
1431            }
1432            for cte in &with.ctes {
1433                // Recursively collect from this CTE's body first (depth-first)
1434                collect_ctes_from_cte_body(&cte.this, collected, has_recursive);
1435                // Then add this CTE itself
1436                collected.push(cte.clone());
1437            }
1438        }
1439    }
1440}
1441
1442/// Strip WITH clauses from CTE body expressions
1443fn strip_with_from_cte_body(expr: &mut Expression) {
1444    if let Expression::Select(ref mut select) = expr {
1445        select.with = None;
1446    }
1447}
1448
1449/// Strip WITH clauses from nested subqueries (after hoisting to top level)
1450fn strip_nested_with_clauses(select: &mut Select, _is_top_level: bool) {
1451    // Strip WITH from FROM subqueries
1452    if let Some(ref mut from) = select.from {
1453        for expr in from.expressions.iter_mut() {
1454            strip_with_from_expr(expr);
1455        }
1456    }
1457    // Strip from JOINs
1458    for join in select.joins.iter_mut() {
1459        strip_with_from_expr(&mut join.this);
1460    }
1461    // Strip from select expressions
1462    for expr in select.expressions.iter_mut() {
1463        strip_with_from_expr(expr);
1464    }
1465    // Strip from WHERE
1466    if let Some(ref mut w) = select.where_clause {
1467        strip_with_from_expr(&mut w.this);
1468    }
1469}
1470
1471fn strip_with_from_expr(expr: &mut Expression) {
1472    match expr {
1473        Expression::Subquery(ref mut subquery) => {
1474            strip_with_from_inner_query(&mut subquery.this);
1475        }
1476        Expression::Alias(ref mut alias) => {
1477            strip_with_from_expr(&mut alias.this);
1478        }
1479        Expression::Select(ref mut select) => {
1480            // Strip WITH from this SELECT (it's nested)
1481            select.with = None;
1482            // Recurse into its subqueries
1483            strip_nested_with_clauses(select, false);
1484        }
1485        _ => {}
1486    }
1487}
1488
1489fn strip_with_from_inner_query(expr: &mut Expression) {
1490    if let Expression::Select(ref mut select) = expr {
1491        select.with = None;
1492        strip_nested_with_clauses(select, false);
1493    }
1494}
1495
1496/// Helper to recursively collect CTEs from nested subqueries
1497fn collect_nested_ctes(
1498    expr: &Expression,
1499    collected: &mut Vec<crate::expressions::Cte>,
1500    has_recursive: &mut bool,
1501    is_top_level: bool,
1502) {
1503    match expr {
1504        Expression::Select(select) => {
1505            // If this is not the top level and has a WITH clause, collect its CTEs
1506            if !is_top_level {
1507                if let Some(ref with) = select.with {
1508                    if with.recursive {
1509                        *has_recursive = true;
1510                    }
1511                    collected.extend(with.ctes.clone());
1512                }
1513            }
1514
1515            // Recurse into FROM clause
1516            if let Some(ref from) = select.from {
1517                for expr in &from.expressions {
1518                    collect_nested_ctes(expr, collected, has_recursive, false);
1519                }
1520            }
1521
1522            // Recurse into JOINs
1523            for join in &select.joins {
1524                collect_nested_ctes(&join.this, collected, has_recursive, false);
1525            }
1526
1527            // Recurse into select expressions (for subqueries in SELECT)
1528            for sel_expr in &select.expressions {
1529                collect_nested_ctes(sel_expr, collected, has_recursive, false);
1530            }
1531
1532            // Recurse into WHERE
1533            if let Some(ref where_clause) = select.where_clause {
1534                collect_nested_ctes(&where_clause.this, collected, has_recursive, false);
1535            }
1536        }
1537        Expression::Subquery(subquery) => {
1538            // Process the inner query
1539            collect_nested_ctes(&subquery.this, collected, has_recursive, false);
1540        }
1541        Expression::Alias(alias) => {
1542            collect_nested_ctes(&alias.this, collected, has_recursive, false);
1543        }
1544        // Add more expression types as needed
1545        _ => {}
1546    }
1547}
1548
1549/// Inline window definitions from WINDOW clause.
1550///
1551/// Some dialects don't support named windows. This transform inlines them:
1552///
1553/// ```sql
1554/// SELECT SUM(a) OVER w FROM t WINDOW w AS (PARTITION BY b)
1555/// ```
1556///
1557/// To:
1558///
1559/// ```sql
1560/// SELECT SUM(a) OVER (PARTITION BY b) FROM t
1561/// ```
1562///
1563/// Reference: `transforms.py:975-1003`
1564pub fn eliminate_window_clause(expr: Expression) -> Result<Expression> {
1565    match expr {
1566        Expression::Select(mut select) => {
1567            if let Some(named_windows) = select.windows.take() {
1568                // Build a map of window name -> window spec
1569                let window_map: std::collections::HashMap<String, &Over> = named_windows
1570                    .iter()
1571                    .map(|nw| (nw.name.name.to_lowercase(), &nw.spec))
1572                    .collect();
1573
1574                // Inline window references in the select expressions
1575                select.expressions = select
1576                    .expressions
1577                    .into_iter()
1578                    .map(|e| inline_window_refs(e, &window_map))
1579                    .collect();
1580            }
1581            Ok(Expression::Select(select))
1582        }
1583        other => Ok(other),
1584    }
1585}
1586
1587/// Helper function to inline window references in an expression
1588fn inline_window_refs(
1589    expr: Expression,
1590    window_map: &std::collections::HashMap<String, &Over>,
1591) -> Expression {
1592    match expr {
1593        Expression::WindowFunction(mut wf) => {
1594            // Check if this window references a named window
1595            if let Some(ref name) = wf.over.window_name {
1596                let key = name.name.to_lowercase();
1597                if let Some(named_spec) = window_map.get(&key) {
1598                    // Inherit properties from the named window
1599                    if wf.over.partition_by.is_empty() && !named_spec.partition_by.is_empty() {
1600                        wf.over.partition_by = named_spec.partition_by.clone();
1601                    }
1602                    if wf.over.order_by.is_empty() && !named_spec.order_by.is_empty() {
1603                        wf.over.order_by = named_spec.order_by.clone();
1604                    }
1605                    if wf.over.frame.is_none() && named_spec.frame.is_some() {
1606                        wf.over.frame = named_spec.frame.clone();
1607                    }
1608                    // Clear the window name reference
1609                    wf.over.window_name = None;
1610                }
1611            }
1612            Expression::WindowFunction(wf)
1613        }
1614        Expression::Alias(mut alias) => {
1615            // Recurse into aliased expressions
1616            alias.this = inline_window_refs(alias.this, window_map);
1617            Expression::Alias(alias)
1618        }
1619        // For a complete implementation, we would need to recursively visit all expressions
1620        // that can contain window functions (CASE, subqueries, etc.)
1621        other => other,
1622    }
1623}
1624
1625/// Eliminate Oracle-style (+) join marks by converting to standard JOINs.
1626///
1627/// Oracle uses (+) syntax for outer joins:
1628/// ```sql
1629/// SELECT * FROM a, b WHERE a.x = b.x(+)
1630/// ```
1631///
1632/// This is converted to standard LEFT OUTER JOIN:
1633/// ```sql
1634/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1635/// ```
1636///
1637/// Reference: `transforms.py:828-945`
1638pub fn eliminate_join_marks(expr: Expression) -> Result<Expression> {
1639    match expr {
1640        Expression::Select(mut select) => {
1641            // Check if there are any join marks in the WHERE clause
1642            let has_join_marks = select
1643                .where_clause
1644                .as_ref()
1645                .map_or(false, |w| contains_join_mark(&w.this));
1646
1647            if !has_join_marks {
1648                return Ok(Expression::Select(select));
1649            }
1650
1651            // Collect tables from FROM clause
1652            let from_tables: Vec<String> = select
1653                .from
1654                .as_ref()
1655                .map(|f| {
1656                    f.expressions
1657                        .iter()
1658                        .filter_map(|e| get_table_name(e))
1659                        .collect()
1660                })
1661                .unwrap_or_default();
1662
1663            // Extract join conditions and their marked tables from WHERE
1664            let mut join_conditions: std::collections::HashMap<String, Vec<Expression>> =
1665                std::collections::HashMap::new();
1666            let mut remaining_conditions: Vec<Expression> = Vec::new();
1667
1668            if let Some(ref where_clause) = select.where_clause {
1669                extract_join_mark_conditions(
1670                    &where_clause.this,
1671                    &mut join_conditions,
1672                    &mut remaining_conditions,
1673                );
1674            }
1675
1676            // Build new JOINs for each marked table
1677            let mut new_joins = select.joins.clone();
1678            for (table_name, conditions) in join_conditions {
1679                // Find if this table is in FROM or existing JOINs
1680                let table_in_from = from_tables.contains(&table_name);
1681
1682                if table_in_from && !conditions.is_empty() {
1683                    // Create LEFT JOIN with combined conditions
1684                    let combined_condition = conditions.into_iter().reduce(|a, b| {
1685                        Expression::And(Box::new(BinaryOp {
1686                            left: a,
1687                            right: b,
1688                            left_comments: vec![],
1689                            operator_comments: vec![],
1690                            trailing_comments: vec![],
1691                            inferred_type: None,
1692                        }))
1693                    });
1694
1695                    // Find the table in FROM and move it to a JOIN
1696                    if let Some(ref mut from) = select.from {
1697                        if let Some(pos) = from
1698                            .expressions
1699                            .iter()
1700                            .position(|e| get_table_name(e).map_or(false, |n| n == table_name))
1701                        {
1702                            if from.expressions.len() > 1 {
1703                                let join_table = from.expressions.remove(pos);
1704                                new_joins.push(crate::expressions::Join {
1705                                    this: join_table,
1706                                    kind: JoinKind::Left,
1707                                    on: combined_condition,
1708                                    using: vec![],
1709                                    use_inner_keyword: false,
1710                                    use_outer_keyword: true,
1711                                    deferred_condition: false,
1712                                    join_hint: None,
1713                                    match_condition: None,
1714                                    pivots: Vec::new(),
1715                                    comments: Vec::new(),
1716                                    nesting_group: 0,
1717                                    directed: false,
1718                                });
1719                            }
1720                        }
1721                    }
1722                }
1723            }
1724
1725            select.joins = new_joins;
1726
1727            // Update WHERE with remaining conditions
1728            if remaining_conditions.is_empty() {
1729                select.where_clause = None;
1730            } else {
1731                let combined = remaining_conditions.into_iter().reduce(|a, b| {
1732                    Expression::And(Box::new(BinaryOp {
1733                        left: a,
1734                        right: b,
1735                        left_comments: vec![],
1736                        operator_comments: vec![],
1737                        trailing_comments: vec![],
1738                        inferred_type: None,
1739                    }))
1740                });
1741                select.where_clause = combined.map(|c| Where { this: c });
1742            }
1743
1744            // Clear join marks from all columns
1745            clear_join_marks(&mut Expression::Select(select.clone()));
1746
1747            Ok(Expression::Select(select))
1748        }
1749        other => Ok(other),
1750    }
1751}
1752
1753/// Check if an expression contains any columns with join marks
1754fn contains_join_mark(expr: &Expression) -> bool {
1755    match expr {
1756        Expression::Column(col) => col.join_mark,
1757        Expression::And(op) | Expression::Or(op) => {
1758            contains_join_mark(&op.left) || contains_join_mark(&op.right)
1759        }
1760        Expression::Eq(op)
1761        | Expression::Neq(op)
1762        | Expression::Lt(op)
1763        | Expression::Lte(op)
1764        | Expression::Gt(op)
1765        | Expression::Gte(op) => contains_join_mark(&op.left) || contains_join_mark(&op.right),
1766        Expression::Not(op) => contains_join_mark(&op.this),
1767        _ => false,
1768    }
1769}
1770
1771/// Get table name from a table expression
1772fn get_table_name(expr: &Expression) -> Option<String> {
1773    match expr {
1774        Expression::Table(t) => Some(t.name.name.clone()),
1775        Expression::Alias(a) => Some(a.alias.name.clone()),
1776        _ => None,
1777    }
1778}
1779
1780/// Extract join mark conditions from WHERE clause
1781fn extract_join_mark_conditions(
1782    expr: &Expression,
1783    join_conditions: &mut std::collections::HashMap<String, Vec<Expression>>,
1784    remaining: &mut Vec<Expression>,
1785) {
1786    match expr {
1787        Expression::And(op) => {
1788            extract_join_mark_conditions(&op.left, join_conditions, remaining);
1789            extract_join_mark_conditions(&op.right, join_conditions, remaining);
1790        }
1791        _ => {
1792            if let Some(table) = get_join_mark_table(expr) {
1793                join_conditions
1794                    .entry(table)
1795                    .or_insert_with(Vec::new)
1796                    .push(expr.clone());
1797            } else {
1798                remaining.push(expr.clone());
1799            }
1800        }
1801    }
1802}
1803
1804/// Get the table name of a column with join mark in an expression
1805fn get_join_mark_table(expr: &Expression) -> Option<String> {
1806    match expr {
1807        Expression::Eq(op)
1808        | Expression::Neq(op)
1809        | Expression::Lt(op)
1810        | Expression::Lte(op)
1811        | Expression::Gt(op)
1812        | Expression::Gte(op) => {
1813            // Check both sides for join mark columns
1814            if let Expression::Column(col) = &op.left {
1815                if col.join_mark {
1816                    return col.table.as_ref().map(|t| t.name.clone());
1817                }
1818            }
1819            if let Expression::Column(col) = &op.right {
1820                if col.join_mark {
1821                    return col.table.as_ref().map(|t| t.name.clone());
1822                }
1823            }
1824            None
1825        }
1826        _ => None,
1827    }
1828}
1829
1830/// Clear join marks from all columns in an expression
1831fn clear_join_marks(expr: &mut Expression) {
1832    match expr {
1833        Expression::Column(col) => col.join_mark = false,
1834        Expression::Select(select) => {
1835            if let Some(ref mut w) = select.where_clause {
1836                clear_join_marks(&mut w.this);
1837            }
1838            for sel_expr in &mut select.expressions {
1839                clear_join_marks(sel_expr);
1840            }
1841        }
1842        Expression::And(op) | Expression::Or(op) => {
1843            clear_join_marks(&mut op.left);
1844            clear_join_marks(&mut op.right);
1845        }
1846        Expression::Eq(op)
1847        | Expression::Neq(op)
1848        | Expression::Lt(op)
1849        | Expression::Lte(op)
1850        | Expression::Gt(op)
1851        | Expression::Gte(op) => {
1852            clear_join_marks(&mut op.left);
1853            clear_join_marks(&mut op.right);
1854        }
1855        _ => {}
1856    }
1857}
1858
1859/// Add column names to recursive CTE definitions.
1860///
1861/// Uses projection output names in recursive CTE definitions to define the CTEs' columns.
1862/// This is required by some dialects that need explicit column names in recursive CTEs.
1863///
1864/// Reference: `transforms.py:576-592`
1865pub fn add_recursive_cte_column_names(expr: Expression) -> Result<Expression> {
1866    match expr {
1867        Expression::Select(mut select) => {
1868            if let Some(ref mut with) = select.with {
1869                if with.recursive {
1870                    let mut counter = 0;
1871                    for cte in &mut with.ctes {
1872                        if cte.columns.is_empty() {
1873                            // Try to get column names from the CTE's SELECT
1874                            if let Expression::Select(ref cte_select) = cte.this {
1875                                let names: Vec<Identifier> = cte_select
1876                                    .expressions
1877                                    .iter()
1878                                    .map(|e| match e {
1879                                        Expression::Alias(a) => a.alias.clone(),
1880                                        Expression::Column(c) => c.name.clone(),
1881                                        _ => {
1882                                            counter += 1;
1883                                            Identifier::new(format!("_c_{}", counter))
1884                                        }
1885                                    })
1886                                    .collect();
1887                                cte.columns = names;
1888                            }
1889                        }
1890                    }
1891                }
1892            }
1893            Ok(Expression::Select(select))
1894        }
1895        other => Ok(other),
1896    }
1897}
1898
1899/// Convert epoch string in CAST to timestamp literal.
1900///
1901/// Replaces `CAST('epoch' AS TIMESTAMP)` with `CAST('1970-01-01 00:00:00' AS TIMESTAMP)`
1902/// for dialects that don't support the 'epoch' keyword.
1903///
1904/// Reference: `transforms.py:595-604`
1905pub fn epoch_cast_to_ts(expr: Expression) -> Result<Expression> {
1906    match expr {
1907        Expression::Cast(mut cast) => {
1908            if let Expression::Literal(ref lit) = cast.this {
1909                if let Literal::String(ref s) = lit.as_ref() {
1910                    if s.to_lowercase() == "epoch" {
1911                        if is_temporal_type(&cast.to) {
1912                            cast.this = Expression::Literal(Box::new(Literal::String(
1913                                "1970-01-01 00:00:00".to_string(),
1914                            )));
1915                        }
1916                    }
1917                }
1918            }
1919            Ok(Expression::Cast(cast))
1920        }
1921        Expression::TryCast(mut try_cast) => {
1922            if let Expression::Literal(ref lit) = try_cast.this {
1923                if let Literal::String(ref s) = lit.as_ref() {
1924                    if s.to_lowercase() == "epoch" {
1925                        if is_temporal_type(&try_cast.to) {
1926                            try_cast.this = Expression::Literal(Box::new(Literal::String(
1927                                "1970-01-01 00:00:00".to_string(),
1928                            )));
1929                        }
1930                    }
1931                }
1932            }
1933            Ok(Expression::TryCast(try_cast))
1934        }
1935        other => Ok(other),
1936    }
1937}
1938
1939/// Check if a DataType is a temporal type (DATE, TIMESTAMP, etc.)
1940fn is_temporal_type(dt: &DataType) -> bool {
1941    matches!(
1942        dt,
1943        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
1944    )
1945}
1946
1947/// Ensure boolean values in conditions.
1948///
1949/// Converts numeric values used in conditions into explicit boolean expressions.
1950/// For dialects that require explicit booleans in WHERE clauses.
1951///
1952/// Converts:
1953/// ```sql
1954/// WHERE column
1955/// ```
1956/// To:
1957/// ```sql
1958/// WHERE column <> 0
1959/// ```
1960///
1961/// And:
1962/// ```sql
1963/// WHERE 1
1964/// ```
1965/// To:
1966/// ```sql
1967/// WHERE 1 <> 0
1968/// ```
1969///
1970/// Reference: `transforms.py:703-721`
1971pub fn ensure_bools(expr: Expression) -> Result<Expression> {
1972    // First, recursively process Case WHEN conditions throughout the expression tree
1973    let expr = ensure_bools_in_case(expr);
1974    match expr {
1975        Expression::Select(mut select) => {
1976            // Transform WHERE clause condition
1977            if let Some(ref mut where_clause) = select.where_clause {
1978                where_clause.this = ensure_bool_condition(where_clause.this.clone());
1979            }
1980            // Transform HAVING clause condition
1981            if let Some(ref mut having) = select.having {
1982                having.this = ensure_bool_condition(having.this.clone());
1983            }
1984            Ok(Expression::Select(select))
1985        }
1986        // Top-level AND/OR/NOT expressions also need ensure_bools processing
1987        Expression::And(_) | Expression::Or(_) | Expression::Not(_) => {
1988            Ok(ensure_bool_condition(expr))
1989        }
1990        other => Ok(other),
1991    }
1992}
1993
1994/// Recursively walk the expression tree to find Case expressions and apply
1995/// ensure_bool_condition to their WHEN conditions. This ensures that
1996/// `CASE WHEN TRUE` becomes `CASE WHEN (1 = 1)` etc.
1997fn ensure_bools_in_case(expr: Expression) -> Expression {
1998    match expr {
1999        Expression::Case(mut case) => {
2000            case.whens = case
2001                .whens
2002                .into_iter()
2003                .map(|(condition, result)| {
2004                    let new_condition = ensure_bool_condition(ensure_bools_in_case(condition));
2005                    let new_result = ensure_bools_in_case(result);
2006                    (new_condition, new_result)
2007                })
2008                .collect();
2009            if let Some(else_expr) = case.else_ {
2010                case.else_ = Some(ensure_bools_in_case(else_expr));
2011            }
2012            Expression::Case(Box::new(*case))
2013        }
2014        Expression::Select(mut select) => {
2015            // Recursively process expressions in the SELECT list
2016            select.expressions = select
2017                .expressions
2018                .into_iter()
2019                .map(ensure_bools_in_case)
2020                .collect();
2021            // Process WHERE/HAVING are handled by ensure_bools main function
2022            Expression::Select(select)
2023        }
2024        Expression::Alias(mut alias) => {
2025            alias.this = ensure_bools_in_case(alias.this);
2026            Expression::Alias(alias)
2027        }
2028        Expression::Paren(mut paren) => {
2029            paren.this = ensure_bools_in_case(paren.this);
2030            Expression::Paren(paren)
2031        }
2032        other => other,
2033    }
2034}
2035
2036/// Helper to check if an expression is inherently boolean (returns a boolean value).
2037/// Inherently boolean expressions include comparisons, predicates, logical operators, etc.
2038fn is_boolean_expression(expr: &Expression) -> bool {
2039    matches!(
2040        expr,
2041        Expression::Eq(_)
2042            | Expression::Neq(_)
2043            | Expression::Lt(_)
2044            | Expression::Lte(_)
2045            | Expression::Gt(_)
2046            | Expression::Gte(_)
2047            | Expression::Is(_)
2048            | Expression::IsNull(_)
2049            | Expression::IsTrue(_)
2050            | Expression::IsFalse(_)
2051            | Expression::Like(_)
2052            | Expression::ILike(_)
2053            | Expression::SimilarTo(_)
2054            | Expression::Glob(_)
2055            | Expression::RegexpLike(_)
2056            | Expression::In(_)
2057            | Expression::Between(_)
2058            | Expression::Exists(_)
2059            | Expression::And(_)
2060            | Expression::Or(_)
2061            | Expression::Not(_)
2062            | Expression::Any(_)
2063            | Expression::All(_)
2064            | Expression::EqualNull(_)
2065    )
2066}
2067
2068/// Helper to wrap a non-boolean expression with `<> 0`
2069fn wrap_neq_zero(expr: Expression) -> Expression {
2070    Expression::Neq(Box::new(BinaryOp {
2071        left: expr,
2072        right: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
2073        left_comments: vec![],
2074        operator_comments: vec![],
2075        trailing_comments: vec![],
2076        inferred_type: None,
2077    }))
2078}
2079
2080/// Helper to convert a condition expression to ensure it's boolean.
2081///
2082/// In TSQL, conditions in WHERE/HAVING must be boolean expressions.
2083/// Non-boolean expressions (columns, literals, casts, function calls, etc.)
2084/// are wrapped with `<> 0`. Boolean literals are converted to `(1 = 1)` or `(1 = 0)`.
2085fn ensure_bool_condition(expr: Expression) -> Expression {
2086    match expr {
2087        // For AND/OR, recursively process children
2088        Expression::And(op) => {
2089            let new_op = BinaryOp {
2090                left: ensure_bool_condition(op.left.clone()),
2091                right: ensure_bool_condition(op.right.clone()),
2092                left_comments: op.left_comments.clone(),
2093                operator_comments: op.operator_comments.clone(),
2094                trailing_comments: op.trailing_comments.clone(),
2095                inferred_type: None,
2096            };
2097            Expression::And(Box::new(new_op))
2098        }
2099        Expression::Or(op) => {
2100            let new_op = BinaryOp {
2101                left: ensure_bool_condition(op.left.clone()),
2102                right: ensure_bool_condition(op.right.clone()),
2103                left_comments: op.left_comments.clone(),
2104                operator_comments: op.operator_comments.clone(),
2105                trailing_comments: op.trailing_comments.clone(),
2106                inferred_type: None,
2107            };
2108            Expression::Or(Box::new(new_op))
2109        }
2110        // For NOT, recursively process the inner expression
2111        Expression::Not(op) => Expression::Not(Box::new(crate::expressions::UnaryOp {
2112            this: ensure_bool_condition(op.this.clone()),
2113            inferred_type: None,
2114        })),
2115        // For Paren, recurse into inner expression
2116        Expression::Paren(paren) => Expression::Paren(Box::new(crate::expressions::Paren {
2117            this: ensure_bool_condition(paren.this.clone()),
2118            trailing_comments: paren.trailing_comments.clone(),
2119        })),
2120        // Boolean literals: true -> (1 = 1), false -> (1 = 0)
2121        Expression::Boolean(BooleanLiteral { value: true }) => {
2122            Expression::Paren(Box::new(crate::expressions::Paren {
2123                this: Expression::Eq(Box::new(BinaryOp {
2124                    left: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2125                    right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2126                    left_comments: vec![],
2127                    operator_comments: vec![],
2128                    trailing_comments: vec![],
2129                    inferred_type: None,
2130                })),
2131                trailing_comments: vec![],
2132            }))
2133        }
2134        Expression::Boolean(BooleanLiteral { value: false }) => {
2135            Expression::Paren(Box::new(crate::expressions::Paren {
2136                this: Expression::Eq(Box::new(BinaryOp {
2137                    left: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2138                    right: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
2139                    left_comments: vec![],
2140                    operator_comments: vec![],
2141                    trailing_comments: vec![],
2142                    inferred_type: None,
2143                })),
2144                trailing_comments: vec![],
2145            }))
2146        }
2147        // Already boolean expressions pass through unchanged
2148        ref e if is_boolean_expression(e) => expr,
2149        // Everything else (Column, Identifier, Cast, Literal::Number, function calls, etc.)
2150        // gets wrapped with <> 0
2151        _ => wrap_neq_zero(expr),
2152    }
2153}
2154
2155/// Remove table qualifiers from column references.
2156///
2157/// Converts `table.column` to just `column` throughout the expression tree.
2158///
2159/// Reference: `transforms.py:724-730`
2160pub fn unqualify_columns(expr: Expression) -> Result<Expression> {
2161    Ok(unqualify_columns_recursive(expr))
2162}
2163
2164/// Recursively remove table qualifiers from column references
2165fn unqualify_columns_recursive(expr: Expression) -> Expression {
2166    match expr {
2167        Expression::Column(mut col) => {
2168            col.table = None;
2169            Expression::Column(col)
2170        }
2171        Expression::Select(mut select) => {
2172            select.expressions = select
2173                .expressions
2174                .into_iter()
2175                .map(unqualify_columns_recursive)
2176                .collect();
2177            if let Some(ref mut where_clause) = select.where_clause {
2178                where_clause.this = unqualify_columns_recursive(where_clause.this.clone());
2179            }
2180            if let Some(ref mut having) = select.having {
2181                having.this = unqualify_columns_recursive(having.this.clone());
2182            }
2183            if let Some(ref mut group_by) = select.group_by {
2184                group_by.expressions = group_by
2185                    .expressions
2186                    .iter()
2187                    .cloned()
2188                    .map(unqualify_columns_recursive)
2189                    .collect();
2190            }
2191            if let Some(ref mut order_by) = select.order_by {
2192                order_by.expressions = order_by
2193                    .expressions
2194                    .iter()
2195                    .map(|o| crate::expressions::Ordered {
2196                        this: unqualify_columns_recursive(o.this.clone()),
2197                        desc: o.desc,
2198                        nulls_first: o.nulls_first,
2199                        explicit_asc: o.explicit_asc,
2200                        with_fill: o.with_fill.clone(),
2201                    })
2202                    .collect();
2203            }
2204            for join in &mut select.joins {
2205                if let Some(ref mut on) = join.on {
2206                    *on = unqualify_columns_recursive(on.clone());
2207                }
2208            }
2209            Expression::Select(select)
2210        }
2211        Expression::Alias(mut alias) => {
2212            alias.this = unqualify_columns_recursive(alias.this);
2213            Expression::Alias(alias)
2214        }
2215        // Binary operations
2216        Expression::And(op) => Expression::And(Box::new(unqualify_binary_op(*op))),
2217        Expression::Or(op) => Expression::Or(Box::new(unqualify_binary_op(*op))),
2218        Expression::Eq(op) => Expression::Eq(Box::new(unqualify_binary_op(*op))),
2219        Expression::Neq(op) => Expression::Neq(Box::new(unqualify_binary_op(*op))),
2220        Expression::Lt(op) => Expression::Lt(Box::new(unqualify_binary_op(*op))),
2221        Expression::Lte(op) => Expression::Lte(Box::new(unqualify_binary_op(*op))),
2222        Expression::Gt(op) => Expression::Gt(Box::new(unqualify_binary_op(*op))),
2223        Expression::Gte(op) => Expression::Gte(Box::new(unqualify_binary_op(*op))),
2224        Expression::Add(op) => Expression::Add(Box::new(unqualify_binary_op(*op))),
2225        Expression::Sub(op) => Expression::Sub(Box::new(unqualify_binary_op(*op))),
2226        Expression::Mul(op) => Expression::Mul(Box::new(unqualify_binary_op(*op))),
2227        Expression::Div(op) => Expression::Div(Box::new(unqualify_binary_op(*op))),
2228        // Functions
2229        Expression::Function(mut func) => {
2230            func.args = func
2231                .args
2232                .into_iter()
2233                .map(unqualify_columns_recursive)
2234                .collect();
2235            Expression::Function(func)
2236        }
2237        Expression::AggregateFunction(mut func) => {
2238            func.args = func
2239                .args
2240                .into_iter()
2241                .map(unqualify_columns_recursive)
2242                .collect();
2243            Expression::AggregateFunction(func)
2244        }
2245        Expression::Case(mut case) => {
2246            case.whens = case
2247                .whens
2248                .into_iter()
2249                .map(|(cond, result)| {
2250                    (
2251                        unqualify_columns_recursive(cond),
2252                        unqualify_columns_recursive(result),
2253                    )
2254                })
2255                .collect();
2256            if let Some(else_expr) = case.else_ {
2257                case.else_ = Some(unqualify_columns_recursive(else_expr));
2258            }
2259            Expression::Case(case)
2260        }
2261        // Other expressions pass through unchanged
2262        other => other,
2263    }
2264}
2265
2266/// Helper to unqualify columns in a binary operation
2267fn unqualify_binary_op(mut op: BinaryOp) -> BinaryOp {
2268    op.left = unqualify_columns_recursive(op.left);
2269    op.right = unqualify_columns_recursive(op.right);
2270    op
2271}
2272
2273/// Convert UNNEST(GENERATE_DATE_ARRAY(...)) to recursive CTE.
2274///
2275/// For dialects that don't support GENERATE_DATE_ARRAY, this converts:
2276/// ```sql
2277/// SELECT * FROM UNNEST(GENERATE_DATE_ARRAY('2024-01-01', '2024-01-31', INTERVAL 1 DAY)) AS d(date_value)
2278/// ```
2279/// To a recursive CTE:
2280/// ```sql
2281/// WITH RECURSIVE _generated_dates(date_value) AS (
2282///     SELECT CAST('2024-01-01' AS DATE) AS date_value
2283///     UNION ALL
2284///     SELECT CAST(DATE_ADD(date_value, 1, DAY) AS DATE)
2285///     FROM _generated_dates
2286///     WHERE CAST(DATE_ADD(date_value, 1, DAY) AS DATE) <= CAST('2024-01-31' AS DATE)
2287/// )
2288/// SELECT date_value FROM _generated_dates
2289/// ```
2290///
2291/// Reference: `transforms.py:68-122`
2292pub fn unnest_generate_date_array_using_recursive_cte(expr: Expression) -> Result<Expression> {
2293    match expr {
2294        Expression::Select(mut select) => {
2295            let mut cte_count = 0;
2296            let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
2297
2298            // Process existing CTE bodies first (to handle CTE-wrapped GENERATE_DATE_ARRAY)
2299            if let Some(ref mut with) = select.with {
2300                for cte in &mut with.ctes {
2301                    process_expression_for_gda(&mut cte.this, &mut cte_count, &mut new_ctes);
2302                }
2303            }
2304
2305            // Process FROM clause
2306            if let Some(ref mut from) = select.from {
2307                for table_expr in &mut from.expressions {
2308                    if let Some((cte, replacement)) =
2309                        try_convert_generate_date_array(table_expr, &mut cte_count)
2310                    {
2311                        new_ctes.push(cte);
2312                        *table_expr = replacement;
2313                    }
2314                }
2315            }
2316
2317            // Process JOINs
2318            for join in &mut select.joins {
2319                if let Some((cte, replacement)) =
2320                    try_convert_generate_date_array(&join.this, &mut cte_count)
2321                {
2322                    new_ctes.push(cte);
2323                    join.this = replacement;
2324                }
2325            }
2326
2327            // Add collected CTEs to the WITH clause
2328            if !new_ctes.is_empty() {
2329                let with_clause = select.with.get_or_insert_with(|| crate::expressions::With {
2330                    ctes: Vec::new(),
2331                    recursive: true, // Recursive CTEs
2332                    leading_comments: vec![],
2333                    search: None,
2334                });
2335                with_clause.recursive = true;
2336
2337                // Prepend new CTEs before existing ones
2338                let mut all_ctes = new_ctes;
2339                all_ctes.append(&mut with_clause.ctes);
2340                with_clause.ctes = all_ctes;
2341            }
2342
2343            Ok(Expression::Select(select))
2344        }
2345        other => Ok(other),
2346    }
2347}
2348
2349/// Recursively process an expression tree to find and convert UNNEST(GENERATE_DATE_ARRAY)
2350/// inside CTE bodies, subqueries, etc.
2351fn process_expression_for_gda(
2352    expr: &mut Expression,
2353    cte_count: &mut usize,
2354    new_ctes: &mut Vec<crate::expressions::Cte>,
2355) {
2356    match expr {
2357        Expression::Select(ref mut select) => {
2358            // Process FROM clause
2359            if let Some(ref mut from) = select.from {
2360                for table_expr in &mut from.expressions {
2361                    if let Some((cte, replacement)) =
2362                        try_convert_generate_date_array(table_expr, cte_count)
2363                    {
2364                        new_ctes.push(cte);
2365                        *table_expr = replacement;
2366                    }
2367                }
2368            }
2369            // Process JOINs
2370            for join in &mut select.joins {
2371                if let Some((cte, replacement)) =
2372                    try_convert_generate_date_array(&join.this, cte_count)
2373                {
2374                    new_ctes.push(cte);
2375                    join.this = replacement;
2376                }
2377            }
2378        }
2379        Expression::Union(ref mut u) => {
2380            process_expression_for_gda(&mut u.left, cte_count, new_ctes);
2381            process_expression_for_gda(&mut u.right, cte_count, new_ctes);
2382        }
2383        Expression::Subquery(ref mut sq) => {
2384            process_expression_for_gda(&mut sq.this, cte_count, new_ctes);
2385        }
2386        _ => {}
2387    }
2388}
2389
2390/// Try to convert an UNNEST(GENERATE_DATE_ARRAY(...)) to a recursive CTE reference.
2391/// `column_name_override` allows the caller to specify a custom column name (from alias).
2392fn try_convert_generate_date_array(
2393    expr: &Expression,
2394    cte_count: &mut usize,
2395) -> Option<(crate::expressions::Cte, Expression)> {
2396    try_convert_generate_date_array_with_name(expr, cte_count, None)
2397}
2398
2399fn try_convert_generate_date_array_with_name(
2400    expr: &Expression,
2401    cte_count: &mut usize,
2402    column_name_override: Option<&str>,
2403) -> Option<(crate::expressions::Cte, Expression)> {
2404    // Helper: extract (start, end, step) from GENERATE_DATE_ARRAY/GenerateSeries variants
2405    fn extract_gda_args(
2406        inner: &Expression,
2407    ) -> Option<(&Expression, &Expression, Option<&Expression>)> {
2408        match inner {
2409            Expression::GenerateDateArray(gda) => {
2410                let start = gda.start.as_ref()?;
2411                let end = gda.end.as_ref()?;
2412                let step = gda.step.as_deref();
2413                Some((start, end, step))
2414            }
2415            Expression::GenerateSeries(gs) => {
2416                let start = gs.start.as_deref()?;
2417                let end = gs.end.as_deref()?;
2418                let step = gs.step.as_deref();
2419                Some((start, end, step))
2420            }
2421            Expression::Function(f) if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") => {
2422                if f.args.len() >= 2 {
2423                    let start = &f.args[0];
2424                    let end = &f.args[1];
2425                    let step = f.args.get(2);
2426                    Some((start, end, step))
2427                } else {
2428                    None
2429                }
2430            }
2431            _ => None,
2432        }
2433    }
2434
2435    // Look for UNNEST containing GENERATE_DATE_ARRAY
2436    if let Expression::Unnest(unnest) = expr {
2437        if let Some((start, end, step_opt)) = extract_gda_args(&unnest.this) {
2438            let start = start;
2439            let end = end;
2440            let step: Option<&Expression> = step_opt;
2441
2442            // Generate CTE name
2443            let cte_name = if *cte_count == 0 {
2444                "_generated_dates".to_string()
2445            } else {
2446                format!("_generated_dates_{}", cte_count)
2447            };
2448            *cte_count += 1;
2449
2450            let column_name =
2451                Identifier::new(column_name_override.unwrap_or("date_value").to_string());
2452
2453            // Helper: wrap expression in CAST(... AS DATE) unless already a date literal or CAST to DATE
2454            let cast_to_date = |expr: &Expression| -> Expression {
2455                match expr {
2456                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
2457                        // DATE '...' -> convert to CAST('...' AS DATE) to match expected output
2458                        if let Expression::Literal(lit) = expr {
2459                            if let Literal::Date(d) = lit.as_ref() {
2460                                Expression::Cast(Box::new(Cast {
2461                                    this: Expression::Literal(Box::new(Literal::String(d.clone()))),
2462                                    to: DataType::Date,
2463                                    trailing_comments: vec![],
2464                                    double_colon_syntax: false,
2465                                    format: None,
2466                                    default: None,
2467                                    inferred_type: None,
2468                                }))
2469                            } else {
2470                                expr.clone()
2471                            }
2472                        } else {
2473                            unreachable!()
2474                        }
2475                    }
2476                    Expression::Cast(c) if matches!(c.to, DataType::Date) => expr.clone(),
2477                    _ => Expression::Cast(Box::new(Cast {
2478                        this: expr.clone(),
2479                        to: DataType::Date,
2480                        trailing_comments: vec![],
2481                        double_colon_syntax: false,
2482                        format: None,
2483                        default: None,
2484                        inferred_type: None,
2485                    })),
2486                }
2487            };
2488
2489            // Build base case: SELECT CAST(start AS DATE) AS date_value
2490            let base_select = Select {
2491                expressions: vec![Expression::Alias(Box::new(crate::expressions::Alias {
2492                    this: cast_to_date(start),
2493                    alias: column_name.clone(),
2494                    column_aliases: vec![],
2495                    pre_alias_comments: vec![],
2496                    trailing_comments: vec![],
2497                    inferred_type: None,
2498                }))],
2499                ..Select::new()
2500            };
2501
2502            // Normalize interval: convert String("1") -> Number("1") so it generates without quotes
2503            let normalize_interval = |expr: &Expression| -> Expression {
2504                if let Expression::Interval(ref iv) = expr {
2505                    let mut iv_clone = iv.as_ref().clone();
2506                    if let Some(Expression::Literal(ref lit)) = iv_clone.this {
2507                        if let Literal::String(ref s) = lit.as_ref() {
2508                            // Convert numeric strings to Number literals for unquoted output
2509                            if s.parse::<f64>().is_ok() {
2510                                iv_clone.this =
2511                                    Some(Expression::Literal(Box::new(Literal::Number(s.clone()))));
2512                            }
2513                        }
2514                    }
2515                    Expression::Interval(Box::new(iv_clone))
2516                } else {
2517                    expr.clone()
2518                }
2519            };
2520
2521            // Build recursive case: DateAdd(date_value, count, unit) from CTE where result <= end
2522            // Extract interval unit and count from step expression
2523            let normalized_step = step.map(|s| normalize_interval(s)).unwrap_or_else(|| {
2524                Expression::Interval(Box::new(crate::expressions::Interval {
2525                    this: Some(Expression::Literal(Box::new(Literal::Number(
2526                        "1".to_string(),
2527                    )))),
2528                    unit: Some(crate::expressions::IntervalUnitSpec::Simple {
2529                        unit: crate::expressions::IntervalUnit::Day,
2530                        use_plural: false,
2531                    }),
2532                }))
2533            });
2534
2535            // Extract unit and count from interval expression to build DateAddFunc
2536            let (add_unit, add_count) = extract_interval_unit_and_count(&normalized_step);
2537
2538            let date_add_expr = Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
2539                this: Expression::Column(Box::new(crate::expressions::Column {
2540                    name: column_name.clone(),
2541                    table: None,
2542                    join_mark: false,
2543                    trailing_comments: vec![],
2544                    span: None,
2545                    inferred_type: None,
2546                })),
2547                interval: add_count,
2548                unit: add_unit,
2549            }));
2550
2551            let cast_date_add = Expression::Cast(Box::new(Cast {
2552                this: date_add_expr.clone(),
2553                to: DataType::Date,
2554                trailing_comments: vec![],
2555                double_colon_syntax: false,
2556                format: None,
2557                default: None,
2558                inferred_type: None,
2559            }));
2560
2561            let recursive_select = Select {
2562                expressions: vec![cast_date_add.clone()],
2563                from: Some(From {
2564                    expressions: vec![Expression::Table(Box::new(
2565                        crate::expressions::TableRef::new(&cte_name),
2566                    ))],
2567                }),
2568                where_clause: Some(Where {
2569                    this: Expression::Lte(Box::new(BinaryOp {
2570                        left: cast_date_add,
2571                        right: cast_to_date(end),
2572                        left_comments: vec![],
2573                        operator_comments: vec![],
2574                        trailing_comments: vec![],
2575                        inferred_type: None,
2576                    })),
2577                }),
2578                ..Select::new()
2579            };
2580
2581            // Build UNION ALL of base and recursive
2582            let union = crate::expressions::Union {
2583                left: Expression::Select(Box::new(base_select)),
2584                right: Expression::Select(Box::new(recursive_select)),
2585                all: true, // UNION ALL
2586                distinct: false,
2587                with: None,
2588                order_by: None,
2589                limit: None,
2590                offset: None,
2591                distribute_by: None,
2592                sort_by: None,
2593                cluster_by: None,
2594                by_name: false,
2595                side: None,
2596                kind: None,
2597                corresponding: false,
2598                strict: false,
2599                on_columns: Vec::new(),
2600            };
2601
2602            // Create CTE
2603            let cte = crate::expressions::Cte {
2604                this: Expression::Union(Box::new(union)),
2605                alias: Identifier::new(cte_name.clone()),
2606                columns: vec![column_name.clone()],
2607                materialized: None,
2608                key_expressions: Vec::new(),
2609                alias_first: true,
2610                comments: Vec::new(),
2611            };
2612
2613            // Create replacement: SELECT date_value FROM cte_name
2614            let replacement_select = Select {
2615                expressions: vec![Expression::Column(Box::new(crate::expressions::Column {
2616                    name: column_name,
2617                    table: None,
2618                    join_mark: false,
2619                    trailing_comments: vec![],
2620                    span: None,
2621                    inferred_type: None,
2622                }))],
2623                from: Some(From {
2624                    expressions: vec![Expression::Table(Box::new(
2625                        crate::expressions::TableRef::new(&cte_name),
2626                    ))],
2627                }),
2628                ..Select::new()
2629            };
2630
2631            let replacement = Expression::Subquery(Box::new(Subquery {
2632                this: Expression::Select(Box::new(replacement_select)),
2633                alias: Some(Identifier::new(cte_name)),
2634                column_aliases: vec![],
2635                order_by: None,
2636                limit: None,
2637                offset: None,
2638                distribute_by: None,
2639                sort_by: None,
2640                cluster_by: None,
2641                lateral: false,
2642                modifiers_inside: false,
2643                trailing_comments: vec![],
2644                inferred_type: None,
2645            }));
2646
2647            return Some((cte, replacement));
2648        }
2649    }
2650
2651    // Also check for aliased UNNEST like UNNEST(...) AS _q(date_week)
2652    if let Expression::Alias(alias) = expr {
2653        // Extract column name from alias column_aliases if present
2654        let col_name = alias.column_aliases.first().map(|id| id.name.as_str());
2655        if let Some((cte, replacement)) =
2656            try_convert_generate_date_array_with_name(&alias.this, cte_count, col_name)
2657        {
2658            // If we extracted a column name from the alias, don't preserve the outer alias
2659            // since the CTE now uses that column name directly
2660            if col_name.is_some() {
2661                return Some((cte, replacement));
2662            }
2663            let new_alias = Expression::Alias(Box::new(crate::expressions::Alias {
2664                this: replacement,
2665                alias: alias.alias.clone(),
2666                column_aliases: alias.column_aliases.clone(),
2667                pre_alias_comments: alias.pre_alias_comments.clone(),
2668                trailing_comments: alias.trailing_comments.clone(),
2669                inferred_type: None,
2670            }));
2671            return Some((cte, new_alias));
2672        }
2673    }
2674
2675    None
2676}
2677
2678/// Extract interval unit and count from an interval expression.
2679/// Handles both structured intervals (with separate unit field) and
2680/// string-encoded intervals like `INTERVAL '1 WEEK'` where unit is None
2681/// and the value contains both count and unit.
2682fn extract_interval_unit_and_count(
2683    expr: &Expression,
2684) -> (crate::expressions::IntervalUnit, Expression) {
2685    use crate::expressions::{IntervalUnit, IntervalUnitSpec, Literal};
2686
2687    if let Expression::Interval(ref iv) = expr {
2688        // First try: structured unit field
2689        if let Some(ref unit_spec) = iv.unit {
2690            if let IntervalUnitSpec::Simple { unit, .. } = unit_spec {
2691                let count = match &iv.this {
2692                    Some(e) => e.clone(),
2693                    None => Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2694                };
2695                return (unit.clone(), count);
2696            }
2697        }
2698
2699        // Second try: parse from string value like "1 WEEK" or "1"
2700        if let Some(ref val_expr) = iv.this {
2701            match val_expr {
2702                Expression::Literal(lit)
2703                    if matches!(lit.as_ref(), Literal::String(_) | Literal::Number(_)) =>
2704                {
2705                    let s = match lit.as_ref() {
2706                        Literal::String(s) | Literal::Number(s) => s,
2707                        _ => unreachable!(),
2708                    };
2709                    // Try to parse "count unit" format like "1 WEEK", "1 MONTH"
2710                    let parts: Vec<&str> = s.trim().splitn(2, char::is_whitespace).collect();
2711                    if parts.len() == 2 {
2712                        let count_str = parts[0].trim();
2713                        let unit_str = parts[1].trim().to_uppercase();
2714                        let unit = match unit_str.as_str() {
2715                            "YEAR" | "YEARS" => IntervalUnit::Year,
2716                            "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
2717                            "MONTH" | "MONTHS" => IntervalUnit::Month,
2718                            "WEEK" | "WEEKS" => IntervalUnit::Week,
2719                            "DAY" | "DAYS" => IntervalUnit::Day,
2720                            "HOUR" | "HOURS" => IntervalUnit::Hour,
2721                            "MINUTE" | "MINUTES" => IntervalUnit::Minute,
2722                            "SECOND" | "SECONDS" => IntervalUnit::Second,
2723                            "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
2724                            "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
2725                            _ => IntervalUnit::Day,
2726                        };
2727                        return (
2728                            unit,
2729                            Expression::Literal(Box::new(Literal::Number(count_str.to_string()))),
2730                        );
2731                    }
2732                    // Just a number with no unit - default to Day
2733                    if s.parse::<f64>().is_ok() {
2734                        return (
2735                            IntervalUnit::Day,
2736                            Expression::Literal(Box::new(Literal::Number(s.clone()))),
2737                        );
2738                    }
2739                }
2740                _ => {}
2741            }
2742        }
2743
2744        // Fallback
2745        (
2746            IntervalUnit::Day,
2747            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2748        )
2749    } else {
2750        (
2751            IntervalUnit::Day,
2752            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2753        )
2754    }
2755}
2756
2757/// Convert ILIKE to LOWER(x) LIKE LOWER(y).
2758///
2759/// For dialects that don't support ILIKE (case-insensitive LIKE), this converts:
2760/// ```sql
2761/// SELECT * FROM t WHERE x ILIKE '%pattern%'
2762/// ```
2763/// To:
2764/// ```sql
2765/// SELECT * FROM t WHERE LOWER(x) LIKE LOWER('%pattern%')
2766/// ```
2767///
2768/// Reference: `generator.py:no_ilike_sql()`
2769pub fn no_ilike_sql(expr: Expression) -> Result<Expression> {
2770    match expr {
2771        Expression::ILike(ilike) => {
2772            // Create LOWER(left) LIKE LOWER(right)
2773            let lower_left = Expression::Function(Box::new(crate::expressions::Function {
2774                name: "LOWER".to_string(),
2775                args: vec![ilike.left],
2776                distinct: false,
2777                trailing_comments: vec![],
2778                use_bracket_syntax: false,
2779                no_parens: false,
2780                quoted: false,
2781                span: None,
2782                inferred_type: None,
2783            }));
2784
2785            let lower_right = Expression::Function(Box::new(crate::expressions::Function {
2786                name: "LOWER".to_string(),
2787                args: vec![ilike.right],
2788                distinct: false,
2789                trailing_comments: vec![],
2790                use_bracket_syntax: false,
2791                no_parens: false,
2792                quoted: false,
2793                span: None,
2794                inferred_type: None,
2795            }));
2796
2797            Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
2798                left: lower_left,
2799                right: lower_right,
2800                escape: ilike.escape,
2801                quantifier: ilike.quantifier,
2802                inferred_type: None,
2803            })))
2804        }
2805        other => Ok(other),
2806    }
2807}
2808
2809/// Convert TryCast to Cast.
2810///
2811/// For dialects that don't support TRY_CAST (safe cast that returns NULL on error),
2812/// this converts TRY_CAST to regular CAST. Note: This may cause runtime errors
2813/// for invalid casts that TRY_CAST would handle gracefully.
2814///
2815/// Reference: `generator.py:no_trycast_sql()`
2816pub fn no_trycast_sql(expr: Expression) -> Result<Expression> {
2817    match expr {
2818        Expression::TryCast(try_cast) => Ok(Expression::Cast(try_cast)),
2819        other => Ok(other),
2820    }
2821}
2822
2823/// Convert SafeCast to Cast.
2824///
2825/// For dialects that don't support SAFE_CAST (BigQuery's safe cast syntax),
2826/// this converts SAFE_CAST to regular CAST.
2827pub fn no_safe_cast_sql(expr: Expression) -> Result<Expression> {
2828    match expr {
2829        Expression::SafeCast(safe_cast) => Ok(Expression::Cast(safe_cast)),
2830        other => Ok(other),
2831    }
2832}
2833
2834/// Convert COMMENT ON statements to inline comments.
2835///
2836/// For dialects that don't support COMMENT ON syntax, this can transform
2837/// comment statements into inline comments or skip them entirely.
2838///
2839/// Reference: `generator.py:no_comment_column_constraint_sql()`
2840pub fn no_comment_column_constraint(expr: Expression) -> Result<Expression> {
2841    // For now, just pass through - comment handling is done in generator
2842    Ok(expr)
2843}
2844
2845/// Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...)).
2846///
2847/// Some dialects use GENERATE_SERIES as a table-valued function, while others
2848/// prefer the UNNEST syntax. This converts:
2849/// ```sql
2850/// SELECT * FROM GENERATE_SERIES(1, 10) AS t(n)
2851/// ```
2852/// To:
2853/// ```sql
2854/// SELECT * FROM UNNEST(GENERATE_SERIES(1, 10)) AS _u(n)
2855/// ```
2856///
2857/// Reference: `transforms.py:125-135`
2858pub fn unnest_generate_series(expr: Expression) -> Result<Expression> {
2859    // Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...))
2860    // This handles the case where GENERATE_SERIES is used as a table-valued function
2861    match expr {
2862        Expression::Table(ref table) => {
2863            // Check if the table name matches GENERATE_SERIES pattern
2864            // In practice, this would be Expression::GenerateSeries wrapped in a Table context
2865            if table.name.name.to_uppercase() == "GENERATE_SERIES" {
2866                // Create UNNEST wrapper
2867                let unnest = Expression::Unnest(Box::new(UnnestFunc {
2868                    this: expr.clone(),
2869                    expressions: Vec::new(),
2870                    with_ordinality: false,
2871                    alias: None,
2872                    offset_alias: None,
2873                }));
2874
2875                // If there's an alias, wrap in alias
2876                return Ok(Expression::Alias(Box::new(crate::expressions::Alias {
2877                    this: unnest,
2878                    alias: Identifier::new("_u".to_string()),
2879                    column_aliases: vec![],
2880                    pre_alias_comments: vec![],
2881                    trailing_comments: vec![],
2882                    inferred_type: None,
2883                })));
2884            }
2885            Ok(expr)
2886        }
2887        Expression::GenerateSeries(gs) => {
2888            // Wrap GenerateSeries directly in UNNEST
2889            let unnest = Expression::Unnest(Box::new(UnnestFunc {
2890                this: Expression::GenerateSeries(gs),
2891                expressions: Vec::new(),
2892                with_ordinality: false,
2893                alias: None,
2894                offset_alias: None,
2895            }));
2896            Ok(unnest)
2897        }
2898        other => Ok(other),
2899    }
2900}
2901
2902/// Convert UNNEST(GENERATE_SERIES(start, end, step)) to a subquery for PostgreSQL.
2903///
2904/// PostgreSQL's GENERATE_SERIES returns rows directly, so UNNEST wrapping is unnecessary.
2905/// Instead, convert to:
2906/// ```sql
2907/// (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2908/// ```
2909///
2910/// This handles the case where GENERATE_DATE_ARRAY was converted to GENERATE_SERIES
2911/// during cross-dialect normalization, but the original had UNNEST wrapping.
2912pub fn unwrap_unnest_generate_series_for_postgres(expr: Expression) -> Result<Expression> {
2913    use crate::dialects::transform_recursive;
2914    transform_recursive(expr, &unwrap_unnest_generate_series_single)
2915}
2916
2917fn unwrap_unnest_generate_series_single(expr: Expression) -> Result<Expression> {
2918    use crate::expressions::*;
2919    // Match UNNEST(GENERATE_SERIES(...)) patterns in FROM clauses
2920    match expr {
2921        Expression::Select(mut select) => {
2922            // Process FROM clause
2923            if let Some(ref mut from) = select.from {
2924                for table_expr in &mut from.expressions {
2925                    if let Some(replacement) = try_unwrap_unnest_gen_series(table_expr) {
2926                        *table_expr = replacement;
2927                    }
2928                }
2929            }
2930            // Process JOINs
2931            for join in &mut select.joins {
2932                if let Some(replacement) = try_unwrap_unnest_gen_series(&join.this) {
2933                    join.this = replacement;
2934                }
2935            }
2936            Ok(Expression::Select(select))
2937        }
2938        other => Ok(other),
2939    }
2940}
2941
2942/// Try to convert an UNNEST(GENERATE_SERIES(...)) to a PostgreSQL subquery.
2943/// Returns the replacement expression if applicable.
2944fn try_unwrap_unnest_gen_series(expr: &Expression) -> Option<Expression> {
2945    use crate::expressions::*;
2946
2947    // Match Unnest containing GenerateSeries
2948    let gen_series = match expr {
2949        Expression::Unnest(unnest) => {
2950            if let Expression::GenerateSeries(ref gs) = unnest.this {
2951                Some(gs.as_ref().clone())
2952            } else {
2953                None
2954            }
2955        }
2956        Expression::Alias(alias) => {
2957            if let Expression::Unnest(ref unnest) = alias.this {
2958                if let Expression::GenerateSeries(ref gs) = unnest.this {
2959                    Some(gs.as_ref().clone())
2960                } else {
2961                    None
2962                }
2963            } else {
2964                None
2965            }
2966        }
2967        _ => None,
2968    };
2969
2970    let gs = gen_series?;
2971
2972    // Build: (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2973    let value_col = Expression::boxed_column(Column {
2974        name: Identifier::new("value".to_string()),
2975        table: None,
2976        join_mark: false,
2977        trailing_comments: vec![],
2978        span: None,
2979        inferred_type: None,
2980    });
2981
2982    let cast_value = Expression::Cast(Box::new(Cast {
2983        this: value_col,
2984        to: DataType::Date,
2985        trailing_comments: vec![],
2986        double_colon_syntax: false,
2987        format: None,
2988        default: None,
2989        inferred_type: None,
2990    }));
2991
2992    let gen_series_expr = Expression::GenerateSeries(Box::new(gs));
2993
2994    // GENERATE_SERIES(...) AS _t(value)
2995    let gen_series_aliased = Expression::Alias(Box::new(Alias {
2996        this: gen_series_expr,
2997        alias: Identifier::new("_t".to_string()),
2998        column_aliases: vec![Identifier::new("value".to_string())],
2999        pre_alias_comments: vec![],
3000        trailing_comments: vec![],
3001        inferred_type: None,
3002    }));
3003
3004    let mut inner_select = Select::new();
3005    inner_select.expressions = vec![cast_value];
3006    inner_select.from = Some(From {
3007        expressions: vec![gen_series_aliased],
3008    });
3009
3010    let inner_select_expr = Expression::Select(Box::new(inner_select));
3011
3012    let subquery = Expression::Subquery(Box::new(Subquery {
3013        this: inner_select_expr,
3014        alias: None,
3015        column_aliases: vec![],
3016        order_by: None,
3017        limit: None,
3018        offset: None,
3019        distribute_by: None,
3020        sort_by: None,
3021        cluster_by: None,
3022        lateral: false,
3023        modifiers_inside: false,
3024        trailing_comments: vec![],
3025        inferred_type: None,
3026    }));
3027
3028    // Wrap in alias AS _unnested_generate_series
3029    Some(Expression::Alias(Box::new(Alias {
3030        this: subquery,
3031        alias: Identifier::new("_unnested_generate_series".to_string()),
3032        column_aliases: vec![],
3033        pre_alias_comments: vec![],
3034        trailing_comments: vec![],
3035        inferred_type: None,
3036    })))
3037}
3038
3039/// Expand BETWEEN expressions in DELETE statements to >= AND <=
3040///
3041/// Some dialects (like StarRocks) don't support BETWEEN in DELETE statements
3042/// or prefer the expanded form. This transforms:
3043///   `DELETE FROM t WHERE a BETWEEN b AND c`
3044/// to:
3045///   `DELETE FROM t WHERE a >= b AND a <= c`
3046pub fn expand_between_in_delete(expr: Expression) -> Result<Expression> {
3047    match expr {
3048        Expression::Delete(mut delete) => {
3049            // If there's a WHERE clause, expand any BETWEEN expressions in it
3050            if let Some(ref mut where_clause) = delete.where_clause {
3051                where_clause.this = expand_between_recursive(where_clause.this.clone());
3052            }
3053            Ok(Expression::Delete(delete))
3054        }
3055        other => Ok(other),
3056    }
3057}
3058
3059/// Recursively expand BETWEEN expressions to >= AND <=
3060fn expand_between_recursive(expr: Expression) -> Expression {
3061    match expr {
3062        // Expand: a BETWEEN b AND c -> a >= b AND a <= c
3063        // Expand: a NOT BETWEEN b AND c -> a < b OR a > c
3064        Expression::Between(between) => {
3065            let this = expand_between_recursive(between.this.clone());
3066            let low = expand_between_recursive(between.low);
3067            let high = expand_between_recursive(between.high);
3068
3069            if between.not {
3070                // NOT BETWEEN: a < b OR a > c
3071                Expression::Or(Box::new(BinaryOp::new(
3072                    Expression::Lt(Box::new(BinaryOp::new(this.clone(), low))),
3073                    Expression::Gt(Box::new(BinaryOp::new(this, high))),
3074                )))
3075            } else {
3076                // BETWEEN: a >= b AND a <= c
3077                Expression::And(Box::new(BinaryOp::new(
3078                    Expression::Gte(Box::new(BinaryOp::new(this.clone(), low))),
3079                    Expression::Lte(Box::new(BinaryOp::new(this, high))),
3080                )))
3081            }
3082        }
3083
3084        // Recursively process AND/OR expressions
3085        Expression::And(mut op) => {
3086            op.left = expand_between_recursive(op.left);
3087            op.right = expand_between_recursive(op.right);
3088            Expression::And(op)
3089        }
3090        Expression::Or(mut op) => {
3091            op.left = expand_between_recursive(op.left);
3092            op.right = expand_between_recursive(op.right);
3093            Expression::Or(op)
3094        }
3095        Expression::Not(mut op) => {
3096            op.this = expand_between_recursive(op.this);
3097            Expression::Not(op)
3098        }
3099
3100        // Recursively process parenthesized expressions
3101        Expression::Paren(mut paren) => {
3102            paren.this = expand_between_recursive(paren.this);
3103            Expression::Paren(paren)
3104        }
3105
3106        // Pass through everything else unchanged
3107        other => other,
3108    }
3109}
3110
3111/// Push down CTE column names into SELECT expressions.
3112///
3113/// BigQuery doesn't support column names when defining a CTE, e.g.:
3114/// `WITH vartab(v) AS (SELECT ...)` is not valid.
3115/// Instead, it expects: `WITH vartab AS (SELECT ... AS v)`.
3116///
3117/// This transform removes the CTE column aliases and adds them as
3118/// aliases on the SELECT expressions.
3119pub fn pushdown_cte_column_names(expr: Expression) -> Result<Expression> {
3120    match expr {
3121        Expression::Select(mut select) => {
3122            if let Some(ref mut with) = select.with {
3123                for cte in &mut with.ctes {
3124                    if !cte.columns.is_empty() {
3125                        // Check if the CTE body is a star query - if so, just strip column names
3126                        let is_star = matches!(&cte.this, Expression::Select(s) if
3127                            s.expressions.len() == 1 && matches!(&s.expressions[0], Expression::Star(_)));
3128
3129                        if is_star {
3130                            // Can't push down column names for star queries, just remove them
3131                            cte.columns.clear();
3132                            continue;
3133                        }
3134
3135                        // Extract column names
3136                        let column_names: Vec<Identifier> = cte.columns.drain(..).collect();
3137
3138                        // Push column names down into the SELECT expressions
3139                        if let Expression::Select(ref mut inner_select) = cte.this {
3140                            let new_exprs: Vec<Expression> = inner_select
3141                                .expressions
3142                                .drain(..)
3143                                .zip(
3144                                    column_names
3145                                        .into_iter()
3146                                        .chain(std::iter::repeat_with(|| Identifier::new(""))),
3147                                )
3148                                .map(|(expr, col_name)| {
3149                                    if col_name.name.is_empty() {
3150                                        return expr;
3151                                    }
3152                                    // If already aliased, replace the alias
3153                                    match expr {
3154                                        Expression::Alias(mut a) => {
3155                                            a.alias = col_name;
3156                                            Expression::Alias(a)
3157                                        }
3158                                        other => {
3159                                            Expression::Alias(Box::new(crate::expressions::Alias {
3160                                                this: other,
3161                                                alias: col_name,
3162                                                column_aliases: Vec::new(),
3163                                                pre_alias_comments: Vec::new(),
3164                                                trailing_comments: Vec::new(),
3165                                                inferred_type: None,
3166                                            }))
3167                                        }
3168                                    }
3169                                })
3170                                .collect();
3171                            inner_select.expressions = new_exprs;
3172                        }
3173                    }
3174                }
3175            }
3176            Ok(Expression::Select(select))
3177        }
3178        other => Ok(other),
3179    }
3180}
3181
3182/// Simplify nested parentheses around VALUES in FROM clause.
3183/// Converts `FROM ((VALUES (1)))` to `FROM (VALUES (1))` by stripping redundant wrapping.
3184/// Handles various nesting patterns: Subquery(Paren(Values)), Paren(Paren(Values)), etc.
3185pub fn simplify_nested_paren_values(expr: Expression) -> Result<Expression> {
3186    match expr {
3187        Expression::Select(mut select) => {
3188            if let Some(ref mut from) = select.from {
3189                for from_item in from.expressions.iter_mut() {
3190                    simplify_paren_values_in_from(from_item);
3191                }
3192            }
3193            Ok(Expression::Select(select))
3194        }
3195        other => Ok(other),
3196    }
3197}
3198
3199fn simplify_paren_values_in_from(expr: &mut Expression) {
3200    // Check various patterns and build replacement if needed
3201    let replacement = match expr {
3202        // Subquery(Paren(Values)) -> Subquery with Values directly
3203        Expression::Subquery(ref subquery) => {
3204            if let Expression::Paren(ref paren) = subquery.this {
3205                if matches!(&paren.this, Expression::Values(_)) {
3206                    let mut new_sub = subquery.as_ref().clone();
3207                    new_sub.this = paren.this.clone();
3208                    Some(Expression::Subquery(Box::new(new_sub)))
3209                } else {
3210                    None
3211                }
3212            } else {
3213                None
3214            }
3215        }
3216        // Paren(Subquery(Values)) -> Subquery(Values) - strip the Paren wrapper
3217        // Paren(Paren(Values)) -> Paren(Values) - strip one layer
3218        Expression::Paren(ref outer_paren) => {
3219            if let Expression::Subquery(ref subquery) = outer_paren.this {
3220                // Paren(Subquery(Values)) -> Subquery(Values) - strip outer Paren
3221                if matches!(&subquery.this, Expression::Values(_)) {
3222                    Some(outer_paren.this.clone())
3223                }
3224                // Paren(Subquery(Paren(Values))) -> Subquery(Values)
3225                else if let Expression::Paren(ref paren) = subquery.this {
3226                    if matches!(&paren.this, Expression::Values(_)) {
3227                        let mut new_sub = subquery.as_ref().clone();
3228                        new_sub.this = paren.this.clone();
3229                        Some(Expression::Subquery(Box::new(new_sub)))
3230                    } else {
3231                        None
3232                    }
3233                } else {
3234                    None
3235                }
3236            } else if let Expression::Paren(ref inner_paren) = outer_paren.this {
3237                if matches!(&inner_paren.this, Expression::Values(_)) {
3238                    Some(outer_paren.this.clone())
3239                } else {
3240                    None
3241                }
3242            } else {
3243                None
3244            }
3245        }
3246        _ => None,
3247    };
3248    if let Some(new_expr) = replacement {
3249        *expr = new_expr;
3250    }
3251}
3252
3253/// Add auto-generated table aliases (like `_t0`) for POSEXPLODE/EXPLODE in FROM clause
3254/// when the alias has column_aliases but no alias name.
3255/// This is needed for Spark target: `FROM POSEXPLODE(x) AS (a, b)` -> `FROM POSEXPLODE(x) AS _t0(a, b)`
3256pub fn add_auto_table_alias(expr: Expression) -> Result<Expression> {
3257    match expr {
3258        Expression::Select(mut select) => {
3259            // Process FROM expressions
3260            if let Some(ref mut from) = select.from {
3261                let mut counter = 0usize;
3262                for from_item in from.expressions.iter_mut() {
3263                    add_auto_alias_to_from_item(from_item, &mut counter);
3264                }
3265            }
3266            Ok(Expression::Select(select))
3267        }
3268        other => Ok(other),
3269    }
3270}
3271
3272fn add_auto_alias_to_from_item(expr: &mut Expression, counter: &mut usize) {
3273    use crate::expressions::Identifier;
3274
3275    match expr {
3276        Expression::Alias(ref mut alias) => {
3277            // If the alias name is empty and there are column_aliases, add auto-generated name
3278            if alias.alias.name.is_empty() && !alias.column_aliases.is_empty() {
3279                alias.alias = Identifier::new(format!("_t{}", counter));
3280                *counter += 1;
3281            }
3282        }
3283        _ => {}
3284    }
3285}
3286
3287/// Convert BigQuery-style UNNEST aliases to column-alias format for DuckDB/Presto/Spark.
3288///
3289/// BigQuery uses: `UNNEST(arr) AS x` where x is a column alias.
3290/// DuckDB/Presto/Spark need: `UNNEST(arr) AS _t0(x)` where _t0 is a table alias and x is the column alias.
3291///
3292/// Propagate struct field names from the first named struct in an array to subsequent unnamed structs.
3293///
3294/// In BigQuery, `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92)]` means the second struct
3295/// should inherit field names from the first: `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score)]`.
3296pub fn propagate_struct_field_names(expr: Expression) -> Result<Expression> {
3297    use crate::dialects::transform_recursive;
3298    transform_recursive(expr, &propagate_struct_names_in_expr)
3299}
3300
3301fn propagate_struct_names_in_expr(expr: Expression) -> Result<Expression> {
3302    use crate::expressions::{Alias, ArrayConstructor, Function, Identifier};
3303
3304    /// Helper to propagate struct field names within an array of expressions
3305    fn propagate_in_elements(elements: &[Expression]) -> Option<Vec<Expression>> {
3306        if elements.len() <= 1 {
3307            return None;
3308        }
3309        // Check if first element is a named STRUCT function
3310        if let Some(Expression::Function(ref first_struct)) = elements.first() {
3311            if first_struct.name.eq_ignore_ascii_case("STRUCT") {
3312                // Extract field names from first struct
3313                let field_names: Vec<Option<String>> = first_struct
3314                    .args
3315                    .iter()
3316                    .map(|arg| {
3317                        if let Expression::Alias(a) = arg {
3318                            Some(a.alias.name.clone())
3319                        } else {
3320                            None
3321                        }
3322                    })
3323                    .collect();
3324
3325                // Only propagate if first struct has at least one named field
3326                if field_names.iter().any(|n| n.is_some()) {
3327                    let mut new_elements = Vec::with_capacity(elements.len());
3328                    new_elements.push(elements[0].clone());
3329
3330                    for elem in &elements[1..] {
3331                        if let Expression::Function(ref s) = elem {
3332                            if s.name.eq_ignore_ascii_case("STRUCT")
3333                                && s.args.len() == field_names.len()
3334                            {
3335                                // Check if this struct has NO names (all unnamed)
3336                                let all_unnamed =
3337                                    s.args.iter().all(|a| !matches!(a, Expression::Alias(_)));
3338                                if all_unnamed {
3339                                    // Apply names from first struct
3340                                    let new_args: Vec<Expression> = s
3341                                        .args
3342                                        .iter()
3343                                        .zip(field_names.iter())
3344                                        .map(|(val, name)| {
3345                                            if let Some(n) = name {
3346                                                Expression::Alias(Box::new(Alias::new(
3347                                                    val.clone(),
3348                                                    Identifier::new(n.clone()),
3349                                                )))
3350                                            } else {
3351                                                val.clone()
3352                                            }
3353                                        })
3354                                        .collect();
3355                                    new_elements.push(Expression::Function(Box::new(
3356                                        Function::new("STRUCT".to_string(), new_args),
3357                                    )));
3358                                    continue;
3359                                }
3360                            }
3361                        }
3362                        new_elements.push(elem.clone());
3363                    }
3364
3365                    return Some(new_elements);
3366                }
3367            }
3368        }
3369        None
3370    }
3371
3372    // Look for Array expressions containing STRUCT function calls
3373    if let Expression::Array(ref arr) = expr {
3374        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3375            return Ok(Expression::Array(Box::new(crate::expressions::Array {
3376                expressions: new_elements,
3377            })));
3378        }
3379    }
3380
3381    // Also handle ArrayFunc (ArrayConstructor) - bracket notation [STRUCT(...), ...]
3382    if let Expression::ArrayFunc(ref arr) = expr {
3383        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3384            return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
3385                expressions: new_elements,
3386                bracket_notation: arr.bracket_notation,
3387                use_list_keyword: arr.use_list_keyword,
3388            })));
3389        }
3390    }
3391
3392    Ok(expr)
3393}
3394
3395/// This walks the entire expression tree to find SELECT statements and converts UNNEST aliases
3396/// in their FROM clauses and JOINs.
3397pub fn unnest_alias_to_column_alias(expr: Expression) -> Result<Expression> {
3398    use crate::dialects::transform_recursive;
3399    transform_recursive(expr, &unnest_alias_transform_single_select)
3400}
3401
3402/// Move UNNEST items from FROM clause to CROSS JOINs without changing alias format.
3403/// Used for BigQuery -> BigQuery/Redshift where we want CROSS JOIN but not _t0(col) aliases.
3404pub fn unnest_from_to_cross_join(expr: Expression) -> Result<Expression> {
3405    use crate::dialects::transform_recursive;
3406    transform_recursive(expr, &unnest_from_to_cross_join_single_select)
3407}
3408
3409fn unnest_from_to_cross_join_single_select(expr: Expression) -> Result<Expression> {
3410    if let Expression::Select(mut select) = expr {
3411        if let Some(ref mut from) = select.from {
3412            if from.expressions.len() > 1 {
3413                let mut new_from_exprs = Vec::new();
3414                let mut new_cross_joins = Vec::new();
3415
3416                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3417                    if idx == 0 {
3418                        new_from_exprs.push(from_item);
3419                    } else {
3420                        let is_unnest = match &from_item {
3421                            Expression::Unnest(_) => true,
3422                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3423                            _ => false,
3424                        };
3425
3426                        if is_unnest {
3427                            new_cross_joins.push(crate::expressions::Join {
3428                                this: from_item,
3429                                on: None,
3430                                using: Vec::new(),
3431                                kind: JoinKind::Cross,
3432                                use_inner_keyword: false,
3433                                use_outer_keyword: false,
3434                                deferred_condition: false,
3435                                join_hint: None,
3436                                match_condition: None,
3437                                pivots: Vec::new(),
3438                                comments: Vec::new(),
3439                                nesting_group: 0,
3440                                directed: false,
3441                            });
3442                        } else {
3443                            new_from_exprs.push(from_item);
3444                        }
3445                    }
3446                }
3447
3448                from.expressions = new_from_exprs;
3449                new_cross_joins.append(&mut select.joins);
3450                select.joins = new_cross_joins;
3451            }
3452        }
3453
3454        Ok(Expression::Select(select))
3455    } else {
3456        Ok(expr)
3457    }
3458}
3459
3460/// Wrap UNNEST function aliases in JOIN items from `AS name` to `AS _u(name)`
3461/// Used for PostgreSQL → Presto/Trino transpilation where GENERATE_SERIES is
3462/// converted to UNNEST(SEQUENCE) and the alias needs the column-alias format.
3463pub fn wrap_unnest_join_aliases(expr: Expression) -> Result<Expression> {
3464    use crate::dialects::transform_recursive;
3465    transform_recursive(expr, &wrap_unnest_join_aliases_single)
3466}
3467
3468fn wrap_unnest_join_aliases_single(expr: Expression) -> Result<Expression> {
3469    if let Expression::Select(mut select) = expr {
3470        // Process JOIN items
3471        for join in &mut select.joins {
3472            wrap_unnest_alias_in_join_item(&mut join.this);
3473        }
3474        Ok(Expression::Select(select))
3475    } else {
3476        Ok(expr)
3477    }
3478}
3479
3480/// If a join item is an Alias wrapping an UNNEST function, convert alias to _u(alias_name) format
3481fn wrap_unnest_alias_in_join_item(expr: &mut Expression) {
3482    use crate::expressions::Identifier;
3483    if let Expression::Alias(alias) = expr {
3484        // Check if the inner expression is a function call to UNNEST
3485        let is_unnest = match &alias.this {
3486            Expression::Function(f) => f.name.eq_ignore_ascii_case("UNNEST"),
3487            _ => false,
3488        };
3489
3490        if is_unnest && alias.column_aliases.is_empty() {
3491            // Simple alias like `AS s` -> wrap to `AS _u(s)`
3492            let original_alias_name = alias.alias.name.clone();
3493            alias.alias = Identifier {
3494                name: "_u".to_string(),
3495                quoted: false,
3496                trailing_comments: Vec::new(),
3497                span: None,
3498            };
3499            alias.column_aliases = vec![Identifier {
3500                name: original_alias_name,
3501                quoted: false,
3502                trailing_comments: Vec::new(),
3503                span: None,
3504            }];
3505        }
3506    }
3507}
3508
3509fn unnest_alias_transform_single_select(expr: Expression) -> Result<Expression> {
3510    if let Expression::Select(mut select) = expr {
3511        let mut counter = 0usize;
3512
3513        // Process FROM expressions: convert aliases AND move UNNEST items to CROSS JOIN
3514        if let Some(ref mut from) = select.from {
3515            // First pass: convert aliases in-place
3516            for from_item in from.expressions.iter_mut() {
3517                convert_unnest_alias_in_from(from_item, &mut counter);
3518            }
3519
3520            // Second pass: move UNNEST items from FROM to CROSS JOINs
3521            if from.expressions.len() > 1 {
3522                let mut new_from_exprs = Vec::new();
3523                let mut new_cross_joins = Vec::new();
3524
3525                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3526                    if idx == 0 {
3527                        // First expression always stays in FROM
3528                        new_from_exprs.push(from_item);
3529                    } else {
3530                        // Check if this is UNNEST or Alias(UNNEST)
3531                        let is_unnest = match &from_item {
3532                            Expression::Unnest(_) => true,
3533                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3534                            _ => false,
3535                        };
3536
3537                        if is_unnest {
3538                            // Convert to CROSS JOIN
3539                            new_cross_joins.push(crate::expressions::Join {
3540                                this: from_item,
3541                                on: None,
3542                                using: Vec::new(),
3543                                kind: JoinKind::Cross,
3544                                use_inner_keyword: false,
3545                                use_outer_keyword: false,
3546                                deferred_condition: false,
3547                                join_hint: None,
3548                                match_condition: None,
3549                                pivots: Vec::new(),
3550                                comments: Vec::new(),
3551                                nesting_group: 0,
3552                                directed: false,
3553                            });
3554                        } else {
3555                            // Keep non-UNNEST items in FROM
3556                            new_from_exprs.push(from_item);
3557                        }
3558                    }
3559                }
3560
3561                from.expressions = new_from_exprs;
3562                // Prepend cross joins before existing joins
3563                new_cross_joins.append(&mut select.joins);
3564                select.joins = new_cross_joins;
3565            }
3566        }
3567
3568        // Process JOINs (existing joins that may have UNNEST aliases)
3569        for join in select.joins.iter_mut() {
3570            convert_unnest_alias_in_from(&mut join.this, &mut counter);
3571        }
3572
3573        Ok(Expression::Select(select))
3574    } else {
3575        Ok(expr)
3576    }
3577}
3578
3579fn convert_unnest_alias_in_from(expr: &mut Expression, counter: &mut usize) {
3580    use crate::expressions::Identifier;
3581
3582    if let Expression::Alias(ref mut alias) = expr {
3583        // Check if the inner expression is UNNEST (or EXPLODE)
3584        let is_unnest = matches!(&alias.this, Expression::Unnest(_))
3585            || matches!(&alias.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"));
3586
3587        if is_unnest && alias.column_aliases.is_empty() {
3588            // Convert: UNNEST(arr) AS x -> UNNEST(arr) AS _tN(x)
3589            let col_alias = alias.alias.clone();
3590            alias.column_aliases = vec![col_alias];
3591            alias.alias = Identifier::new(format!("_t{}", counter));
3592            *counter += 1;
3593        }
3594    }
3595}
3596
3597/// Expand POSEXPLODE in SELECT expressions for DuckDB.
3598///
3599/// Converts `SELECT POSEXPLODE(x)` to `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col`
3600/// Handles both aliased and unaliased forms:
3601/// - `SELECT POSEXPLODE(x) AS (a, b)` -> `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b`
3602/// - `SELECT * FROM POSEXPLODE(x) AS (a, b)` -> `SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)`
3603pub fn expand_posexplode_duckdb(expr: Expression) -> Result<Expression> {
3604    use crate::expressions::{Alias, Function};
3605
3606    match expr {
3607        Expression::Select(mut select) => {
3608            // Check if any SELECT expression is a POSEXPLODE function
3609            let mut new_expressions = Vec::new();
3610            let mut changed = false;
3611
3612            for sel_expr in select.expressions.drain(..) {
3613                // Check for POSEXPLODE(x) AS (a, b) - aliased form
3614                if let Expression::Alias(ref alias_box) = sel_expr {
3615                    if let Expression::Function(ref func) = alias_box.this {
3616                        if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3617                            let arg = func.args[0].clone();
3618                            // Get alias names: default pos, col
3619                            let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3620                                (
3621                                    alias_box.column_aliases[0].name.clone(),
3622                                    alias_box.column_aliases[1].name.clone(),
3623                                )
3624                            } else if !alias_box.alias.is_empty() {
3625                                // Single alias like AS x - use as col name, "pos" for position
3626                                ("pos".to_string(), alias_box.alias.name.clone())
3627                            } else {
3628                                ("pos".to_string(), "col".to_string())
3629                            };
3630
3631                            // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos_name
3632                            let gen_subscripts = Expression::Function(Box::new(Function::new(
3633                                "GENERATE_SUBSCRIPTS".to_string(),
3634                                vec![
3635                                    arg.clone(),
3636                                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3637                                ],
3638                            )));
3639                            let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3640                                gen_subscripts,
3641                                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3642                            )));
3643                            let pos_alias = Expression::Alias(Box::new(Alias {
3644                                this: sub_one,
3645                                alias: Identifier::new(pos_name),
3646                                column_aliases: Vec::new(),
3647                                pre_alias_comments: Vec::new(),
3648                                trailing_comments: Vec::new(),
3649                                inferred_type: None,
3650                            }));
3651
3652                            // UNNEST(x) AS col_name
3653                            let unnest = Expression::Unnest(Box::new(UnnestFunc {
3654                                this: arg,
3655                                expressions: Vec::new(),
3656                                with_ordinality: false,
3657                                alias: None,
3658                                offset_alias: None,
3659                            }));
3660                            let col_alias = Expression::Alias(Box::new(Alias {
3661                                this: unnest,
3662                                alias: Identifier::new(col_name),
3663                                column_aliases: Vec::new(),
3664                                pre_alias_comments: Vec::new(),
3665                                trailing_comments: Vec::new(),
3666                                inferred_type: None,
3667                            }));
3668
3669                            new_expressions.push(pos_alias);
3670                            new_expressions.push(col_alias);
3671                            changed = true;
3672                            continue;
3673                        }
3674                    }
3675                }
3676
3677                // Check for bare POSEXPLODE(x) - unaliased form
3678                if let Expression::Function(ref func) = sel_expr {
3679                    if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3680                        let arg = func.args[0].clone();
3681                        let pos_name = "pos";
3682                        let col_name = "col";
3683
3684                        // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos
3685                        let gen_subscripts = Expression::Function(Box::new(Function::new(
3686                            "GENERATE_SUBSCRIPTS".to_string(),
3687                            vec![
3688                                arg.clone(),
3689                                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3690                            ],
3691                        )));
3692                        let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3693                            gen_subscripts,
3694                            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3695                        )));
3696                        let pos_alias = Expression::Alias(Box::new(Alias {
3697                            this: sub_one,
3698                            alias: Identifier::new(pos_name),
3699                            column_aliases: Vec::new(),
3700                            pre_alias_comments: Vec::new(),
3701                            trailing_comments: Vec::new(),
3702                            inferred_type: None,
3703                        }));
3704
3705                        // UNNEST(x) AS col
3706                        let unnest = Expression::Unnest(Box::new(UnnestFunc {
3707                            this: arg,
3708                            expressions: Vec::new(),
3709                            with_ordinality: false,
3710                            alias: None,
3711                            offset_alias: None,
3712                        }));
3713                        let col_alias = Expression::Alias(Box::new(Alias {
3714                            this: unnest,
3715                            alias: Identifier::new(col_name),
3716                            column_aliases: Vec::new(),
3717                            pre_alias_comments: Vec::new(),
3718                            trailing_comments: Vec::new(),
3719                            inferred_type: None,
3720                        }));
3721
3722                        new_expressions.push(pos_alias);
3723                        new_expressions.push(col_alias);
3724                        changed = true;
3725                        continue;
3726                    }
3727                }
3728
3729                // Not a POSEXPLODE, keep as-is
3730                new_expressions.push(sel_expr);
3731            }
3732
3733            if changed {
3734                select.expressions = new_expressions;
3735            } else {
3736                select.expressions = new_expressions;
3737            }
3738
3739            // Also handle POSEXPLODE in FROM clause:
3740            // SELECT * FROM POSEXPLODE(x) AS (a, b) -> SELECT * FROM (SELECT ...)
3741            if let Some(ref mut from) = select.from {
3742                expand_posexplode_in_from_duckdb(from)?;
3743            }
3744
3745            Ok(Expression::Select(select))
3746        }
3747        other => Ok(other),
3748    }
3749}
3750
3751/// Helper to expand POSEXPLODE in FROM clause for DuckDB
3752fn expand_posexplode_in_from_duckdb(from: &mut From) -> Result<()> {
3753    use crate::expressions::{Alias, Function};
3754
3755    let mut new_expressions = Vec::new();
3756    let mut _changed = false;
3757
3758    for table_expr in from.expressions.drain(..) {
3759        // Check for POSEXPLODE(x) AS (a, b) in FROM
3760        if let Expression::Alias(ref alias_box) = table_expr {
3761            if let Expression::Function(ref func) = alias_box.this {
3762                if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3763                    let arg = func.args[0].clone();
3764                    let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3765                        (
3766                            alias_box.column_aliases[0].name.clone(),
3767                            alias_box.column_aliases[1].name.clone(),
3768                        )
3769                    } else {
3770                        ("pos".to_string(), "col".to_string())
3771                    };
3772
3773                    // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)
3774                    let gen_subscripts = Expression::Function(Box::new(Function::new(
3775                        "GENERATE_SUBSCRIPTS".to_string(),
3776                        vec![
3777                            arg.clone(),
3778                            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3779                        ],
3780                    )));
3781                    let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3782                        gen_subscripts,
3783                        Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3784                    )));
3785                    let pos_alias = Expression::Alias(Box::new(Alias {
3786                        this: sub_one,
3787                        alias: Identifier::new(&pos_name),
3788                        column_aliases: Vec::new(),
3789                        pre_alias_comments: Vec::new(),
3790                        trailing_comments: Vec::new(),
3791                        inferred_type: None,
3792                    }));
3793                    let unnest = Expression::Unnest(Box::new(UnnestFunc {
3794                        this: arg,
3795                        expressions: Vec::new(),
3796                        with_ordinality: false,
3797                        alias: None,
3798                        offset_alias: None,
3799                    }));
3800                    let col_alias = Expression::Alias(Box::new(Alias {
3801                        this: unnest,
3802                        alias: Identifier::new(&col_name),
3803                        column_aliases: Vec::new(),
3804                        pre_alias_comments: Vec::new(),
3805                        trailing_comments: Vec::new(),
3806                        inferred_type: None,
3807                    }));
3808
3809                    let mut inner_select = Select::new();
3810                    inner_select.expressions = vec![pos_alias, col_alias];
3811
3812                    let subquery = Expression::Subquery(Box::new(Subquery {
3813                        this: Expression::Select(Box::new(inner_select)),
3814                        alias: None,
3815                        column_aliases: Vec::new(),
3816                        order_by: None,
3817                        limit: None,
3818                        offset: None,
3819                        distribute_by: None,
3820                        sort_by: None,
3821                        cluster_by: None,
3822                        lateral: false,
3823                        modifiers_inside: false,
3824                        trailing_comments: Vec::new(),
3825                        inferred_type: None,
3826                    }));
3827                    new_expressions.push(subquery);
3828                    _changed = true;
3829                    continue;
3830                }
3831            }
3832        }
3833
3834        // Also check for bare POSEXPLODE(x) in FROM (no alias)
3835        if let Expression::Function(ref func) = table_expr {
3836            if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3837                let arg = func.args[0].clone();
3838
3839                // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col)
3840                let gen_subscripts = Expression::Function(Box::new(Function::new(
3841                    "GENERATE_SUBSCRIPTS".to_string(),
3842                    vec![
3843                        arg.clone(),
3844                        Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3845                    ],
3846                )));
3847                let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3848                    gen_subscripts,
3849                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3850                )));
3851                let pos_alias = Expression::Alias(Box::new(Alias {
3852                    this: sub_one,
3853                    alias: Identifier::new("pos"),
3854                    column_aliases: Vec::new(),
3855                    pre_alias_comments: Vec::new(),
3856                    trailing_comments: Vec::new(),
3857                    inferred_type: None,
3858                }));
3859                let unnest = Expression::Unnest(Box::new(UnnestFunc {
3860                    this: arg,
3861                    expressions: Vec::new(),
3862                    with_ordinality: false,
3863                    alias: None,
3864                    offset_alias: None,
3865                }));
3866                let col_alias = Expression::Alias(Box::new(Alias {
3867                    this: unnest,
3868                    alias: Identifier::new("col"),
3869                    column_aliases: Vec::new(),
3870                    pre_alias_comments: Vec::new(),
3871                    trailing_comments: Vec::new(),
3872                    inferred_type: None,
3873                }));
3874
3875                let mut inner_select = Select::new();
3876                inner_select.expressions = vec![pos_alias, col_alias];
3877
3878                let subquery = Expression::Subquery(Box::new(Subquery {
3879                    this: Expression::Select(Box::new(inner_select)),
3880                    alias: None,
3881                    column_aliases: Vec::new(),
3882                    order_by: None,
3883                    limit: None,
3884                    offset: None,
3885                    distribute_by: None,
3886                    sort_by: None,
3887                    cluster_by: None,
3888                    lateral: false,
3889                    modifiers_inside: false,
3890                    trailing_comments: Vec::new(),
3891                    inferred_type: None,
3892                }));
3893                new_expressions.push(subquery);
3894                _changed = true;
3895                continue;
3896            }
3897        }
3898
3899        new_expressions.push(table_expr);
3900    }
3901
3902    from.expressions = new_expressions;
3903    Ok(())
3904}
3905
3906/// Convert EXPLODE/POSEXPLODE in SELECT projections into CROSS JOIN UNNEST patterns.
3907///
3908/// This implements the `explode_projection_to_unnest` transform from Python sqlglot.
3909/// It restructures queries like:
3910///   `SELECT EXPLODE(x) FROM tbl`
3911/// into:
3912///   `SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(...) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE ...`
3913///
3914/// The transform handles:
3915/// - EXPLODE(x) and POSEXPLODE(x) functions
3916/// - Name collision avoidance (_u, _u_2, ... and col, col_2, ...)
3917/// - Multiple EXPLODE/POSEXPLODE in one SELECT
3918/// - Queries with or without FROM clause
3919/// - Presto (index_offset=1) and BigQuery (index_offset=0) variants
3920pub fn explode_projection_to_unnest(expr: Expression, target: DialectType) -> Result<Expression> {
3921    match expr {
3922        Expression::Select(select) => explode_projection_to_unnest_impl(*select, target),
3923        other => Ok(other),
3924    }
3925}
3926
3927/// Snowflake-specific rewrite to mirror Python sqlglot's explode_projection_to_unnest behavior
3928/// when FLATTEN appears in a nested LATERAL within a SELECT projection.
3929///
3930/// This intentionally rewrites:
3931/// - `LATERAL FLATTEN(INPUT => x) alias`
3932/// into:
3933/// - `LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS alias(SEQ, KEY, PATH, INDEX, VALUE, THIS)`
3934/// and appends CROSS JOIN TABLE(FLATTEN(...)) range/entity joins plus alignment predicates
3935/// to the containing SELECT.
3936pub fn snowflake_flatten_projection_to_unnest(expr: Expression) -> Result<Expression> {
3937    match expr {
3938        Expression::Select(select) => snowflake_flatten_projection_to_unnest_impl(*select),
3939        other => Ok(other),
3940    }
3941}
3942
3943fn snowflake_flatten_projection_to_unnest_impl(mut select: Select) -> Result<Expression> {
3944    let mut flattened_inputs: Vec<Expression> = Vec::new();
3945    let mut new_selects: Vec<Expression> = Vec::with_capacity(select.expressions.len());
3946
3947    for sel_expr in select.expressions.into_iter() {
3948        let found_input: RefCell<Option<Expression>> = RefCell::new(None);
3949
3950        let rewritten = transform_recursive(sel_expr, &|e| {
3951            if let Expression::Lateral(lat) = e {
3952                if let Some(input_expr) = extract_flatten_input(&lat) {
3953                    if found_input.borrow().is_none() {
3954                        *found_input.borrow_mut() = Some(input_expr);
3955                    }
3956                    return Ok(Expression::Lateral(Box::new(rewrite_flatten_lateral(*lat))));
3957                }
3958                return Ok(Expression::Lateral(lat));
3959            }
3960            Ok(e)
3961        })?;
3962
3963        if let Some(input) = found_input.into_inner() {
3964            flattened_inputs.push(input);
3965        }
3966        new_selects.push(rewritten);
3967    }
3968
3969    if flattened_inputs.is_empty() {
3970        select.expressions = new_selects;
3971        return Ok(Expression::Select(Box::new(select)));
3972    }
3973
3974    select.expressions = new_selects;
3975
3976    for (idx, input_expr) in flattened_inputs.into_iter().enumerate() {
3977        // Match sqlglot naming: first pair is _u/_u_2 with pos/pos_2 and entity.
3978        let is_first = idx == 0;
3979        let series_alias = if is_first {
3980            "pos".to_string()
3981        } else {
3982            format!("pos_{}", idx + 1)
3983        };
3984        let series_source_alias = if is_first {
3985            "_u".to_string()
3986        } else {
3987            format!("_u_{}", idx * 2 + 1)
3988        };
3989        let unnest_source_alias = if is_first {
3990            "_u_2".to_string()
3991        } else {
3992            format!("_u_{}", idx * 2 + 2)
3993        };
3994        let pos2_alias = if is_first {
3995            "pos_2".to_string()
3996        } else {
3997            format!("{}_2", series_alias)
3998        };
3999        let entity_alias = if is_first {
4000            "entity".to_string()
4001        } else {
4002            format!("entity_{}", idx + 1)
4003        };
4004
4005        let array_size_call = Expression::Function(Box::new(Function::new(
4006            "ARRAY_SIZE".to_string(),
4007            vec![Expression::NamedArgument(Box::new(NamedArgument {
4008                name: Identifier::new("INPUT"),
4009                value: input_expr.clone(),
4010                separator: NamedArgSeparator::DArrow,
4011            }))],
4012        )));
4013
4014        let greatest = Expression::Function(Box::new(Function::new(
4015            "GREATEST".to_string(),
4016            vec![array_size_call.clone()],
4017        )));
4018
4019        let series_end = Expression::Add(Box::new(BinaryOp::new(
4020            Expression::Paren(Box::new(crate::expressions::Paren {
4021                this: Expression::Sub(Box::new(BinaryOp::new(
4022                    greatest,
4023                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4024                ))),
4025                trailing_comments: Vec::new(),
4026            })),
4027            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4028        )));
4029
4030        let series_range = Expression::Function(Box::new(Function::new(
4031            "ARRAY_GENERATE_RANGE".to_string(),
4032            vec![
4033                Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4034                series_end,
4035            ],
4036        )));
4037
4038        let series_flatten = Expression::Function(Box::new(Function::new(
4039            "FLATTEN".to_string(),
4040            vec![Expression::NamedArgument(Box::new(NamedArgument {
4041                name: Identifier::new("INPUT"),
4042                value: series_range,
4043                separator: NamedArgSeparator::DArrow,
4044            }))],
4045        )));
4046
4047        let series_table = Expression::Function(Box::new(Function::new(
4048            "TABLE".to_string(),
4049            vec![series_flatten],
4050        )));
4051
4052        let series_alias_expr = Expression::Alias(Box::new(Alias {
4053            this: series_table,
4054            alias: Identifier::new(series_source_alias.clone()),
4055            column_aliases: vec![
4056                Identifier::new("seq"),
4057                Identifier::new("key"),
4058                Identifier::new("path"),
4059                Identifier::new("index"),
4060                Identifier::new(series_alias.clone()),
4061                Identifier::new("this"),
4062            ],
4063            pre_alias_comments: Vec::new(),
4064            trailing_comments: Vec::new(),
4065            inferred_type: None,
4066        }));
4067
4068        select.joins.push(Join {
4069            this: series_alias_expr,
4070            on: None,
4071            using: Vec::new(),
4072            kind: JoinKind::Cross,
4073            use_inner_keyword: false,
4074            use_outer_keyword: false,
4075            deferred_condition: false,
4076            join_hint: None,
4077            match_condition: None,
4078            pivots: Vec::new(),
4079            comments: Vec::new(),
4080            nesting_group: 0,
4081            directed: false,
4082        });
4083
4084        let entity_flatten = Expression::Function(Box::new(Function::new(
4085            "FLATTEN".to_string(),
4086            vec![Expression::NamedArgument(Box::new(NamedArgument {
4087                name: Identifier::new("INPUT"),
4088                value: input_expr.clone(),
4089                separator: NamedArgSeparator::DArrow,
4090            }))],
4091        )));
4092
4093        let entity_table = Expression::Function(Box::new(Function::new(
4094            "TABLE".to_string(),
4095            vec![entity_flatten],
4096        )));
4097
4098        let entity_alias_expr = Expression::Alias(Box::new(Alias {
4099            this: entity_table,
4100            alias: Identifier::new(unnest_source_alias.clone()),
4101            column_aliases: vec![
4102                Identifier::new("seq"),
4103                Identifier::new("key"),
4104                Identifier::new("path"),
4105                Identifier::new(pos2_alias.clone()),
4106                Identifier::new(entity_alias.clone()),
4107                Identifier::new("this"),
4108            ],
4109            pre_alias_comments: Vec::new(),
4110            trailing_comments: Vec::new(),
4111            inferred_type: None,
4112        }));
4113
4114        select.joins.push(Join {
4115            this: entity_alias_expr,
4116            on: None,
4117            using: Vec::new(),
4118            kind: JoinKind::Cross,
4119            use_inner_keyword: false,
4120            use_outer_keyword: false,
4121            deferred_condition: false,
4122            join_hint: None,
4123            match_condition: None,
4124            pivots: Vec::new(),
4125            comments: Vec::new(),
4126            nesting_group: 0,
4127            directed: false,
4128        });
4129
4130        let pos_col =
4131            Expression::qualified_column(series_source_alias.clone(), series_alias.clone());
4132        let pos2_col =
4133            Expression::qualified_column(unnest_source_alias.clone(), pos2_alias.clone());
4134
4135        let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), pos2_col.clone())));
4136        let size_minus_1 = Expression::Paren(Box::new(crate::expressions::Paren {
4137            this: Expression::Sub(Box::new(BinaryOp::new(
4138                array_size_call,
4139                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4140            ))),
4141            trailing_comments: Vec::new(),
4142        }));
4143        let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_minus_1.clone())));
4144        let pos2_eq_size = Expression::Eq(Box::new(BinaryOp::new(pos2_col, size_minus_1)));
4145        let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos2_eq_size)));
4146        let or_cond = Expression::Or(Box::new(BinaryOp::new(
4147            eq,
4148            Expression::Paren(Box::new(crate::expressions::Paren {
4149                this: and_cond,
4150                trailing_comments: Vec::new(),
4151            })),
4152        )));
4153
4154        select.where_clause = Some(match select.where_clause.take() {
4155            Some(existing) => Where {
4156                this: Expression::And(Box::new(BinaryOp::new(existing.this, or_cond))),
4157            },
4158            None => Where { this: or_cond },
4159        });
4160    }
4161
4162    Ok(Expression::Select(Box::new(select)))
4163}
4164
4165fn extract_flatten_input(lat: &Lateral) -> Option<Expression> {
4166    let Expression::Function(f) = lat.this.as_ref() else {
4167        return None;
4168    };
4169    if !f.name.eq_ignore_ascii_case("FLATTEN") {
4170        return None;
4171    }
4172
4173    for arg in &f.args {
4174        if let Expression::NamedArgument(na) = arg {
4175            if na.name.name.eq_ignore_ascii_case("INPUT") {
4176                return Some(na.value.clone());
4177            }
4178        }
4179    }
4180    f.args.first().cloned()
4181}
4182
4183fn rewrite_flatten_lateral(mut lat: Lateral) -> Lateral {
4184    let cond = Expression::Eq(Box::new(BinaryOp::new(
4185        Expression::qualified_column("_u", "pos"),
4186        Expression::qualified_column("_u_2", "pos_2"),
4187    )));
4188    let true_expr = Expression::qualified_column("_u_2", "entity");
4189    let iff_expr = Expression::Function(Box::new(Function::new(
4190        "IFF".to_string(),
4191        vec![cond, true_expr, Expression::Null(crate::expressions::Null)],
4192    )));
4193
4194    lat.this = Box::new(iff_expr);
4195    if lat.column_aliases.is_empty() {
4196        lat.column_aliases = vec![
4197            "SEQ".to_string(),
4198            "KEY".to_string(),
4199            "PATH".to_string(),
4200            "INDEX".to_string(),
4201            "VALUE".to_string(),
4202            "THIS".to_string(),
4203        ];
4204    }
4205    lat
4206}
4207
4208/// Info about an EXPLODE/POSEXPLODE found in a SELECT projection
4209struct ExplodeInfo {
4210    /// The argument to EXPLODE/POSEXPLODE (the array expression)
4211    arg_sql: String,
4212    /// The alias for the exploded column
4213    explode_alias: String,
4214    /// The alias for the position column
4215    pos_alias: String,
4216    /// Source alias for this unnest (e.g., _u_2)
4217    unnest_source_alias: String,
4218}
4219
4220fn explode_projection_to_unnest_impl(select: Select, target: DialectType) -> Result<Expression> {
4221    let is_presto = matches!(
4222        target,
4223        DialectType::Presto | DialectType::Trino | DialectType::Athena
4224    );
4225    let is_bigquery = matches!(target, DialectType::BigQuery);
4226
4227    if !is_presto && !is_bigquery {
4228        return Ok(Expression::Select(Box::new(select)));
4229    }
4230
4231    // Check if any SELECT projection contains EXPLODE or POSEXPLODE
4232    let has_explode = select.expressions.iter().any(|e| expr_contains_explode(e));
4233    if !has_explode {
4234        return Ok(Expression::Select(Box::new(select)));
4235    }
4236
4237    // Collect taken names from existing SELECT expressions and FROM sources
4238    let mut taken_select_names = std::collections::HashSet::new();
4239    let mut taken_source_names = std::collections::HashSet::new();
4240
4241    // Collect names from existing SELECT expressions (output names)
4242    for sel in &select.expressions {
4243        if let Some(name) = get_output_name(sel) {
4244            taken_select_names.insert(name);
4245        }
4246    }
4247
4248    // Also add the explode arg name if it's a column reference
4249    for sel in &select.expressions {
4250        let explode_expr = find_explode_in_expr(sel);
4251        if let Some(arg) = explode_expr {
4252            if let Some(name) = get_output_name(&arg) {
4253                taken_select_names.insert(name);
4254            }
4255        }
4256    }
4257
4258    // Collect source names from FROM clause
4259    if let Some(ref from) = select.from {
4260        for from_expr in &from.expressions {
4261            collect_source_names(from_expr, &mut taken_source_names);
4262        }
4263    }
4264    // Also collect from JOINs
4265    for join in &select.joins {
4266        collect_source_names(&join.this, &mut taken_source_names);
4267    }
4268
4269    // Generate series alias
4270    let series_alias = new_name(&mut taken_select_names, "pos");
4271
4272    // Generate series source alias
4273    let series_source_alias = new_name(&mut taken_source_names, "_u");
4274
4275    // Get the target dialect for generating expression SQL
4276    let target_dialect = Dialect::get(target);
4277
4278    // Process each SELECT expression, collecting explode info
4279    let mut explode_infos: Vec<ExplodeInfo> = Vec::new();
4280    let mut new_projections: Vec<String> = Vec::new();
4281
4282    for (_idx, sel_expr) in select.expressions.iter().enumerate() {
4283        let explode_data = extract_explode_data(sel_expr);
4284
4285        if let Some((is_posexplode, arg_expr, explicit_alias, explicit_pos_alias)) = explode_data {
4286            // Generate the argument SQL in target dialect
4287            let arg_sql = target_dialect
4288                .generate(&arg_expr)
4289                .unwrap_or_else(|_| "NULL".to_string());
4290
4291            let unnest_source_alias = new_name(&mut taken_source_names, "_u");
4292
4293            let explode_alias = if let Some(ref ea) = explicit_alias {
4294                // Use the explicit alias directly (it was explicitly specified by the user)
4295                // Remove from taken_select_names first to avoid false collision with itself
4296                taken_select_names.remove(ea.as_str());
4297                // Now check for collision with other names
4298                let name = new_name(&mut taken_select_names, ea);
4299                name
4300            } else {
4301                new_name(&mut taken_select_names, "col")
4302            };
4303
4304            let pos_alias = if let Some(ref pa) = explicit_pos_alias {
4305                // Use the explicit pos alias directly
4306                taken_select_names.remove(pa.as_str());
4307                let name = new_name(&mut taken_select_names, pa);
4308                name
4309            } else {
4310                new_name(&mut taken_select_names, "pos")
4311            };
4312
4313            // Build the IF projection
4314            if is_presto {
4315                // Presto: IF(_u.pos = _u_2.pos_2, _u_2.col) AS col
4316                let if_col = format!(
4317                    "IF({}.{} = {}.{}, {}.{}) AS {}",
4318                    series_source_alias,
4319                    series_alias,
4320                    unnest_source_alias,
4321                    pos_alias,
4322                    unnest_source_alias,
4323                    explode_alias,
4324                    explode_alias
4325                );
4326                new_projections.push(if_col);
4327
4328                // For POSEXPLODE, also add the position projection
4329                if is_posexplode {
4330                    let if_pos = format!(
4331                        "IF({}.{} = {}.{}, {}.{}) AS {}",
4332                        series_source_alias,
4333                        series_alias,
4334                        unnest_source_alias,
4335                        pos_alias,
4336                        unnest_source_alias,
4337                        pos_alias,
4338                        pos_alias
4339                    );
4340                    new_projections.push(if_pos);
4341                }
4342            } else {
4343                // BigQuery: IF(pos = pos_2, col, NULL) AS col
4344                let if_col = format!(
4345                    "IF({} = {}, {}, NULL) AS {}",
4346                    series_alias, pos_alias, explode_alias, explode_alias
4347                );
4348                new_projections.push(if_col);
4349
4350                // For POSEXPLODE, also add the position projection
4351                if is_posexplode {
4352                    let if_pos = format!(
4353                        "IF({} = {}, {}, NULL) AS {}",
4354                        series_alias, pos_alias, pos_alias, pos_alias
4355                    );
4356                    new_projections.push(if_pos);
4357                }
4358            }
4359
4360            explode_infos.push(ExplodeInfo {
4361                arg_sql,
4362                explode_alias,
4363                pos_alias,
4364                unnest_source_alias,
4365            });
4366        } else {
4367            // Not an EXPLODE expression, generate as-is
4368            let sel_sql = target_dialect
4369                .generate(sel_expr)
4370                .unwrap_or_else(|_| "*".to_string());
4371            new_projections.push(sel_sql);
4372        }
4373    }
4374
4375    if explode_infos.is_empty() {
4376        return Ok(Expression::Select(Box::new(select)));
4377    }
4378
4379    // Build the FROM clause
4380    let mut from_parts: Vec<String> = Vec::new();
4381
4382    // Existing FROM sources
4383    if let Some(ref from) = select.from {
4384        for from_expr in &from.expressions {
4385            let from_sql = target_dialect.generate(from_expr).unwrap_or_default();
4386            from_parts.push(from_sql);
4387        }
4388    }
4389
4390    // Build the size expressions for the series generator
4391    let size_exprs: Vec<String> = explode_infos
4392        .iter()
4393        .map(|info| {
4394            if is_presto {
4395                format!("CARDINALITY({})", info.arg_sql)
4396            } else {
4397                format!("ARRAY_LENGTH({})", info.arg_sql)
4398            }
4399        })
4400        .collect();
4401
4402    let greatest_arg = if size_exprs.len() == 1 {
4403        size_exprs[0].clone()
4404    } else {
4405        format!("GREATEST({})", size_exprs.join(", "))
4406    };
4407
4408    // Build the series source
4409    // greatest_arg is already "GREATEST(...)" when multiple, or "CARDINALITY(x)" / "ARRAY_LENGTH(x)" when single
4410    let series_sql = if is_presto {
4411        // SEQUENCE(1, GREATEST(CARDINALITY(x))) for single, SEQUENCE(1, GREATEST(C(a), C(b))) for multiple
4412        if size_exprs.len() == 1 {
4413            format!(
4414                "UNNEST(SEQUENCE(1, GREATEST({}))) AS {}({})",
4415                greatest_arg, series_source_alias, series_alias
4416            )
4417        } else {
4418            // greatest_arg already has GREATEST(...) wrapper
4419            format!(
4420                "UNNEST(SEQUENCE(1, {})) AS {}({})",
4421                greatest_arg, series_source_alias, series_alias
4422            )
4423        }
4424    } else {
4425        // GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1) for single
4426        if size_exprs.len() == 1 {
4427            format!(
4428                "UNNEST(GENERATE_ARRAY(0, GREATEST({}) - 1)) AS {}",
4429                greatest_arg, series_alias
4430            )
4431        } else {
4432            // greatest_arg already has GREATEST(...) wrapper
4433            format!(
4434                "UNNEST(GENERATE_ARRAY(0, {} - 1)) AS {}",
4435                greatest_arg, series_alias
4436            )
4437        }
4438    };
4439
4440    // Build CROSS JOIN UNNEST clauses
4441    // Always use Presto-style (WITH ORDINALITY) for the SQL string to parse,
4442    // then convert to BigQuery-style AST after parsing if needed
4443    let mut cross_joins: Vec<String> = Vec::new();
4444
4445    for info in &explode_infos {
4446        // Always use WITH ORDINALITY syntax (which our parser handles)
4447        cross_joins.push(format!(
4448            "CROSS JOIN UNNEST({}) WITH ORDINALITY AS {}({}, {})",
4449            info.arg_sql, info.unnest_source_alias, info.explode_alias, info.pos_alias
4450        ));
4451    }
4452
4453    // Build WHERE clause
4454    let mut where_conditions: Vec<String> = Vec::new();
4455
4456    for info in &explode_infos {
4457        let size_expr = if is_presto {
4458            format!("CARDINALITY({})", info.arg_sql)
4459        } else {
4460            format!("ARRAY_LENGTH({})", info.arg_sql)
4461        };
4462
4463        let cond = if is_presto {
4464            format!(
4465                "{series_src}.{series_al} = {unnest_src}.{pos_al} OR ({series_src}.{series_al} > {size} AND {unnest_src}.{pos_al} = {size})",
4466                series_src = series_source_alias,
4467                series_al = series_alias,
4468                unnest_src = info.unnest_source_alias,
4469                pos_al = info.pos_alias,
4470                size = size_expr
4471            )
4472        } else {
4473            format!(
4474                "{series_al} = {pos_al} OR ({series_al} > ({size} - 1) AND {pos_al} = ({size} - 1))",
4475                series_al = series_alias,
4476                pos_al = info.pos_alias,
4477                size = size_expr
4478            )
4479        };
4480
4481        where_conditions.push(cond);
4482    }
4483
4484    // Combine WHERE conditions with AND (wrapped in parens if multiple)
4485    let where_sql = if where_conditions.len() == 1 {
4486        where_conditions[0].clone()
4487    } else {
4488        where_conditions
4489            .iter()
4490            .map(|c| format!("({})", c))
4491            .collect::<Vec<_>>()
4492            .join(" AND ")
4493    };
4494
4495    // Build the complete SQL
4496    let select_part = new_projections.join(", ");
4497
4498    // FROM part: if there was no original FROM, the series becomes the FROM source
4499    let from_and_joins = if from_parts.is_empty() {
4500        // No original FROM: series is the FROM source, everything else is CROSS JOIN
4501        format!("FROM {} {}", series_sql, cross_joins.join(" "))
4502    } else {
4503        format!(
4504            "FROM {} {} {}",
4505            from_parts.join(", "),
4506            format!("CROSS JOIN {}", series_sql),
4507            cross_joins.join(" ")
4508        )
4509    };
4510
4511    let full_sql = format!(
4512        "SELECT {} {} WHERE {}",
4513        select_part, from_and_joins, where_sql
4514    );
4515
4516    // Parse the constructed SQL using the Generic dialect (which handles all SQL syntax)
4517    // We use Generic instead of the target dialect to avoid parser limitations
4518    let generic_dialect = Dialect::get(DialectType::Generic);
4519    let parsed = generic_dialect.parse(&full_sql);
4520    match parsed {
4521        Ok(mut stmts) if !stmts.is_empty() => {
4522            let mut result = stmts.remove(0);
4523
4524            // For BigQuery, convert Presto-style UNNEST AST to BigQuery-style
4525            // Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos])
4526            // BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos) [no outer Alias]
4527            if is_bigquery {
4528                convert_unnest_presto_to_bigquery(&mut result);
4529            }
4530
4531            Ok(result)
4532        }
4533        _ => {
4534            // If parsing fails, return the original expression unchanged
4535            Ok(Expression::Select(Box::new(select)))
4536        }
4537    }
4538}
4539
4540/// Convert Presto-style UNNEST WITH ORDINALITY to BigQuery-style UNNEST WITH OFFSET in the AST.
4541/// Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos_N])
4542/// BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos_N)
4543fn convert_unnest_presto_to_bigquery(expr: &mut Expression) {
4544    match expr {
4545        Expression::Select(ref mut select) => {
4546            // Convert in FROM clause
4547            if let Some(ref mut from) = select.from {
4548                for from_item in from.expressions.iter_mut() {
4549                    convert_unnest_presto_to_bigquery(from_item);
4550                }
4551            }
4552            // Convert in JOINs
4553            for join in select.joins.iter_mut() {
4554                convert_unnest_presto_to_bigquery(&mut join.this);
4555            }
4556        }
4557        Expression::Alias(ref alias) => {
4558            // Check if this is Alias(Unnest(with_ordinality=true), ..., column_aliases=[col, pos])
4559            if let Expression::Unnest(ref unnest) = alias.this {
4560                if unnest.with_ordinality && alias.column_aliases.len() >= 2 {
4561                    let col_alias = alias.column_aliases[0].clone();
4562                    let pos_alias = alias.column_aliases[1].clone();
4563                    let mut new_unnest = unnest.as_ref().clone();
4564                    new_unnest.alias = Some(col_alias);
4565                    new_unnest.offset_alias = Some(pos_alias);
4566                    // Replace the Alias(Unnest) with just Unnest
4567                    *expr = Expression::Unnest(Box::new(new_unnest));
4568                }
4569            }
4570        }
4571        _ => {}
4572    }
4573}
4574
4575/// Find a new name that doesn't conflict with existing names.
4576/// Tries `base`, then `base_2`, `base_3`, etc.
4577fn new_name(names: &mut std::collections::HashSet<String>, base: &str) -> String {
4578    if !names.contains(base) {
4579        names.insert(base.to_string());
4580        return base.to_string();
4581    }
4582    let mut i = 2;
4583    loop {
4584        let candidate = format!("{}_{}", base, i);
4585        if !names.contains(&candidate) {
4586            names.insert(candidate.clone());
4587            return candidate;
4588        }
4589        i += 1;
4590    }
4591}
4592
4593/// Check if an expression contains EXPLODE or POSEXPLODE
4594fn expr_contains_explode(expr: &Expression) -> bool {
4595    match expr {
4596        Expression::Explode(_) => true,
4597        Expression::ExplodeOuter(_) => true,
4598        Expression::Function(f) => {
4599            let name = f.name.to_uppercase();
4600            name == "POSEXPLODE" || name == "POSEXPLODE_OUTER"
4601        }
4602        Expression::Alias(a) => expr_contains_explode(&a.this),
4603        _ => false,
4604    }
4605}
4606
4607/// Find the EXPLODE/POSEXPLODE expression within a select item, return the arg
4608fn find_explode_in_expr(expr: &Expression) -> Option<Expression> {
4609    match expr {
4610        Expression::Explode(uf) => Some(uf.this.clone()),
4611        Expression::ExplodeOuter(uf) => Some(uf.this.clone()),
4612        Expression::Function(f) => {
4613            let name = f.name.to_uppercase();
4614            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4615                Some(f.args[0].clone())
4616            } else {
4617                None
4618            }
4619        }
4620        Expression::Alias(a) => find_explode_in_expr(&a.this),
4621        _ => None,
4622    }
4623}
4624
4625/// Extract explode data from a SELECT expression.
4626/// Returns (is_posexplode, arg_expression, explicit_col_alias, explicit_pos_alias)
4627fn extract_explode_data(
4628    expr: &Expression,
4629) -> Option<(bool, Expression, Option<String>, Option<String>)> {
4630    match expr {
4631        // Bare EXPLODE(x) without alias
4632        Expression::Explode(uf) => Some((false, uf.this.clone(), None, None)),
4633        Expression::ExplodeOuter(uf) => Some((false, uf.this.clone(), None, None)),
4634        // Bare POSEXPLODE(x) without alias
4635        Expression::Function(f) => {
4636            let name = f.name.to_uppercase();
4637            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4638                Some((true, f.args[0].clone(), None, None))
4639            } else {
4640                None
4641            }
4642        }
4643        // Aliased: EXPLODE(x) AS col, or POSEXPLODE(x) AS (a, b)
4644        Expression::Alias(a) => {
4645            match &a.this {
4646                Expression::Explode(uf) => {
4647                    let alias = if !a.alias.is_empty() {
4648                        Some(a.alias.name.clone())
4649                    } else {
4650                        None
4651                    };
4652                    Some((false, uf.this.clone(), alias, None))
4653                }
4654                Expression::ExplodeOuter(uf) => {
4655                    let alias = if !a.alias.is_empty() {
4656                        Some(a.alias.name.clone())
4657                    } else {
4658                        None
4659                    };
4660                    Some((false, uf.this.clone(), alias, None))
4661                }
4662                Expression::Function(f) => {
4663                    let name = f.name.to_uppercase();
4664                    if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4665                        // Check for column aliases: AS (a, b)
4666                        if a.column_aliases.len() == 2 {
4667                            let pos_alias = a.column_aliases[0].name.clone();
4668                            let col_alias = a.column_aliases[1].name.clone();
4669                            Some((true, f.args[0].clone(), Some(col_alias), Some(pos_alias)))
4670                        } else if !a.alias.is_empty() {
4671                            // Single alias: AS x
4672                            Some((true, f.args[0].clone(), Some(a.alias.name.clone()), None))
4673                        } else {
4674                            Some((true, f.args[0].clone(), None, None))
4675                        }
4676                    } else {
4677                        None
4678                    }
4679                }
4680                _ => None,
4681            }
4682        }
4683        _ => None,
4684    }
4685}
4686
4687/// Get the output name of a SELECT expression
4688fn get_output_name(expr: &Expression) -> Option<String> {
4689    match expr {
4690        Expression::Alias(a) => {
4691            if !a.alias.is_empty() {
4692                Some(a.alias.name.clone())
4693            } else {
4694                None
4695            }
4696        }
4697        Expression::Column(c) => Some(c.name.name.clone()),
4698        Expression::Identifier(id) => Some(id.name.clone()),
4699        _ => None,
4700    }
4701}
4702
4703/// Collect source names from a FROM/JOIN expression
4704fn collect_source_names(expr: &Expression, names: &mut std::collections::HashSet<String>) {
4705    match expr {
4706        Expression::Alias(a) => {
4707            if !a.alias.is_empty() {
4708                names.insert(a.alias.name.clone());
4709            }
4710        }
4711        Expression::Subquery(s) => {
4712            if let Some(ref alias) = s.alias {
4713                names.insert(alias.name.clone());
4714            }
4715        }
4716        Expression::Table(t) => {
4717            if let Some(ref alias) = t.alias {
4718                names.insert(alias.name.clone());
4719            } else {
4720                names.insert(t.name.name.clone());
4721            }
4722        }
4723        Expression::Column(c) => {
4724            names.insert(c.name.name.clone());
4725        }
4726        Expression::Identifier(id) => {
4727            names.insert(id.name.clone());
4728        }
4729        _ => {}
4730    }
4731}
4732
4733/// Strip UNNEST wrapping from column reference arguments for Redshift target.
4734/// BigQuery UNNEST(column_ref) -> Redshift: just column_ref
4735pub fn strip_unnest_column_refs(expr: Expression) -> Result<Expression> {
4736    use crate::dialects::transform_recursive;
4737    transform_recursive(expr, &strip_unnest_column_refs_single)
4738}
4739
4740fn strip_unnest_column_refs_single(expr: Expression) -> Result<Expression> {
4741    if let Expression::Select(mut select) = expr {
4742        // Process JOINs (UNNEST items have been moved to joins by unnest_from_to_cross_join)
4743        for join in select.joins.iter_mut() {
4744            strip_unnest_from_expr(&mut join.this);
4745        }
4746        // Process FROM items too
4747        if let Some(ref mut from) = select.from {
4748            for from_item in from.expressions.iter_mut() {
4749                strip_unnest_from_expr(from_item);
4750            }
4751        }
4752        Ok(Expression::Select(select))
4753    } else {
4754        Ok(expr)
4755    }
4756}
4757
4758/// If expr is Alias(UNNEST(column_ref), alias) where UNNEST arg is a column/dot path,
4759/// replace with Alias(column_ref, alias) to strip the UNNEST.
4760fn strip_unnest_from_expr(expr: &mut Expression) {
4761    if let Expression::Alias(ref mut alias) = expr {
4762        if let Expression::Unnest(ref unnest) = alias.this {
4763            let is_column_ref = matches!(&unnest.this, Expression::Column(_) | Expression::Dot(_));
4764            if is_column_ref {
4765                // Replace UNNEST(col_ref) with just col_ref
4766                let inner = unnest.this.clone();
4767                alias.this = inner;
4768            }
4769        }
4770    }
4771}
4772
4773/// Wrap DuckDB UNNEST of struct arrays in (SELECT UNNEST(..., max_depth => 2)) subquery.
4774/// BigQuery UNNEST of struct arrays needs this wrapping for DuckDB to properly expand struct fields.
4775pub fn wrap_duckdb_unnest_struct(expr: Expression) -> Result<Expression> {
4776    use crate::dialects::transform_recursive;
4777    transform_recursive(expr, &wrap_duckdb_unnest_struct_single)
4778}
4779
4780fn wrap_duckdb_unnest_struct_single(expr: Expression) -> Result<Expression> {
4781    if let Expression::Select(mut select) = expr {
4782        // Process FROM items
4783        if let Some(ref mut from) = select.from {
4784            for from_item in from.expressions.iter_mut() {
4785                try_wrap_unnest_in_subquery(from_item);
4786            }
4787        }
4788
4789        // Process JOINs
4790        for join in select.joins.iter_mut() {
4791            try_wrap_unnest_in_subquery(&mut join.this);
4792        }
4793
4794        Ok(Expression::Select(select))
4795    } else {
4796        Ok(expr)
4797    }
4798}
4799
4800/// Check if an expression contains struct array elements that need DuckDB UNNEST wrapping.
4801fn is_struct_array_unnest_arg(expr: &Expression) -> bool {
4802    match expr {
4803        // Array literal containing struct elements
4804        Expression::Array(arr) => arr
4805            .expressions
4806            .iter()
4807            .any(|e| matches!(e, Expression::Struct(_))),
4808        Expression::ArrayFunc(arr) => arr
4809            .expressions
4810            .iter()
4811            .any(|e| matches!(e, Expression::Struct(_))),
4812        // CAST to struct array type, e.g. CAST([] AS STRUCT(x BIGINT)[])
4813        Expression::Cast(c) => {
4814            matches!(&c.to, DataType::Array { element_type, .. } if matches!(**element_type, DataType::Struct { .. }))
4815        }
4816        _ => false,
4817    }
4818}
4819
4820/// Try to wrap an UNNEST expression in a (SELECT UNNEST(..., max_depth => 2)) subquery.
4821/// Handles both bare UNNEST and Alias(UNNEST).
4822fn try_wrap_unnest_in_subquery(expr: &mut Expression) {
4823    // Check for Alias wrapping UNNEST
4824    if let Expression::Alias(ref alias) = expr {
4825        if let Expression::Unnest(ref unnest) = alias.this {
4826            if is_struct_array_unnest_arg(&unnest.this) {
4827                let unnest_clone = (**unnest).clone();
4828                let alias_name = alias.alias.clone();
4829                let new_expr = make_unnest_subquery(unnest_clone, Some(alias_name));
4830                *expr = new_expr;
4831                return;
4832            }
4833        }
4834    }
4835
4836    // Check for bare UNNEST
4837    if let Expression::Unnest(ref unnest) = expr {
4838        if is_struct_array_unnest_arg(&unnest.this) {
4839            let unnest_clone = (**unnest).clone();
4840            let new_expr = make_unnest_subquery(unnest_clone, None);
4841            *expr = new_expr;
4842        }
4843    }
4844}
4845
4846/// Create (SELECT UNNEST(arg, max_depth => 2)) [AS alias] subquery.
4847fn make_unnest_subquery(unnest: UnnestFunc, alias: Option<Identifier>) -> Expression {
4848    // Build UNNEST function call with max_depth => 2 named argument
4849    let max_depth_arg = Expression::NamedArgument(Box::new(NamedArgument {
4850        name: Identifier::new("max_depth".to_string()),
4851        value: Expression::Literal(Box::new(Literal::Number("2".to_string()))),
4852        separator: NamedArgSeparator::DArrow,
4853    }));
4854
4855    let mut unnest_args = vec![unnest.this];
4856    unnest_args.extend(unnest.expressions);
4857    unnest_args.push(max_depth_arg);
4858
4859    let unnest_func =
4860        Expression::Function(Box::new(Function::new("UNNEST".to_string(), unnest_args)));
4861
4862    // Build SELECT UNNEST(...)
4863    let mut inner_select = Select::new();
4864    inner_select.expressions = vec![unnest_func];
4865    let inner_select = Expression::Select(Box::new(inner_select));
4866
4867    // Wrap in subquery
4868    let subquery = Subquery {
4869        this: inner_select,
4870        alias,
4871        column_aliases: Vec::new(),
4872        order_by: None,
4873        limit: None,
4874        offset: None,
4875        distribute_by: None,
4876        sort_by: None,
4877        cluster_by: None,
4878        lateral: false,
4879        modifiers_inside: false,
4880        trailing_comments: Vec::new(),
4881        inferred_type: None,
4882    };
4883
4884    Expression::Subquery(Box::new(subquery))
4885}
4886
4887/// Wrap UNION with ORDER BY/LIMIT in a subquery.
4888///
4889/// Some dialects (ClickHouse, TSQL) don't support ORDER BY/LIMIT directly on UNION.
4890/// This transform converts:
4891///   SELECT ... UNION SELECT ... ORDER BY x LIMIT n
4892/// to:
4893///   SELECT * FROM (SELECT ... UNION SELECT ...) AS _l_0 ORDER BY x LIMIT n
4894///
4895/// NOTE: Our parser may place ORDER BY/LIMIT on the right-hand SELECT rather than
4896/// the Union (unlike Python sqlglot). This function handles both cases by checking
4897/// the right-hand SELECT for trailing ORDER BY/LIMIT and moving them to the Union.
4898pub fn no_limit_order_by_union(expr: Expression) -> Result<Expression> {
4899    use crate::expressions::{Limit as LimitClause, Offset as OffsetClause, OrderBy, Star};
4900
4901    match expr {
4902        Expression::Union(mut u) => {
4903            // Check if ORDER BY/LIMIT are on the rightmost Select instead of the Union
4904            // (our parser may attach them to the right SELECT)
4905            if u.order_by.is_none() && u.limit.is_none() && u.offset.is_none() {
4906                // Find the rightmost Select and check for ORDER BY/LIMIT
4907                if let Expression::Select(ref mut right_select) = u.right {
4908                    if right_select.order_by.is_some()
4909                        || right_select.limit.is_some()
4910                        || right_select.offset.is_some()
4911                    {
4912                        // Move ORDER BY/LIMIT from right Select to Union
4913                        u.order_by = right_select.order_by.take();
4914                        u.limit = right_select.limit.take().map(|l| Box::new(l.this));
4915                        u.offset = right_select.offset.take().map(|o| Box::new(o.this));
4916                    }
4917                }
4918            }
4919
4920            let has_order_or_limit =
4921                u.order_by.is_some() || u.limit.is_some() || u.offset.is_some();
4922            if has_order_or_limit {
4923                // Extract ORDER BY, LIMIT, OFFSET from the Union
4924                let order_by: Option<OrderBy> = u.order_by.take();
4925                let union_limit: Option<Box<Expression>> = u.limit.take();
4926                let union_offset: Option<Box<Expression>> = u.offset.take();
4927
4928                // Convert Union's limit (Box<Expression>) to Select's limit (Limit struct)
4929                let select_limit: Option<LimitClause> = union_limit.map(|l| LimitClause {
4930                    this: *l,
4931                    percent: false,
4932                    comments: Vec::new(),
4933                });
4934
4935                // Convert Union's offset (Box<Expression>) to Select's offset (Offset struct)
4936                let select_offset: Option<OffsetClause> = union_offset.map(|o| OffsetClause {
4937                    this: *o,
4938                    rows: None,
4939                });
4940
4941                // Create a subquery from the Union
4942                let subquery = Subquery {
4943                    this: Expression::Union(u),
4944                    alias: Some(Identifier::new("_l_0")),
4945                    column_aliases: Vec::new(),
4946                    lateral: false,
4947                    modifiers_inside: false,
4948                    order_by: None,
4949                    limit: None,
4950                    offset: None,
4951                    distribute_by: None,
4952                    sort_by: None,
4953                    cluster_by: None,
4954                    trailing_comments: Vec::new(),
4955                    inferred_type: None,
4956                };
4957
4958                // Build SELECT * FROM (UNION) AS _l_0 ORDER BY ... LIMIT ...
4959                let mut select = Select::default();
4960                select.expressions = vec![Expression::Star(Star {
4961                    table: None,
4962                    except: None,
4963                    replace: None,
4964                    rename: None,
4965                    trailing_comments: Vec::new(),
4966                    span: None,
4967                })];
4968                select.from = Some(From {
4969                    expressions: vec![Expression::Subquery(Box::new(subquery))],
4970                });
4971                select.order_by = order_by;
4972                select.limit = select_limit;
4973                select.offset = select_offset;
4974
4975                Ok(Expression::Select(Box::new(select)))
4976            } else {
4977                Ok(Expression::Union(u))
4978            }
4979        }
4980        _ => Ok(expr),
4981    }
4982}
4983
4984/// Expand LIKE ANY / ILIKE ANY to OR chains.
4985///
4986/// For dialects that don't support quantifiers on LIKE/ILIKE (e.g. DuckDB),
4987/// expand `x LIKE ANY (('a', 'b'))` to `x LIKE 'a' OR x LIKE 'b'`.
4988///
4989/// Handles precedence: when LIKE ANY (→OR) is inside AND, wraps in parens.
4990/// When LIKE ALL (→AND) is inside OR, wraps in parens for readability.
4991pub fn expand_like_any(expr: Expression) -> Result<Expression> {
4992    use crate::expressions::{BinaryOp, LikeOp, Paren};
4993
4994    /// Sentinel comment used to mark Paren nodes created by LIKE ALL expansion.
4995    /// These markers are stripped in a cleanup pass unless they end up inside an OR parent.
4996    const LIKE_ALL_MARKER: &str = "__LIKE_ALL_EXPANSION__";
4997
4998    fn unwrap_parens(e: &Expression) -> &Expression {
4999        match e {
5000            Expression::Paren(p) => unwrap_parens(&p.this),
5001            _ => e,
5002        }
5003    }
5004
5005    fn extract_tuple_values(e: &Expression) -> Option<Vec<Expression>> {
5006        let inner = unwrap_parens(e);
5007        match inner {
5008            Expression::Tuple(t) => Some(t.expressions.clone()),
5009            // Single value in parens: treat as single-element list
5010            _ if !matches!(e, Expression::Tuple(_)) => Some(vec![inner.clone()]),
5011            _ => None,
5012        }
5013    }
5014
5015    /// Build a chain of LIKE/ILIKE conditions joined by a combiner (OR for ANY, AND for ALL).
5016    fn expand_like_quantifier(
5017        op: &LikeOp,
5018        values: Vec<Expression>,
5019        is_ilike: bool,
5020        combiner: fn(Expression, Expression) -> Expression,
5021        wrap_marker: bool,
5022    ) -> Expression {
5023        let num_values = values.len();
5024        let mut result: Option<Expression> = None;
5025        for val in values {
5026            let like = if is_ilike {
5027                Expression::ILike(Box::new(LikeOp {
5028                    left: op.left.clone(),
5029                    right: val,
5030                    escape: op.escape.clone(),
5031                    quantifier: None,
5032                    inferred_type: None,
5033                }))
5034            } else {
5035                Expression::Like(Box::new(LikeOp {
5036                    left: op.left.clone(),
5037                    right: val,
5038                    escape: op.escape.clone(),
5039                    quantifier: None,
5040                    inferred_type: None,
5041                }))
5042            };
5043            result = Some(match result {
5044                None => like,
5045                Some(prev) => combiner(prev, like),
5046            });
5047        }
5048        let expanded = result.unwrap_or_else(|| unreachable!("values is non-empty"));
5049        // For LIKE ALL (AND chain) with multiple values, wrap in a marker Paren.
5050        // The marker lets us distinguish expansion-created AND from parser-created AND
5051        // when deciding whether to keep parens inside OR.
5052        if wrap_marker && num_values > 1 {
5053            Expression::Paren(Box::new(Paren {
5054                this: expanded,
5055                trailing_comments: vec![LIKE_ALL_MARKER.to_string()],
5056            }))
5057        } else {
5058            expanded
5059        }
5060    }
5061
5062    fn or_combiner(a: Expression, b: Expression) -> Expression {
5063        Expression::Or(Box::new(BinaryOp::new(a, b)))
5064    }
5065
5066    fn and_combiner(a: Expression, b: Expression) -> Expression {
5067        Expression::And(Box::new(BinaryOp::new(a, b)))
5068    }
5069
5070    fn is_like_all_marker(p: &Paren) -> bool {
5071        p.trailing_comments.len() == 1 && p.trailing_comments[0] == LIKE_ALL_MARKER
5072    }
5073
5074    // Phase 1: Expand LIKE ANY/ALL and fix precedence in a single bottom-up pass.
5075    //
5076    // - LIKE ANY → bare Or chain
5077    // - LIKE ALL → Paren(And chain) with marker comment
5078    // - And handler: wraps bare Or children in Paren (from LIKE ANY expansion;
5079    //   parser never creates bare Or inside And)
5080    // - Or handler: converts marker Paren to clean Paren (keeps the wrapping)
5081    let result = transform_recursive(expr, &|e| {
5082        match e {
5083            // LIKE ANY -> OR chain (bare)
5084            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ANY") => {
5085                if let Some(values) = extract_tuple_values(&op.right) {
5086                    if values.is_empty() {
5087                        return Ok(e);
5088                    }
5089                    Ok(expand_like_quantifier(
5090                        op,
5091                        values,
5092                        false,
5093                        or_combiner,
5094                        false,
5095                    ))
5096                } else {
5097                    Ok(e)
5098                }
5099            }
5100            // LIKE ALL -> AND chain (with marker Paren)
5101            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ALL") => {
5102                if let Some(values) = extract_tuple_values(&op.right) {
5103                    if values.is_empty() {
5104                        return Ok(e);
5105                    }
5106                    Ok(expand_like_quantifier(
5107                        op,
5108                        values,
5109                        false,
5110                        and_combiner,
5111                        true,
5112                    ))
5113                } else {
5114                    Ok(e)
5115                }
5116            }
5117            // ILIKE ANY -> OR chain (bare)
5118            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ANY") => {
5119                if let Some(values) = extract_tuple_values(&op.right) {
5120                    if values.is_empty() {
5121                        return Ok(e);
5122                    }
5123                    Ok(expand_like_quantifier(op, values, true, or_combiner, false))
5124                } else {
5125                    Ok(e)
5126                }
5127            }
5128            // ILIKE ALL -> AND chain (with marker Paren)
5129            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ALL") => {
5130                if let Some(values) = extract_tuple_values(&op.right) {
5131                    if values.is_empty() {
5132                        return Ok(e);
5133                    }
5134                    Ok(expand_like_quantifier(op, values, true, and_combiner, true))
5135                } else {
5136                    Ok(e)
5137                }
5138            }
5139            // After children are expanded (bottom-up), fix And nodes:
5140            // Wrap bare Or children in Paren (from LIKE ANY expansion).
5141            // The parser never produces bare Or inside And (AND binds tighter than OR,
5142            // so explicit parens in SQL like "(a OR b) AND c" create Paren(Or(...)) in the AST).
5143            Expression::And(mut op) => {
5144                if matches!(&op.left, Expression::Or(_)) {
5145                    op.left = Expression::Paren(Box::new(Paren {
5146                        this: op.left,
5147                        trailing_comments: vec![],
5148                    }));
5149                }
5150                if matches!(&op.right, Expression::Or(_)) {
5151                    op.right = Expression::Paren(Box::new(Paren {
5152                        this: op.right,
5153                        trailing_comments: vec![],
5154                    }));
5155                }
5156                Ok(Expression::And(op))
5157            }
5158            // After children are expanded (bottom-up), fix Or nodes:
5159            // Convert marker Paren(And) to clean Paren(And) so the cleanup pass won't strip it.
5160            Expression::Or(mut op) => {
5161                if let Expression::Paren(ref mut p) = op.left {
5162                    if is_like_all_marker(p) {
5163                        p.trailing_comments.clear();
5164                    }
5165                }
5166                if let Expression::Paren(ref mut p) = op.right {
5167                    if is_like_all_marker(p) {
5168                        p.trailing_comments.clear();
5169                    }
5170                }
5171                Ok(Expression::Or(op))
5172            }
5173            _ => Ok(e),
5174        }
5175    })?;
5176
5177    // Phase 2: Strip remaining marker Paren(And) nodes that weren't inside an Or parent.
5178    // These are standalone LIKE ALL expansions (e.g., in SELECT expressions) that don't
5179    // need parentheses.
5180    transform_recursive(result, &|e| {
5181        if let Expression::Paren(p) = &e {
5182            if is_like_all_marker(p) {
5183                let Expression::Paren(p) = e else {
5184                    unreachable!()
5185                };
5186                return Ok(p.this);
5187            }
5188        }
5189        Ok(e)
5190    })
5191}
5192
5193/// Ensures all unaliased column outputs in subqueries and CTEs get self-aliases.
5194///
5195/// This is needed for TSQL which requires derived table outputs to be aliased.
5196/// For example: `SELECT c FROM t` inside a subquery becomes `SELECT c AS c FROM t`.
5197///
5198/// Mirrors Python sqlglot's `qualify_derived_table_outputs` function which is applied
5199/// as a TRANSFORMS preprocessor for Subquery and CTE expressions in the TSQL dialect.
5200pub fn qualify_derived_table_outputs(expr: Expression) -> Result<Expression> {
5201    use crate::expressions::Alias;
5202
5203    fn add_self_aliases_to_select(select: &mut Select) {
5204        let new_expressions: Vec<Expression> = select
5205            .expressions
5206            .iter()
5207            .map(|e| {
5208                match e {
5209                    // Column reference without alias -> add self-alias
5210                    Expression::Column(col) => {
5211                        let alias_name = col.name.clone();
5212                        Expression::Alias(Box::new(Alias {
5213                            this: e.clone(),
5214                            alias: alias_name,
5215                            column_aliases: Vec::new(),
5216                            pre_alias_comments: Vec::new(),
5217                            trailing_comments: Vec::new(),
5218                            inferred_type: None,
5219                        }))
5220                    }
5221                    // Already aliased or star or other -> keep as is
5222                    _ => e.clone(),
5223                }
5224            })
5225            .collect();
5226        select.expressions = new_expressions;
5227    }
5228
5229    fn walk_and_qualify(expr: &mut Expression) {
5230        match expr {
5231            Expression::Select(ref mut select) => {
5232                // Qualify subqueries in FROM
5233                if let Some(ref mut from) = select.from {
5234                    for e in from.expressions.iter_mut() {
5235                        qualify_subquery_expr(e);
5236                        walk_and_qualify(e);
5237                    }
5238                }
5239                // Qualify subqueries in JOINs
5240                for join in select.joins.iter_mut() {
5241                    qualify_subquery_expr(&mut join.this);
5242                    walk_and_qualify(&mut join.this);
5243                }
5244                // Recurse into expressions (for correlated subqueries etc.)
5245                for e in select.expressions.iter_mut() {
5246                    walk_and_qualify(e);
5247                }
5248                // Recurse into WHERE
5249                if let Some(ref mut w) = select.where_clause {
5250                    walk_and_qualify(&mut w.this);
5251                }
5252            }
5253            Expression::Subquery(ref mut subquery) => {
5254                walk_and_qualify(&mut subquery.this);
5255            }
5256            Expression::Union(ref mut u) => {
5257                walk_and_qualify(&mut u.left);
5258                walk_and_qualify(&mut u.right);
5259            }
5260            Expression::Intersect(ref mut i) => {
5261                walk_and_qualify(&mut i.left);
5262                walk_and_qualify(&mut i.right);
5263            }
5264            Expression::Except(ref mut e) => {
5265                walk_and_qualify(&mut e.left);
5266                walk_and_qualify(&mut e.right);
5267            }
5268            Expression::Cte(ref mut cte) => {
5269                walk_and_qualify(&mut cte.this);
5270            }
5271            _ => {}
5272        }
5273    }
5274
5275    fn qualify_subquery_expr(expr: &mut Expression) {
5276        match expr {
5277            Expression::Subquery(ref mut subquery) => {
5278                // Only qualify if the subquery has a table alias but no column aliases
5279                if subquery.alias.is_some() && subquery.column_aliases.is_empty() {
5280                    if let Expression::Select(ref mut inner_select) = subquery.this {
5281                        // Check the inner select doesn't use *
5282                        let has_star = inner_select
5283                            .expressions
5284                            .iter()
5285                            .any(|e| matches!(e, Expression::Star(_)));
5286                        if !has_star {
5287                            add_self_aliases_to_select(inner_select);
5288                        }
5289                    }
5290                }
5291                // Recurse into the subquery's inner query
5292                walk_and_qualify(&mut subquery.this);
5293            }
5294            Expression::Alias(ref mut alias) => {
5295                qualify_subquery_expr(&mut alias.this);
5296            }
5297            _ => {}
5298        }
5299    }
5300
5301    let mut result = expr;
5302    walk_and_qualify(&mut result);
5303
5304    // Also qualify CTE inner queries at the top level
5305    if let Expression::Select(ref mut select) = result {
5306        if let Some(ref mut with) = select.with {
5307            for cte in with.ctes.iter_mut() {
5308                // CTE with column names -> no need to qualify
5309                if cte.columns.is_empty() {
5310                    // Walk into the CTE's inner query for nested subqueries
5311                    walk_and_qualify(&mut cte.this);
5312                }
5313            }
5314        }
5315    }
5316
5317    Ok(result)
5318}
5319
5320#[cfg(test)]
5321mod tests {
5322    use super::*;
5323    use crate::dialects::{Dialect, DialectType};
5324    use crate::expressions::Column;
5325
5326    fn gen(expr: &Expression) -> String {
5327        let dialect = Dialect::get(DialectType::Generic);
5328        dialect.generate(expr).unwrap()
5329    }
5330
5331    #[test]
5332    fn test_preprocess() {
5333        let expr = Expression::Boolean(BooleanLiteral { value: true });
5334        let result = preprocess(expr, &[replace_bool_with_int]).unwrap();
5335        assert!(
5336            matches!(result, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
5337        );
5338    }
5339
5340    #[test]
5341    fn test_preprocess_chain() {
5342        // Test chaining multiple transforms using function pointers
5343        let expr = Expression::Boolean(BooleanLiteral { value: true });
5344        // Create array of function pointers (all same type)
5345        let transforms: Vec<fn(Expression) -> Result<Expression>> =
5346            vec![replace_bool_with_int, replace_int_with_bool];
5347        let result = preprocess(expr, &transforms).unwrap();
5348        // After replace_bool_with_int: 1
5349        // After replace_int_with_bool: true
5350        if let Expression::Boolean(b) = result {
5351            assert!(b.value);
5352        } else {
5353            panic!("Expected boolean literal");
5354        }
5355    }
5356
5357    #[test]
5358    fn test_unnest_to_explode() {
5359        let unnest = Expression::Unnest(Box::new(UnnestFunc {
5360            this: Expression::boxed_column(Column {
5361                name: Identifier::new("arr".to_string()),
5362                table: None,
5363                join_mark: false,
5364                trailing_comments: vec![],
5365                span: None,
5366                inferred_type: None,
5367            }),
5368            expressions: Vec::new(),
5369            with_ordinality: false,
5370            alias: None,
5371            offset_alias: None,
5372        }));
5373
5374        let result = unnest_to_explode(unnest).unwrap();
5375        assert!(matches!(result, Expression::Explode(_)));
5376    }
5377
5378    #[test]
5379    fn test_explode_to_unnest() {
5380        let explode = Expression::Explode(Box::new(UnaryFunc {
5381            this: Expression::boxed_column(Column {
5382                name: Identifier::new("arr".to_string()),
5383                table: None,
5384                join_mark: false,
5385                trailing_comments: vec![],
5386                span: None,
5387                inferred_type: None,
5388            }),
5389            original_name: None,
5390            inferred_type: None,
5391        }));
5392
5393        let result = explode_to_unnest(explode).unwrap();
5394        assert!(matches!(result, Expression::Unnest(_)));
5395    }
5396
5397    #[test]
5398    fn test_replace_bool_with_int() {
5399        let true_expr = Expression::Boolean(BooleanLiteral { value: true });
5400        let result = replace_bool_with_int(true_expr).unwrap();
5401        if let Expression::Literal(lit) = result {
5402            if let Literal::Number(n) = lit.as_ref() {
5403                assert_eq!(n, "1");
5404            }
5405        } else {
5406            panic!("Expected number literal");
5407        }
5408
5409        let false_expr = Expression::Boolean(BooleanLiteral { value: false });
5410        let result = replace_bool_with_int(false_expr).unwrap();
5411        if let Expression::Literal(lit) = result {
5412            if let Literal::Number(n) = lit.as_ref() {
5413                assert_eq!(n, "0");
5414            }
5415        } else {
5416            panic!("Expected number literal");
5417        }
5418    }
5419
5420    #[test]
5421    fn test_replace_int_with_bool() {
5422        let one_expr = Expression::Literal(Box::new(Literal::Number("1".to_string())));
5423        let result = replace_int_with_bool(one_expr).unwrap();
5424        if let Expression::Boolean(b) = result {
5425            assert!(b.value);
5426        } else {
5427            panic!("Expected boolean true");
5428        }
5429
5430        let zero_expr = Expression::Literal(Box::new(Literal::Number("0".to_string())));
5431        let result = replace_int_with_bool(zero_expr).unwrap();
5432        if let Expression::Boolean(b) = result {
5433            assert!(!b.value);
5434        } else {
5435            panic!("Expected boolean false");
5436        }
5437
5438        // Test that other numbers are not converted
5439        let two_expr = Expression::Literal(Box::new(Literal::Number("2".to_string())));
5440        let result = replace_int_with_bool(two_expr).unwrap();
5441        assert!(
5442            matches!(result, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
5443        );
5444    }
5445
5446    #[test]
5447    fn test_strip_data_type_params() {
5448        // Test Decimal
5449        let decimal = DataType::Decimal {
5450            precision: Some(10),
5451            scale: Some(2),
5452        };
5453        let stripped = strip_data_type_params(decimal);
5454        assert_eq!(
5455            stripped,
5456            DataType::Decimal {
5457                precision: None,
5458                scale: None
5459            }
5460        );
5461
5462        // Test VarChar
5463        let varchar = DataType::VarChar {
5464            length: Some(255),
5465            parenthesized_length: false,
5466        };
5467        let stripped = strip_data_type_params(varchar);
5468        assert_eq!(
5469            stripped,
5470            DataType::VarChar {
5471                length: None,
5472                parenthesized_length: false
5473            }
5474        );
5475
5476        // Test Char
5477        let char_type = DataType::Char { length: Some(10) };
5478        let stripped = strip_data_type_params(char_type);
5479        assert_eq!(stripped, DataType::Char { length: None });
5480
5481        // Test Timestamp (preserve timezone)
5482        let timestamp = DataType::Timestamp {
5483            precision: Some(6),
5484            timezone: true,
5485        };
5486        let stripped = strip_data_type_params(timestamp);
5487        assert_eq!(
5488            stripped,
5489            DataType::Timestamp {
5490                precision: None,
5491                timezone: true
5492            }
5493        );
5494
5495        // Test Array (recursive)
5496        let array = DataType::Array {
5497            element_type: Box::new(DataType::VarChar {
5498                length: Some(100),
5499                parenthesized_length: false,
5500            }),
5501            dimension: None,
5502        };
5503        let stripped = strip_data_type_params(array);
5504        assert_eq!(
5505            stripped,
5506            DataType::Array {
5507                element_type: Box::new(DataType::VarChar {
5508                    length: None,
5509                    parenthesized_length: false
5510                }),
5511                dimension: None,
5512            }
5513        );
5514
5515        // Test types without params are unchanged
5516        let text = DataType::Text;
5517        let stripped = strip_data_type_params(text);
5518        assert_eq!(stripped, DataType::Text);
5519    }
5520
5521    #[test]
5522    fn test_remove_precision_parameterized_types_cast() {
5523        // Create a CAST(1 AS DECIMAL(10, 2)) expression
5524        let cast_expr = Expression::Cast(Box::new(Cast {
5525            this: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5526            to: DataType::Decimal {
5527                precision: Some(10),
5528                scale: Some(2),
5529            },
5530            trailing_comments: vec![],
5531            double_colon_syntax: false,
5532            format: None,
5533            default: None,
5534            inferred_type: None,
5535        }));
5536
5537        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5538        if let Expression::Cast(cast) = result {
5539            assert_eq!(
5540                cast.to,
5541                DataType::Decimal {
5542                    precision: None,
5543                    scale: None
5544                }
5545            );
5546        } else {
5547            panic!("Expected Cast expression");
5548        }
5549    }
5550
5551    #[test]
5552    fn test_remove_precision_parameterized_types_varchar() {
5553        // Create a CAST('hello' AS VARCHAR(10)) expression
5554        let cast_expr = Expression::Cast(Box::new(Cast {
5555            this: Expression::Literal(Box::new(Literal::String("hello".to_string()))),
5556            to: DataType::VarChar {
5557                length: Some(10),
5558                parenthesized_length: false,
5559            },
5560            trailing_comments: vec![],
5561            double_colon_syntax: false,
5562            format: None,
5563            default: None,
5564            inferred_type: None,
5565        }));
5566
5567        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5568        if let Expression::Cast(cast) = result {
5569            assert_eq!(
5570                cast.to,
5571                DataType::VarChar {
5572                    length: None,
5573                    parenthesized_length: false
5574                }
5575            );
5576        } else {
5577            panic!("Expected Cast expression");
5578        }
5579    }
5580
5581    #[test]
5582    fn test_remove_precision_direct_cast() {
5583        // Test transform on a direct Cast expression (not nested in Select)
5584        // The current implementation handles top-level Cast expressions;
5585        // a full implementation would need recursive AST traversal
5586        let cast = Expression::Cast(Box::new(Cast {
5587            this: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5588            to: DataType::Decimal {
5589                precision: Some(10),
5590                scale: Some(2),
5591            },
5592            trailing_comments: vec![],
5593            double_colon_syntax: false,
5594            format: None,
5595            default: None,
5596            inferred_type: None,
5597        }));
5598
5599        let transformed = remove_precision_parameterized_types(cast).unwrap();
5600        let generated = gen(&transformed);
5601
5602        // Should now be DECIMAL without precision
5603        assert!(generated.contains("DECIMAL"));
5604        assert!(!generated.contains("(10"));
5605    }
5606
5607    #[test]
5608    fn test_epoch_cast_to_ts() {
5609        // Test CAST('epoch' AS TIMESTAMP) → CAST('1970-01-01 00:00:00' AS TIMESTAMP)
5610        let cast_expr = Expression::Cast(Box::new(Cast {
5611            this: Expression::Literal(Box::new(Literal::String("epoch".to_string()))),
5612            to: DataType::Timestamp {
5613                precision: None,
5614                timezone: false,
5615            },
5616            trailing_comments: vec![],
5617            double_colon_syntax: false,
5618            format: None,
5619            default: None,
5620            inferred_type: None,
5621        }));
5622
5623        let result = epoch_cast_to_ts(cast_expr).unwrap();
5624        if let Expression::Cast(cast) = result {
5625            if let Expression::Literal(lit) = cast.this {
5626                if let Literal::String(s) = lit.as_ref() {
5627                    assert_eq!(s, "1970-01-01 00:00:00");
5628                }
5629            } else {
5630                panic!("Expected string literal");
5631            }
5632        } else {
5633            panic!("Expected Cast expression");
5634        }
5635    }
5636
5637    #[test]
5638    fn test_epoch_cast_to_ts_preserves_non_epoch() {
5639        // Test that non-epoch strings are preserved
5640        let cast_expr = Expression::Cast(Box::new(Cast {
5641            this: Expression::Literal(Box::new(Literal::String("2024-01-15".to_string()))),
5642            to: DataType::Timestamp {
5643                precision: None,
5644                timezone: false,
5645            },
5646            trailing_comments: vec![],
5647            double_colon_syntax: false,
5648            format: None,
5649            default: None,
5650            inferred_type: None,
5651        }));
5652
5653        let result = epoch_cast_to_ts(cast_expr).unwrap();
5654        if let Expression::Cast(cast) = result {
5655            if let Expression::Literal(lit) = cast.this {
5656                if let Literal::String(s) = lit.as_ref() {
5657                    assert_eq!(s, "2024-01-15");
5658                }
5659            } else {
5660                panic!("Expected string literal");
5661            }
5662        } else {
5663            panic!("Expected Cast expression");
5664        }
5665    }
5666
5667    #[test]
5668    fn test_unqualify_columns() {
5669        // Test that table qualifiers are removed
5670        let col = Expression::boxed_column(Column {
5671            name: Identifier::new("id".to_string()),
5672            table: Some(Identifier::new("users".to_string())),
5673            join_mark: false,
5674            trailing_comments: vec![],
5675            span: None,
5676            inferred_type: None,
5677        });
5678
5679        let result = unqualify_columns(col).unwrap();
5680        if let Expression::Column(c) = result {
5681            assert!(c.table.is_none());
5682            assert_eq!(c.name.name, "id");
5683        } else {
5684            panic!("Expected Column expression");
5685        }
5686    }
5687
5688    #[test]
5689    fn test_is_temporal_type() {
5690        assert!(is_temporal_type(&DataType::Date));
5691        assert!(is_temporal_type(&DataType::Timestamp {
5692            precision: None,
5693            timezone: false
5694        }));
5695        assert!(is_temporal_type(&DataType::Time {
5696            precision: None,
5697            timezone: false
5698        }));
5699        assert!(!is_temporal_type(&DataType::Int {
5700            length: None,
5701            integer_spelling: false
5702        }));
5703        assert!(!is_temporal_type(&DataType::VarChar {
5704            length: None,
5705            parenthesized_length: false
5706        }));
5707    }
5708
5709    #[test]
5710    fn test_eliminate_semi_join_basic() {
5711        use crate::expressions::{Join, TableRef};
5712
5713        // Test that semi joins are converted to EXISTS
5714        let select = Expression::Select(Box::new(Select {
5715            expressions: vec![Expression::boxed_column(Column {
5716                name: Identifier::new("a".to_string()),
5717                table: None,
5718                join_mark: false,
5719                trailing_comments: vec![],
5720                span: None,
5721                inferred_type: None,
5722            })],
5723            from: Some(From {
5724                expressions: vec![Expression::Table(Box::new(TableRef::new("t1")))],
5725            }),
5726            joins: vec![Join {
5727                this: Expression::Table(Box::new(TableRef::new("t2"))),
5728                kind: JoinKind::Semi,
5729                on: Some(Expression::Eq(Box::new(BinaryOp {
5730                    left: Expression::boxed_column(Column {
5731                        name: Identifier::new("x".to_string()),
5732                        table: None,
5733                        join_mark: false,
5734                        trailing_comments: vec![],
5735                        span: None,
5736                        inferred_type: None,
5737                    }),
5738                    right: Expression::boxed_column(Column {
5739                        name: Identifier::new("y".to_string()),
5740                        table: None,
5741                        join_mark: false,
5742                        trailing_comments: vec![],
5743                        span: None,
5744                        inferred_type: None,
5745                    }),
5746                    left_comments: vec![],
5747                    operator_comments: vec![],
5748                    trailing_comments: vec![],
5749                    inferred_type: None,
5750                }))),
5751                using: vec![],
5752                use_inner_keyword: false,
5753                use_outer_keyword: false,
5754                deferred_condition: false,
5755                join_hint: None,
5756                match_condition: None,
5757                pivots: Vec::new(),
5758                comments: Vec::new(),
5759                nesting_group: 0,
5760                directed: false,
5761            }],
5762            ..Select::new()
5763        }));
5764
5765        let result = eliminate_semi_and_anti_joins(select).unwrap();
5766        if let Expression::Select(s) = result {
5767            // Semi join should be removed
5768            assert!(s.joins.is_empty());
5769            // WHERE clause should have EXISTS
5770            assert!(s.where_clause.is_some());
5771        } else {
5772            panic!("Expected Select expression");
5773        }
5774    }
5775
5776    #[test]
5777    fn test_no_ilike_sql() {
5778        use crate::expressions::LikeOp;
5779
5780        // Test ILIKE conversion to LOWER+LIKE
5781        let ilike_expr = Expression::ILike(Box::new(LikeOp {
5782            left: Expression::boxed_column(Column {
5783                name: Identifier::new("name".to_string()),
5784                table: None,
5785                join_mark: false,
5786                trailing_comments: vec![],
5787                span: None,
5788                inferred_type: None,
5789            }),
5790            right: Expression::Literal(Box::new(Literal::String("%test%".to_string()))),
5791            escape: None,
5792            quantifier: None,
5793            inferred_type: None,
5794        }));
5795
5796        let result = no_ilike_sql(ilike_expr).unwrap();
5797        if let Expression::Like(like) = result {
5798            // Left should be LOWER(name)
5799            if let Expression::Function(f) = &like.left {
5800                assert_eq!(f.name, "LOWER");
5801            } else {
5802                panic!("Expected LOWER function on left");
5803            }
5804            // Right should be LOWER('%test%')
5805            if let Expression::Function(f) = &like.right {
5806                assert_eq!(f.name, "LOWER");
5807            } else {
5808                panic!("Expected LOWER function on right");
5809            }
5810        } else {
5811            panic!("Expected Like expression");
5812        }
5813    }
5814
5815    #[test]
5816    fn test_no_trycast_sql() {
5817        // Test TryCast conversion to Cast
5818        let trycast_expr = Expression::TryCast(Box::new(Cast {
5819            this: Expression::Literal(Box::new(Literal::String("123".to_string()))),
5820            to: DataType::Int {
5821                length: None,
5822                integer_spelling: false,
5823            },
5824            trailing_comments: vec![],
5825            double_colon_syntax: false,
5826            format: None,
5827            default: None,
5828            inferred_type: None,
5829        }));
5830
5831        let result = no_trycast_sql(trycast_expr).unwrap();
5832        assert!(matches!(result, Expression::Cast(_)));
5833    }
5834
5835    #[test]
5836    fn test_no_safe_cast_sql() {
5837        // Test SafeCast conversion to Cast
5838        let safe_cast_expr = Expression::SafeCast(Box::new(Cast {
5839            this: Expression::Literal(Box::new(Literal::String("123".to_string()))),
5840            to: DataType::Int {
5841                length: None,
5842                integer_spelling: false,
5843            },
5844            trailing_comments: vec![],
5845            double_colon_syntax: false,
5846            format: None,
5847            default: None,
5848            inferred_type: None,
5849        }));
5850
5851        let result = no_safe_cast_sql(safe_cast_expr).unwrap();
5852        assert!(matches!(result, Expression::Cast(_)));
5853    }
5854
5855    #[test]
5856    fn test_explode_to_unnest_presto() {
5857        let spark = Dialect::get(DialectType::Spark);
5858        let result = spark
5859            .transpile_to("SELECT EXPLODE(x) FROM tbl", DialectType::Presto)
5860            .unwrap();
5861        assert_eq!(
5862            result[0],
5863            "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))"
5864        );
5865    }
5866
5867    #[test]
5868    fn test_explode_to_unnest_bigquery() {
5869        let spark = Dialect::get(DialectType::Spark);
5870        let result = spark
5871            .transpile_to("SELECT EXPLODE(x) FROM tbl", DialectType::BigQuery)
5872            .unwrap();
5873        assert_eq!(
5874            result[0],
5875            "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))"
5876        );
5877    }
5878}