Skip to main content

polyglot_sql/
transforms.rs

1//! SQL AST Transforms
2//!
3//! This module provides functions to transform SQL ASTs for dialect compatibility.
4//! These transforms are used during transpilation to convert dialect-specific features
5//! to forms that are supported by the target dialect.
6//!
7//! Based on the Python implementation in `sqlglot/transforms.py`.
8
9use crate::dialects::transform_recursive;
10use crate::dialects::{Dialect, DialectType};
11use crate::error::Result;
12use crate::expressions::{
13    Alias, BinaryOp, BooleanLiteral, Cast, DataType, Exists, Expression, From, Function,
14    Identifier, Join, JoinKind, Lateral, LateralView, Literal, NamedArgSeparator, NamedArgument,
15    Over, Select, StructField, Subquery, UnaryFunc, UnnestFunc, Where,
16};
17use std::cell::RefCell;
18
19/// Apply a chain of transforms to an expression
20///
21/// # Arguments
22/// * `expr` - The expression to transform
23/// * `transforms` - A list of transform functions to apply in order
24///
25/// # Returns
26/// The transformed expression
27pub fn preprocess<F>(expr: Expression, transforms: &[F]) -> Result<Expression>
28where
29    F: Fn(Expression) -> Result<Expression>,
30{
31    let mut result = expr;
32    for transform in transforms {
33        result = transform(result)?;
34    }
35    Ok(result)
36}
37
38/// Convert UNNEST to EXPLODE (for Spark/Hive compatibility)
39///
40/// UNNEST is standard SQL but Spark uses EXPLODE instead.
41pub fn unnest_to_explode(expr: Expression) -> Result<Expression> {
42    match expr {
43        Expression::Unnest(unnest) => {
44            Ok(Expression::Explode(Box::new(UnaryFunc::new(unnest.this))))
45        }
46        _ => Ok(expr),
47    }
48}
49
50/// Convert CROSS JOIN UNNEST to LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks.
51///
52/// This is a SELECT-level structural transformation that:
53/// 1. Converts UNNEST in FROM clause to INLINE/EXPLODE
54/// 2. Converts CROSS JOIN (LATERAL) UNNEST to LATERAL VIEW entries
55/// 3. For single-arg UNNEST: uses EXPLODE
56/// 4. For multi-arg UNNEST: uses INLINE(ARRAYS_ZIP(...))
57///
58/// Based on Python sqlglot's `unnest_to_explode` transform in transforms.py (lines 290-391).
59pub fn unnest_to_explode_select(expr: Expression) -> Result<Expression> {
60    transform_recursive(expr, &unnest_to_explode_select_inner)
61}
62
63/// Helper to determine the UDTF function for an UNNEST expression.
64/// Single-arg UNNEST → EXPLODE, multi-arg → INLINE
65fn make_udtf_expr(unnest: &UnnestFunc) -> Expression {
66    let has_multi_expr = !unnest.expressions.is_empty();
67    if has_multi_expr {
68        // Multi-arg: INLINE(ARRAYS_ZIP(arg1, arg2, ...))
69        let mut all_args = vec![unnest.this.clone()];
70        all_args.extend(unnest.expressions.iter().cloned());
71        let arrays_zip =
72            Expression::Function(Box::new(Function::new("ARRAYS_ZIP".to_string(), all_args)));
73        Expression::Function(Box::new(Function::new(
74            "INLINE".to_string(),
75            vec![arrays_zip],
76        )))
77    } else {
78        // Single-arg: EXPLODE(arg)
79        Expression::Explode(Box::new(UnaryFunc::new(unnest.this.clone())))
80    }
81}
82
83fn unnest_to_explode_select_inner(expr: Expression) -> Result<Expression> {
84    let Expression::Select(mut select) = expr else {
85        return Ok(expr);
86    };
87
88    // Process FROM clause: UNNEST items need conversion
89    if let Some(ref mut from) = select.from {
90        if from.expressions.len() >= 1 {
91            let mut new_from_exprs = Vec::new();
92            let mut new_lateral_views = Vec::new();
93            let first_is_unnest = is_unnest_expr(&from.expressions[0]);
94
95            for (idx, from_item) in from.expressions.drain(..).enumerate() {
96                if idx == 0 && first_is_unnest {
97                    // UNNEST is the first (and possibly only) item in FROM
98                    // Replace it with INLINE/EXPLODE, keeping alias
99                    let replaced = replace_from_unnest(from_item);
100                    new_from_exprs.push(replaced);
101                } else if idx > 0 && is_unnest_expr(&from_item) {
102                    // Additional UNNEST items in FROM (comma-joined) → LATERAL VIEW
103                    let (alias_name, column_aliases, unnest_func) = extract_unnest_info(from_item);
104                    let udtf = make_udtf_expr(&unnest_func);
105                    new_lateral_views.push(LateralView {
106                        this: udtf,
107                        table_alias: alias_name,
108                        column_aliases,
109                        outer: false,
110                    });
111                } else {
112                    new_from_exprs.push(from_item);
113                }
114            }
115
116            from.expressions = new_from_exprs;
117            // Append lateral views for comma-joined UNNESTs
118            select.lateral_views.extend(new_lateral_views);
119        }
120    }
121
122    // Process joins: CROSS JOIN (LATERAL) UNNEST → LATERAL VIEW
123    let mut remaining_joins = Vec::new();
124    for join in select.joins.drain(..) {
125        if matches!(join.kind, JoinKind::Cross | JoinKind::Inner) {
126            let (is_unnest, is_lateral) = check_join_unnest(&join.this);
127            if is_unnest {
128                // Extract UNNEST info from join, handling Lateral wrapper
129                let (lateral_alias, lateral_col_aliases, join_expr) = if is_lateral {
130                    if let Expression::Lateral(lat) = join.this {
131                        // Extract alias from Lateral struct
132                        let alias = lat.alias.map(|s| Identifier::new(&s));
133                        let col_aliases: Vec<Identifier> = lat
134                            .column_aliases
135                            .iter()
136                            .map(|s| Identifier::new(s))
137                            .collect();
138                        (alias, col_aliases, *lat.this)
139                    } else {
140                        (None, Vec::new(), join.this)
141                    }
142                } else {
143                    (None, Vec::new(), join.this)
144                };
145
146                let (alias_name, column_aliases, unnest_func) = extract_unnest_info(join_expr);
147
148                // Prefer Lateral's alias over UNNEST's alias
149                let final_alias = lateral_alias.or(alias_name);
150                let final_col_aliases = if !lateral_col_aliases.is_empty() {
151                    lateral_col_aliases
152                } else {
153                    column_aliases
154                };
155
156                // Use "unnest" as default alias if none provided (for single-arg case)
157                let table_alias = final_alias.or_else(|| Some(Identifier::new("unnest")));
158                let col_aliases = if final_col_aliases.is_empty() {
159                    vec![Identifier::new("unnest")]
160                } else {
161                    final_col_aliases
162                };
163
164                let udtf = make_udtf_expr(&unnest_func);
165                select.lateral_views.push(LateralView {
166                    this: udtf,
167                    table_alias,
168                    column_aliases: col_aliases,
169                    outer: false,
170                });
171            } else {
172                remaining_joins.push(join);
173            }
174        } else {
175            remaining_joins.push(join);
176        }
177    }
178    select.joins = remaining_joins;
179
180    Ok(Expression::Select(select))
181}
182
183/// Check if an expression is or wraps an UNNEST
184fn is_unnest_expr(expr: &Expression) -> bool {
185    match expr {
186        Expression::Unnest(_) => true,
187        Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
188        _ => false,
189    }
190}
191
192/// Check if a join's expression is an UNNEST (possibly wrapped in Lateral)
193fn check_join_unnest(expr: &Expression) -> (bool, bool) {
194    match expr {
195        Expression::Unnest(_) => (true, false),
196        Expression::Alias(a) => {
197            if matches!(a.this, Expression::Unnest(_)) {
198                (true, false)
199            } else {
200                (false, false)
201            }
202        }
203        Expression::Lateral(lat) => match &*lat.this {
204            Expression::Unnest(_) => (true, true),
205            Expression::Alias(a) => {
206                if matches!(a.this, Expression::Unnest(_)) {
207                    (true, true)
208                } else {
209                    (false, true)
210                }
211            }
212            _ => (false, true),
213        },
214        _ => (false, false),
215    }
216}
217
218/// Replace an UNNEST in FROM with INLINE/EXPLODE, preserving alias structure
219fn replace_from_unnest(from_item: Expression) -> Expression {
220    match from_item {
221        Expression::Alias(mut a) => {
222            if let Expression::Unnest(unnest) = a.this {
223                a.this = make_udtf_expr(&unnest);
224            }
225            Expression::Alias(a)
226        }
227        Expression::Unnest(unnest) => make_udtf_expr(&unnest),
228        other => other,
229    }
230}
231
232/// Extract alias info and UnnestFunc from an expression (possibly wrapped in Alias)
233fn extract_unnest_info(expr: Expression) -> (Option<Identifier>, Vec<Identifier>, UnnestFunc) {
234    match expr {
235        Expression::Alias(a) => {
236            if let Expression::Unnest(unnest) = a.this {
237                (Some(a.alias), a.column_aliases, *unnest)
238            } else {
239                // Should not happen if we already checked is_unnest_expr
240                (
241                    Some(a.alias),
242                    a.column_aliases,
243                    UnnestFunc {
244                        this: a.this,
245                        expressions: Vec::new(),
246                        with_ordinality: false,
247                        alias: None,
248                        offset_alias: None,
249                    },
250                )
251            }
252        }
253        Expression::Unnest(unnest) => {
254            let alias = unnest.alias.clone();
255            (alias, Vec::new(), *unnest)
256        }
257        _ => (
258            None,
259            Vec::new(),
260            UnnestFunc {
261                this: expr,
262                expressions: Vec::new(),
263                with_ordinality: false,
264                alias: None,
265                offset_alias: None,
266            },
267        ),
268    }
269}
270
271/// Convert EXPLODE to UNNEST (for standard SQL compatibility)
272pub fn explode_to_unnest(expr: Expression) -> Result<Expression> {
273    match expr {
274        Expression::Explode(explode) => Ok(Expression::Unnest(Box::new(UnnestFunc {
275            this: explode.this,
276            expressions: Vec::new(),
277            with_ordinality: false,
278            alias: None,
279            offset_alias: None,
280        }))),
281        _ => Ok(expr),
282    }
283}
284
285/// Replace boolean literals for dialects that don't support them
286///
287/// Converts TRUE/FALSE to 1/0 for dialects like older MySQL versions
288pub fn replace_bool_with_int(expr: Expression) -> Result<Expression> {
289    match expr {
290        Expression::Boolean(b) => {
291            let value = if b.value { "1" } else { "0" };
292            Ok(Expression::Literal(Box::new(Literal::Number(
293                value.to_string(),
294            ))))
295        }
296        _ => Ok(expr),
297    }
298}
299
300/// Replace integer literals for dialects that prefer boolean
301///
302/// Converts 1/0 to TRUE/FALSE
303pub fn replace_int_with_bool(expr: Expression) -> Result<Expression> {
304    match expr {
305        Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1" || n == "0") =>
306        {
307            let Literal::Number(n) = lit.as_ref() else {
308                unreachable!()
309            };
310            Ok(Expression::Boolean(BooleanLiteral { value: n == "1" }))
311        }
312        _ => Ok(expr),
313    }
314}
315
316/// Remove precision from parameterized types
317///
318/// Some dialects don't support precision parameters on certain types.
319/// This transform removes them, e.g., VARCHAR(255) → VARCHAR, DECIMAL(10,2) → DECIMAL
320pub fn remove_precision_parameterized_types(expr: Expression) -> Result<Expression> {
321    Ok(strip_type_params_recursive(expr))
322}
323
324/// Recursively strip type parameters from DataType values in an expression
325fn strip_type_params_recursive(expr: Expression) -> Expression {
326    match expr {
327        // Handle Cast expressions - strip precision from target type
328        Expression::Cast(mut cast) => {
329            cast.to = strip_data_type_params(cast.to);
330            // Also recursively process the expression being cast
331            cast.this = strip_type_params_recursive(cast.this);
332            Expression::Cast(cast)
333        }
334        // Handle TryCast expressions (uses same Cast struct)
335        Expression::TryCast(mut try_cast) => {
336            try_cast.to = strip_data_type_params(try_cast.to);
337            try_cast.this = strip_type_params_recursive(try_cast.this);
338            Expression::TryCast(try_cast)
339        }
340        // Handle SafeCast expressions (uses same Cast struct)
341        Expression::SafeCast(mut safe_cast) => {
342            safe_cast.to = strip_data_type_params(safe_cast.to);
343            safe_cast.this = strip_type_params_recursive(safe_cast.this);
344            Expression::SafeCast(safe_cast)
345        }
346        // For now, pass through other expressions
347        // A full implementation would recursively visit all nodes
348        _ => expr,
349    }
350}
351
352/// Strip precision/scale/length parameters from a DataType
353fn strip_data_type_params(dt: DataType) -> DataType {
354    match dt {
355        // Numeric types with precision/scale
356        DataType::Decimal { .. } => DataType::Decimal {
357            precision: None,
358            scale: None,
359        },
360        DataType::TinyInt { .. } => DataType::TinyInt { length: None },
361        DataType::SmallInt { .. } => DataType::SmallInt { length: None },
362        DataType::Int { .. } => DataType::Int {
363            length: None,
364            integer_spelling: false,
365        },
366        DataType::BigInt { .. } => DataType::BigInt { length: None },
367
368        // String types with length
369        DataType::Char { .. } => DataType::Char { length: None },
370        DataType::VarChar { .. } => DataType::VarChar {
371            length: None,
372            parenthesized_length: false,
373        },
374
375        // Binary types with length
376        DataType::Binary { .. } => DataType::Binary { length: None },
377        DataType::VarBinary { .. } => DataType::VarBinary { length: None },
378
379        // Bit types with length
380        DataType::Bit { .. } => DataType::Bit { length: None },
381        DataType::VarBit { .. } => DataType::VarBit { length: None },
382
383        // Time types with precision
384        DataType::Time { .. } => DataType::Time {
385            precision: None,
386            timezone: false,
387        },
388        DataType::Timestamp { timezone, .. } => DataType::Timestamp {
389            precision: None,
390            timezone,
391        },
392
393        // Array - recursively strip element type
394        DataType::Array {
395            element_type,
396            dimension,
397        } => DataType::Array {
398            element_type: Box::new(strip_data_type_params(*element_type)),
399            dimension,
400        },
401
402        // Map - recursively strip key and value types
403        DataType::Map {
404            key_type,
405            value_type,
406        } => DataType::Map {
407            key_type: Box::new(strip_data_type_params(*key_type)),
408            value_type: Box::new(strip_data_type_params(*value_type)),
409        },
410
411        // Struct - recursively strip field types
412        DataType::Struct { fields, nested } => DataType::Struct {
413            fields: fields
414                .into_iter()
415                .map(|f| {
416                    StructField::with_options(
417                        f.name,
418                        strip_data_type_params(f.data_type),
419                        f.options,
420                    )
421                })
422                .collect(),
423            nested,
424        },
425
426        // Vector - strip dimension
427        DataType::Vector { element_type, .. } => DataType::Vector {
428            element_type: element_type.map(|et| Box::new(strip_data_type_params(*et))),
429            dimension: None,
430        },
431
432        // Object - recursively strip field types
433        DataType::Object { fields, modifier } => DataType::Object {
434            fields: fields
435                .into_iter()
436                .map(|(name, ty, not_null)| (name, strip_data_type_params(ty), not_null))
437                .collect(),
438            modifier,
439        },
440
441        // Other types pass through unchanged
442        other => other,
443    }
444}
445
446/// Eliminate QUALIFY clause by converting to a subquery with WHERE filter
447///
448/// QUALIFY is supported by Snowflake, BigQuery, and DuckDB but not by most other dialects.
449///
450/// Converts:
451/// ```sql
452/// SELECT * FROM t QUALIFY ROW_NUMBER() OVER (...) = 1
453/// ```
454/// To:
455/// ```sql
456/// SELECT * FROM (SELECT *, ROW_NUMBER() OVER (...) AS _w FROM t) _t WHERE _w = 1
457/// ```
458///
459/// Reference: `transforms.py:194-255`
460pub fn eliminate_qualify(expr: Expression) -> Result<Expression> {
461    match expr {
462        Expression::Select(mut select) => {
463            if let Some(qualify) = select.qualify.take() {
464                // Python sqlglot approach:
465                // 1. Extract the window function from the qualify condition
466                // 2. Add it as _w alias to the inner select
467                // 3. Replace the window function reference with _w in the outer WHERE
468                // 4. Keep original select expressions in the outer query
469
470                let qualify_filter = qualify.this;
471                let window_alias_name = "_w".to_string();
472                let window_alias_ident = Identifier::new(window_alias_name.clone());
473
474                // Try to extract window function from comparison
475                // Pattern: WINDOW_FUNC = value -> inner adds WINDOW_FUNC AS _w, outer WHERE _w = value
476                let (window_expr, outer_where) =
477                    extract_window_from_condition(qualify_filter.clone(), &window_alias_ident);
478
479                if let Some(win_expr) = window_expr {
480                    // Add window function as _w alias to inner select
481                    let window_alias_expr =
482                        Expression::Alias(Box::new(crate::expressions::Alias {
483                            this: win_expr,
484                            alias: window_alias_ident.clone(),
485                            column_aliases: vec![],
486                            alias_explicit_as: false,
487                            alias_keyword: None,
488                            pre_alias_comments: vec![],
489                            trailing_comments: vec![],
490                            inferred_type: None,
491                        }));
492
493                    // For the outer SELECT, replace aliased expressions with just the alias reference
494                    // e.g., `1 AS other_id` in inner -> `other_id` in outer
495                    // Non-aliased expressions (columns, identifiers) stay as-is
496                    let outer_exprs: Vec<Expression> = select
497                        .expressions
498                        .iter()
499                        .map(|expr| {
500                            if let Expression::Alias(a) = expr {
501                                // Replace with just the alias identifier as a column reference
502                                Expression::Column(Box::new(crate::expressions::Column {
503                                    name: a.alias.clone(),
504                                    table: None,
505                                    join_mark: false,
506                                    trailing_comments: vec![],
507                                    span: None,
508                                    inferred_type: None,
509                                }))
510                            } else {
511                                expr.clone()
512                            }
513                        })
514                        .collect();
515                    select.expressions.push(window_alias_expr);
516
517                    // Create the inner subquery
518                    let inner_select = Expression::Select(select);
519                    let subquery = Subquery {
520                        this: inner_select,
521                        alias: Some(Identifier::new("_t".to_string())),
522                        column_aliases: vec![],
523                        alias_explicit_as: false,
524                        alias_keyword: None,
525                        order_by: None,
526                        limit: None,
527                        offset: None,
528                        distribute_by: None,
529                        sort_by: None,
530                        cluster_by: None,
531                        lateral: false,
532                        modifiers_inside: false,
533                        trailing_comments: vec![],
534                        inferred_type: None,
535                    };
536
537                    // Create the outer SELECT with alias-resolved expressions and WHERE _w <op> value
538                    let outer_select = Select {
539                        expressions: outer_exprs,
540                        from: Some(From {
541                            expressions: vec![Expression::Subquery(Box::new(subquery))],
542                        }),
543                        where_clause: Some(Where { this: outer_where }),
544                        ..Select::new()
545                    };
546
547                    return Ok(Expression::Select(Box::new(outer_select)));
548                } else {
549                    // Fallback: if we can't extract a window function, use old approach
550                    let qualify_alias = Expression::Alias(Box::new(crate::expressions::Alias {
551                        this: qualify_filter.clone(),
552                        alias: window_alias_ident.clone(),
553                        column_aliases: vec![],
554                        alias_explicit_as: false,
555                        alias_keyword: None,
556                        pre_alias_comments: vec![],
557                        trailing_comments: vec![],
558                        inferred_type: None,
559                    }));
560
561                    let original_exprs = select.expressions.clone();
562                    select.expressions.push(qualify_alias);
563
564                    let inner_select = Expression::Select(select);
565                    let subquery = Subquery {
566                        this: inner_select,
567                        alias: Some(Identifier::new("_t".to_string())),
568                        column_aliases: vec![],
569                        alias_explicit_as: false,
570                        alias_keyword: None,
571                        order_by: None,
572                        limit: None,
573                        offset: None,
574                        distribute_by: None,
575                        sort_by: None,
576                        cluster_by: None,
577                        lateral: false,
578                        modifiers_inside: false,
579                        trailing_comments: vec![],
580                        inferred_type: None,
581                    };
582
583                    let outer_select = Select {
584                        expressions: original_exprs,
585                        from: Some(From {
586                            expressions: vec![Expression::Subquery(Box::new(subquery))],
587                        }),
588                        where_clause: Some(Where {
589                            this: Expression::Column(Box::new(crate::expressions::Column {
590                                name: window_alias_ident,
591                                table: None,
592                                join_mark: false,
593                                trailing_comments: vec![],
594                                span: None,
595                                inferred_type: None,
596                            })),
597                        }),
598                        ..Select::new()
599                    };
600
601                    return Ok(Expression::Select(Box::new(outer_select)));
602                }
603            }
604            Ok(Expression::Select(select))
605        }
606        other => Ok(other),
607    }
608}
609
610/// Extract a window function from a qualify condition.
611/// Returns (window_expression, rewritten_condition) if found.
612/// The rewritten condition replaces the window function with a column reference to the alias.
613fn extract_window_from_condition(
614    condition: Expression,
615    alias: &Identifier,
616) -> (Option<Expression>, Expression) {
617    let alias_col = Expression::Column(Box::new(crate::expressions::Column {
618        name: alias.clone(),
619        table: None,
620        join_mark: false,
621        trailing_comments: vec![],
622        span: None,
623        inferred_type: None,
624    }));
625
626    // Check if condition is a simple comparison with a window function on one side
627    match condition {
628        // WINDOW_FUNC = value
629        Expression::Eq(ref op) => {
630            if is_window_expr(&op.left) {
631                (
632                    Some(op.left.clone()),
633                    Expression::Eq(Box::new(BinaryOp {
634                        left: alias_col,
635                        right: op.right.clone(),
636                        ..(**op).clone()
637                    })),
638                )
639            } else if is_window_expr(&op.right) {
640                (
641                    Some(op.right.clone()),
642                    Expression::Eq(Box::new(BinaryOp {
643                        left: op.left.clone(),
644                        right: alias_col,
645                        ..(**op).clone()
646                    })),
647                )
648            } else {
649                (None, condition)
650            }
651        }
652        Expression::Neq(ref op) => {
653            if is_window_expr(&op.left) {
654                (
655                    Some(op.left.clone()),
656                    Expression::Neq(Box::new(BinaryOp {
657                        left: alias_col,
658                        right: op.right.clone(),
659                        ..(**op).clone()
660                    })),
661                )
662            } else if is_window_expr(&op.right) {
663                (
664                    Some(op.right.clone()),
665                    Expression::Neq(Box::new(BinaryOp {
666                        left: op.left.clone(),
667                        right: alias_col,
668                        ..(**op).clone()
669                    })),
670                )
671            } else {
672                (None, condition)
673            }
674        }
675        Expression::Lt(ref op) => {
676            if is_window_expr(&op.left) {
677                (
678                    Some(op.left.clone()),
679                    Expression::Lt(Box::new(BinaryOp {
680                        left: alias_col,
681                        right: op.right.clone(),
682                        ..(**op).clone()
683                    })),
684                )
685            } else if is_window_expr(&op.right) {
686                (
687                    Some(op.right.clone()),
688                    Expression::Lt(Box::new(BinaryOp {
689                        left: op.left.clone(),
690                        right: alias_col,
691                        ..(**op).clone()
692                    })),
693                )
694            } else {
695                (None, condition)
696            }
697        }
698        Expression::Lte(ref op) => {
699            if is_window_expr(&op.left) {
700                (
701                    Some(op.left.clone()),
702                    Expression::Lte(Box::new(BinaryOp {
703                        left: alias_col,
704                        right: op.right.clone(),
705                        ..(**op).clone()
706                    })),
707                )
708            } else if is_window_expr(&op.right) {
709                (
710                    Some(op.right.clone()),
711                    Expression::Lte(Box::new(BinaryOp {
712                        left: op.left.clone(),
713                        right: alias_col,
714                        ..(**op).clone()
715                    })),
716                )
717            } else {
718                (None, condition)
719            }
720        }
721        Expression::Gt(ref op) => {
722            if is_window_expr(&op.left) {
723                (
724                    Some(op.left.clone()),
725                    Expression::Gt(Box::new(BinaryOp {
726                        left: alias_col,
727                        right: op.right.clone(),
728                        ..(**op).clone()
729                    })),
730                )
731            } else if is_window_expr(&op.right) {
732                (
733                    Some(op.right.clone()),
734                    Expression::Gt(Box::new(BinaryOp {
735                        left: op.left.clone(),
736                        right: alias_col,
737                        ..(**op).clone()
738                    })),
739                )
740            } else {
741                (None, condition)
742            }
743        }
744        Expression::Gte(ref op) => {
745            if is_window_expr(&op.left) {
746                (
747                    Some(op.left.clone()),
748                    Expression::Gte(Box::new(BinaryOp {
749                        left: alias_col,
750                        right: op.right.clone(),
751                        ..(**op).clone()
752                    })),
753                )
754            } else if is_window_expr(&op.right) {
755                (
756                    Some(op.right.clone()),
757                    Expression::Gte(Box::new(BinaryOp {
758                        left: op.left.clone(),
759                        right: alias_col,
760                        ..(**op).clone()
761                    })),
762                )
763            } else {
764                (None, condition)
765            }
766        }
767        // If the condition is just a window function (bare QUALIFY expression)
768        _ if is_window_expr(&condition) => (Some(condition), alias_col),
769        // Can't extract window function
770        _ => (None, condition),
771    }
772}
773
774/// Check if an expression is a window function
775fn is_window_expr(expr: &Expression) -> bool {
776    matches!(expr, Expression::Window(_) | Expression::WindowFunction(_))
777}
778
779/// Eliminate DISTINCT ON clause by converting to a subquery with ROW_NUMBER
780///
781/// DISTINCT ON is PostgreSQL-specific. For dialects that don't support it,
782/// this converts it to a subquery with a ROW_NUMBER() window function.
783///
784/// Converts:
785/// ```sql
786/// SELECT DISTINCT ON (a) a, b FROM t ORDER BY a, b
787/// ```
788/// To:
789/// ```sql
790/// SELECT a, b FROM (
791///     SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY a, b) AS _row_number
792///     FROM t
793/// ) _t WHERE _row_number = 1
794/// ```
795///
796/// Reference: `transforms.py:138-191`
797pub fn eliminate_distinct_on(expr: Expression) -> Result<Expression> {
798    eliminate_distinct_on_for_dialect(expr, None, None)
799}
800
801/// Eliminate DISTINCT ON with dialect-specific NULL ordering behavior.
802///
803/// For dialects where NULLs don't sort first by default in DESC ordering,
804/// we need to add explicit NULL ordering to preserve DISTINCT ON semantics.
805pub fn eliminate_distinct_on_for_dialect(
806    expr: Expression,
807    target: Option<DialectType>,
808    source: Option<DialectType>,
809) -> Result<Expression> {
810    use crate::expressions::Case;
811
812    // PostgreSQL and DuckDB support DISTINCT ON natively - skip elimination
813    if matches!(
814        target,
815        Some(DialectType::PostgreSQL) | Some(DialectType::DuckDB)
816    ) {
817        return Ok(expr);
818    }
819
820    // Determine NULL ordering mode based on target dialect
821    // Oracle/Redshift/Snowflake: NULLS FIRST is default for DESC -> no change needed
822    // BigQuery/Spark/Presto/Hive/etc: need explicit NULLS FIRST
823    // MySQL/TSQL: no NULLS FIRST syntax -> use CASE WHEN IS NULL
824    enum NullsMode {
825        None,       // Default NULLS FIRST behavior (Oracle, Redshift, Snowflake)
826        NullsFirst, // Add explicit NULLS FIRST (BigQuery, Spark, Presto, Hive, etc.)
827        CaseExpr,   // Use CASE WHEN IS NULL for NULLS FIRST simulation (MySQL, StarRocks, TSQL)
828    }
829
830    let nulls_mode = match target {
831        Some(DialectType::MySQL)
832        | Some(DialectType::SingleStore)
833        | Some(DialectType::TSQL)
834        | Some(DialectType::Fabric) => NullsMode::CaseExpr,
835        Some(DialectType::Oracle) | Some(DialectType::Redshift) | Some(DialectType::Snowflake) => {
836            NullsMode::None
837        }
838        Some(DialectType::StarRocks) => {
839            if matches!(source, Some(DialectType::Redshift)) {
840                NullsMode::CaseExpr
841            } else {
842                NullsMode::None
843            }
844        }
845        // All other dialects that don't support DISTINCT ON: use NULLS FIRST
846        _ => NullsMode::NullsFirst,
847    };
848
849    match expr {
850        Expression::Select(mut select) => {
851            if let Some(distinct_cols) = select.distinct_on.take() {
852                if !distinct_cols.is_empty() {
853                    // Create ROW_NUMBER() OVER (PARTITION BY distinct_cols ORDER BY ...)
854                    let row_number_alias = Identifier::new("_row_number".to_string());
855
856                    // Get order_by expressions, or use distinct_cols as default order
857                    let order_exprs = if let Some(ref order_by) = select.order_by {
858                        let mut exprs = order_by.expressions.clone();
859                        // Add NULL ordering based on target dialect
860                        match nulls_mode {
861                            NullsMode::NullsFirst => {
862                                for ord in &mut exprs {
863                                    if ord.desc && ord.nulls_first.is_none() {
864                                        ord.nulls_first = Some(true);
865                                    }
866                                }
867                            }
868                            NullsMode::CaseExpr => {
869                                // For each DESC column without explicit nulls ordering,
870                                // prepend: CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
871                                let mut new_exprs = Vec::new();
872                                for ord in &exprs {
873                                    if ord.desc && ord.nulls_first.is_none() {
874                                        // Add CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
875                                        let null_check = Expression::Case(Box::new(Case {
876                                            operand: None,
877                                            whens: vec![(
878                                                Expression::IsNull(Box::new(
879                                                    crate::expressions::IsNull {
880                                                        this: ord.this.clone(),
881                                                        not: false,
882                                                        postfix_form: false,
883                                                    },
884                                                )),
885                                                Expression::Literal(Box::new(Literal::Number(
886                                                    "1".to_string(),
887                                                ))),
888                                            )],
889                                            else_: Some(Expression::Literal(Box::new(
890                                                Literal::Number("0".to_string()),
891                                            ))),
892                                            comments: Vec::new(),
893                                            inferred_type: None,
894                                        }));
895                                        new_exprs.push(crate::expressions::Ordered {
896                                            this: null_check,
897                                            desc: true,
898                                            nulls_first: None,
899                                            explicit_asc: false,
900                                            with_fill: None,
901                                        });
902                                    }
903                                    new_exprs.push(ord.clone());
904                                }
905                                exprs = new_exprs;
906                            }
907                            NullsMode::None => {}
908                        }
909                        exprs
910                    } else {
911                        distinct_cols
912                            .iter()
913                            .map(|e| crate::expressions::Ordered {
914                                this: e.clone(),
915                                desc: false,
916                                nulls_first: None,
917                                explicit_asc: false,
918                                with_fill: None,
919                            })
920                            .collect()
921                    };
922
923                    // Create window function: ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)
924                    let row_number_func =
925                        Expression::WindowFunction(Box::new(crate::expressions::WindowFunction {
926                            this: Expression::RowNumber(crate::expressions::RowNumber),
927                            over: Over {
928                                partition_by: distinct_cols,
929                                order_by: order_exprs,
930                                frame: None,
931                                window_name: None,
932                                alias: None,
933                            },
934                            keep: None,
935                            inferred_type: None,
936                        }));
937
938                    // Build aliased inner expressions and outer column references
939                    // Inner: SELECT a AS a, b AS b, ROW_NUMBER() OVER (...) AS _row_number
940                    // Outer: SELECT a, b FROM (...)
941                    let mut inner_aliased_exprs = Vec::new();
942                    let mut outer_select_exprs = Vec::new();
943                    for orig_expr in &select.expressions {
944                        match orig_expr {
945                            Expression::Alias(alias) => {
946                                // Already aliased - keep as-is in inner, reference alias in outer
947                                inner_aliased_exprs.push(orig_expr.clone());
948                                outer_select_exprs.push(Expression::Column(Box::new(
949                                    crate::expressions::Column {
950                                        name: alias.alias.clone(),
951                                        table: None,
952                                        join_mark: false,
953                                        trailing_comments: vec![],
954                                        span: None,
955                                        inferred_type: None,
956                                    },
957                                )));
958                            }
959                            Expression::Column(col) => {
960                                // Wrap in alias: a AS a in inner, just a in outer
961                                inner_aliased_exprs.push(Expression::Alias(Box::new(
962                                    crate::expressions::Alias {
963                                        this: orig_expr.clone(),
964                                        alias: col.name.clone(),
965                                        column_aliases: vec![],
966                                        alias_explicit_as: false,
967                                        alias_keyword: None,
968                                        pre_alias_comments: vec![],
969                                        trailing_comments: vec![],
970                                        inferred_type: None,
971                                    },
972                                )));
973                                outer_select_exprs.push(Expression::Column(Box::new(
974                                    crate::expressions::Column {
975                                        name: col.name.clone(),
976                                        table: None,
977                                        join_mark: false,
978                                        trailing_comments: vec![],
979                                        span: None,
980                                        inferred_type: None,
981                                    },
982                                )));
983                            }
984                            _ => {
985                                // Complex expression without alias - include as-is in both
986                                inner_aliased_exprs.push(orig_expr.clone());
987                                outer_select_exprs.push(orig_expr.clone());
988                            }
989                        }
990                    }
991
992                    // Add ROW_NUMBER as aliased expression to inner select list
993                    let row_number_alias_expr =
994                        Expression::Alias(Box::new(crate::expressions::Alias {
995                            this: row_number_func,
996                            alias: row_number_alias.clone(),
997                            column_aliases: vec![],
998                            alias_explicit_as: false,
999                            alias_keyword: None,
1000                            pre_alias_comments: vec![],
1001                            trailing_comments: vec![],
1002                            inferred_type: None,
1003                        }));
1004                    inner_aliased_exprs.push(row_number_alias_expr);
1005
1006                    // Replace inner select's expressions with aliased versions
1007                    select.expressions = inner_aliased_exprs;
1008
1009                    // Remove ORDER BY from inner query (it's now in the window function)
1010                    let _inner_order_by = select.order_by.take();
1011
1012                    // Clear DISTINCT from inner select (DISTINCT ON is replaced by ROW_NUMBER)
1013                    select.distinct = false;
1014
1015                    // Create inner subquery
1016                    let inner_select = Expression::Select(select);
1017                    let subquery = Subquery {
1018                        this: inner_select,
1019                        alias: Some(Identifier::new("_t".to_string())),
1020                        column_aliases: vec![],
1021                        alias_explicit_as: false,
1022                        alias_keyword: None,
1023                        order_by: None,
1024                        limit: None,
1025                        offset: None,
1026                        distribute_by: None,
1027                        sort_by: None,
1028                        cluster_by: None,
1029                        lateral: false,
1030                        modifiers_inside: false,
1031                        trailing_comments: vec![],
1032                        inferred_type: None,
1033                    };
1034
1035                    // Create outer SELECT with WHERE _row_number = 1
1036                    // No ORDER BY on outer query
1037                    let outer_select = Select {
1038                        expressions: outer_select_exprs,
1039                        from: Some(From {
1040                            expressions: vec![Expression::Subquery(Box::new(subquery))],
1041                        }),
1042                        where_clause: Some(Where {
1043                            this: Expression::Eq(Box::new(BinaryOp {
1044                                left: Expression::Column(Box::new(crate::expressions::Column {
1045                                    name: row_number_alias,
1046                                    table: None,
1047                                    join_mark: false,
1048                                    trailing_comments: vec![],
1049                                    span: None,
1050                                    inferred_type: None,
1051                                })),
1052                                right: Expression::Literal(Box::new(Literal::Number(
1053                                    "1".to_string(),
1054                                ))),
1055                                left_comments: vec![],
1056                                operator_comments: vec![],
1057                                trailing_comments: vec![],
1058                                inferred_type: None,
1059                            })),
1060                        }),
1061                        ..Select::new()
1062                    };
1063
1064                    return Ok(Expression::Select(Box::new(outer_select)));
1065                }
1066            }
1067            Ok(Expression::Select(select))
1068        }
1069        other => Ok(other),
1070    }
1071}
1072
1073/// Convert SEMI and ANTI joins into equivalent forms that use EXISTS instead.
1074///
1075/// For dialects that don't support SEMI/ANTI join syntax, this converts:
1076/// - `SELECT * FROM a SEMI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1077/// - `SELECT * FROM a ANTI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1078///
1079/// Reference: `transforms.py:607-621`
1080pub fn eliminate_semi_and_anti_joins(expr: Expression) -> Result<Expression> {
1081    match expr {
1082        Expression::Select(mut select) => {
1083            let mut new_joins = Vec::new();
1084            let mut extra_where_conditions = Vec::new();
1085
1086            for join in select.joins.drain(..) {
1087                match join.kind {
1088                    JoinKind::Semi | JoinKind::LeftSemi => {
1089                        if let Some(on_condition) = join.on {
1090                            // Create: EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1091                            let subquery_select = Select {
1092                                expressions: vec![Expression::Literal(Box::new(Literal::Number(
1093                                    "1".to_string(),
1094                                )))],
1095                                from: Some(From {
1096                                    expressions: vec![join.this],
1097                                }),
1098                                where_clause: Some(Where { this: on_condition }),
1099                                ..Select::new()
1100                            };
1101
1102                            let exists = Expression::Exists(Box::new(Exists {
1103                                this: Expression::Subquery(Box::new(Subquery {
1104                                    this: Expression::Select(Box::new(subquery_select)),
1105                                    alias: None,
1106                                    column_aliases: vec![],
1107                                    alias_explicit_as: false,
1108                                    alias_keyword: None,
1109                                    order_by: None,
1110                                    limit: None,
1111                                    offset: None,
1112                                    distribute_by: None,
1113                                    sort_by: None,
1114                                    cluster_by: None,
1115                                    lateral: false,
1116                                    modifiers_inside: false,
1117                                    trailing_comments: vec![],
1118                                    inferred_type: None,
1119                                })),
1120                                not: false,
1121                            }));
1122
1123                            extra_where_conditions.push(exists);
1124                        }
1125                    }
1126                    JoinKind::Anti | JoinKind::LeftAnti => {
1127                        if let Some(on_condition) = join.on {
1128                            // Create: NOT EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1129                            let subquery_select = Select {
1130                                expressions: vec![Expression::Literal(Box::new(Literal::Number(
1131                                    "1".to_string(),
1132                                )))],
1133                                from: Some(From {
1134                                    expressions: vec![join.this],
1135                                }),
1136                                where_clause: Some(Where { this: on_condition }),
1137                                ..Select::new()
1138                            };
1139
1140                            // Use Exists with not: true for NOT EXISTS
1141                            let not_exists = Expression::Exists(Box::new(Exists {
1142                                this: Expression::Subquery(Box::new(Subquery {
1143                                    this: Expression::Select(Box::new(subquery_select)),
1144                                    alias: None,
1145                                    column_aliases: vec![],
1146                                    alias_explicit_as: false,
1147                                    alias_keyword: None,
1148                                    order_by: None,
1149                                    limit: None,
1150                                    offset: None,
1151                                    distribute_by: None,
1152                                    sort_by: None,
1153                                    cluster_by: None,
1154                                    lateral: false,
1155                                    modifiers_inside: false,
1156                                    trailing_comments: vec![],
1157                                    inferred_type: None,
1158                                })),
1159                                not: true,
1160                            }));
1161
1162                            extra_where_conditions.push(not_exists);
1163                        }
1164                    }
1165                    _ => {
1166                        // Keep other join types as-is
1167                        new_joins.push(join);
1168                    }
1169                }
1170            }
1171
1172            select.joins = new_joins;
1173
1174            // Add EXISTS conditions to WHERE clause
1175            if !extra_where_conditions.is_empty() {
1176                let combined = extra_where_conditions
1177                    .into_iter()
1178                    .reduce(|acc, cond| {
1179                        Expression::And(Box::new(BinaryOp {
1180                            left: acc,
1181                            right: cond,
1182                            left_comments: vec![],
1183                            operator_comments: vec![],
1184                            trailing_comments: vec![],
1185                            inferred_type: None,
1186                        }))
1187                    })
1188                    .unwrap();
1189
1190                select.where_clause = match select.where_clause {
1191                    Some(Where { this: existing }) => Some(Where {
1192                        this: Expression::And(Box::new(BinaryOp {
1193                            left: existing,
1194                            right: combined,
1195                            left_comments: vec![],
1196                            operator_comments: vec![],
1197                            trailing_comments: vec![],
1198                            inferred_type: None,
1199                        })),
1200                    }),
1201                    None => Some(Where { this: combined }),
1202                };
1203            }
1204
1205            Ok(Expression::Select(select))
1206        }
1207        other => Ok(other),
1208    }
1209}
1210
1211/// Convert FULL OUTER JOIN to a UNION of LEFT and RIGHT OUTER joins.
1212///
1213/// For dialects that don't support FULL OUTER JOIN, this converts:
1214/// ```sql
1215/// SELECT * FROM a FULL OUTER JOIN b ON a.x = b.x
1216/// ```
1217/// To:
1218/// ```sql
1219/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1220/// UNION ALL
1221/// SELECT * FROM a RIGHT OUTER JOIN b ON a.x = b.x
1222/// WHERE NOT EXISTS (SELECT 1 FROM a WHERE a.x = b.x)
1223/// ```
1224///
1225/// Note: This transformation currently only works for queries with a single FULL OUTER join.
1226///
1227/// Reference: `transforms.py:624-661`
1228pub fn eliminate_full_outer_join(expr: Expression) -> Result<Expression> {
1229    match expr {
1230        Expression::Select(mut select) => {
1231            // Find FULL OUTER joins
1232            let full_outer_join_idx = select.joins.iter().position(|j| j.kind == JoinKind::Full);
1233
1234            if let Some(idx) = full_outer_join_idx {
1235                // We only handle queries with a single FULL OUTER join
1236                let full_join_count = select
1237                    .joins
1238                    .iter()
1239                    .filter(|j| j.kind == JoinKind::Full)
1240                    .count();
1241                if full_join_count != 1 {
1242                    return Ok(Expression::Select(select));
1243                }
1244
1245                // Clone the query for the right side of the UNION
1246                let mut right_select = select.clone();
1247
1248                // Get the join condition from the FULL OUTER join
1249                let full_join = &select.joins[idx];
1250                let join_condition = full_join.on.clone();
1251
1252                // Left side: convert FULL to LEFT
1253                select.joins[idx].kind = JoinKind::Left;
1254
1255                // Right side: convert FULL to RIGHT and add NOT EXISTS condition
1256                right_select.joins[idx].kind = JoinKind::Right;
1257
1258                // Build NOT EXISTS for the right side to exclude rows that matched
1259                if let (Some(ref from), Some(ref join_cond)) = (&select.from, &join_condition) {
1260                    if !from.expressions.is_empty() {
1261                        let anti_subquery = Expression::Select(Box::new(Select {
1262                            expressions: vec![Expression::Literal(Box::new(Literal::Number(
1263                                "1".to_string(),
1264                            )))],
1265                            from: Some(from.clone()),
1266                            where_clause: Some(Where {
1267                                this: join_cond.clone(),
1268                            }),
1269                            ..Select::new()
1270                        }));
1271
1272                        let not_exists = Expression::Not(Box::new(crate::expressions::UnaryOp {
1273                            inferred_type: None,
1274                            this: Expression::Exists(Box::new(Exists {
1275                                this: Expression::Subquery(Box::new(Subquery {
1276                                    this: anti_subquery,
1277                                    alias: None,
1278                                    column_aliases: vec![],
1279                                    alias_explicit_as: false,
1280                                    alias_keyword: None,
1281                                    order_by: None,
1282                                    limit: None,
1283                                    offset: None,
1284                                    distribute_by: None,
1285                                    sort_by: None,
1286                                    cluster_by: None,
1287                                    lateral: false,
1288                                    modifiers_inside: false,
1289                                    trailing_comments: vec![],
1290                                    inferred_type: None,
1291                                })),
1292                                not: false,
1293                            })),
1294                        }));
1295
1296                        // Add NOT EXISTS to the WHERE clause
1297                        right_select.where_clause = Some(Where {
1298                            this: match right_select.where_clause {
1299                                Some(w) => Expression::And(Box::new(BinaryOp {
1300                                    left: w.this,
1301                                    right: not_exists,
1302                                    left_comments: vec![],
1303                                    operator_comments: vec![],
1304                                    trailing_comments: vec![],
1305                                    inferred_type: None,
1306                                })),
1307                                None => not_exists,
1308                            },
1309                        });
1310                    }
1311                }
1312
1313                // Remove WITH clause from right side (CTEs should only be on left)
1314                right_select.with = None;
1315
1316                // Remove ORDER BY from left side (will be applied after UNION)
1317                let order_by = select.order_by.take();
1318
1319                // Create UNION ALL of left and right
1320                let union = crate::expressions::Union {
1321                    left: Expression::Select(select),
1322                    right: Expression::Select(right_select),
1323                    all: true, // UNION ALL
1324                    distinct: false,
1325                    with: None,
1326                    order_by,
1327                    limit: None,
1328                    offset: None,
1329                    distribute_by: None,
1330                    sort_by: None,
1331                    cluster_by: None,
1332                    by_name: false,
1333                    side: None,
1334                    kind: None,
1335                    corresponding: false,
1336                    strict: false,
1337                    on_columns: Vec::new(),
1338                };
1339
1340                return Ok(Expression::Union(Box::new(union)));
1341            }
1342
1343            Ok(Expression::Select(select))
1344        }
1345        other => Ok(other),
1346    }
1347}
1348
1349/// Move CTEs to the top level of the query.
1350///
1351/// Some dialects (e.g., Hive, T-SQL, Spark prior to version 3) only allow CTEs to be
1352/// defined at the top-level, so for example queries like:
1353///
1354/// ```sql
1355/// SELECT * FROM (WITH t(c) AS (SELECT 1) SELECT * FROM t) AS subq
1356/// ```
1357///
1358/// are invalid in those dialects. This transformation moves all CTEs to the top level.
1359///
1360/// Reference: `transforms.py:664-700`
1361pub fn move_ctes_to_top_level(expr: Expression) -> Result<Expression> {
1362    match expr {
1363        Expression::Select(mut select) => {
1364            // Phase 1: Collect CTEs from nested subqueries (not inside CTE definitions)
1365            let mut collected_ctes: Vec<crate::expressions::Cte> = Vec::new();
1366            let mut has_recursive = false;
1367
1368            collect_nested_ctes(
1369                &Expression::Select(select.clone()),
1370                &mut collected_ctes,
1371                &mut has_recursive,
1372                true,
1373            );
1374
1375            // Phase 2: Flatten CTEs nested inside top-level CTE definitions
1376            // This handles: WITH c AS (WITH b AS (...) SELECT ...) -> WITH b AS (...), c AS (SELECT ...)
1377            let mut cte_body_collected: Vec<(String, Vec<crate::expressions::Cte>)> = Vec::new();
1378            if let Some(ref with) = select.with {
1379                for cte in &with.ctes {
1380                    let mut body_ctes: Vec<crate::expressions::Cte> = Vec::new();
1381                    collect_ctes_from_cte_body(&cte.this, &mut body_ctes, &mut has_recursive);
1382                    if !body_ctes.is_empty() {
1383                        cte_body_collected.push((cte.alias.name.clone(), body_ctes));
1384                    }
1385                }
1386            }
1387
1388            let has_subquery_ctes = !collected_ctes.is_empty();
1389            let has_body_ctes = !cte_body_collected.is_empty();
1390
1391            if has_subquery_ctes || has_body_ctes {
1392                // Strip WITH clauses from inner subqueries
1393                strip_nested_with_clauses(&mut select, true);
1394
1395                // Strip WITH clauses from CTE body definitions
1396                if has_body_ctes {
1397                    if let Some(ref mut with) = select.with {
1398                        for cte in with.ctes.iter_mut() {
1399                            strip_with_from_cte_body(&mut cte.this);
1400                        }
1401                    }
1402                }
1403
1404                let top_with = select.with.get_or_insert_with(|| crate::expressions::With {
1405                    ctes: Vec::new(),
1406                    recursive: false,
1407                    leading_comments: vec![],
1408                    search: None,
1409                });
1410
1411                if has_recursive {
1412                    top_with.recursive = true;
1413                }
1414
1415                // Insert body CTEs before their parent CTE (Python sqlglot behavior)
1416                if has_body_ctes {
1417                    let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
1418                    for mut cte in top_with.ctes.drain(..) {
1419                        // Check if this CTE has nested CTEs to insert before it
1420                        if let Some(pos) = cte_body_collected
1421                            .iter()
1422                            .position(|(name, _)| *name == cte.alias.name)
1423                        {
1424                            let (_, mut nested) = cte_body_collected.remove(pos);
1425                            // Strip WITH from each nested CTE's body too
1426                            for nested_cte in nested.iter_mut() {
1427                                strip_with_from_cte_body(&mut nested_cte.this);
1428                            }
1429                            new_ctes.extend(nested);
1430                        }
1431                        // Also strip WITH from the parent CTE's body
1432                        strip_with_from_cte_body(&mut cte.this);
1433                        new_ctes.push(cte);
1434                    }
1435                    top_with.ctes = new_ctes;
1436                }
1437
1438                // Append collected subquery CTEs after existing ones
1439                top_with.ctes.extend(collected_ctes);
1440            }
1441
1442            Ok(Expression::Select(select))
1443        }
1444        other => Ok(other),
1445    }
1446}
1447
1448/// Recursively collect CTEs from within CTE body expressions (for deep nesting)
1449fn collect_ctes_from_cte_body(
1450    expr: &Expression,
1451    collected: &mut Vec<crate::expressions::Cte>,
1452    has_recursive: &mut bool,
1453) {
1454    if let Expression::Select(select) = expr {
1455        if let Some(ref with) = select.with {
1456            if with.recursive {
1457                *has_recursive = true;
1458            }
1459            for cte in &with.ctes {
1460                // Recursively collect from this CTE's body first (depth-first)
1461                collect_ctes_from_cte_body(&cte.this, collected, has_recursive);
1462                // Then add this CTE itself
1463                collected.push(cte.clone());
1464            }
1465        }
1466    }
1467}
1468
1469/// Strip WITH clauses from CTE body expressions
1470fn strip_with_from_cte_body(expr: &mut Expression) {
1471    if let Expression::Select(ref mut select) = expr {
1472        select.with = None;
1473    }
1474}
1475
1476/// Strip WITH clauses from nested subqueries (after hoisting to top level)
1477fn strip_nested_with_clauses(select: &mut Select, _is_top_level: bool) {
1478    // Strip WITH from FROM subqueries
1479    if let Some(ref mut from) = select.from {
1480        for expr in from.expressions.iter_mut() {
1481            strip_with_from_expr(expr);
1482        }
1483    }
1484    // Strip from JOINs
1485    for join in select.joins.iter_mut() {
1486        strip_with_from_expr(&mut join.this);
1487    }
1488    // Strip from select expressions
1489    for expr in select.expressions.iter_mut() {
1490        strip_with_from_expr(expr);
1491    }
1492    // Strip from WHERE
1493    if let Some(ref mut w) = select.where_clause {
1494        strip_with_from_expr(&mut w.this);
1495    }
1496}
1497
1498fn strip_with_from_expr(expr: &mut Expression) {
1499    match expr {
1500        Expression::Subquery(ref mut subquery) => {
1501            strip_with_from_inner_query(&mut subquery.this);
1502        }
1503        Expression::Alias(ref mut alias) => {
1504            strip_with_from_expr(&mut alias.this);
1505        }
1506        Expression::Select(ref mut select) => {
1507            // Strip WITH from this SELECT (it's nested)
1508            select.with = None;
1509            // Recurse into its subqueries
1510            strip_nested_with_clauses(select, false);
1511        }
1512        _ => {}
1513    }
1514}
1515
1516fn strip_with_from_inner_query(expr: &mut Expression) {
1517    if let Expression::Select(ref mut select) = expr {
1518        select.with = None;
1519        strip_nested_with_clauses(select, false);
1520    }
1521}
1522
1523/// Helper to recursively collect CTEs from nested subqueries
1524fn collect_nested_ctes(
1525    expr: &Expression,
1526    collected: &mut Vec<crate::expressions::Cte>,
1527    has_recursive: &mut bool,
1528    is_top_level: bool,
1529) {
1530    match expr {
1531        Expression::Select(select) => {
1532            // If this is not the top level and has a WITH clause, collect its CTEs
1533            if !is_top_level {
1534                if let Some(ref with) = select.with {
1535                    if with.recursive {
1536                        *has_recursive = true;
1537                    }
1538                    collected.extend(with.ctes.clone());
1539                }
1540            }
1541
1542            // Recurse into FROM clause
1543            if let Some(ref from) = select.from {
1544                for expr in &from.expressions {
1545                    collect_nested_ctes(expr, collected, has_recursive, false);
1546                }
1547            }
1548
1549            // Recurse into JOINs
1550            for join in &select.joins {
1551                collect_nested_ctes(&join.this, collected, has_recursive, false);
1552            }
1553
1554            // Recurse into select expressions (for subqueries in SELECT)
1555            for sel_expr in &select.expressions {
1556                collect_nested_ctes(sel_expr, collected, has_recursive, false);
1557            }
1558
1559            // Recurse into WHERE
1560            if let Some(ref where_clause) = select.where_clause {
1561                collect_nested_ctes(&where_clause.this, collected, has_recursive, false);
1562            }
1563        }
1564        Expression::Subquery(subquery) => {
1565            // Process the inner query
1566            collect_nested_ctes(&subquery.this, collected, has_recursive, false);
1567        }
1568        Expression::Alias(alias) => {
1569            collect_nested_ctes(&alias.this, collected, has_recursive, false);
1570        }
1571        // Add more expression types as needed
1572        _ => {}
1573    }
1574}
1575
1576/// Inline window definitions from WINDOW clause.
1577///
1578/// Some dialects don't support named windows. This transform inlines them:
1579///
1580/// ```sql
1581/// SELECT SUM(a) OVER w FROM t WINDOW w AS (PARTITION BY b)
1582/// ```
1583///
1584/// To:
1585///
1586/// ```sql
1587/// SELECT SUM(a) OVER (PARTITION BY b) FROM t
1588/// ```
1589///
1590/// Reference: `transforms.py:975-1003`
1591pub fn eliminate_window_clause(expr: Expression) -> Result<Expression> {
1592    match expr {
1593        Expression::Select(mut select) => {
1594            if let Some(named_windows) = select.windows.take() {
1595                // Build a map of window name -> window spec
1596                let window_map: std::collections::HashMap<String, &Over> = named_windows
1597                    .iter()
1598                    .map(|nw| (nw.name.name.to_lowercase(), &nw.spec))
1599                    .collect();
1600
1601                // Inline window references in the select expressions
1602                select.expressions = select
1603                    .expressions
1604                    .into_iter()
1605                    .map(|e| inline_window_refs(e, &window_map))
1606                    .collect();
1607            }
1608            Ok(Expression::Select(select))
1609        }
1610        other => Ok(other),
1611    }
1612}
1613
1614/// Helper function to inline window references in an expression
1615fn inline_window_refs(
1616    expr: Expression,
1617    window_map: &std::collections::HashMap<String, &Over>,
1618) -> Expression {
1619    match expr {
1620        Expression::WindowFunction(mut wf) => {
1621            // Check if this window references a named window
1622            if let Some(ref name) = wf.over.window_name {
1623                let key = name.name.to_lowercase();
1624                if let Some(named_spec) = window_map.get(&key) {
1625                    // Inherit properties from the named window
1626                    if wf.over.partition_by.is_empty() && !named_spec.partition_by.is_empty() {
1627                        wf.over.partition_by = named_spec.partition_by.clone();
1628                    }
1629                    if wf.over.order_by.is_empty() && !named_spec.order_by.is_empty() {
1630                        wf.over.order_by = named_spec.order_by.clone();
1631                    }
1632                    if wf.over.frame.is_none() && named_spec.frame.is_some() {
1633                        wf.over.frame = named_spec.frame.clone();
1634                    }
1635                    // Clear the window name reference
1636                    wf.over.window_name = None;
1637                }
1638            }
1639            Expression::WindowFunction(wf)
1640        }
1641        Expression::Alias(mut alias) => {
1642            // Recurse into aliased expressions
1643            alias.this = inline_window_refs(alias.this, window_map);
1644            Expression::Alias(alias)
1645        }
1646        // For a complete implementation, we would need to recursively visit all expressions
1647        // that can contain window functions (CASE, subqueries, etc.)
1648        other => other,
1649    }
1650}
1651
1652/// Eliminate Oracle-style (+) join marks by converting to standard JOINs.
1653///
1654/// Oracle uses (+) syntax for outer joins:
1655/// ```sql
1656/// SELECT * FROM a, b WHERE a.x = b.x(+)
1657/// ```
1658///
1659/// This is converted to standard LEFT OUTER JOIN:
1660/// ```sql
1661/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1662/// ```
1663///
1664/// Reference: `transforms.py:828-945`
1665pub fn eliminate_join_marks(expr: Expression) -> Result<Expression> {
1666    match expr {
1667        Expression::Select(mut select) => {
1668            // Check if there are any join marks in the WHERE clause
1669            let has_join_marks = select
1670                .where_clause
1671                .as_ref()
1672                .map_or(false, |w| contains_join_mark(&w.this));
1673
1674            if !has_join_marks {
1675                return Ok(Expression::Select(select));
1676            }
1677
1678            // Collect tables from FROM clause
1679            let from_tables: Vec<String> = select
1680                .from
1681                .as_ref()
1682                .map(|f| {
1683                    f.expressions
1684                        .iter()
1685                        .filter_map(|e| get_table_name(e))
1686                        .collect()
1687                })
1688                .unwrap_or_default();
1689
1690            // Extract join conditions and their marked tables from WHERE
1691            let mut join_conditions: std::collections::HashMap<String, Vec<Expression>> =
1692                std::collections::HashMap::new();
1693            let mut remaining_conditions: Vec<Expression> = Vec::new();
1694
1695            if let Some(ref where_clause) = select.where_clause {
1696                extract_join_mark_conditions(
1697                    &where_clause.this,
1698                    &mut join_conditions,
1699                    &mut remaining_conditions,
1700                );
1701            }
1702
1703            // Build new JOINs for each marked table
1704            let mut new_joins = select.joins.clone();
1705            for (table_name, conditions) in join_conditions {
1706                // Find if this table is in FROM or existing JOINs
1707                let table_in_from = from_tables.contains(&table_name);
1708
1709                if table_in_from && !conditions.is_empty() {
1710                    // Create LEFT JOIN with combined conditions
1711                    let combined_condition = conditions.into_iter().reduce(|a, b| {
1712                        Expression::And(Box::new(BinaryOp {
1713                            left: a,
1714                            right: b,
1715                            left_comments: vec![],
1716                            operator_comments: vec![],
1717                            trailing_comments: vec![],
1718                            inferred_type: None,
1719                        }))
1720                    });
1721
1722                    // Find the table in FROM and move it to a JOIN
1723                    if let Some(ref mut from) = select.from {
1724                        if let Some(pos) = from
1725                            .expressions
1726                            .iter()
1727                            .position(|e| get_table_name(e).map_or(false, |n| n == table_name))
1728                        {
1729                            if from.expressions.len() > 1 {
1730                                let join_table = from.expressions.remove(pos);
1731                                new_joins.push(crate::expressions::Join {
1732                                    this: join_table,
1733                                    kind: JoinKind::Left,
1734                                    on: combined_condition,
1735                                    using: vec![],
1736                                    use_inner_keyword: false,
1737                                    use_outer_keyword: true,
1738                                    deferred_condition: false,
1739                                    join_hint: None,
1740                                    match_condition: None,
1741                                    pivots: Vec::new(),
1742                                    comments: Vec::new(),
1743                                    nesting_group: 0,
1744                                    directed: false,
1745                                });
1746                            }
1747                        }
1748                    }
1749                }
1750            }
1751
1752            select.joins = new_joins;
1753
1754            // Update WHERE with remaining conditions
1755            if remaining_conditions.is_empty() {
1756                select.where_clause = None;
1757            } else {
1758                let combined = remaining_conditions.into_iter().reduce(|a, b| {
1759                    Expression::And(Box::new(BinaryOp {
1760                        left: a,
1761                        right: b,
1762                        left_comments: vec![],
1763                        operator_comments: vec![],
1764                        trailing_comments: vec![],
1765                        inferred_type: None,
1766                    }))
1767                });
1768                select.where_clause = combined.map(|c| Where { this: c });
1769            }
1770
1771            // Clear join marks from all columns
1772            clear_join_marks(&mut Expression::Select(select.clone()));
1773
1774            Ok(Expression::Select(select))
1775        }
1776        other => Ok(other),
1777    }
1778}
1779
1780/// Check if an expression contains any columns with join marks
1781fn contains_join_mark(expr: &Expression) -> bool {
1782    match expr {
1783        Expression::Column(col) => col.join_mark,
1784        Expression::And(op) | Expression::Or(op) => {
1785            contains_join_mark(&op.left) || contains_join_mark(&op.right)
1786        }
1787        Expression::Eq(op)
1788        | Expression::Neq(op)
1789        | Expression::Lt(op)
1790        | Expression::Lte(op)
1791        | Expression::Gt(op)
1792        | Expression::Gte(op) => contains_join_mark(&op.left) || contains_join_mark(&op.right),
1793        Expression::Not(op) => contains_join_mark(&op.this),
1794        _ => false,
1795    }
1796}
1797
1798/// Get table name from a table expression
1799fn get_table_name(expr: &Expression) -> Option<String> {
1800    match expr {
1801        Expression::Table(t) => Some(t.name.name.clone()),
1802        Expression::Alias(a) => Some(a.alias.name.clone()),
1803        _ => None,
1804    }
1805}
1806
1807/// Extract join mark conditions from WHERE clause
1808fn extract_join_mark_conditions(
1809    expr: &Expression,
1810    join_conditions: &mut std::collections::HashMap<String, Vec<Expression>>,
1811    remaining: &mut Vec<Expression>,
1812) {
1813    match expr {
1814        Expression::And(op) => {
1815            extract_join_mark_conditions(&op.left, join_conditions, remaining);
1816            extract_join_mark_conditions(&op.right, join_conditions, remaining);
1817        }
1818        _ => {
1819            if let Some(table) = get_join_mark_table(expr) {
1820                join_conditions
1821                    .entry(table)
1822                    .or_insert_with(Vec::new)
1823                    .push(expr.clone());
1824            } else {
1825                remaining.push(expr.clone());
1826            }
1827        }
1828    }
1829}
1830
1831/// Get the table name of a column with join mark in an expression
1832fn get_join_mark_table(expr: &Expression) -> Option<String> {
1833    match expr {
1834        Expression::Eq(op)
1835        | Expression::Neq(op)
1836        | Expression::Lt(op)
1837        | Expression::Lte(op)
1838        | Expression::Gt(op)
1839        | Expression::Gte(op) => {
1840            // Check both sides for join mark columns
1841            if let Expression::Column(col) = &op.left {
1842                if col.join_mark {
1843                    return col.table.as_ref().map(|t| t.name.clone());
1844                }
1845            }
1846            if let Expression::Column(col) = &op.right {
1847                if col.join_mark {
1848                    return col.table.as_ref().map(|t| t.name.clone());
1849                }
1850            }
1851            None
1852        }
1853        _ => None,
1854    }
1855}
1856
1857/// Clear join marks from all columns in an expression
1858fn clear_join_marks(expr: &mut Expression) {
1859    match expr {
1860        Expression::Column(col) => col.join_mark = false,
1861        Expression::Select(select) => {
1862            if let Some(ref mut w) = select.where_clause {
1863                clear_join_marks(&mut w.this);
1864            }
1865            for sel_expr in &mut select.expressions {
1866                clear_join_marks(sel_expr);
1867            }
1868        }
1869        Expression::And(op) | Expression::Or(op) => {
1870            clear_join_marks(&mut op.left);
1871            clear_join_marks(&mut op.right);
1872        }
1873        Expression::Eq(op)
1874        | Expression::Neq(op)
1875        | Expression::Lt(op)
1876        | Expression::Lte(op)
1877        | Expression::Gt(op)
1878        | Expression::Gte(op) => {
1879            clear_join_marks(&mut op.left);
1880            clear_join_marks(&mut op.right);
1881        }
1882        _ => {}
1883    }
1884}
1885
1886/// Add column names to recursive CTE definitions.
1887///
1888/// Uses projection output names in recursive CTE definitions to define the CTEs' columns.
1889/// This is required by some dialects that need explicit column names in recursive CTEs.
1890///
1891/// Reference: `transforms.py:576-592`
1892pub fn add_recursive_cte_column_names(expr: Expression) -> Result<Expression> {
1893    match expr {
1894        Expression::Select(mut select) => {
1895            if let Some(ref mut with) = select.with {
1896                if with.recursive {
1897                    let mut counter = 0;
1898                    for cte in &mut with.ctes {
1899                        if cte.columns.is_empty() {
1900                            // Try to get column names from the CTE's SELECT
1901                            if let Expression::Select(ref cte_select) = cte.this {
1902                                let names: Vec<Identifier> = cte_select
1903                                    .expressions
1904                                    .iter()
1905                                    .map(|e| match e {
1906                                        Expression::Alias(a) => a.alias.clone(),
1907                                        Expression::Column(c) => c.name.clone(),
1908                                        _ => {
1909                                            counter += 1;
1910                                            Identifier::new(format!("_c_{}", counter))
1911                                        }
1912                                    })
1913                                    .collect();
1914                                cte.columns = names;
1915                            }
1916                        }
1917                    }
1918                }
1919            }
1920            Ok(Expression::Select(select))
1921        }
1922        other => Ok(other),
1923    }
1924}
1925
1926/// Convert epoch string in CAST to timestamp literal.
1927///
1928/// Replaces `CAST('epoch' AS TIMESTAMP)` with `CAST('1970-01-01 00:00:00' AS TIMESTAMP)`
1929/// for dialects that don't support the 'epoch' keyword.
1930///
1931/// Reference: `transforms.py:595-604`
1932pub fn epoch_cast_to_ts(expr: Expression) -> Result<Expression> {
1933    match expr {
1934        Expression::Cast(mut cast) => {
1935            if let Expression::Literal(ref lit) = cast.this {
1936                if let Literal::String(ref s) = lit.as_ref() {
1937                    if s.to_lowercase() == "epoch" {
1938                        if is_temporal_type(&cast.to) {
1939                            cast.this = Expression::Literal(Box::new(Literal::String(
1940                                "1970-01-01 00:00:00".to_string(),
1941                            )));
1942                        }
1943                    }
1944                }
1945            }
1946            Ok(Expression::Cast(cast))
1947        }
1948        Expression::TryCast(mut try_cast) => {
1949            if let Expression::Literal(ref lit) = try_cast.this {
1950                if let Literal::String(ref s) = lit.as_ref() {
1951                    if s.to_lowercase() == "epoch" {
1952                        if is_temporal_type(&try_cast.to) {
1953                            try_cast.this = Expression::Literal(Box::new(Literal::String(
1954                                "1970-01-01 00:00:00".to_string(),
1955                            )));
1956                        }
1957                    }
1958                }
1959            }
1960            Ok(Expression::TryCast(try_cast))
1961        }
1962        other => Ok(other),
1963    }
1964}
1965
1966/// Check if a DataType is a temporal type (DATE, TIMESTAMP, etc.)
1967fn is_temporal_type(dt: &DataType) -> bool {
1968    matches!(
1969        dt,
1970        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
1971    )
1972}
1973
1974/// Ensure boolean values in conditions.
1975///
1976/// Converts numeric values used in conditions into explicit boolean expressions.
1977/// For dialects that require explicit booleans in WHERE clauses.
1978///
1979/// Converts:
1980/// ```sql
1981/// WHERE column
1982/// ```
1983/// To:
1984/// ```sql
1985/// WHERE column <> 0
1986/// ```
1987///
1988/// And:
1989/// ```sql
1990/// WHERE 1
1991/// ```
1992/// To:
1993/// ```sql
1994/// WHERE 1 <> 0
1995/// ```
1996///
1997/// Reference: `transforms.py:703-721`
1998pub fn ensure_bools(expr: Expression) -> Result<Expression> {
1999    // First, recursively process Case WHEN conditions throughout the expression tree
2000    let expr = ensure_bools_in_case(expr);
2001    match expr {
2002        Expression::Select(mut select) => {
2003            // Transform WHERE clause condition
2004            if let Some(ref mut where_clause) = select.where_clause {
2005                where_clause.this = ensure_bool_condition(where_clause.this.clone());
2006            }
2007            // Transform HAVING clause condition
2008            if let Some(ref mut having) = select.having {
2009                having.this = ensure_bool_condition(having.this.clone());
2010            }
2011            Ok(Expression::Select(select))
2012        }
2013        // Top-level AND/OR/NOT expressions also need ensure_bools processing
2014        Expression::And(_) | Expression::Or(_) | Expression::Not(_) => {
2015            Ok(ensure_bool_condition(expr))
2016        }
2017        other => Ok(other),
2018    }
2019}
2020
2021/// Recursively walk the expression tree to find Case expressions and apply
2022/// ensure_bool_condition to their WHEN conditions. This ensures that
2023/// `CASE WHEN TRUE` becomes `CASE WHEN (1 = 1)` etc.
2024fn ensure_bools_in_case(expr: Expression) -> Expression {
2025    match expr {
2026        Expression::Case(mut case) => {
2027            case.whens = case
2028                .whens
2029                .into_iter()
2030                .map(|(condition, result)| {
2031                    let new_condition = ensure_bool_condition(ensure_bools_in_case(condition));
2032                    let new_result = ensure_bools_in_case(result);
2033                    (new_condition, new_result)
2034                })
2035                .collect();
2036            if let Some(else_expr) = case.else_ {
2037                case.else_ = Some(ensure_bools_in_case(else_expr));
2038            }
2039            Expression::Case(Box::new(*case))
2040        }
2041        Expression::Select(mut select) => {
2042            // Recursively process expressions in the SELECT list
2043            select.expressions = select
2044                .expressions
2045                .into_iter()
2046                .map(ensure_bools_in_case)
2047                .collect();
2048            // Process WHERE/HAVING are handled by ensure_bools main function
2049            Expression::Select(select)
2050        }
2051        Expression::Alias(mut alias) => {
2052            alias.this = ensure_bools_in_case(alias.this);
2053            Expression::Alias(alias)
2054        }
2055        Expression::Paren(mut paren) => {
2056            paren.this = ensure_bools_in_case(paren.this);
2057            Expression::Paren(paren)
2058        }
2059        other => other,
2060    }
2061}
2062
2063/// Helper to check if an expression is inherently boolean (returns a boolean value).
2064/// Inherently boolean expressions include comparisons, predicates, logical operators, etc.
2065fn is_boolean_expression(expr: &Expression) -> bool {
2066    matches!(
2067        expr,
2068        Expression::Eq(_)
2069            | Expression::Neq(_)
2070            | Expression::Lt(_)
2071            | Expression::Lte(_)
2072            | Expression::Gt(_)
2073            | Expression::Gte(_)
2074            | Expression::Is(_)
2075            | Expression::IsNull(_)
2076            | Expression::IsTrue(_)
2077            | Expression::IsFalse(_)
2078            | Expression::Like(_)
2079            | Expression::ILike(_)
2080            | Expression::SimilarTo(_)
2081            | Expression::Glob(_)
2082            | Expression::RegexpLike(_)
2083            | Expression::In(_)
2084            | Expression::Between(_)
2085            | Expression::Exists(_)
2086            | Expression::And(_)
2087            | Expression::Or(_)
2088            | Expression::Not(_)
2089            | Expression::Any(_)
2090            | Expression::All(_)
2091            | Expression::EqualNull(_)
2092    )
2093}
2094
2095/// Helper to wrap a non-boolean expression with `<> 0`
2096fn wrap_neq_zero(expr: Expression) -> Expression {
2097    Expression::Neq(Box::new(BinaryOp {
2098        left: expr,
2099        right: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
2100        left_comments: vec![],
2101        operator_comments: vec![],
2102        trailing_comments: vec![],
2103        inferred_type: None,
2104    }))
2105}
2106
2107/// Helper to convert a condition expression to ensure it's boolean.
2108///
2109/// In TSQL, conditions in WHERE/HAVING must be boolean expressions.
2110/// Non-boolean expressions (columns, literals, casts, function calls, etc.)
2111/// are wrapped with `<> 0`. Boolean literals are converted to `(1 = 1)` or `(1 = 0)`.
2112fn ensure_bool_condition(expr: Expression) -> Expression {
2113    match expr {
2114        // For AND/OR, recursively process children
2115        Expression::And(op) => {
2116            let new_op = BinaryOp {
2117                left: ensure_bool_condition(op.left.clone()),
2118                right: ensure_bool_condition(op.right.clone()),
2119                left_comments: op.left_comments.clone(),
2120                operator_comments: op.operator_comments.clone(),
2121                trailing_comments: op.trailing_comments.clone(),
2122                inferred_type: None,
2123            };
2124            Expression::And(Box::new(new_op))
2125        }
2126        Expression::Or(op) => {
2127            let new_op = BinaryOp {
2128                left: ensure_bool_condition(op.left.clone()),
2129                right: ensure_bool_condition(op.right.clone()),
2130                left_comments: op.left_comments.clone(),
2131                operator_comments: op.operator_comments.clone(),
2132                trailing_comments: op.trailing_comments.clone(),
2133                inferred_type: None,
2134            };
2135            Expression::Or(Box::new(new_op))
2136        }
2137        // For NOT, recursively process the inner expression
2138        Expression::Not(op) => Expression::Not(Box::new(crate::expressions::UnaryOp {
2139            this: ensure_bool_condition(op.this.clone()),
2140            inferred_type: None,
2141        })),
2142        // For Paren, recurse into inner expression
2143        Expression::Paren(paren) => Expression::Paren(Box::new(crate::expressions::Paren {
2144            this: ensure_bool_condition(paren.this.clone()),
2145            trailing_comments: paren.trailing_comments.clone(),
2146        })),
2147        // Boolean literals: true -> (1 = 1), false -> (1 = 0)
2148        Expression::Boolean(BooleanLiteral { value: true }) => {
2149            Expression::Paren(Box::new(crate::expressions::Paren {
2150                this: Expression::Eq(Box::new(BinaryOp {
2151                    left: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2152                    right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2153                    left_comments: vec![],
2154                    operator_comments: vec![],
2155                    trailing_comments: vec![],
2156                    inferred_type: None,
2157                })),
2158                trailing_comments: vec![],
2159            }))
2160        }
2161        Expression::Boolean(BooleanLiteral { value: false }) => {
2162            Expression::Paren(Box::new(crate::expressions::Paren {
2163                this: Expression::Eq(Box::new(BinaryOp {
2164                    left: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2165                    right: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
2166                    left_comments: vec![],
2167                    operator_comments: vec![],
2168                    trailing_comments: vec![],
2169                    inferred_type: None,
2170                })),
2171                trailing_comments: vec![],
2172            }))
2173        }
2174        // Already boolean expressions pass through unchanged
2175        ref e if is_boolean_expression(e) => expr,
2176        // Everything else (Column, Identifier, Cast, Literal::Number, function calls, etc.)
2177        // gets wrapped with <> 0
2178        _ => wrap_neq_zero(expr),
2179    }
2180}
2181
2182/// Remove table qualifiers from column references.
2183///
2184/// Converts `table.column` to just `column` throughout the expression tree.
2185///
2186/// Reference: `transforms.py:724-730`
2187pub fn unqualify_columns(expr: Expression) -> Result<Expression> {
2188    Ok(unqualify_columns_recursive(expr))
2189}
2190
2191/// Recursively remove table qualifiers from column references
2192fn unqualify_columns_recursive(expr: Expression) -> Expression {
2193    match expr {
2194        Expression::Column(mut col) => {
2195            col.table = None;
2196            Expression::Column(col)
2197        }
2198        Expression::Select(mut select) => {
2199            select.expressions = select
2200                .expressions
2201                .into_iter()
2202                .map(unqualify_columns_recursive)
2203                .collect();
2204            if let Some(ref mut where_clause) = select.where_clause {
2205                where_clause.this = unqualify_columns_recursive(where_clause.this.clone());
2206            }
2207            if let Some(ref mut having) = select.having {
2208                having.this = unqualify_columns_recursive(having.this.clone());
2209            }
2210            if let Some(ref mut group_by) = select.group_by {
2211                group_by.expressions = group_by
2212                    .expressions
2213                    .iter()
2214                    .cloned()
2215                    .map(unqualify_columns_recursive)
2216                    .collect();
2217            }
2218            if let Some(ref mut order_by) = select.order_by {
2219                order_by.expressions = order_by
2220                    .expressions
2221                    .iter()
2222                    .map(|o| crate::expressions::Ordered {
2223                        this: unqualify_columns_recursive(o.this.clone()),
2224                        desc: o.desc,
2225                        nulls_first: o.nulls_first,
2226                        explicit_asc: o.explicit_asc,
2227                        with_fill: o.with_fill.clone(),
2228                    })
2229                    .collect();
2230            }
2231            for join in &mut select.joins {
2232                if let Some(ref mut on) = join.on {
2233                    *on = unqualify_columns_recursive(on.clone());
2234                }
2235            }
2236            Expression::Select(select)
2237        }
2238        Expression::Alias(mut alias) => {
2239            alias.this = unqualify_columns_recursive(alias.this);
2240            Expression::Alias(alias)
2241        }
2242        // Binary operations
2243        Expression::And(op) => Expression::And(Box::new(unqualify_binary_op(*op))),
2244        Expression::Or(op) => Expression::Or(Box::new(unqualify_binary_op(*op))),
2245        Expression::Eq(op) => Expression::Eq(Box::new(unqualify_binary_op(*op))),
2246        Expression::Neq(op) => Expression::Neq(Box::new(unqualify_binary_op(*op))),
2247        Expression::Lt(op) => Expression::Lt(Box::new(unqualify_binary_op(*op))),
2248        Expression::Lte(op) => Expression::Lte(Box::new(unqualify_binary_op(*op))),
2249        Expression::Gt(op) => Expression::Gt(Box::new(unqualify_binary_op(*op))),
2250        Expression::Gte(op) => Expression::Gte(Box::new(unqualify_binary_op(*op))),
2251        Expression::Add(op) => Expression::Add(Box::new(unqualify_binary_op(*op))),
2252        Expression::Sub(op) => Expression::Sub(Box::new(unqualify_binary_op(*op))),
2253        Expression::Mul(op) => Expression::Mul(Box::new(unqualify_binary_op(*op))),
2254        Expression::Div(op) => Expression::Div(Box::new(unqualify_binary_op(*op))),
2255        // Functions
2256        Expression::Function(mut func) => {
2257            func.args = func
2258                .args
2259                .into_iter()
2260                .map(unqualify_columns_recursive)
2261                .collect();
2262            Expression::Function(func)
2263        }
2264        Expression::AggregateFunction(mut func) => {
2265            func.args = func
2266                .args
2267                .into_iter()
2268                .map(unqualify_columns_recursive)
2269                .collect();
2270            Expression::AggregateFunction(func)
2271        }
2272        Expression::Case(mut case) => {
2273            case.whens = case
2274                .whens
2275                .into_iter()
2276                .map(|(cond, result)| {
2277                    (
2278                        unqualify_columns_recursive(cond),
2279                        unqualify_columns_recursive(result),
2280                    )
2281                })
2282                .collect();
2283            if let Some(else_expr) = case.else_ {
2284                case.else_ = Some(unqualify_columns_recursive(else_expr));
2285            }
2286            Expression::Case(case)
2287        }
2288        // Other expressions pass through unchanged
2289        other => other,
2290    }
2291}
2292
2293/// Helper to unqualify columns in a binary operation
2294fn unqualify_binary_op(mut op: BinaryOp) -> BinaryOp {
2295    op.left = unqualify_columns_recursive(op.left);
2296    op.right = unqualify_columns_recursive(op.right);
2297    op
2298}
2299
2300/// Convert UNNEST(GENERATE_DATE_ARRAY(...)) to recursive CTE.
2301///
2302/// For dialects that don't support GENERATE_DATE_ARRAY, this converts:
2303/// ```sql
2304/// SELECT * FROM UNNEST(GENERATE_DATE_ARRAY('2024-01-01', '2024-01-31', INTERVAL 1 DAY)) AS d(date_value)
2305/// ```
2306/// To a recursive CTE:
2307/// ```sql
2308/// WITH RECURSIVE _generated_dates(date_value) AS (
2309///     SELECT CAST('2024-01-01' AS DATE) AS date_value
2310///     UNION ALL
2311///     SELECT CAST(DATE_ADD(date_value, 1, DAY) AS DATE)
2312///     FROM _generated_dates
2313///     WHERE CAST(DATE_ADD(date_value, 1, DAY) AS DATE) <= CAST('2024-01-31' AS DATE)
2314/// )
2315/// SELECT date_value FROM _generated_dates
2316/// ```
2317///
2318/// Reference: `transforms.py:68-122`
2319pub fn unnest_generate_date_array_using_recursive_cte(expr: Expression) -> Result<Expression> {
2320    match expr {
2321        Expression::Select(mut select) => {
2322            let mut cte_count = 0;
2323            let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
2324
2325            // Process existing CTE bodies first (to handle CTE-wrapped GENERATE_DATE_ARRAY)
2326            if let Some(ref mut with) = select.with {
2327                for cte in &mut with.ctes {
2328                    process_expression_for_gda(&mut cte.this, &mut cte_count, &mut new_ctes);
2329                }
2330            }
2331
2332            // Process FROM clause
2333            if let Some(ref mut from) = select.from {
2334                for table_expr in &mut from.expressions {
2335                    if let Some((cte, replacement)) =
2336                        try_convert_generate_date_array(table_expr, &mut cte_count)
2337                    {
2338                        new_ctes.push(cte);
2339                        *table_expr = replacement;
2340                    }
2341                }
2342            }
2343
2344            // Process JOINs
2345            for join in &mut select.joins {
2346                if let Some((cte, replacement)) =
2347                    try_convert_generate_date_array(&join.this, &mut cte_count)
2348                {
2349                    new_ctes.push(cte);
2350                    join.this = replacement;
2351                }
2352            }
2353
2354            // Add collected CTEs to the WITH clause
2355            if !new_ctes.is_empty() {
2356                let with_clause = select.with.get_or_insert_with(|| crate::expressions::With {
2357                    ctes: Vec::new(),
2358                    recursive: true, // Recursive CTEs
2359                    leading_comments: vec![],
2360                    search: None,
2361                });
2362                with_clause.recursive = true;
2363
2364                // Prepend new CTEs before existing ones
2365                let mut all_ctes = new_ctes;
2366                all_ctes.append(&mut with_clause.ctes);
2367                with_clause.ctes = all_ctes;
2368            }
2369
2370            Ok(Expression::Select(select))
2371        }
2372        other => Ok(other),
2373    }
2374}
2375
2376/// Recursively process an expression tree to find and convert UNNEST(GENERATE_DATE_ARRAY)
2377/// inside CTE bodies, subqueries, etc.
2378fn process_expression_for_gda(
2379    expr: &mut Expression,
2380    cte_count: &mut usize,
2381    new_ctes: &mut Vec<crate::expressions::Cte>,
2382) {
2383    match expr {
2384        Expression::Select(ref mut select) => {
2385            // Process FROM clause
2386            if let Some(ref mut from) = select.from {
2387                for table_expr in &mut from.expressions {
2388                    if let Some((cte, replacement)) =
2389                        try_convert_generate_date_array(table_expr, cte_count)
2390                    {
2391                        new_ctes.push(cte);
2392                        *table_expr = replacement;
2393                    }
2394                }
2395            }
2396            // Process JOINs
2397            for join in &mut select.joins {
2398                if let Some((cte, replacement)) =
2399                    try_convert_generate_date_array(&join.this, cte_count)
2400                {
2401                    new_ctes.push(cte);
2402                    join.this = replacement;
2403                }
2404            }
2405        }
2406        Expression::Union(ref mut u) => {
2407            process_expression_for_gda(&mut u.left, cte_count, new_ctes);
2408            process_expression_for_gda(&mut u.right, cte_count, new_ctes);
2409        }
2410        Expression::Subquery(ref mut sq) => {
2411            process_expression_for_gda(&mut sq.this, cte_count, new_ctes);
2412        }
2413        _ => {}
2414    }
2415}
2416
2417/// Try to convert an UNNEST(GENERATE_DATE_ARRAY(...)) to a recursive CTE reference.
2418/// `column_name_override` allows the caller to specify a custom column name (from alias).
2419fn try_convert_generate_date_array(
2420    expr: &Expression,
2421    cte_count: &mut usize,
2422) -> Option<(crate::expressions::Cte, Expression)> {
2423    try_convert_generate_date_array_with_name(expr, cte_count, None)
2424}
2425
2426fn try_convert_generate_date_array_with_name(
2427    expr: &Expression,
2428    cte_count: &mut usize,
2429    column_name_override: Option<&str>,
2430) -> Option<(crate::expressions::Cte, Expression)> {
2431    // Helper: extract (start, end, step) from GENERATE_DATE_ARRAY/GenerateSeries variants
2432    fn extract_gda_args(
2433        inner: &Expression,
2434    ) -> Option<(&Expression, &Expression, Option<&Expression>)> {
2435        match inner {
2436            Expression::GenerateDateArray(gda) => {
2437                let start = gda.start.as_ref()?;
2438                let end = gda.end.as_ref()?;
2439                let step = gda.step.as_deref();
2440                Some((start, end, step))
2441            }
2442            Expression::GenerateSeries(gs) => {
2443                let start = gs.start.as_deref()?;
2444                let end = gs.end.as_deref()?;
2445                let step = gs.step.as_deref();
2446                Some((start, end, step))
2447            }
2448            Expression::Function(f) if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") => {
2449                if f.args.len() >= 2 {
2450                    let start = &f.args[0];
2451                    let end = &f.args[1];
2452                    let step = f.args.get(2);
2453                    Some((start, end, step))
2454                } else {
2455                    None
2456                }
2457            }
2458            _ => None,
2459        }
2460    }
2461
2462    // Look for UNNEST containing GENERATE_DATE_ARRAY
2463    if let Expression::Unnest(unnest) = expr {
2464        if let Some((start, end, step_opt)) = extract_gda_args(&unnest.this) {
2465            let start = start;
2466            let end = end;
2467            let step: Option<&Expression> = step_opt;
2468
2469            // Generate CTE name
2470            let cte_name = if *cte_count == 0 {
2471                "_generated_dates".to_string()
2472            } else {
2473                format!("_generated_dates_{}", cte_count)
2474            };
2475            *cte_count += 1;
2476
2477            let column_name =
2478                Identifier::new(column_name_override.unwrap_or("date_value").to_string());
2479
2480            // Helper: wrap expression in CAST(... AS DATE) unless already a date literal or CAST to DATE
2481            let cast_to_date = |expr: &Expression| -> Expression {
2482                match expr {
2483                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
2484                        // DATE '...' -> convert to CAST('...' AS DATE) to match expected output
2485                        if let Expression::Literal(lit) = expr {
2486                            if let Literal::Date(d) = lit.as_ref() {
2487                                Expression::Cast(Box::new(Cast {
2488                                    this: Expression::Literal(Box::new(Literal::String(d.clone()))),
2489                                    to: DataType::Date,
2490                                    trailing_comments: vec![],
2491                                    double_colon_syntax: false,
2492                                    format: None,
2493                                    default: None,
2494                                    inferred_type: None,
2495                                }))
2496                            } else {
2497                                expr.clone()
2498                            }
2499                        } else {
2500                            unreachable!()
2501                        }
2502                    }
2503                    Expression::Cast(c) if matches!(c.to, DataType::Date) => expr.clone(),
2504                    _ => Expression::Cast(Box::new(Cast {
2505                        this: expr.clone(),
2506                        to: DataType::Date,
2507                        trailing_comments: vec![],
2508                        double_colon_syntax: false,
2509                        format: None,
2510                        default: None,
2511                        inferred_type: None,
2512                    })),
2513                }
2514            };
2515
2516            // Build base case: SELECT CAST(start AS DATE) AS date_value
2517            let base_select = Select {
2518                expressions: vec![Expression::Alias(Box::new(crate::expressions::Alias {
2519                    this: cast_to_date(start),
2520                    alias: column_name.clone(),
2521                    column_aliases: vec![],
2522                    alias_explicit_as: false,
2523                    alias_keyword: None,
2524                    pre_alias_comments: vec![],
2525                    trailing_comments: vec![],
2526                    inferred_type: None,
2527                }))],
2528                ..Select::new()
2529            };
2530
2531            // Normalize interval: convert String("1") -> Number("1") so it generates without quotes
2532            let normalize_interval = |expr: &Expression| -> Expression {
2533                if let Expression::Interval(ref iv) = expr {
2534                    let mut iv_clone = iv.as_ref().clone();
2535                    if let Some(Expression::Literal(ref lit)) = iv_clone.this {
2536                        if let Literal::String(ref s) = lit.as_ref() {
2537                            // Convert numeric strings to Number literals for unquoted output
2538                            if s.parse::<f64>().is_ok() {
2539                                iv_clone.this =
2540                                    Some(Expression::Literal(Box::new(Literal::Number(s.clone()))));
2541                            }
2542                        }
2543                    }
2544                    Expression::Interval(Box::new(iv_clone))
2545                } else {
2546                    expr.clone()
2547                }
2548            };
2549
2550            // Build recursive case: DateAdd(date_value, count, unit) from CTE where result <= end
2551            // Extract interval unit and count from step expression
2552            let normalized_step = step.map(|s| normalize_interval(s)).unwrap_or_else(|| {
2553                Expression::Interval(Box::new(crate::expressions::Interval {
2554                    this: Some(Expression::Literal(Box::new(Literal::Number(
2555                        "1".to_string(),
2556                    )))),
2557                    unit: Some(crate::expressions::IntervalUnitSpec::Simple {
2558                        unit: crate::expressions::IntervalUnit::Day,
2559                        use_plural: false,
2560                    }),
2561                }))
2562            });
2563
2564            // Extract unit and count from interval expression to build DateAddFunc
2565            let (add_unit, add_count) = extract_interval_unit_and_count(&normalized_step);
2566
2567            let date_add_expr = Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
2568                this: Expression::Column(Box::new(crate::expressions::Column {
2569                    name: column_name.clone(),
2570                    table: None,
2571                    join_mark: false,
2572                    trailing_comments: vec![],
2573                    span: None,
2574                    inferred_type: None,
2575                })),
2576                interval: add_count,
2577                unit: add_unit,
2578            }));
2579
2580            let cast_date_add = Expression::Cast(Box::new(Cast {
2581                this: date_add_expr.clone(),
2582                to: DataType::Date,
2583                trailing_comments: vec![],
2584                double_colon_syntax: false,
2585                format: None,
2586                default: None,
2587                inferred_type: None,
2588            }));
2589
2590            let recursive_select = Select {
2591                expressions: vec![cast_date_add.clone()],
2592                from: Some(From {
2593                    expressions: vec![Expression::Table(Box::new(
2594                        crate::expressions::TableRef::new(&cte_name),
2595                    ))],
2596                }),
2597                where_clause: Some(Where {
2598                    this: Expression::Lte(Box::new(BinaryOp {
2599                        left: cast_date_add,
2600                        right: cast_to_date(end),
2601                        left_comments: vec![],
2602                        operator_comments: vec![],
2603                        trailing_comments: vec![],
2604                        inferred_type: None,
2605                    })),
2606                }),
2607                ..Select::new()
2608            };
2609
2610            // Build UNION ALL of base and recursive
2611            let union = crate::expressions::Union {
2612                left: Expression::Select(Box::new(base_select)),
2613                right: Expression::Select(Box::new(recursive_select)),
2614                all: true, // UNION ALL
2615                distinct: false,
2616                with: None,
2617                order_by: None,
2618                limit: None,
2619                offset: None,
2620                distribute_by: None,
2621                sort_by: None,
2622                cluster_by: None,
2623                by_name: false,
2624                side: None,
2625                kind: None,
2626                corresponding: false,
2627                strict: false,
2628                on_columns: Vec::new(),
2629            };
2630
2631            // Create CTE
2632            let cte = crate::expressions::Cte {
2633                this: Expression::Union(Box::new(union)),
2634                alias: Identifier::new(cte_name.clone()),
2635                columns: vec![column_name.clone()],
2636                materialized: None,
2637                key_expressions: Vec::new(),
2638                alias_first: true,
2639                comments: Vec::new(),
2640            };
2641
2642            // Create replacement: SELECT date_value FROM cte_name
2643            let replacement_select = Select {
2644                expressions: vec![Expression::Column(Box::new(crate::expressions::Column {
2645                    name: column_name,
2646                    table: None,
2647                    join_mark: false,
2648                    trailing_comments: vec![],
2649                    span: None,
2650                    inferred_type: None,
2651                }))],
2652                from: Some(From {
2653                    expressions: vec![Expression::Table(Box::new(
2654                        crate::expressions::TableRef::new(&cte_name),
2655                    ))],
2656                }),
2657                ..Select::new()
2658            };
2659
2660            let replacement = Expression::Subquery(Box::new(Subquery {
2661                this: Expression::Select(Box::new(replacement_select)),
2662                alias: Some(Identifier::new(cte_name)),
2663                column_aliases: vec![],
2664                alias_explicit_as: false,
2665                alias_keyword: None,
2666                order_by: None,
2667                limit: None,
2668                offset: None,
2669                distribute_by: None,
2670                sort_by: None,
2671                cluster_by: None,
2672                lateral: false,
2673                modifiers_inside: false,
2674                trailing_comments: vec![],
2675                inferred_type: None,
2676            }));
2677
2678            return Some((cte, replacement));
2679        }
2680    }
2681
2682    // Also check for aliased UNNEST like UNNEST(...) AS _q(date_week)
2683    if let Expression::Alias(alias) = expr {
2684        // Extract column name from alias column_aliases if present
2685        let col_name = alias.column_aliases.first().map(|id| id.name.as_str());
2686        if let Some((cte, replacement)) =
2687            try_convert_generate_date_array_with_name(&alias.this, cte_count, col_name)
2688        {
2689            // If we extracted a column name from the alias, don't preserve the outer alias
2690            // since the CTE now uses that column name directly
2691            if col_name.is_some() {
2692                return Some((cte, replacement));
2693            }
2694            let new_alias = Expression::Alias(Box::new(crate::expressions::Alias {
2695                this: replacement,
2696                alias: alias.alias.clone(),
2697                column_aliases: alias.column_aliases.clone(),
2698                alias_explicit_as: false,
2699                alias_keyword: None,
2700                pre_alias_comments: alias.pre_alias_comments.clone(),
2701                trailing_comments: alias.trailing_comments.clone(),
2702                inferred_type: None,
2703            }));
2704            return Some((cte, new_alias));
2705        }
2706    }
2707
2708    None
2709}
2710
2711/// Extract interval unit and count from an interval expression.
2712/// Handles both structured intervals (with separate unit field) and
2713/// string-encoded intervals like `INTERVAL '1 WEEK'` where unit is None
2714/// and the value contains both count and unit.
2715fn extract_interval_unit_and_count(
2716    expr: &Expression,
2717) -> (crate::expressions::IntervalUnit, Expression) {
2718    use crate::expressions::{IntervalUnit, IntervalUnitSpec, Literal};
2719
2720    if let Expression::Interval(ref iv) = expr {
2721        // First try: structured unit field
2722        if let Some(ref unit_spec) = iv.unit {
2723            if let IntervalUnitSpec::Simple { unit, .. } = unit_spec {
2724                let count = match &iv.this {
2725                    Some(e) => e.clone(),
2726                    None => Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2727                };
2728                return (unit.clone(), count);
2729            }
2730        }
2731
2732        // Second try: parse from string value like "1 WEEK" or "1"
2733        if let Some(ref val_expr) = iv.this {
2734            match val_expr {
2735                Expression::Literal(lit)
2736                    if matches!(lit.as_ref(), Literal::String(_) | Literal::Number(_)) =>
2737                {
2738                    let s = match lit.as_ref() {
2739                        Literal::String(s) | Literal::Number(s) => s,
2740                        _ => unreachable!(),
2741                    };
2742                    // Try to parse "count unit" format like "1 WEEK", "1 MONTH"
2743                    let parts: Vec<&str> = s.trim().splitn(2, char::is_whitespace).collect();
2744                    if parts.len() == 2 {
2745                        let count_str = parts[0].trim();
2746                        let unit_str = parts[1].trim().to_uppercase();
2747                        let unit = match unit_str.as_str() {
2748                            "YEAR" | "YEARS" => IntervalUnit::Year,
2749                            "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
2750                            "MONTH" | "MONTHS" => IntervalUnit::Month,
2751                            "WEEK" | "WEEKS" => IntervalUnit::Week,
2752                            "DAY" | "DAYS" => IntervalUnit::Day,
2753                            "HOUR" | "HOURS" => IntervalUnit::Hour,
2754                            "MINUTE" | "MINUTES" => IntervalUnit::Minute,
2755                            "SECOND" | "SECONDS" => IntervalUnit::Second,
2756                            "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
2757                            "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
2758                            _ => IntervalUnit::Day,
2759                        };
2760                        return (
2761                            unit,
2762                            Expression::Literal(Box::new(Literal::Number(count_str.to_string()))),
2763                        );
2764                    }
2765                    // Just a number with no unit - default to Day
2766                    if s.parse::<f64>().is_ok() {
2767                        return (
2768                            IntervalUnit::Day,
2769                            Expression::Literal(Box::new(Literal::Number(s.clone()))),
2770                        );
2771                    }
2772                }
2773                _ => {}
2774            }
2775        }
2776
2777        // Fallback
2778        (
2779            IntervalUnit::Day,
2780            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2781        )
2782    } else {
2783        (
2784            IntervalUnit::Day,
2785            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2786        )
2787    }
2788}
2789
2790/// Convert ILIKE to LOWER(x) LIKE LOWER(y).
2791///
2792/// For dialects that don't support ILIKE (case-insensitive LIKE), this converts:
2793/// ```sql
2794/// SELECT * FROM t WHERE x ILIKE '%pattern%'
2795/// ```
2796/// To:
2797/// ```sql
2798/// SELECT * FROM t WHERE LOWER(x) LIKE LOWER('%pattern%')
2799/// ```
2800///
2801/// Reference: `generator.py:no_ilike_sql()`
2802pub fn no_ilike_sql(expr: Expression) -> Result<Expression> {
2803    match expr {
2804        Expression::ILike(ilike) => {
2805            // Create LOWER(left) LIKE LOWER(right)
2806            let lower_left = Expression::Function(Box::new(crate::expressions::Function {
2807                name: "LOWER".to_string(),
2808                args: vec![ilike.left],
2809                distinct: false,
2810                trailing_comments: vec![],
2811                use_bracket_syntax: false,
2812                no_parens: false,
2813                quoted: false,
2814                span: None,
2815                inferred_type: None,
2816            }));
2817
2818            let lower_right = Expression::Function(Box::new(crate::expressions::Function {
2819                name: "LOWER".to_string(),
2820                args: vec![ilike.right],
2821                distinct: false,
2822                trailing_comments: vec![],
2823                use_bracket_syntax: false,
2824                no_parens: false,
2825                quoted: false,
2826                span: None,
2827                inferred_type: None,
2828            }));
2829
2830            Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
2831                left: lower_left,
2832                right: lower_right,
2833                escape: ilike.escape,
2834                quantifier: ilike.quantifier,
2835                inferred_type: None,
2836            })))
2837        }
2838        other => Ok(other),
2839    }
2840}
2841
2842/// Convert TryCast to Cast.
2843///
2844/// For dialects that don't support TRY_CAST (safe cast that returns NULL on error),
2845/// this converts TRY_CAST to regular CAST. Note: This may cause runtime errors
2846/// for invalid casts that TRY_CAST would handle gracefully.
2847///
2848/// Reference: `generator.py:no_trycast_sql()`
2849pub fn no_trycast_sql(expr: Expression) -> Result<Expression> {
2850    match expr {
2851        Expression::TryCast(try_cast) => Ok(Expression::Cast(try_cast)),
2852        other => Ok(other),
2853    }
2854}
2855
2856/// Convert SafeCast to Cast.
2857///
2858/// For dialects that don't support SAFE_CAST (BigQuery's safe cast syntax),
2859/// this converts SAFE_CAST to regular CAST.
2860pub fn no_safe_cast_sql(expr: Expression) -> Result<Expression> {
2861    match expr {
2862        Expression::SafeCast(safe_cast) => Ok(Expression::Cast(safe_cast)),
2863        other => Ok(other),
2864    }
2865}
2866
2867/// Convert COMMENT ON statements to inline comments.
2868///
2869/// For dialects that don't support COMMENT ON syntax, this can transform
2870/// comment statements into inline comments or skip them entirely.
2871///
2872/// Reference: `generator.py:no_comment_column_constraint_sql()`
2873pub fn no_comment_column_constraint(expr: Expression) -> Result<Expression> {
2874    // For now, just pass through - comment handling is done in generator
2875    Ok(expr)
2876}
2877
2878/// Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...)).
2879///
2880/// Some dialects use GENERATE_SERIES as a table-valued function, while others
2881/// prefer the UNNEST syntax. This converts:
2882/// ```sql
2883/// SELECT * FROM GENERATE_SERIES(1, 10) AS t(n)
2884/// ```
2885/// To:
2886/// ```sql
2887/// SELECT * FROM UNNEST(GENERATE_SERIES(1, 10)) AS _u(n)
2888/// ```
2889///
2890/// Reference: `transforms.py:125-135`
2891pub fn unnest_generate_series(expr: Expression) -> Result<Expression> {
2892    // Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...))
2893    // This handles the case where GENERATE_SERIES is used as a table-valued function
2894    match expr {
2895        Expression::Table(ref table) => {
2896            // Check if the table name matches GENERATE_SERIES pattern
2897            // In practice, this would be Expression::GenerateSeries wrapped in a Table context
2898            if table.name.name.to_uppercase() == "GENERATE_SERIES" {
2899                // Create UNNEST wrapper
2900                let unnest = Expression::Unnest(Box::new(UnnestFunc {
2901                    this: expr.clone(),
2902                    expressions: Vec::new(),
2903                    with_ordinality: false,
2904                    alias: None,
2905                    offset_alias: None,
2906                }));
2907
2908                // If there's an alias, wrap in alias
2909                return Ok(Expression::Alias(Box::new(crate::expressions::Alias {
2910                    this: unnest,
2911                    alias: Identifier::new("_u".to_string()),
2912                    column_aliases: vec![],
2913                    alias_explicit_as: false,
2914                    alias_keyword: None,
2915                    pre_alias_comments: vec![],
2916                    trailing_comments: vec![],
2917                    inferred_type: None,
2918                })));
2919            }
2920            Ok(expr)
2921        }
2922        Expression::GenerateSeries(gs) => {
2923            // Wrap GenerateSeries directly in UNNEST
2924            let unnest = Expression::Unnest(Box::new(UnnestFunc {
2925                this: Expression::GenerateSeries(gs),
2926                expressions: Vec::new(),
2927                with_ordinality: false,
2928                alias: None,
2929                offset_alias: None,
2930            }));
2931            Ok(unnest)
2932        }
2933        other => Ok(other),
2934    }
2935}
2936
2937/// Convert UNNEST(GENERATE_SERIES(start, end, step)) to a subquery for PostgreSQL.
2938///
2939/// PostgreSQL's GENERATE_SERIES returns rows directly, so UNNEST wrapping is unnecessary.
2940/// Instead, convert to:
2941/// ```sql
2942/// (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2943/// ```
2944///
2945/// This handles the case where GENERATE_DATE_ARRAY was converted to GENERATE_SERIES
2946/// during cross-dialect normalization, but the original had UNNEST wrapping.
2947pub fn unwrap_unnest_generate_series_for_postgres(expr: Expression) -> Result<Expression> {
2948    use crate::dialects::transform_recursive;
2949    transform_recursive(expr, &unwrap_unnest_generate_series_single)
2950}
2951
2952fn unwrap_unnest_generate_series_single(expr: Expression) -> Result<Expression> {
2953    use crate::expressions::*;
2954    // Match UNNEST(GENERATE_SERIES(...)) patterns in FROM clauses
2955    match expr {
2956        Expression::Select(mut select) => {
2957            // Process FROM clause
2958            if let Some(ref mut from) = select.from {
2959                for table_expr in &mut from.expressions {
2960                    if let Some(replacement) = try_unwrap_unnest_gen_series(table_expr) {
2961                        *table_expr = replacement;
2962                    }
2963                }
2964            }
2965            // Process JOINs
2966            for join in &mut select.joins {
2967                if let Some(replacement) = try_unwrap_unnest_gen_series(&join.this) {
2968                    join.this = replacement;
2969                }
2970            }
2971            Ok(Expression::Select(select))
2972        }
2973        other => Ok(other),
2974    }
2975}
2976
2977/// Try to convert an UNNEST(GENERATE_SERIES(...)) to a PostgreSQL subquery.
2978/// Returns the replacement expression if applicable.
2979fn try_unwrap_unnest_gen_series(expr: &Expression) -> Option<Expression> {
2980    use crate::expressions::*;
2981
2982    // Match Unnest containing GenerateSeries
2983    let gen_series = match expr {
2984        Expression::Unnest(unnest) => {
2985            if let Expression::GenerateSeries(ref gs) = unnest.this {
2986                Some(gs.as_ref().clone())
2987            } else {
2988                None
2989            }
2990        }
2991        Expression::Alias(alias) => {
2992            if let Expression::Unnest(ref unnest) = alias.this {
2993                if let Expression::GenerateSeries(ref gs) = unnest.this {
2994                    Some(gs.as_ref().clone())
2995                } else {
2996                    None
2997                }
2998            } else {
2999                None
3000            }
3001        }
3002        _ => None,
3003    };
3004
3005    let gs = gen_series?;
3006
3007    // Build: (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
3008    let value_col = Expression::boxed_column(Column {
3009        name: Identifier::new("value".to_string()),
3010        table: None,
3011        join_mark: false,
3012        trailing_comments: vec![],
3013        span: None,
3014        inferred_type: None,
3015    });
3016
3017    let cast_value = Expression::Cast(Box::new(Cast {
3018        this: value_col,
3019        to: DataType::Date,
3020        trailing_comments: vec![],
3021        double_colon_syntax: false,
3022        format: None,
3023        default: None,
3024        inferred_type: None,
3025    }));
3026
3027    let gen_series_expr = Expression::GenerateSeries(Box::new(gs));
3028
3029    // GENERATE_SERIES(...) AS _t(value)
3030    let gen_series_aliased = Expression::Alias(Box::new(Alias {
3031        this: gen_series_expr,
3032        alias: Identifier::new("_t".to_string()),
3033        column_aliases: vec![Identifier::new("value".to_string())],
3034        alias_explicit_as: false,
3035        alias_keyword: None,
3036        pre_alias_comments: vec![],
3037        trailing_comments: vec![],
3038        inferred_type: None,
3039    }));
3040
3041    let mut inner_select = Select::new();
3042    inner_select.expressions = vec![cast_value];
3043    inner_select.from = Some(From {
3044        expressions: vec![gen_series_aliased],
3045    });
3046
3047    let inner_select_expr = Expression::Select(Box::new(inner_select));
3048
3049    let subquery = Expression::Subquery(Box::new(Subquery {
3050        this: inner_select_expr,
3051        alias: None,
3052        column_aliases: vec![],
3053        alias_explicit_as: false,
3054        alias_keyword: None,
3055        order_by: None,
3056        limit: None,
3057        offset: None,
3058        distribute_by: None,
3059        sort_by: None,
3060        cluster_by: None,
3061        lateral: false,
3062        modifiers_inside: false,
3063        trailing_comments: vec![],
3064        inferred_type: None,
3065    }));
3066
3067    // Wrap in alias AS _unnested_generate_series
3068    Some(Expression::Alias(Box::new(Alias {
3069        this: subquery,
3070        alias: Identifier::new("_unnested_generate_series".to_string()),
3071        column_aliases: vec![],
3072        alias_explicit_as: false,
3073        alias_keyword: None,
3074        pre_alias_comments: vec![],
3075        trailing_comments: vec![],
3076        inferred_type: None,
3077    })))
3078}
3079
3080/// Expand BETWEEN expressions in DELETE statements to >= AND <=
3081///
3082/// Some dialects (like StarRocks) don't support BETWEEN in DELETE statements
3083/// or prefer the expanded form. This transforms:
3084///   `DELETE FROM t WHERE a BETWEEN b AND c`
3085/// to:
3086///   `DELETE FROM t WHERE a >= b AND a <= c`
3087pub fn expand_between_in_delete(expr: Expression) -> Result<Expression> {
3088    match expr {
3089        Expression::Delete(mut delete) => {
3090            // If there's a WHERE clause, expand any BETWEEN expressions in it
3091            if let Some(ref mut where_clause) = delete.where_clause {
3092                where_clause.this = expand_between_recursive(where_clause.this.clone());
3093            }
3094            Ok(Expression::Delete(delete))
3095        }
3096        other => Ok(other),
3097    }
3098}
3099
3100/// Recursively expand BETWEEN expressions to >= AND <=
3101fn expand_between_recursive(expr: Expression) -> Expression {
3102    match expr {
3103        // Expand: a BETWEEN b AND c -> a >= b AND a <= c
3104        // Expand: a NOT BETWEEN b AND c -> a < b OR a > c
3105        Expression::Between(between) => {
3106            let this = expand_between_recursive(between.this.clone());
3107            let low = expand_between_recursive(between.low);
3108            let high = expand_between_recursive(between.high);
3109
3110            if between.not {
3111                // NOT BETWEEN: a < b OR a > c
3112                Expression::Or(Box::new(BinaryOp::new(
3113                    Expression::Lt(Box::new(BinaryOp::new(this.clone(), low))),
3114                    Expression::Gt(Box::new(BinaryOp::new(this, high))),
3115                )))
3116            } else {
3117                // BETWEEN: a >= b AND a <= c
3118                Expression::And(Box::new(BinaryOp::new(
3119                    Expression::Gte(Box::new(BinaryOp::new(this.clone(), low))),
3120                    Expression::Lte(Box::new(BinaryOp::new(this, high))),
3121                )))
3122            }
3123        }
3124
3125        // Recursively process AND/OR expressions
3126        Expression::And(mut op) => {
3127            op.left = expand_between_recursive(op.left);
3128            op.right = expand_between_recursive(op.right);
3129            Expression::And(op)
3130        }
3131        Expression::Or(mut op) => {
3132            op.left = expand_between_recursive(op.left);
3133            op.right = expand_between_recursive(op.right);
3134            Expression::Or(op)
3135        }
3136        Expression::Not(mut op) => {
3137            op.this = expand_between_recursive(op.this);
3138            Expression::Not(op)
3139        }
3140
3141        // Recursively process parenthesized expressions
3142        Expression::Paren(mut paren) => {
3143            paren.this = expand_between_recursive(paren.this);
3144            Expression::Paren(paren)
3145        }
3146
3147        // Pass through everything else unchanged
3148        other => other,
3149    }
3150}
3151
3152/// Push down CTE column names into SELECT expressions.
3153///
3154/// BigQuery doesn't support column names when defining a CTE, e.g.:
3155/// `WITH vartab(v) AS (SELECT ...)` is not valid.
3156/// Instead, it expects: `WITH vartab AS (SELECT ... AS v)`.
3157///
3158/// This transform removes the CTE column aliases and adds them as
3159/// aliases on the SELECT expressions.
3160pub fn pushdown_cte_column_names(expr: Expression) -> Result<Expression> {
3161    match expr {
3162        Expression::Select(mut select) => {
3163            if let Some(ref mut with) = select.with {
3164                for cte in &mut with.ctes {
3165                    if !cte.columns.is_empty() {
3166                        // Check if the CTE body is a star query - if so, just strip column names
3167                        let is_star = matches!(&cte.this, Expression::Select(s) if
3168                            s.expressions.len() == 1 && matches!(&s.expressions[0], Expression::Star(_)));
3169
3170                        if is_star {
3171                            // Can't push down column names for star queries, just remove them
3172                            cte.columns.clear();
3173                            continue;
3174                        }
3175
3176                        // Extract column names
3177                        let column_names: Vec<Identifier> = cte.columns.drain(..).collect();
3178
3179                        // Push column names down into the SELECT expressions
3180                        if let Expression::Select(ref mut inner_select) = cte.this {
3181                            let new_exprs: Vec<Expression> = inner_select
3182                                .expressions
3183                                .drain(..)
3184                                .zip(
3185                                    column_names
3186                                        .into_iter()
3187                                        .chain(std::iter::repeat_with(|| Identifier::new(""))),
3188                                )
3189                                .map(|(expr, col_name)| {
3190                                    if col_name.name.is_empty() {
3191                                        return expr;
3192                                    }
3193                                    // If already aliased, replace the alias
3194                                    match expr {
3195                                        Expression::Alias(mut a) => {
3196                                            a.alias = col_name;
3197                                            Expression::Alias(a)
3198                                        }
3199                                        other => {
3200                                            Expression::Alias(Box::new(crate::expressions::Alias {
3201                                                this: other,
3202                                                alias: col_name,
3203                                                column_aliases: Vec::new(),
3204                                                alias_explicit_as: false,
3205                                                alias_keyword: None,
3206                                                pre_alias_comments: Vec::new(),
3207                                                trailing_comments: Vec::new(),
3208                                                inferred_type: None,
3209                                            }))
3210                                        }
3211                                    }
3212                                })
3213                                .collect();
3214                            inner_select.expressions = new_exprs;
3215                        }
3216                    }
3217                }
3218            }
3219            Ok(Expression::Select(select))
3220        }
3221        other => Ok(other),
3222    }
3223}
3224
3225/// Simplify nested parentheses around VALUES in FROM clause.
3226/// Converts `FROM ((VALUES (1)))` to `FROM (VALUES (1))` by stripping redundant wrapping.
3227/// Handles various nesting patterns: Subquery(Paren(Values)), Paren(Paren(Values)), etc.
3228pub fn simplify_nested_paren_values(expr: Expression) -> Result<Expression> {
3229    match expr {
3230        Expression::Select(mut select) => {
3231            if let Some(ref mut from) = select.from {
3232                for from_item in from.expressions.iter_mut() {
3233                    simplify_paren_values_in_from(from_item);
3234                }
3235            }
3236            Ok(Expression::Select(select))
3237        }
3238        other => Ok(other),
3239    }
3240}
3241
3242fn simplify_paren_values_in_from(expr: &mut Expression) {
3243    // Check various patterns and build replacement if needed
3244    let replacement = match expr {
3245        // Subquery(Paren(Values)) -> Subquery with Values directly
3246        Expression::Subquery(ref subquery) => {
3247            if let Expression::Paren(ref paren) = subquery.this {
3248                if matches!(&paren.this, Expression::Values(_)) {
3249                    let mut new_sub = subquery.as_ref().clone();
3250                    new_sub.this = paren.this.clone();
3251                    Some(Expression::Subquery(Box::new(new_sub)))
3252                } else {
3253                    None
3254                }
3255            } else {
3256                None
3257            }
3258        }
3259        // Paren(Subquery(Values)) -> Subquery(Values) - strip the Paren wrapper
3260        // Paren(Paren(Values)) -> Paren(Values) - strip one layer
3261        Expression::Paren(ref outer_paren) => {
3262            if let Expression::Subquery(ref subquery) = outer_paren.this {
3263                // Paren(Subquery(Values)) -> Subquery(Values) - strip outer Paren
3264                if matches!(&subquery.this, Expression::Values(_)) {
3265                    Some(outer_paren.this.clone())
3266                }
3267                // Paren(Subquery(Paren(Values))) -> Subquery(Values)
3268                else if let Expression::Paren(ref paren) = subquery.this {
3269                    if matches!(&paren.this, Expression::Values(_)) {
3270                        let mut new_sub = subquery.as_ref().clone();
3271                        new_sub.this = paren.this.clone();
3272                        Some(Expression::Subquery(Box::new(new_sub)))
3273                    } else {
3274                        None
3275                    }
3276                } else {
3277                    None
3278                }
3279            } else if let Expression::Paren(ref inner_paren) = outer_paren.this {
3280                if matches!(&inner_paren.this, Expression::Values(_)) {
3281                    Some(outer_paren.this.clone())
3282                } else {
3283                    None
3284                }
3285            } else {
3286                None
3287            }
3288        }
3289        _ => None,
3290    };
3291    if let Some(new_expr) = replacement {
3292        *expr = new_expr;
3293    }
3294}
3295
3296/// Add auto-generated table aliases (like `_t0`) for POSEXPLODE/EXPLODE in FROM clause
3297/// when the alias has column_aliases but no alias name.
3298/// This is needed for Spark target: `FROM POSEXPLODE(x) AS (a, b)` -> `FROM POSEXPLODE(x) AS _t0(a, b)`
3299pub fn add_auto_table_alias(expr: Expression) -> Result<Expression> {
3300    match expr {
3301        Expression::Select(mut select) => {
3302            // Process FROM expressions
3303            if let Some(ref mut from) = select.from {
3304                let mut counter = 0usize;
3305                for from_item in from.expressions.iter_mut() {
3306                    add_auto_alias_to_from_item(from_item, &mut counter);
3307                }
3308            }
3309            Ok(Expression::Select(select))
3310        }
3311        other => Ok(other),
3312    }
3313}
3314
3315fn add_auto_alias_to_from_item(expr: &mut Expression, counter: &mut usize) {
3316    use crate::expressions::Identifier;
3317
3318    match expr {
3319        Expression::Alias(ref mut alias) => {
3320            // If the alias name is empty and there are column_aliases, add auto-generated name
3321            if alias.alias.name.is_empty() && !alias.column_aliases.is_empty() {
3322                alias.alias = Identifier::new(format!("_t{}", counter));
3323                *counter += 1;
3324            }
3325        }
3326        _ => {}
3327    }
3328}
3329
3330/// Convert BigQuery-style UNNEST aliases to column-alias format for DuckDB/Presto/Spark.
3331///
3332/// BigQuery uses: `UNNEST(arr) AS x` where x is a column alias.
3333/// DuckDB/Presto/Spark need: `UNNEST(arr) AS _t0(x)` where _t0 is a table alias and x is the column alias.
3334///
3335/// Propagate struct field names from the first named struct in an array to subsequent unnamed structs.
3336///
3337/// In BigQuery, `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92)]` means the second struct
3338/// should inherit field names from the first: `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score)]`.
3339pub fn propagate_struct_field_names(expr: Expression) -> Result<Expression> {
3340    use crate::dialects::transform_recursive;
3341    transform_recursive(expr, &propagate_struct_names_in_expr)
3342}
3343
3344fn propagate_struct_names_in_expr(expr: Expression) -> Result<Expression> {
3345    use crate::expressions::{Alias, ArrayConstructor, Function, Identifier};
3346
3347    /// Helper to propagate struct field names within an array of expressions
3348    fn propagate_in_elements(elements: &[Expression]) -> Option<Vec<Expression>> {
3349        if elements.len() <= 1 {
3350            return None;
3351        }
3352        // Check if first element is a named STRUCT function
3353        if let Some(Expression::Function(ref first_struct)) = elements.first() {
3354            if first_struct.name.eq_ignore_ascii_case("STRUCT") {
3355                // Extract field names from first struct
3356                let field_names: Vec<Option<String>> = first_struct
3357                    .args
3358                    .iter()
3359                    .map(|arg| {
3360                        if let Expression::Alias(a) = arg {
3361                            Some(a.alias.name.clone())
3362                        } else {
3363                            None
3364                        }
3365                    })
3366                    .collect();
3367
3368                // Only propagate if first struct has at least one named field
3369                if field_names.iter().any(|n| n.is_some()) {
3370                    let mut new_elements = Vec::with_capacity(elements.len());
3371                    new_elements.push(elements[0].clone());
3372
3373                    for elem in &elements[1..] {
3374                        if let Expression::Function(ref s) = elem {
3375                            if s.name.eq_ignore_ascii_case("STRUCT")
3376                                && s.args.len() == field_names.len()
3377                            {
3378                                // Check if this struct has NO names (all unnamed)
3379                                let all_unnamed =
3380                                    s.args.iter().all(|a| !matches!(a, Expression::Alias(_)));
3381                                if all_unnamed {
3382                                    // Apply names from first struct
3383                                    let new_args: Vec<Expression> = s
3384                                        .args
3385                                        .iter()
3386                                        .zip(field_names.iter())
3387                                        .map(|(val, name)| {
3388                                            if let Some(n) = name {
3389                                                Expression::Alias(Box::new(Alias::new(
3390                                                    val.clone(),
3391                                                    Identifier::new(n.clone()),
3392                                                )))
3393                                            } else {
3394                                                val.clone()
3395                                            }
3396                                        })
3397                                        .collect();
3398                                    new_elements.push(Expression::Function(Box::new(
3399                                        Function::new("STRUCT".to_string(), new_args),
3400                                    )));
3401                                    continue;
3402                                }
3403                            }
3404                        }
3405                        new_elements.push(elem.clone());
3406                    }
3407
3408                    return Some(new_elements);
3409                }
3410            }
3411        }
3412        None
3413    }
3414
3415    // Look for Array expressions containing STRUCT function calls
3416    if let Expression::Array(ref arr) = expr {
3417        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3418            return Ok(Expression::Array(Box::new(crate::expressions::Array {
3419                expressions: new_elements,
3420            })));
3421        }
3422    }
3423
3424    // Also handle ArrayFunc (ArrayConstructor) - bracket notation [STRUCT(...), ...]
3425    if let Expression::ArrayFunc(ref arr) = expr {
3426        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3427            return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
3428                expressions: new_elements,
3429                bracket_notation: arr.bracket_notation,
3430                use_list_keyword: arr.use_list_keyword,
3431            })));
3432        }
3433    }
3434
3435    Ok(expr)
3436}
3437
3438/// This walks the entire expression tree to find SELECT statements and converts UNNEST aliases
3439/// in their FROM clauses and JOINs.
3440pub fn unnest_alias_to_column_alias(expr: Expression) -> Result<Expression> {
3441    use crate::dialects::transform_recursive;
3442    transform_recursive(expr, &unnest_alias_transform_single_select)
3443}
3444
3445/// Move UNNEST items from FROM clause to CROSS JOINs without changing alias format.
3446/// Used for BigQuery -> BigQuery/Redshift where we want CROSS JOIN but not _t0(col) aliases.
3447pub fn unnest_from_to_cross_join(expr: Expression) -> Result<Expression> {
3448    use crate::dialects::transform_recursive;
3449    transform_recursive(expr, &unnest_from_to_cross_join_single_select)
3450}
3451
3452fn unnest_from_to_cross_join_single_select(expr: Expression) -> Result<Expression> {
3453    if let Expression::Select(mut select) = expr {
3454        if let Some(ref mut from) = select.from {
3455            if from.expressions.len() > 1 {
3456                let mut new_from_exprs = Vec::new();
3457                let mut new_cross_joins = Vec::new();
3458
3459                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3460                    if idx == 0 {
3461                        new_from_exprs.push(from_item);
3462                    } else {
3463                        let is_unnest = match &from_item {
3464                            Expression::Unnest(_) => true,
3465                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3466                            _ => false,
3467                        };
3468
3469                        if is_unnest {
3470                            new_cross_joins.push(crate::expressions::Join {
3471                                this: from_item,
3472                                on: None,
3473                                using: Vec::new(),
3474                                kind: JoinKind::Cross,
3475                                use_inner_keyword: false,
3476                                use_outer_keyword: false,
3477                                deferred_condition: false,
3478                                join_hint: None,
3479                                match_condition: None,
3480                                pivots: Vec::new(),
3481                                comments: Vec::new(),
3482                                nesting_group: 0,
3483                                directed: false,
3484                            });
3485                        } else {
3486                            new_from_exprs.push(from_item);
3487                        }
3488                    }
3489                }
3490
3491                from.expressions = new_from_exprs;
3492                new_cross_joins.append(&mut select.joins);
3493                select.joins = new_cross_joins;
3494            }
3495        }
3496
3497        Ok(Expression::Select(select))
3498    } else {
3499        Ok(expr)
3500    }
3501}
3502
3503/// Wrap UNNEST function aliases in JOIN items from `AS name` to `AS _u(name)`
3504/// Used for PostgreSQL → Presto/Trino transpilation where GENERATE_SERIES is
3505/// converted to UNNEST(SEQUENCE) and the alias needs the column-alias format.
3506pub fn wrap_unnest_join_aliases(expr: Expression) -> Result<Expression> {
3507    use crate::dialects::transform_recursive;
3508    transform_recursive(expr, &wrap_unnest_join_aliases_single)
3509}
3510
3511fn wrap_unnest_join_aliases_single(expr: Expression) -> Result<Expression> {
3512    if let Expression::Select(mut select) = expr {
3513        // Process JOIN items
3514        for join in &mut select.joins {
3515            wrap_unnest_alias_in_join_item(&mut join.this);
3516        }
3517        Ok(Expression::Select(select))
3518    } else {
3519        Ok(expr)
3520    }
3521}
3522
3523/// If a join item is an Alias wrapping an UNNEST function, convert alias to _u(alias_name) format
3524fn wrap_unnest_alias_in_join_item(expr: &mut Expression) {
3525    use crate::expressions::Identifier;
3526    if let Expression::Alias(alias) = expr {
3527        // Check if the inner expression is a function call to UNNEST
3528        let is_unnest = match &alias.this {
3529            Expression::Function(f) => f.name.eq_ignore_ascii_case("UNNEST"),
3530            _ => false,
3531        };
3532
3533        if is_unnest && alias.column_aliases.is_empty() {
3534            // Simple alias like `AS s` -> wrap to `AS _u(s)`
3535            let original_alias_name = alias.alias.name.clone();
3536            alias.alias = Identifier {
3537                name: "_u".to_string(),
3538                quoted: false,
3539                trailing_comments: Vec::new(),
3540                span: None,
3541            };
3542            alias.column_aliases = vec![Identifier {
3543                name: original_alias_name,
3544                quoted: false,
3545                trailing_comments: Vec::new(),
3546                span: None,
3547            }];
3548        }
3549    }
3550}
3551
3552fn unnest_alias_transform_single_select(expr: Expression) -> Result<Expression> {
3553    if let Expression::Select(mut select) = expr {
3554        let mut counter = 0usize;
3555
3556        // Process FROM expressions: convert aliases AND move UNNEST items to CROSS JOIN
3557        if let Some(ref mut from) = select.from {
3558            // First pass: convert aliases in-place
3559            for from_item in from.expressions.iter_mut() {
3560                convert_unnest_alias_in_from(from_item, &mut counter);
3561            }
3562
3563            // Second pass: move UNNEST items from FROM to CROSS JOINs
3564            if from.expressions.len() > 1 {
3565                let mut new_from_exprs = Vec::new();
3566                let mut new_cross_joins = Vec::new();
3567
3568                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3569                    if idx == 0 {
3570                        // First expression always stays in FROM
3571                        new_from_exprs.push(from_item);
3572                    } else {
3573                        // Check if this is UNNEST or Alias(UNNEST)
3574                        let is_unnest = match &from_item {
3575                            Expression::Unnest(_) => true,
3576                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3577                            _ => false,
3578                        };
3579
3580                        if is_unnest {
3581                            // Convert to CROSS JOIN
3582                            new_cross_joins.push(crate::expressions::Join {
3583                                this: from_item,
3584                                on: None,
3585                                using: Vec::new(),
3586                                kind: JoinKind::Cross,
3587                                use_inner_keyword: false,
3588                                use_outer_keyword: false,
3589                                deferred_condition: false,
3590                                join_hint: None,
3591                                match_condition: None,
3592                                pivots: Vec::new(),
3593                                comments: Vec::new(),
3594                                nesting_group: 0,
3595                                directed: false,
3596                            });
3597                        } else {
3598                            // Keep non-UNNEST items in FROM
3599                            new_from_exprs.push(from_item);
3600                        }
3601                    }
3602                }
3603
3604                from.expressions = new_from_exprs;
3605                // Prepend cross joins before existing joins
3606                new_cross_joins.append(&mut select.joins);
3607                select.joins = new_cross_joins;
3608            }
3609        }
3610
3611        // Process JOINs (existing joins that may have UNNEST aliases)
3612        for join in select.joins.iter_mut() {
3613            convert_unnest_alias_in_from(&mut join.this, &mut counter);
3614        }
3615
3616        Ok(Expression::Select(select))
3617    } else {
3618        Ok(expr)
3619    }
3620}
3621
3622fn convert_unnest_alias_in_from(expr: &mut Expression, counter: &mut usize) {
3623    use crate::expressions::Identifier;
3624
3625    if let Expression::Alias(ref mut alias) = expr {
3626        // Check if the inner expression is UNNEST (or EXPLODE)
3627        let is_unnest = matches!(&alias.this, Expression::Unnest(_))
3628            || matches!(&alias.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"));
3629
3630        if is_unnest && alias.column_aliases.is_empty() {
3631            // Convert: UNNEST(arr) AS x -> UNNEST(arr) AS _tN(x)
3632            let col_alias = alias.alias.clone();
3633            alias.column_aliases = vec![col_alias];
3634            alias.alias = Identifier::new(format!("_t{}", counter));
3635            *counter += 1;
3636        }
3637    }
3638}
3639
3640/// Expand POSEXPLODE in SELECT expressions for DuckDB.
3641///
3642/// Converts `SELECT POSEXPLODE(x)` to `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col`
3643/// Handles both aliased and unaliased forms:
3644/// - `SELECT POSEXPLODE(x) AS (a, b)` -> `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b`
3645/// - `SELECT * FROM POSEXPLODE(x) AS (a, b)` -> `SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)`
3646pub fn expand_posexplode_duckdb(expr: Expression) -> Result<Expression> {
3647    use crate::expressions::{Alias, Function};
3648
3649    match expr {
3650        Expression::Select(mut select) => {
3651            // Check if any SELECT expression is a POSEXPLODE function
3652            let mut new_expressions = Vec::new();
3653            let mut changed = false;
3654
3655            for sel_expr in select.expressions.drain(..) {
3656                // Check for POSEXPLODE(x) AS (a, b) - aliased form
3657                if let Expression::Alias(ref alias_box) = sel_expr {
3658                    if let Expression::Function(ref func) = alias_box.this {
3659                        if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3660                            let arg = func.args[0].clone();
3661                            // Get alias names: default pos, col
3662                            let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3663                                (
3664                                    alias_box.column_aliases[0].name.clone(),
3665                                    alias_box.column_aliases[1].name.clone(),
3666                                )
3667                            } else if !alias_box.alias.is_empty() {
3668                                // Single alias like AS x - use as col name, "pos" for position
3669                                ("pos".to_string(), alias_box.alias.name.clone())
3670                            } else {
3671                                ("pos".to_string(), "col".to_string())
3672                            };
3673
3674                            // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos_name
3675                            let gen_subscripts = Expression::Function(Box::new(Function::new(
3676                                "GENERATE_SUBSCRIPTS".to_string(),
3677                                vec![
3678                                    arg.clone(),
3679                                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3680                                ],
3681                            )));
3682                            let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3683                                gen_subscripts,
3684                                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3685                            )));
3686                            let pos_alias = Expression::Alias(Box::new(Alias {
3687                                this: sub_one,
3688                                alias: Identifier::new(pos_name),
3689                                column_aliases: Vec::new(),
3690                                alias_explicit_as: false,
3691                                alias_keyword: None,
3692                                pre_alias_comments: Vec::new(),
3693                                trailing_comments: Vec::new(),
3694                                inferred_type: None,
3695                            }));
3696
3697                            // UNNEST(x) AS col_name
3698                            let unnest = Expression::Unnest(Box::new(UnnestFunc {
3699                                this: arg,
3700                                expressions: Vec::new(),
3701                                with_ordinality: false,
3702                                alias: None,
3703                                offset_alias: None,
3704                            }));
3705                            let col_alias = Expression::Alias(Box::new(Alias {
3706                                this: unnest,
3707                                alias: Identifier::new(col_name),
3708                                column_aliases: Vec::new(),
3709                                alias_explicit_as: false,
3710                                alias_keyword: None,
3711                                pre_alias_comments: Vec::new(),
3712                                trailing_comments: Vec::new(),
3713                                inferred_type: None,
3714                            }));
3715
3716                            new_expressions.push(pos_alias);
3717                            new_expressions.push(col_alias);
3718                            changed = true;
3719                            continue;
3720                        }
3721                    }
3722                }
3723
3724                // Check for bare POSEXPLODE(x) - unaliased form
3725                if let Expression::Function(ref func) = sel_expr {
3726                    if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3727                        let arg = func.args[0].clone();
3728                        let pos_name = "pos";
3729                        let col_name = "col";
3730
3731                        // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos
3732                        let gen_subscripts = Expression::Function(Box::new(Function::new(
3733                            "GENERATE_SUBSCRIPTS".to_string(),
3734                            vec![
3735                                arg.clone(),
3736                                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3737                            ],
3738                        )));
3739                        let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3740                            gen_subscripts,
3741                            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3742                        )));
3743                        let pos_alias = Expression::Alias(Box::new(Alias {
3744                            this: sub_one,
3745                            alias: Identifier::new(pos_name),
3746                            column_aliases: Vec::new(),
3747                            alias_explicit_as: false,
3748                            alias_keyword: None,
3749                            pre_alias_comments: Vec::new(),
3750                            trailing_comments: Vec::new(),
3751                            inferred_type: None,
3752                        }));
3753
3754                        // UNNEST(x) AS col
3755                        let unnest = Expression::Unnest(Box::new(UnnestFunc {
3756                            this: arg,
3757                            expressions: Vec::new(),
3758                            with_ordinality: false,
3759                            alias: None,
3760                            offset_alias: None,
3761                        }));
3762                        let col_alias = Expression::Alias(Box::new(Alias {
3763                            this: unnest,
3764                            alias: Identifier::new(col_name),
3765                            column_aliases: Vec::new(),
3766                            alias_explicit_as: false,
3767                            alias_keyword: None,
3768                            pre_alias_comments: Vec::new(),
3769                            trailing_comments: Vec::new(),
3770                            inferred_type: None,
3771                        }));
3772
3773                        new_expressions.push(pos_alias);
3774                        new_expressions.push(col_alias);
3775                        changed = true;
3776                        continue;
3777                    }
3778                }
3779
3780                // Not a POSEXPLODE, keep as-is
3781                new_expressions.push(sel_expr);
3782            }
3783
3784            if changed {
3785                select.expressions = new_expressions;
3786            } else {
3787                select.expressions = new_expressions;
3788            }
3789
3790            // Also handle POSEXPLODE in FROM clause:
3791            // SELECT * FROM POSEXPLODE(x) AS (a, b) -> SELECT * FROM (SELECT ...)
3792            if let Some(ref mut from) = select.from {
3793                expand_posexplode_in_from_duckdb(from)?;
3794            }
3795
3796            Ok(Expression::Select(select))
3797        }
3798        other => Ok(other),
3799    }
3800}
3801
3802/// Helper to expand POSEXPLODE in FROM clause for DuckDB
3803fn expand_posexplode_in_from_duckdb(from: &mut From) -> Result<()> {
3804    use crate::expressions::{Alias, Function};
3805
3806    let mut new_expressions = Vec::new();
3807    let mut _changed = false;
3808
3809    for table_expr in from.expressions.drain(..) {
3810        // Check for POSEXPLODE(x) AS (a, b) in FROM
3811        if let Expression::Alias(ref alias_box) = table_expr {
3812            if let Expression::Function(ref func) = alias_box.this {
3813                if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3814                    let arg = func.args[0].clone();
3815                    let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3816                        (
3817                            alias_box.column_aliases[0].name.clone(),
3818                            alias_box.column_aliases[1].name.clone(),
3819                        )
3820                    } else {
3821                        ("pos".to_string(), "col".to_string())
3822                    };
3823
3824                    // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)
3825                    let gen_subscripts = Expression::Function(Box::new(Function::new(
3826                        "GENERATE_SUBSCRIPTS".to_string(),
3827                        vec![
3828                            arg.clone(),
3829                            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3830                        ],
3831                    )));
3832                    let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3833                        gen_subscripts,
3834                        Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3835                    )));
3836                    let pos_alias = Expression::Alias(Box::new(Alias {
3837                        this: sub_one,
3838                        alias: Identifier::new(&pos_name),
3839                        column_aliases: Vec::new(),
3840                        alias_explicit_as: false,
3841                        alias_keyword: None,
3842                        pre_alias_comments: Vec::new(),
3843                        trailing_comments: Vec::new(),
3844                        inferred_type: None,
3845                    }));
3846                    let unnest = Expression::Unnest(Box::new(UnnestFunc {
3847                        this: arg,
3848                        expressions: Vec::new(),
3849                        with_ordinality: false,
3850                        alias: None,
3851                        offset_alias: None,
3852                    }));
3853                    let col_alias = Expression::Alias(Box::new(Alias {
3854                        this: unnest,
3855                        alias: Identifier::new(&col_name),
3856                        column_aliases: Vec::new(),
3857                        alias_explicit_as: false,
3858                        alias_keyword: None,
3859                        pre_alias_comments: Vec::new(),
3860                        trailing_comments: Vec::new(),
3861                        inferred_type: None,
3862                    }));
3863
3864                    let mut inner_select = Select::new();
3865                    inner_select.expressions = vec![pos_alias, col_alias];
3866
3867                    let subquery = Expression::Subquery(Box::new(Subquery {
3868                        this: Expression::Select(Box::new(inner_select)),
3869                        alias: None,
3870                        column_aliases: Vec::new(),
3871                        alias_explicit_as: false,
3872                        alias_keyword: None,
3873                        order_by: None,
3874                        limit: None,
3875                        offset: None,
3876                        distribute_by: None,
3877                        sort_by: None,
3878                        cluster_by: None,
3879                        lateral: false,
3880                        modifiers_inside: false,
3881                        trailing_comments: Vec::new(),
3882                        inferred_type: None,
3883                    }));
3884                    new_expressions.push(subquery);
3885                    _changed = true;
3886                    continue;
3887                }
3888            }
3889        }
3890
3891        // Also check for bare POSEXPLODE(x) in FROM (no alias)
3892        if let Expression::Function(ref func) = table_expr {
3893            if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3894                let arg = func.args[0].clone();
3895
3896                // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col)
3897                let gen_subscripts = Expression::Function(Box::new(Function::new(
3898                    "GENERATE_SUBSCRIPTS".to_string(),
3899                    vec![
3900                        arg.clone(),
3901                        Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3902                    ],
3903                )));
3904                let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3905                    gen_subscripts,
3906                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3907                )));
3908                let pos_alias = Expression::Alias(Box::new(Alias {
3909                    this: sub_one,
3910                    alias: Identifier::new("pos"),
3911                    column_aliases: Vec::new(),
3912                    alias_explicit_as: false,
3913                    alias_keyword: None,
3914                    pre_alias_comments: Vec::new(),
3915                    trailing_comments: Vec::new(),
3916                    inferred_type: None,
3917                }));
3918                let unnest = Expression::Unnest(Box::new(UnnestFunc {
3919                    this: arg,
3920                    expressions: Vec::new(),
3921                    with_ordinality: false,
3922                    alias: None,
3923                    offset_alias: None,
3924                }));
3925                let col_alias = Expression::Alias(Box::new(Alias {
3926                    this: unnest,
3927                    alias: Identifier::new("col"),
3928                    column_aliases: Vec::new(),
3929                    alias_explicit_as: false,
3930                    alias_keyword: None,
3931                    pre_alias_comments: Vec::new(),
3932                    trailing_comments: Vec::new(),
3933                    inferred_type: None,
3934                }));
3935
3936                let mut inner_select = Select::new();
3937                inner_select.expressions = vec![pos_alias, col_alias];
3938
3939                let subquery = Expression::Subquery(Box::new(Subquery {
3940                    this: Expression::Select(Box::new(inner_select)),
3941                    alias: None,
3942                    column_aliases: Vec::new(),
3943                    alias_explicit_as: false,
3944                    alias_keyword: None,
3945                    order_by: None,
3946                    limit: None,
3947                    offset: None,
3948                    distribute_by: None,
3949                    sort_by: None,
3950                    cluster_by: None,
3951                    lateral: false,
3952                    modifiers_inside: false,
3953                    trailing_comments: Vec::new(),
3954                    inferred_type: None,
3955                }));
3956                new_expressions.push(subquery);
3957                _changed = true;
3958                continue;
3959            }
3960        }
3961
3962        new_expressions.push(table_expr);
3963    }
3964
3965    from.expressions = new_expressions;
3966    Ok(())
3967}
3968
3969/// Convert EXPLODE/POSEXPLODE in SELECT projections into CROSS JOIN UNNEST patterns.
3970///
3971/// This implements the `explode_projection_to_unnest` transform from Python sqlglot.
3972/// It restructures queries like:
3973///   `SELECT EXPLODE(x) FROM tbl`
3974/// into:
3975///   `SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(...) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE ...`
3976///
3977/// The transform handles:
3978/// - EXPLODE(x) and POSEXPLODE(x) functions
3979/// - Name collision avoidance (_u, _u_2, ... and col, col_2, ...)
3980/// - Multiple EXPLODE/POSEXPLODE in one SELECT
3981/// - Queries with or without FROM clause
3982/// - Presto (index_offset=1) and BigQuery (index_offset=0) variants
3983pub fn explode_projection_to_unnest(expr: Expression, target: DialectType) -> Result<Expression> {
3984    match expr {
3985        Expression::Select(select) => explode_projection_to_unnest_impl(*select, target),
3986        other => Ok(other),
3987    }
3988}
3989
3990/// Snowflake-specific rewrite to mirror Python sqlglot's explode_projection_to_unnest behavior
3991/// when FLATTEN appears in a nested LATERAL within a SELECT projection.
3992///
3993/// This intentionally rewrites:
3994/// - `LATERAL FLATTEN(INPUT => x) alias`
3995/// into:
3996/// - `LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS alias(SEQ, KEY, PATH, INDEX, VALUE, THIS)`
3997/// and appends CROSS JOIN TABLE(FLATTEN(...)) range/entity joins plus alignment predicates
3998/// to the containing SELECT.
3999pub fn snowflake_flatten_projection_to_unnest(expr: Expression) -> Result<Expression> {
4000    match expr {
4001        Expression::Select(select) => snowflake_flatten_projection_to_unnest_impl(*select),
4002        other => Ok(other),
4003    }
4004}
4005
4006fn snowflake_flatten_projection_to_unnest_impl(mut select: Select) -> Result<Expression> {
4007    let mut flattened_inputs: Vec<Expression> = Vec::new();
4008    let mut new_selects: Vec<Expression> = Vec::with_capacity(select.expressions.len());
4009
4010    for sel_expr in select.expressions.into_iter() {
4011        let found_input: RefCell<Option<Expression>> = RefCell::new(None);
4012
4013        let rewritten = transform_recursive(sel_expr, &|e| {
4014            if let Expression::Lateral(lat) = e {
4015                if let Some(input_expr) = extract_flatten_input(&lat) {
4016                    if found_input.borrow().is_none() {
4017                        *found_input.borrow_mut() = Some(input_expr);
4018                    }
4019                    return Ok(Expression::Lateral(Box::new(rewrite_flatten_lateral(*lat))));
4020                }
4021                return Ok(Expression::Lateral(lat));
4022            }
4023            Ok(e)
4024        })?;
4025
4026        if let Some(input) = found_input.into_inner() {
4027            flattened_inputs.push(input);
4028        }
4029        new_selects.push(rewritten);
4030    }
4031
4032    if flattened_inputs.is_empty() {
4033        select.expressions = new_selects;
4034        return Ok(Expression::Select(Box::new(select)));
4035    }
4036
4037    select.expressions = new_selects;
4038
4039    for (idx, input_expr) in flattened_inputs.into_iter().enumerate() {
4040        // Match sqlglot naming: first pair is _u/_u_2 with pos/pos_2 and entity.
4041        let is_first = idx == 0;
4042        let series_alias = if is_first {
4043            "pos".to_string()
4044        } else {
4045            format!("pos_{}", idx + 1)
4046        };
4047        let series_source_alias = if is_first {
4048            "_u".to_string()
4049        } else {
4050            format!("_u_{}", idx * 2 + 1)
4051        };
4052        let unnest_source_alias = if is_first {
4053            "_u_2".to_string()
4054        } else {
4055            format!("_u_{}", idx * 2 + 2)
4056        };
4057        let pos2_alias = if is_first {
4058            "pos_2".to_string()
4059        } else {
4060            format!("{}_2", series_alias)
4061        };
4062        let entity_alias = if is_first {
4063            "entity".to_string()
4064        } else {
4065            format!("entity_{}", idx + 1)
4066        };
4067
4068        let array_size_call = Expression::Function(Box::new(Function::new(
4069            "ARRAY_SIZE".to_string(),
4070            vec![Expression::NamedArgument(Box::new(NamedArgument {
4071                name: Identifier::new("INPUT"),
4072                value: input_expr.clone(),
4073                separator: NamedArgSeparator::DArrow,
4074            }))],
4075        )));
4076
4077        let greatest = Expression::Function(Box::new(Function::new(
4078            "GREATEST".to_string(),
4079            vec![array_size_call.clone()],
4080        )));
4081
4082        let series_end = Expression::Add(Box::new(BinaryOp::new(
4083            Expression::Paren(Box::new(crate::expressions::Paren {
4084                this: Expression::Sub(Box::new(BinaryOp::new(
4085                    greatest,
4086                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4087                ))),
4088                trailing_comments: Vec::new(),
4089            })),
4090            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4091        )));
4092
4093        let series_range = Expression::Function(Box::new(Function::new(
4094            "ARRAY_GENERATE_RANGE".to_string(),
4095            vec![
4096                Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4097                series_end,
4098            ],
4099        )));
4100
4101        let series_flatten = Expression::Function(Box::new(Function::new(
4102            "FLATTEN".to_string(),
4103            vec![Expression::NamedArgument(Box::new(NamedArgument {
4104                name: Identifier::new("INPUT"),
4105                value: series_range,
4106                separator: NamedArgSeparator::DArrow,
4107            }))],
4108        )));
4109
4110        let series_table = Expression::Function(Box::new(Function::new(
4111            "TABLE".to_string(),
4112            vec![series_flatten],
4113        )));
4114
4115        let series_alias_expr = Expression::Alias(Box::new(Alias {
4116            this: series_table,
4117            alias: Identifier::new(series_source_alias.clone()),
4118            column_aliases: vec![
4119                Identifier::new("seq"),
4120                Identifier::new("key"),
4121                Identifier::new("path"),
4122                Identifier::new("index"),
4123                Identifier::new(series_alias.clone()),
4124                Identifier::new("this"),
4125            ],
4126            alias_explicit_as: false,
4127            alias_keyword: None,
4128            pre_alias_comments: Vec::new(),
4129            trailing_comments: Vec::new(),
4130            inferred_type: None,
4131        }));
4132
4133        select.joins.push(Join {
4134            this: series_alias_expr,
4135            on: None,
4136            using: Vec::new(),
4137            kind: JoinKind::Cross,
4138            use_inner_keyword: false,
4139            use_outer_keyword: false,
4140            deferred_condition: false,
4141            join_hint: None,
4142            match_condition: None,
4143            pivots: Vec::new(),
4144            comments: Vec::new(),
4145            nesting_group: 0,
4146            directed: false,
4147        });
4148
4149        let entity_flatten = Expression::Function(Box::new(Function::new(
4150            "FLATTEN".to_string(),
4151            vec![Expression::NamedArgument(Box::new(NamedArgument {
4152                name: Identifier::new("INPUT"),
4153                value: input_expr.clone(),
4154                separator: NamedArgSeparator::DArrow,
4155            }))],
4156        )));
4157
4158        let entity_table = Expression::Function(Box::new(Function::new(
4159            "TABLE".to_string(),
4160            vec![entity_flatten],
4161        )));
4162
4163        let entity_alias_expr = Expression::Alias(Box::new(Alias {
4164            this: entity_table,
4165            alias: Identifier::new(unnest_source_alias.clone()),
4166            column_aliases: vec![
4167                Identifier::new("seq"),
4168                Identifier::new("key"),
4169                Identifier::new("path"),
4170                Identifier::new(pos2_alias.clone()),
4171                Identifier::new(entity_alias.clone()),
4172                Identifier::new("this"),
4173            ],
4174            alias_explicit_as: false,
4175            alias_keyword: None,
4176            pre_alias_comments: Vec::new(),
4177            trailing_comments: Vec::new(),
4178            inferred_type: None,
4179        }));
4180
4181        select.joins.push(Join {
4182            this: entity_alias_expr,
4183            on: None,
4184            using: Vec::new(),
4185            kind: JoinKind::Cross,
4186            use_inner_keyword: false,
4187            use_outer_keyword: false,
4188            deferred_condition: false,
4189            join_hint: None,
4190            match_condition: None,
4191            pivots: Vec::new(),
4192            comments: Vec::new(),
4193            nesting_group: 0,
4194            directed: false,
4195        });
4196
4197        let pos_col =
4198            Expression::qualified_column(series_source_alias.clone(), series_alias.clone());
4199        let pos2_col =
4200            Expression::qualified_column(unnest_source_alias.clone(), pos2_alias.clone());
4201
4202        let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), pos2_col.clone())));
4203        let size_minus_1 = Expression::Paren(Box::new(crate::expressions::Paren {
4204            this: Expression::Sub(Box::new(BinaryOp::new(
4205                array_size_call,
4206                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4207            ))),
4208            trailing_comments: Vec::new(),
4209        }));
4210        let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_minus_1.clone())));
4211        let pos2_eq_size = Expression::Eq(Box::new(BinaryOp::new(pos2_col, size_minus_1)));
4212        let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos2_eq_size)));
4213        let or_cond = Expression::Or(Box::new(BinaryOp::new(
4214            eq,
4215            Expression::Paren(Box::new(crate::expressions::Paren {
4216                this: and_cond,
4217                trailing_comments: Vec::new(),
4218            })),
4219        )));
4220
4221        select.where_clause = Some(match select.where_clause.take() {
4222            Some(existing) => Where {
4223                this: Expression::And(Box::new(BinaryOp::new(existing.this, or_cond))),
4224            },
4225            None => Where { this: or_cond },
4226        });
4227    }
4228
4229    Ok(Expression::Select(Box::new(select)))
4230}
4231
4232fn extract_flatten_input(lat: &Lateral) -> Option<Expression> {
4233    let Expression::Function(f) = lat.this.as_ref() else {
4234        return None;
4235    };
4236    if !f.name.eq_ignore_ascii_case("FLATTEN") {
4237        return None;
4238    }
4239
4240    for arg in &f.args {
4241        if let Expression::NamedArgument(na) = arg {
4242            if na.name.name.eq_ignore_ascii_case("INPUT") {
4243                return Some(na.value.clone());
4244            }
4245        }
4246    }
4247    f.args.first().cloned()
4248}
4249
4250fn rewrite_flatten_lateral(mut lat: Lateral) -> Lateral {
4251    let cond = Expression::Eq(Box::new(BinaryOp::new(
4252        Expression::qualified_column("_u", "pos"),
4253        Expression::qualified_column("_u_2", "pos_2"),
4254    )));
4255    let true_expr = Expression::qualified_column("_u_2", "entity");
4256    let iff_expr = Expression::Function(Box::new(Function::new(
4257        "IFF".to_string(),
4258        vec![cond, true_expr, Expression::Null(crate::expressions::Null)],
4259    )));
4260
4261    lat.this = Box::new(iff_expr);
4262    if lat.column_aliases.is_empty() {
4263        lat.column_aliases = vec![
4264            "SEQ".to_string(),
4265            "KEY".to_string(),
4266            "PATH".to_string(),
4267            "INDEX".to_string(),
4268            "VALUE".to_string(),
4269            "THIS".to_string(),
4270        ];
4271    }
4272    lat
4273}
4274
4275/// Info about an EXPLODE/POSEXPLODE found in a SELECT projection
4276struct ExplodeInfo {
4277    /// The argument to EXPLODE/POSEXPLODE (the array expression)
4278    arg_sql: String,
4279    /// The alias for the exploded column
4280    explode_alias: String,
4281    /// The alias for the position column
4282    pos_alias: String,
4283    /// Source alias for this unnest (e.g., _u_2)
4284    unnest_source_alias: String,
4285}
4286
4287fn explode_projection_to_unnest_impl(select: Select, target: DialectType) -> Result<Expression> {
4288    let is_presto = matches!(
4289        target,
4290        DialectType::Presto | DialectType::Trino | DialectType::Athena
4291    );
4292    let is_bigquery = matches!(target, DialectType::BigQuery);
4293
4294    if !is_presto && !is_bigquery {
4295        return Ok(Expression::Select(Box::new(select)));
4296    }
4297
4298    // Check if any SELECT projection contains EXPLODE or POSEXPLODE
4299    let has_explode = select.expressions.iter().any(|e| expr_contains_explode(e));
4300    if !has_explode {
4301        return Ok(Expression::Select(Box::new(select)));
4302    }
4303
4304    // Collect taken names from existing SELECT expressions and FROM sources
4305    let mut taken_select_names = std::collections::HashSet::new();
4306    let mut taken_source_names = std::collections::HashSet::new();
4307
4308    // Collect names from existing SELECT expressions (output names)
4309    for sel in &select.expressions {
4310        if let Some(name) = get_output_name(sel) {
4311            taken_select_names.insert(name);
4312        }
4313    }
4314
4315    // Also add the explode arg name if it's a column reference
4316    for sel in &select.expressions {
4317        let explode_expr = find_explode_in_expr(sel);
4318        if let Some(arg) = explode_expr {
4319            if let Some(name) = get_output_name(&arg) {
4320                taken_select_names.insert(name);
4321            }
4322        }
4323    }
4324
4325    // Collect source names from FROM clause
4326    if let Some(ref from) = select.from {
4327        for from_expr in &from.expressions {
4328            collect_source_names(from_expr, &mut taken_source_names);
4329        }
4330    }
4331    // Also collect from JOINs
4332    for join in &select.joins {
4333        collect_source_names(&join.this, &mut taken_source_names);
4334    }
4335
4336    // Generate series alias
4337    let series_alias = new_name(&mut taken_select_names, "pos");
4338
4339    // Generate series source alias
4340    let series_source_alias = new_name(&mut taken_source_names, "_u");
4341
4342    // Get the target dialect for generating expression SQL
4343    let target_dialect = Dialect::get(target);
4344
4345    // Process each SELECT expression, collecting explode info
4346    let mut explode_infos: Vec<ExplodeInfo> = Vec::new();
4347    let mut new_projections: Vec<String> = Vec::new();
4348
4349    for (_idx, sel_expr) in select.expressions.iter().enumerate() {
4350        let explode_data = extract_explode_data(sel_expr);
4351
4352        if let Some((is_posexplode, arg_expr, explicit_alias, explicit_pos_alias)) = explode_data {
4353            // Generate the argument SQL in target dialect
4354            let arg_sql = target_dialect
4355                .generate(&arg_expr)
4356                .unwrap_or_else(|_| "NULL".to_string());
4357
4358            let unnest_source_alias = new_name(&mut taken_source_names, "_u");
4359
4360            let explode_alias = if let Some(ref ea) = explicit_alias {
4361                // Use the explicit alias directly (it was explicitly specified by the user)
4362                // Remove from taken_select_names first to avoid false collision with itself
4363                taken_select_names.remove(ea.as_str());
4364                // Now check for collision with other names
4365                let name = new_name(&mut taken_select_names, ea);
4366                name
4367            } else {
4368                new_name(&mut taken_select_names, "col")
4369            };
4370
4371            let pos_alias = if let Some(ref pa) = explicit_pos_alias {
4372                // Use the explicit pos alias directly
4373                taken_select_names.remove(pa.as_str());
4374                let name = new_name(&mut taken_select_names, pa);
4375                name
4376            } else {
4377                new_name(&mut taken_select_names, "pos")
4378            };
4379
4380            // Build the IF projection
4381            if is_presto {
4382                // Presto: IF(_u.pos = _u_2.pos_2, _u_2.col) AS col
4383                let if_col = format!(
4384                    "IF({}.{} = {}.{}, {}.{}) AS {}",
4385                    series_source_alias,
4386                    series_alias,
4387                    unnest_source_alias,
4388                    pos_alias,
4389                    unnest_source_alias,
4390                    explode_alias,
4391                    explode_alias
4392                );
4393                new_projections.push(if_col);
4394
4395                // For POSEXPLODE, also add the position projection
4396                if is_posexplode {
4397                    let if_pos = format!(
4398                        "IF({}.{} = {}.{}, {}.{}) AS {}",
4399                        series_source_alias,
4400                        series_alias,
4401                        unnest_source_alias,
4402                        pos_alias,
4403                        unnest_source_alias,
4404                        pos_alias,
4405                        pos_alias
4406                    );
4407                    new_projections.push(if_pos);
4408                }
4409            } else {
4410                // BigQuery: IF(pos = pos_2, col, NULL) AS col
4411                let if_col = format!(
4412                    "IF({} = {}, {}, NULL) AS {}",
4413                    series_alias, pos_alias, explode_alias, explode_alias
4414                );
4415                new_projections.push(if_col);
4416
4417                // For POSEXPLODE, also add the position projection
4418                if is_posexplode {
4419                    let if_pos = format!(
4420                        "IF({} = {}, {}, NULL) AS {}",
4421                        series_alias, pos_alias, pos_alias, pos_alias
4422                    );
4423                    new_projections.push(if_pos);
4424                }
4425            }
4426
4427            explode_infos.push(ExplodeInfo {
4428                arg_sql,
4429                explode_alias,
4430                pos_alias,
4431                unnest_source_alias,
4432            });
4433        } else {
4434            // Not an EXPLODE expression, generate as-is
4435            let sel_sql = target_dialect
4436                .generate(sel_expr)
4437                .unwrap_or_else(|_| "*".to_string());
4438            new_projections.push(sel_sql);
4439        }
4440    }
4441
4442    if explode_infos.is_empty() {
4443        return Ok(Expression::Select(Box::new(select)));
4444    }
4445
4446    // Build the FROM clause
4447    let mut from_parts: Vec<String> = Vec::new();
4448
4449    // Existing FROM sources
4450    if let Some(ref from) = select.from {
4451        for from_expr in &from.expressions {
4452            let from_sql = target_dialect.generate(from_expr).unwrap_or_default();
4453            from_parts.push(from_sql);
4454        }
4455    }
4456
4457    // Build the size expressions for the series generator
4458    let size_exprs: Vec<String> = explode_infos
4459        .iter()
4460        .map(|info| {
4461            if is_presto {
4462                format!("CARDINALITY({})", info.arg_sql)
4463            } else {
4464                format!("ARRAY_LENGTH({})", info.arg_sql)
4465            }
4466        })
4467        .collect();
4468
4469    let greatest_arg = if size_exprs.len() == 1 {
4470        size_exprs[0].clone()
4471    } else {
4472        format!("GREATEST({})", size_exprs.join(", "))
4473    };
4474
4475    // Build the series source
4476    // greatest_arg is already "GREATEST(...)" when multiple, or "CARDINALITY(x)" / "ARRAY_LENGTH(x)" when single
4477    let series_sql = if is_presto {
4478        // SEQUENCE(1, GREATEST(CARDINALITY(x))) for single, SEQUENCE(1, GREATEST(C(a), C(b))) for multiple
4479        if size_exprs.len() == 1 {
4480            format!(
4481                "UNNEST(SEQUENCE(1, GREATEST({}))) AS {}({})",
4482                greatest_arg, series_source_alias, series_alias
4483            )
4484        } else {
4485            // greatest_arg already has GREATEST(...) wrapper
4486            format!(
4487                "UNNEST(SEQUENCE(1, {})) AS {}({})",
4488                greatest_arg, series_source_alias, series_alias
4489            )
4490        }
4491    } else {
4492        // GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1) for single
4493        if size_exprs.len() == 1 {
4494            format!(
4495                "UNNEST(GENERATE_ARRAY(0, GREATEST({}) - 1)) AS {}",
4496                greatest_arg, series_alias
4497            )
4498        } else {
4499            // greatest_arg already has GREATEST(...) wrapper
4500            format!(
4501                "UNNEST(GENERATE_ARRAY(0, {} - 1)) AS {}",
4502                greatest_arg, series_alias
4503            )
4504        }
4505    };
4506
4507    // Build CROSS JOIN UNNEST clauses
4508    // Always use Presto-style (WITH ORDINALITY) for the SQL string to parse,
4509    // then convert to BigQuery-style AST after parsing if needed
4510    let mut cross_joins: Vec<String> = Vec::new();
4511
4512    for info in &explode_infos {
4513        // Always use WITH ORDINALITY syntax (which our parser handles)
4514        cross_joins.push(format!(
4515            "CROSS JOIN UNNEST({}) WITH ORDINALITY AS {}({}, {})",
4516            info.arg_sql, info.unnest_source_alias, info.explode_alias, info.pos_alias
4517        ));
4518    }
4519
4520    // Build WHERE clause
4521    let mut where_conditions: Vec<String> = Vec::new();
4522
4523    for info in &explode_infos {
4524        let size_expr = if is_presto {
4525            format!("CARDINALITY({})", info.arg_sql)
4526        } else {
4527            format!("ARRAY_LENGTH({})", info.arg_sql)
4528        };
4529
4530        let cond = if is_presto {
4531            format!(
4532                "{series_src}.{series_al} = {unnest_src}.{pos_al} OR ({series_src}.{series_al} > {size} AND {unnest_src}.{pos_al} = {size})",
4533                series_src = series_source_alias,
4534                series_al = series_alias,
4535                unnest_src = info.unnest_source_alias,
4536                pos_al = info.pos_alias,
4537                size = size_expr
4538            )
4539        } else {
4540            format!(
4541                "{series_al} = {pos_al} OR ({series_al} > ({size} - 1) AND {pos_al} = ({size} - 1))",
4542                series_al = series_alias,
4543                pos_al = info.pos_alias,
4544                size = size_expr
4545            )
4546        };
4547
4548        where_conditions.push(cond);
4549    }
4550
4551    // Combine WHERE conditions with AND (wrapped in parens if multiple)
4552    let where_sql = if where_conditions.len() == 1 {
4553        where_conditions[0].clone()
4554    } else {
4555        where_conditions
4556            .iter()
4557            .map(|c| format!("({})", c))
4558            .collect::<Vec<_>>()
4559            .join(" AND ")
4560    };
4561
4562    // Build the complete SQL
4563    let select_part = new_projections.join(", ");
4564
4565    // FROM part: if there was no original FROM, the series becomes the FROM source
4566    let from_and_joins = if from_parts.is_empty() {
4567        // No original FROM: series is the FROM source, everything else is CROSS JOIN
4568        format!("FROM {} {}", series_sql, cross_joins.join(" "))
4569    } else {
4570        format!(
4571            "FROM {} {} {}",
4572            from_parts.join(", "),
4573            format!("CROSS JOIN {}", series_sql),
4574            cross_joins.join(" ")
4575        )
4576    };
4577
4578    let full_sql = format!(
4579        "SELECT {} {} WHERE {}",
4580        select_part, from_and_joins, where_sql
4581    );
4582
4583    // Parse the constructed SQL using the Generic dialect (which handles all SQL syntax)
4584    // We use Generic instead of the target dialect to avoid parser limitations
4585    let generic_dialect = Dialect::get(DialectType::Generic);
4586    let parsed = generic_dialect.parse(&full_sql);
4587    match parsed {
4588        Ok(mut stmts) if !stmts.is_empty() => {
4589            let mut result = stmts.remove(0);
4590
4591            // For BigQuery, convert Presto-style UNNEST AST to BigQuery-style
4592            // Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos])
4593            // BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos) [no outer Alias]
4594            if is_bigquery {
4595                convert_unnest_presto_to_bigquery(&mut result);
4596            }
4597
4598            Ok(result)
4599        }
4600        _ => {
4601            // If parsing fails, return the original expression unchanged
4602            Ok(Expression::Select(Box::new(select)))
4603        }
4604    }
4605}
4606
4607/// Convert Presto-style UNNEST WITH ORDINALITY to BigQuery-style UNNEST WITH OFFSET in the AST.
4608/// Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos_N])
4609/// BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos_N)
4610fn convert_unnest_presto_to_bigquery(expr: &mut Expression) {
4611    match expr {
4612        Expression::Select(ref mut select) => {
4613            // Convert in FROM clause
4614            if let Some(ref mut from) = select.from {
4615                for from_item in from.expressions.iter_mut() {
4616                    convert_unnest_presto_to_bigquery(from_item);
4617                }
4618            }
4619            // Convert in JOINs
4620            for join in select.joins.iter_mut() {
4621                convert_unnest_presto_to_bigquery(&mut join.this);
4622            }
4623        }
4624        Expression::Alias(ref alias) => {
4625            // Check if this is Alias(Unnest(with_ordinality=true), ..., column_aliases=[col, pos])
4626            if let Expression::Unnest(ref unnest) = alias.this {
4627                if unnest.with_ordinality && alias.column_aliases.len() >= 2 {
4628                    let col_alias = alias.column_aliases[0].clone();
4629                    let pos_alias = alias.column_aliases[1].clone();
4630                    let mut new_unnest = unnest.as_ref().clone();
4631                    new_unnest.alias = Some(col_alias);
4632                    new_unnest.offset_alias = Some(pos_alias);
4633                    // Replace the Alias(Unnest) with just Unnest
4634                    *expr = Expression::Unnest(Box::new(new_unnest));
4635                }
4636            }
4637        }
4638        _ => {}
4639    }
4640}
4641
4642/// Find a new name that doesn't conflict with existing names.
4643/// Tries `base`, then `base_2`, `base_3`, etc.
4644fn new_name(names: &mut std::collections::HashSet<String>, base: &str) -> String {
4645    if !names.contains(base) {
4646        names.insert(base.to_string());
4647        return base.to_string();
4648    }
4649    let mut i = 2;
4650    loop {
4651        let candidate = format!("{}_{}", base, i);
4652        if !names.contains(&candidate) {
4653            names.insert(candidate.clone());
4654            return candidate;
4655        }
4656        i += 1;
4657    }
4658}
4659
4660/// Check if an expression contains EXPLODE or POSEXPLODE
4661fn expr_contains_explode(expr: &Expression) -> bool {
4662    match expr {
4663        Expression::Explode(_) => true,
4664        Expression::ExplodeOuter(_) => true,
4665        Expression::Function(f) => {
4666            let name = f.name.to_uppercase();
4667            name == "POSEXPLODE" || name == "POSEXPLODE_OUTER"
4668        }
4669        Expression::Alias(a) => expr_contains_explode(&a.this),
4670        _ => false,
4671    }
4672}
4673
4674/// Find the EXPLODE/POSEXPLODE expression within a select item, return the arg
4675fn find_explode_in_expr(expr: &Expression) -> Option<Expression> {
4676    match expr {
4677        Expression::Explode(uf) => Some(uf.this.clone()),
4678        Expression::ExplodeOuter(uf) => Some(uf.this.clone()),
4679        Expression::Function(f) => {
4680            let name = f.name.to_uppercase();
4681            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4682                Some(f.args[0].clone())
4683            } else {
4684                None
4685            }
4686        }
4687        Expression::Alias(a) => find_explode_in_expr(&a.this),
4688        _ => None,
4689    }
4690}
4691
4692/// Extract explode data from a SELECT expression.
4693/// Returns (is_posexplode, arg_expression, explicit_col_alias, explicit_pos_alias)
4694fn extract_explode_data(
4695    expr: &Expression,
4696) -> Option<(bool, Expression, Option<String>, Option<String>)> {
4697    match expr {
4698        // Bare EXPLODE(x) without alias
4699        Expression::Explode(uf) => Some((false, uf.this.clone(), None, None)),
4700        Expression::ExplodeOuter(uf) => Some((false, uf.this.clone(), None, None)),
4701        // Bare POSEXPLODE(x) without alias
4702        Expression::Function(f) => {
4703            let name = f.name.to_uppercase();
4704            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4705                Some((true, f.args[0].clone(), None, None))
4706            } else {
4707                None
4708            }
4709        }
4710        // Aliased: EXPLODE(x) AS col, or POSEXPLODE(x) AS (a, b)
4711        Expression::Alias(a) => {
4712            match &a.this {
4713                Expression::Explode(uf) => {
4714                    let alias = if !a.alias.is_empty() {
4715                        Some(a.alias.name.clone())
4716                    } else {
4717                        None
4718                    };
4719                    Some((false, uf.this.clone(), alias, None))
4720                }
4721                Expression::ExplodeOuter(uf) => {
4722                    let alias = if !a.alias.is_empty() {
4723                        Some(a.alias.name.clone())
4724                    } else {
4725                        None
4726                    };
4727                    Some((false, uf.this.clone(), alias, None))
4728                }
4729                Expression::Function(f) => {
4730                    let name = f.name.to_uppercase();
4731                    if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4732                        // Check for column aliases: AS (a, b)
4733                        if a.column_aliases.len() == 2 {
4734                            let pos_alias = a.column_aliases[0].name.clone();
4735                            let col_alias = a.column_aliases[1].name.clone();
4736                            Some((true, f.args[0].clone(), Some(col_alias), Some(pos_alias)))
4737                        } else if !a.alias.is_empty() {
4738                            // Single alias: AS x
4739                            Some((true, f.args[0].clone(), Some(a.alias.name.clone()), None))
4740                        } else {
4741                            Some((true, f.args[0].clone(), None, None))
4742                        }
4743                    } else {
4744                        None
4745                    }
4746                }
4747                _ => None,
4748            }
4749        }
4750        _ => None,
4751    }
4752}
4753
4754/// Get the output name of a SELECT expression
4755fn get_output_name(expr: &Expression) -> Option<String> {
4756    match expr {
4757        Expression::Alias(a) => {
4758            if !a.alias.is_empty() {
4759                Some(a.alias.name.clone())
4760            } else {
4761                None
4762            }
4763        }
4764        Expression::Column(c) => Some(c.name.name.clone()),
4765        Expression::Identifier(id) => Some(id.name.clone()),
4766        _ => None,
4767    }
4768}
4769
4770/// Collect source names from a FROM/JOIN expression
4771fn collect_source_names(expr: &Expression, names: &mut std::collections::HashSet<String>) {
4772    match expr {
4773        Expression::Alias(a) => {
4774            if !a.alias.is_empty() {
4775                names.insert(a.alias.name.clone());
4776            }
4777        }
4778        Expression::Subquery(s) => {
4779            if let Some(ref alias) = s.alias {
4780                names.insert(alias.name.clone());
4781            }
4782        }
4783        Expression::Table(t) => {
4784            if let Some(ref alias) = t.alias {
4785                names.insert(alias.name.clone());
4786            } else {
4787                names.insert(t.name.name.clone());
4788            }
4789        }
4790        Expression::Column(c) => {
4791            names.insert(c.name.name.clone());
4792        }
4793        Expression::Identifier(id) => {
4794            names.insert(id.name.clone());
4795        }
4796        _ => {}
4797    }
4798}
4799
4800/// Strip UNNEST wrapping from column reference arguments for Redshift target.
4801/// BigQuery UNNEST(column_ref) -> Redshift: just column_ref
4802pub fn strip_unnest_column_refs(expr: Expression) -> Result<Expression> {
4803    use crate::dialects::transform_recursive;
4804    transform_recursive(expr, &strip_unnest_column_refs_single)
4805}
4806
4807fn strip_unnest_column_refs_single(expr: Expression) -> Result<Expression> {
4808    if let Expression::Select(mut select) = expr {
4809        // Process JOINs (UNNEST items have been moved to joins by unnest_from_to_cross_join)
4810        for join in select.joins.iter_mut() {
4811            strip_unnest_from_expr(&mut join.this);
4812        }
4813        // Process FROM items too
4814        if let Some(ref mut from) = select.from {
4815            for from_item in from.expressions.iter_mut() {
4816                strip_unnest_from_expr(from_item);
4817            }
4818        }
4819        Ok(Expression::Select(select))
4820    } else {
4821        Ok(expr)
4822    }
4823}
4824
4825/// If expr is Alias(UNNEST(column_ref), alias) where UNNEST arg is a column/dot path,
4826/// replace with Alias(column_ref, alias) to strip the UNNEST.
4827fn strip_unnest_from_expr(expr: &mut Expression) {
4828    if let Expression::Alias(ref mut alias) = expr {
4829        if let Expression::Unnest(ref unnest) = alias.this {
4830            let is_column_ref = matches!(&unnest.this, Expression::Column(_) | Expression::Dot(_));
4831            if is_column_ref {
4832                // Replace UNNEST(col_ref) with just col_ref
4833                let inner = unnest.this.clone();
4834                alias.this = inner;
4835            }
4836        }
4837    }
4838}
4839
4840/// Wrap DuckDB UNNEST of struct arrays in (SELECT UNNEST(..., max_depth => 2)) subquery.
4841/// BigQuery UNNEST of struct arrays needs this wrapping for DuckDB to properly expand struct fields.
4842pub fn wrap_duckdb_unnest_struct(expr: Expression) -> Result<Expression> {
4843    use crate::dialects::transform_recursive;
4844    transform_recursive(expr, &wrap_duckdb_unnest_struct_single)
4845}
4846
4847fn wrap_duckdb_unnest_struct_single(expr: Expression) -> Result<Expression> {
4848    if let Expression::Select(mut select) = expr {
4849        // Process FROM items
4850        if let Some(ref mut from) = select.from {
4851            for from_item in from.expressions.iter_mut() {
4852                try_wrap_unnest_in_subquery(from_item);
4853            }
4854        }
4855
4856        // Process JOINs
4857        for join in select.joins.iter_mut() {
4858            try_wrap_unnest_in_subquery(&mut join.this);
4859        }
4860
4861        Ok(Expression::Select(select))
4862    } else {
4863        Ok(expr)
4864    }
4865}
4866
4867/// Check if an expression contains struct array elements that need DuckDB UNNEST wrapping.
4868fn is_struct_array_unnest_arg(expr: &Expression) -> bool {
4869    match expr {
4870        // Array literal containing struct elements
4871        Expression::Array(arr) => arr
4872            .expressions
4873            .iter()
4874            .any(|e| matches!(e, Expression::Struct(_))),
4875        Expression::ArrayFunc(arr) => arr
4876            .expressions
4877            .iter()
4878            .any(|e| matches!(e, Expression::Struct(_))),
4879        // CAST to struct array type, e.g. CAST([] AS STRUCT(x BIGINT)[])
4880        Expression::Cast(c) => {
4881            matches!(&c.to, DataType::Array { element_type, .. } if matches!(**element_type, DataType::Struct { .. }))
4882        }
4883        _ => false,
4884    }
4885}
4886
4887/// Try to wrap an UNNEST expression in a (SELECT UNNEST(..., max_depth => 2)) subquery.
4888/// Handles both bare UNNEST and Alias(UNNEST).
4889fn try_wrap_unnest_in_subquery(expr: &mut Expression) {
4890    // Check for Alias wrapping UNNEST
4891    if let Expression::Alias(ref alias) = expr {
4892        if let Expression::Unnest(ref unnest) = alias.this {
4893            if is_struct_array_unnest_arg(&unnest.this) {
4894                let unnest_clone = (**unnest).clone();
4895                let alias_name = alias.alias.clone();
4896                let new_expr = make_unnest_subquery(unnest_clone, Some(alias_name));
4897                *expr = new_expr;
4898                return;
4899            }
4900        }
4901    }
4902
4903    // Check for bare UNNEST
4904    if let Expression::Unnest(ref unnest) = expr {
4905        if is_struct_array_unnest_arg(&unnest.this) {
4906            let unnest_clone = (**unnest).clone();
4907            let new_expr = make_unnest_subquery(unnest_clone, None);
4908            *expr = new_expr;
4909        }
4910    }
4911}
4912
4913/// Create (SELECT UNNEST(arg, max_depth => 2)) [AS alias] subquery.
4914fn make_unnest_subquery(unnest: UnnestFunc, alias: Option<Identifier>) -> Expression {
4915    // Build UNNEST function call with max_depth => 2 named argument
4916    let max_depth_arg = Expression::NamedArgument(Box::new(NamedArgument {
4917        name: Identifier::new("max_depth".to_string()),
4918        value: Expression::Literal(Box::new(Literal::Number("2".to_string()))),
4919        separator: NamedArgSeparator::DArrow,
4920    }));
4921
4922    let mut unnest_args = vec![unnest.this];
4923    unnest_args.extend(unnest.expressions);
4924    unnest_args.push(max_depth_arg);
4925
4926    let unnest_func =
4927        Expression::Function(Box::new(Function::new("UNNEST".to_string(), unnest_args)));
4928
4929    // Build SELECT UNNEST(...)
4930    let mut inner_select = Select::new();
4931    inner_select.expressions = vec![unnest_func];
4932    let inner_select = Expression::Select(Box::new(inner_select));
4933
4934    // Wrap in subquery
4935    let subquery = Subquery {
4936        this: inner_select,
4937        alias,
4938        column_aliases: Vec::new(),
4939        alias_explicit_as: false,
4940        alias_keyword: None,
4941        order_by: None,
4942        limit: None,
4943        offset: None,
4944        distribute_by: None,
4945        sort_by: None,
4946        cluster_by: None,
4947        lateral: false,
4948        modifiers_inside: false,
4949        trailing_comments: Vec::new(),
4950        inferred_type: None,
4951    };
4952
4953    Expression::Subquery(Box::new(subquery))
4954}
4955
4956/// Wrap UNION with ORDER BY/LIMIT in a subquery.
4957///
4958/// Some dialects (ClickHouse, TSQL) don't support ORDER BY/LIMIT directly on UNION.
4959/// This transform converts:
4960///   SELECT ... UNION SELECT ... ORDER BY x LIMIT n
4961/// to:
4962///   SELECT * FROM (SELECT ... UNION SELECT ...) AS _l_0 ORDER BY x LIMIT n
4963///
4964/// NOTE: Our parser may place ORDER BY/LIMIT on the right-hand SELECT rather than
4965/// the Union (unlike Python sqlglot). This function handles both cases by checking
4966/// the right-hand SELECT for trailing ORDER BY/LIMIT and moving them to the Union.
4967pub fn no_limit_order_by_union(expr: Expression) -> Result<Expression> {
4968    use crate::expressions::{Limit as LimitClause, Offset as OffsetClause, OrderBy, Star};
4969
4970    match expr {
4971        Expression::Union(mut u) => {
4972            // Check if ORDER BY/LIMIT are on the rightmost Select instead of the Union
4973            // (our parser may attach them to the right SELECT)
4974            if u.order_by.is_none() && u.limit.is_none() && u.offset.is_none() {
4975                // Find the rightmost Select and check for ORDER BY/LIMIT
4976                if let Expression::Select(ref mut right_select) = u.right {
4977                    if right_select.order_by.is_some()
4978                        || right_select.limit.is_some()
4979                        || right_select.offset.is_some()
4980                    {
4981                        // Move ORDER BY/LIMIT from right Select to Union
4982                        u.order_by = right_select.order_by.take();
4983                        u.limit = right_select.limit.take().map(|l| Box::new(l.this));
4984                        u.offset = right_select.offset.take().map(|o| Box::new(o.this));
4985                    }
4986                }
4987            }
4988
4989            let has_order_or_limit =
4990                u.order_by.is_some() || u.limit.is_some() || u.offset.is_some();
4991            if has_order_or_limit {
4992                // Extract ORDER BY, LIMIT, OFFSET from the Union
4993                let order_by: Option<OrderBy> = u.order_by.take();
4994                let union_limit: Option<Box<Expression>> = u.limit.take();
4995                let union_offset: Option<Box<Expression>> = u.offset.take();
4996
4997                // Convert Union's limit (Box<Expression>) to Select's limit (Limit struct)
4998                let select_limit: Option<LimitClause> = union_limit.map(|l| LimitClause {
4999                    this: *l,
5000                    percent: false,
5001                    comments: Vec::new(),
5002                });
5003
5004                // Convert Union's offset (Box<Expression>) to Select's offset (Offset struct)
5005                let select_offset: Option<OffsetClause> = union_offset.map(|o| OffsetClause {
5006                    this: *o,
5007                    rows: None,
5008                });
5009
5010                // Create a subquery from the Union
5011                let subquery = Subquery {
5012                    this: Expression::Union(u),
5013                    alias: Some(Identifier::new("_l_0")),
5014                    column_aliases: Vec::new(),
5015                    alias_explicit_as: true,
5016                    alias_keyword: None,
5017                    lateral: false,
5018                    modifiers_inside: false,
5019                    order_by: None,
5020                    limit: None,
5021                    offset: None,
5022                    distribute_by: None,
5023                    sort_by: None,
5024                    cluster_by: None,
5025                    trailing_comments: Vec::new(),
5026                    inferred_type: None,
5027                };
5028
5029                // Build SELECT * FROM (UNION) AS _l_0 ORDER BY ... LIMIT ...
5030                let mut select = Select::default();
5031                select.expressions = vec![Expression::Star(Star {
5032                    table: None,
5033                    except: None,
5034                    replace: None,
5035                    rename: None,
5036                    trailing_comments: Vec::new(),
5037                    span: None,
5038                })];
5039                select.from = Some(From {
5040                    expressions: vec![Expression::Subquery(Box::new(subquery))],
5041                });
5042                select.order_by = order_by;
5043                select.limit = select_limit;
5044                select.offset = select_offset;
5045
5046                Ok(Expression::Select(Box::new(select)))
5047            } else {
5048                Ok(Expression::Union(u))
5049            }
5050        }
5051        _ => Ok(expr),
5052    }
5053}
5054
5055/// Expand LIKE ANY / ILIKE ANY to OR chains.
5056///
5057/// For dialects that don't support quantifiers on LIKE/ILIKE (e.g. DuckDB),
5058/// expand `x LIKE ANY (('a', 'b'))` to `x LIKE 'a' OR x LIKE 'b'`.
5059///
5060/// Handles precedence: when LIKE ANY (→OR) is inside AND, wraps in parens.
5061/// When LIKE ALL (→AND) is inside OR, wraps in parens for readability.
5062pub fn expand_like_any(expr: Expression) -> Result<Expression> {
5063    use crate::expressions::{BinaryOp, LikeOp, Paren};
5064
5065    /// Sentinel comment used to mark Paren nodes created by LIKE ALL expansion.
5066    /// These markers are stripped in a cleanup pass unless they end up inside an OR parent.
5067    const LIKE_ALL_MARKER: &str = "__LIKE_ALL_EXPANSION__";
5068
5069    fn unwrap_parens(e: &Expression) -> &Expression {
5070        match e {
5071            Expression::Paren(p) => unwrap_parens(&p.this),
5072            _ => e,
5073        }
5074    }
5075
5076    fn extract_tuple_values(e: &Expression) -> Option<Vec<Expression>> {
5077        let inner = unwrap_parens(e);
5078        match inner {
5079            Expression::Tuple(t) => Some(t.expressions.clone()),
5080            // Single value in parens: treat as single-element list
5081            _ if !matches!(e, Expression::Tuple(_)) => Some(vec![inner.clone()]),
5082            _ => None,
5083        }
5084    }
5085
5086    /// Build a chain of LIKE/ILIKE conditions joined by a combiner (OR for ANY, AND for ALL).
5087    fn expand_like_quantifier(
5088        op: &LikeOp,
5089        values: Vec<Expression>,
5090        is_ilike: bool,
5091        combiner: fn(Expression, Expression) -> Expression,
5092        wrap_marker: bool,
5093    ) -> Expression {
5094        let num_values = values.len();
5095        let mut result: Option<Expression> = None;
5096        for val in values {
5097            let like = if is_ilike {
5098                Expression::ILike(Box::new(LikeOp {
5099                    left: op.left.clone(),
5100                    right: val,
5101                    escape: op.escape.clone(),
5102                    quantifier: None,
5103                    inferred_type: None,
5104                }))
5105            } else {
5106                Expression::Like(Box::new(LikeOp {
5107                    left: op.left.clone(),
5108                    right: val,
5109                    escape: op.escape.clone(),
5110                    quantifier: None,
5111                    inferred_type: None,
5112                }))
5113            };
5114            result = Some(match result {
5115                None => like,
5116                Some(prev) => combiner(prev, like),
5117            });
5118        }
5119        let expanded = result.unwrap_or_else(|| unreachable!("values is non-empty"));
5120        // For LIKE ALL (AND chain) with multiple values, wrap in a marker Paren.
5121        // The marker lets us distinguish expansion-created AND from parser-created AND
5122        // when deciding whether to keep parens inside OR.
5123        if wrap_marker && num_values > 1 {
5124            Expression::Paren(Box::new(Paren {
5125                this: expanded,
5126                trailing_comments: vec![LIKE_ALL_MARKER.to_string()],
5127            }))
5128        } else {
5129            expanded
5130        }
5131    }
5132
5133    fn or_combiner(a: Expression, b: Expression) -> Expression {
5134        Expression::Or(Box::new(BinaryOp::new(a, b)))
5135    }
5136
5137    fn and_combiner(a: Expression, b: Expression) -> Expression {
5138        Expression::And(Box::new(BinaryOp::new(a, b)))
5139    }
5140
5141    fn is_like_all_marker(p: &Paren) -> bool {
5142        p.trailing_comments.len() == 1 && p.trailing_comments[0] == LIKE_ALL_MARKER
5143    }
5144
5145    // Phase 1: Expand LIKE ANY/ALL and fix precedence in a single bottom-up pass.
5146    //
5147    // - LIKE ANY → bare Or chain
5148    // - LIKE ALL → Paren(And chain) with marker comment
5149    // - And handler: wraps bare Or children in Paren (from LIKE ANY expansion;
5150    //   parser never creates bare Or inside And)
5151    // - Or handler: converts marker Paren to clean Paren (keeps the wrapping)
5152    let result = transform_recursive(expr, &|e| {
5153        match e {
5154            // LIKE ANY -> OR chain (bare)
5155            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ANY") => {
5156                if let Some(values) = extract_tuple_values(&op.right) {
5157                    if values.is_empty() {
5158                        return Ok(e);
5159                    }
5160                    Ok(expand_like_quantifier(
5161                        op,
5162                        values,
5163                        false,
5164                        or_combiner,
5165                        false,
5166                    ))
5167                } else {
5168                    Ok(e)
5169                }
5170            }
5171            // LIKE ALL -> AND chain (with marker Paren)
5172            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ALL") => {
5173                if let Some(values) = extract_tuple_values(&op.right) {
5174                    if values.is_empty() {
5175                        return Ok(e);
5176                    }
5177                    Ok(expand_like_quantifier(
5178                        op,
5179                        values,
5180                        false,
5181                        and_combiner,
5182                        true,
5183                    ))
5184                } else {
5185                    Ok(e)
5186                }
5187            }
5188            // ILIKE ANY -> OR chain (bare)
5189            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ANY") => {
5190                if let Some(values) = extract_tuple_values(&op.right) {
5191                    if values.is_empty() {
5192                        return Ok(e);
5193                    }
5194                    Ok(expand_like_quantifier(op, values, true, or_combiner, false))
5195                } else {
5196                    Ok(e)
5197                }
5198            }
5199            // ILIKE ALL -> AND chain (with marker Paren)
5200            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ALL") => {
5201                if let Some(values) = extract_tuple_values(&op.right) {
5202                    if values.is_empty() {
5203                        return Ok(e);
5204                    }
5205                    Ok(expand_like_quantifier(op, values, true, and_combiner, true))
5206                } else {
5207                    Ok(e)
5208                }
5209            }
5210            // After children are expanded (bottom-up), fix And nodes:
5211            // Wrap bare Or children in Paren (from LIKE ANY expansion).
5212            // The parser never produces bare Or inside And (AND binds tighter than OR,
5213            // so explicit parens in SQL like "(a OR b) AND c" create Paren(Or(...)) in the AST).
5214            Expression::And(mut op) => {
5215                if matches!(&op.left, Expression::Or(_)) {
5216                    op.left = Expression::Paren(Box::new(Paren {
5217                        this: op.left,
5218                        trailing_comments: vec![],
5219                    }));
5220                }
5221                if matches!(&op.right, Expression::Or(_)) {
5222                    op.right = Expression::Paren(Box::new(Paren {
5223                        this: op.right,
5224                        trailing_comments: vec![],
5225                    }));
5226                }
5227                Ok(Expression::And(op))
5228            }
5229            // After children are expanded (bottom-up), fix Or nodes:
5230            // Convert marker Paren(And) to clean Paren(And) so the cleanup pass won't strip it.
5231            Expression::Or(mut op) => {
5232                if let Expression::Paren(ref mut p) = op.left {
5233                    if is_like_all_marker(p) {
5234                        p.trailing_comments.clear();
5235                    }
5236                }
5237                if let Expression::Paren(ref mut p) = op.right {
5238                    if is_like_all_marker(p) {
5239                        p.trailing_comments.clear();
5240                    }
5241                }
5242                Ok(Expression::Or(op))
5243            }
5244            _ => Ok(e),
5245        }
5246    })?;
5247
5248    // Phase 2: Strip remaining marker Paren(And) nodes that weren't inside an Or parent.
5249    // These are standalone LIKE ALL expansions (e.g., in SELECT expressions) that don't
5250    // need parentheses.
5251    transform_recursive(result, &|e| {
5252        if let Expression::Paren(p) = &e {
5253            if is_like_all_marker(p) {
5254                let Expression::Paren(p) = e else {
5255                    unreachable!()
5256                };
5257                return Ok(p.this);
5258            }
5259        }
5260        Ok(e)
5261    })
5262}
5263
5264/// Ensures all unaliased column outputs in subqueries and CTEs get self-aliases.
5265///
5266/// This is needed for TSQL which requires derived table outputs to be aliased.
5267/// For example: `SELECT c FROM t` inside a subquery becomes `SELECT c AS c FROM t`.
5268///
5269/// Mirrors Python sqlglot's `qualify_derived_table_outputs` function which is applied
5270/// as a TRANSFORMS preprocessor for Subquery and CTE expressions in the TSQL dialect.
5271pub fn qualify_derived_table_outputs(expr: Expression) -> Result<Expression> {
5272    use crate::expressions::Alias;
5273
5274    fn add_self_aliases_to_select(select: &mut Select) {
5275        let new_expressions: Vec<Expression> = select
5276            .expressions
5277            .iter()
5278            .map(|e| {
5279                match e {
5280                    // Column reference without alias -> add self-alias
5281                    Expression::Column(col) => {
5282                        let alias_name = col.name.clone();
5283                        Expression::Alias(Box::new(Alias {
5284                            this: e.clone(),
5285                            alias: alias_name,
5286                            column_aliases: Vec::new(),
5287                            alias_explicit_as: false,
5288                            alias_keyword: None,
5289                            pre_alias_comments: Vec::new(),
5290                            trailing_comments: Vec::new(),
5291                            inferred_type: None,
5292                        }))
5293                    }
5294                    // Already aliased or star or other -> keep as is
5295                    _ => e.clone(),
5296                }
5297            })
5298            .collect();
5299        select.expressions = new_expressions;
5300    }
5301
5302    fn walk_and_qualify(expr: &mut Expression) {
5303        match expr {
5304            Expression::Select(ref mut select) => {
5305                // Qualify subqueries in FROM
5306                if let Some(ref mut from) = select.from {
5307                    for e in from.expressions.iter_mut() {
5308                        qualify_subquery_expr(e);
5309                        walk_and_qualify(e);
5310                    }
5311                }
5312                // Qualify subqueries in JOINs
5313                for join in select.joins.iter_mut() {
5314                    qualify_subquery_expr(&mut join.this);
5315                    walk_and_qualify(&mut join.this);
5316                }
5317                // Recurse into expressions (for correlated subqueries etc.)
5318                for e in select.expressions.iter_mut() {
5319                    walk_and_qualify(e);
5320                }
5321                // Recurse into WHERE
5322                if let Some(ref mut w) = select.where_clause {
5323                    walk_and_qualify(&mut w.this);
5324                }
5325            }
5326            Expression::Subquery(ref mut subquery) => {
5327                walk_and_qualify(&mut subquery.this);
5328            }
5329            Expression::Union(ref mut u) => {
5330                walk_and_qualify(&mut u.left);
5331                walk_and_qualify(&mut u.right);
5332            }
5333            Expression::Intersect(ref mut i) => {
5334                walk_and_qualify(&mut i.left);
5335                walk_and_qualify(&mut i.right);
5336            }
5337            Expression::Except(ref mut e) => {
5338                walk_and_qualify(&mut e.left);
5339                walk_and_qualify(&mut e.right);
5340            }
5341            Expression::Cte(ref mut cte) => {
5342                walk_and_qualify(&mut cte.this);
5343            }
5344            _ => {}
5345        }
5346    }
5347
5348    fn qualify_subquery_expr(expr: &mut Expression) {
5349        match expr {
5350            Expression::Subquery(ref mut subquery) => {
5351                // Only qualify if the subquery has a table alias but no column aliases
5352                if subquery.alias.is_some() && subquery.column_aliases.is_empty() {
5353                    if let Expression::Select(ref mut inner_select) = subquery.this {
5354                        // Check the inner select doesn't use *
5355                        let has_star = inner_select
5356                            .expressions
5357                            .iter()
5358                            .any(|e| matches!(e, Expression::Star(_)));
5359                        if !has_star {
5360                            add_self_aliases_to_select(inner_select);
5361                        }
5362                    }
5363                }
5364                // Recurse into the subquery's inner query
5365                walk_and_qualify(&mut subquery.this);
5366            }
5367            Expression::Alias(ref mut alias) => {
5368                qualify_subquery_expr(&mut alias.this);
5369            }
5370            _ => {}
5371        }
5372    }
5373
5374    let mut result = expr;
5375    walk_and_qualify(&mut result);
5376
5377    // Also qualify CTE inner queries at the top level
5378    if let Expression::Select(ref mut select) = result {
5379        if let Some(ref mut with) = select.with {
5380            for cte in with.ctes.iter_mut() {
5381                // CTE with column names -> no need to qualify
5382                if cte.columns.is_empty() {
5383                    // Walk into the CTE's inner query for nested subqueries
5384                    walk_and_qualify(&mut cte.this);
5385                }
5386            }
5387        }
5388    }
5389
5390    Ok(result)
5391}
5392
5393#[cfg(test)]
5394mod tests {
5395    use super::*;
5396    use crate::dialects::{Dialect, DialectType};
5397    use crate::expressions::Column;
5398
5399    fn gen(expr: &Expression) -> String {
5400        let dialect = Dialect::get(DialectType::Generic);
5401        dialect.generate(expr).unwrap()
5402    }
5403
5404    #[test]
5405    fn test_preprocess() {
5406        let expr = Expression::Boolean(BooleanLiteral { value: true });
5407        let result = preprocess(expr, &[replace_bool_with_int]).unwrap();
5408        assert!(
5409            matches!(result, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
5410        );
5411    }
5412
5413    #[test]
5414    fn test_preprocess_chain() {
5415        // Test chaining multiple transforms using function pointers
5416        let expr = Expression::Boolean(BooleanLiteral { value: true });
5417        // Create array of function pointers (all same type)
5418        let transforms: Vec<fn(Expression) -> Result<Expression>> =
5419            vec![replace_bool_with_int, replace_int_with_bool];
5420        let result = preprocess(expr, &transforms).unwrap();
5421        // After replace_bool_with_int: 1
5422        // After replace_int_with_bool: true
5423        if let Expression::Boolean(b) = result {
5424            assert!(b.value);
5425        } else {
5426            panic!("Expected boolean literal");
5427        }
5428    }
5429
5430    #[test]
5431    fn test_unnest_to_explode() {
5432        let unnest = Expression::Unnest(Box::new(UnnestFunc {
5433            this: Expression::boxed_column(Column {
5434                name: Identifier::new("arr".to_string()),
5435                table: None,
5436                join_mark: false,
5437                trailing_comments: vec![],
5438                span: None,
5439                inferred_type: None,
5440            }),
5441            expressions: Vec::new(),
5442            with_ordinality: false,
5443            alias: None,
5444            offset_alias: None,
5445        }));
5446
5447        let result = unnest_to_explode(unnest).unwrap();
5448        assert!(matches!(result, Expression::Explode(_)));
5449    }
5450
5451    #[test]
5452    fn test_explode_to_unnest() {
5453        let explode = Expression::Explode(Box::new(UnaryFunc {
5454            this: Expression::boxed_column(Column {
5455                name: Identifier::new("arr".to_string()),
5456                table: None,
5457                join_mark: false,
5458                trailing_comments: vec![],
5459                span: None,
5460                inferred_type: None,
5461            }),
5462            original_name: None,
5463            inferred_type: None,
5464        }));
5465
5466        let result = explode_to_unnest(explode).unwrap();
5467        assert!(matches!(result, Expression::Unnest(_)));
5468    }
5469
5470    #[test]
5471    fn test_replace_bool_with_int() {
5472        let true_expr = Expression::Boolean(BooleanLiteral { value: true });
5473        let result = replace_bool_with_int(true_expr).unwrap();
5474        if let Expression::Literal(lit) = result {
5475            if let Literal::Number(n) = lit.as_ref() {
5476                assert_eq!(n, "1");
5477            }
5478        } else {
5479            panic!("Expected number literal");
5480        }
5481
5482        let false_expr = Expression::Boolean(BooleanLiteral { value: false });
5483        let result = replace_bool_with_int(false_expr).unwrap();
5484        if let Expression::Literal(lit) = result {
5485            if let Literal::Number(n) = lit.as_ref() {
5486                assert_eq!(n, "0");
5487            }
5488        } else {
5489            panic!("Expected number literal");
5490        }
5491    }
5492
5493    #[test]
5494    fn test_replace_int_with_bool() {
5495        let one_expr = Expression::Literal(Box::new(Literal::Number("1".to_string())));
5496        let result = replace_int_with_bool(one_expr).unwrap();
5497        if let Expression::Boolean(b) = result {
5498            assert!(b.value);
5499        } else {
5500            panic!("Expected boolean true");
5501        }
5502
5503        let zero_expr = Expression::Literal(Box::new(Literal::Number("0".to_string())));
5504        let result = replace_int_with_bool(zero_expr).unwrap();
5505        if let Expression::Boolean(b) = result {
5506            assert!(!b.value);
5507        } else {
5508            panic!("Expected boolean false");
5509        }
5510
5511        // Test that other numbers are not converted
5512        let two_expr = Expression::Literal(Box::new(Literal::Number("2".to_string())));
5513        let result = replace_int_with_bool(two_expr).unwrap();
5514        assert!(
5515            matches!(result, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
5516        );
5517    }
5518
5519    #[test]
5520    fn test_strip_data_type_params() {
5521        // Test Decimal
5522        let decimal = DataType::Decimal {
5523            precision: Some(10),
5524            scale: Some(2),
5525        };
5526        let stripped = strip_data_type_params(decimal);
5527        assert_eq!(
5528            stripped,
5529            DataType::Decimal {
5530                precision: None,
5531                scale: None
5532            }
5533        );
5534
5535        // Test VarChar
5536        let varchar = DataType::VarChar {
5537            length: Some(255),
5538            parenthesized_length: false,
5539        };
5540        let stripped = strip_data_type_params(varchar);
5541        assert_eq!(
5542            stripped,
5543            DataType::VarChar {
5544                length: None,
5545                parenthesized_length: false
5546            }
5547        );
5548
5549        // Test Char
5550        let char_type = DataType::Char { length: Some(10) };
5551        let stripped = strip_data_type_params(char_type);
5552        assert_eq!(stripped, DataType::Char { length: None });
5553
5554        // Test Timestamp (preserve timezone)
5555        let timestamp = DataType::Timestamp {
5556            precision: Some(6),
5557            timezone: true,
5558        };
5559        let stripped = strip_data_type_params(timestamp);
5560        assert_eq!(
5561            stripped,
5562            DataType::Timestamp {
5563                precision: None,
5564                timezone: true
5565            }
5566        );
5567
5568        // Test Array (recursive)
5569        let array = DataType::Array {
5570            element_type: Box::new(DataType::VarChar {
5571                length: Some(100),
5572                parenthesized_length: false,
5573            }),
5574            dimension: None,
5575        };
5576        let stripped = strip_data_type_params(array);
5577        assert_eq!(
5578            stripped,
5579            DataType::Array {
5580                element_type: Box::new(DataType::VarChar {
5581                    length: None,
5582                    parenthesized_length: false
5583                }),
5584                dimension: None,
5585            }
5586        );
5587
5588        // Test types without params are unchanged
5589        let text = DataType::Text;
5590        let stripped = strip_data_type_params(text);
5591        assert_eq!(stripped, DataType::Text);
5592    }
5593
5594    #[test]
5595    fn test_remove_precision_parameterized_types_cast() {
5596        // Create a CAST(1 AS DECIMAL(10, 2)) expression
5597        let cast_expr = Expression::Cast(Box::new(Cast {
5598            this: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5599            to: DataType::Decimal {
5600                precision: Some(10),
5601                scale: Some(2),
5602            },
5603            trailing_comments: vec![],
5604            double_colon_syntax: false,
5605            format: None,
5606            default: None,
5607            inferred_type: None,
5608        }));
5609
5610        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5611        if let Expression::Cast(cast) = result {
5612            assert_eq!(
5613                cast.to,
5614                DataType::Decimal {
5615                    precision: None,
5616                    scale: None
5617                }
5618            );
5619        } else {
5620            panic!("Expected Cast expression");
5621        }
5622    }
5623
5624    #[test]
5625    fn test_remove_precision_parameterized_types_varchar() {
5626        // Create a CAST('hello' AS VARCHAR(10)) expression
5627        let cast_expr = Expression::Cast(Box::new(Cast {
5628            this: Expression::Literal(Box::new(Literal::String("hello".to_string()))),
5629            to: DataType::VarChar {
5630                length: Some(10),
5631                parenthesized_length: false,
5632            },
5633            trailing_comments: vec![],
5634            double_colon_syntax: false,
5635            format: None,
5636            default: None,
5637            inferred_type: None,
5638        }));
5639
5640        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5641        if let Expression::Cast(cast) = result {
5642            assert_eq!(
5643                cast.to,
5644                DataType::VarChar {
5645                    length: None,
5646                    parenthesized_length: false
5647                }
5648            );
5649        } else {
5650            panic!("Expected Cast expression");
5651        }
5652    }
5653
5654    #[test]
5655    fn test_remove_precision_direct_cast() {
5656        // Test transform on a direct Cast expression (not nested in Select)
5657        // The current implementation handles top-level Cast expressions;
5658        // a full implementation would need recursive AST traversal
5659        let cast = Expression::Cast(Box::new(Cast {
5660            this: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5661            to: DataType::Decimal {
5662                precision: Some(10),
5663                scale: Some(2),
5664            },
5665            trailing_comments: vec![],
5666            double_colon_syntax: false,
5667            format: None,
5668            default: None,
5669            inferred_type: None,
5670        }));
5671
5672        let transformed = remove_precision_parameterized_types(cast).unwrap();
5673        let generated = gen(&transformed);
5674
5675        // Should now be DECIMAL without precision
5676        assert!(generated.contains("DECIMAL"));
5677        assert!(!generated.contains("(10"));
5678    }
5679
5680    #[test]
5681    fn test_epoch_cast_to_ts() {
5682        // Test CAST('epoch' AS TIMESTAMP) → CAST('1970-01-01 00:00:00' AS TIMESTAMP)
5683        let cast_expr = Expression::Cast(Box::new(Cast {
5684            this: Expression::Literal(Box::new(Literal::String("epoch".to_string()))),
5685            to: DataType::Timestamp {
5686                precision: None,
5687                timezone: false,
5688            },
5689            trailing_comments: vec![],
5690            double_colon_syntax: false,
5691            format: None,
5692            default: None,
5693            inferred_type: None,
5694        }));
5695
5696        let result = epoch_cast_to_ts(cast_expr).unwrap();
5697        if let Expression::Cast(cast) = result {
5698            if let Expression::Literal(lit) = cast.this {
5699                if let Literal::String(s) = lit.as_ref() {
5700                    assert_eq!(s, "1970-01-01 00:00:00");
5701                }
5702            } else {
5703                panic!("Expected string literal");
5704            }
5705        } else {
5706            panic!("Expected Cast expression");
5707        }
5708    }
5709
5710    #[test]
5711    fn test_epoch_cast_to_ts_preserves_non_epoch() {
5712        // Test that non-epoch strings are preserved
5713        let cast_expr = Expression::Cast(Box::new(Cast {
5714            this: Expression::Literal(Box::new(Literal::String("2024-01-15".to_string()))),
5715            to: DataType::Timestamp {
5716                precision: None,
5717                timezone: false,
5718            },
5719            trailing_comments: vec![],
5720            double_colon_syntax: false,
5721            format: None,
5722            default: None,
5723            inferred_type: None,
5724        }));
5725
5726        let result = epoch_cast_to_ts(cast_expr).unwrap();
5727        if let Expression::Cast(cast) = result {
5728            if let Expression::Literal(lit) = cast.this {
5729                if let Literal::String(s) = lit.as_ref() {
5730                    assert_eq!(s, "2024-01-15");
5731                }
5732            } else {
5733                panic!("Expected string literal");
5734            }
5735        } else {
5736            panic!("Expected Cast expression");
5737        }
5738    }
5739
5740    #[test]
5741    fn test_unqualify_columns() {
5742        // Test that table qualifiers are removed
5743        let col = Expression::boxed_column(Column {
5744            name: Identifier::new("id".to_string()),
5745            table: Some(Identifier::new("users".to_string())),
5746            join_mark: false,
5747            trailing_comments: vec![],
5748            span: None,
5749            inferred_type: None,
5750        });
5751
5752        let result = unqualify_columns(col).unwrap();
5753        if let Expression::Column(c) = result {
5754            assert!(c.table.is_none());
5755            assert_eq!(c.name.name, "id");
5756        } else {
5757            panic!("Expected Column expression");
5758        }
5759    }
5760
5761    #[test]
5762    fn test_is_temporal_type() {
5763        assert!(is_temporal_type(&DataType::Date));
5764        assert!(is_temporal_type(&DataType::Timestamp {
5765            precision: None,
5766            timezone: false
5767        }));
5768        assert!(is_temporal_type(&DataType::Time {
5769            precision: None,
5770            timezone: false
5771        }));
5772        assert!(!is_temporal_type(&DataType::Int {
5773            length: None,
5774            integer_spelling: false
5775        }));
5776        assert!(!is_temporal_type(&DataType::VarChar {
5777            length: None,
5778            parenthesized_length: false
5779        }));
5780    }
5781
5782    #[test]
5783    fn test_eliminate_semi_join_basic() {
5784        use crate::expressions::{Join, TableRef};
5785
5786        // Test that semi joins are converted to EXISTS
5787        let select = Expression::Select(Box::new(Select {
5788            expressions: vec![Expression::boxed_column(Column {
5789                name: Identifier::new("a".to_string()),
5790                table: None,
5791                join_mark: false,
5792                trailing_comments: vec![],
5793                span: None,
5794                inferred_type: None,
5795            })],
5796            from: Some(From {
5797                expressions: vec![Expression::Table(Box::new(TableRef::new("t1")))],
5798            }),
5799            joins: vec![Join {
5800                this: Expression::Table(Box::new(TableRef::new("t2"))),
5801                kind: JoinKind::Semi,
5802                on: Some(Expression::Eq(Box::new(BinaryOp {
5803                    left: Expression::boxed_column(Column {
5804                        name: Identifier::new("x".to_string()),
5805                        table: None,
5806                        join_mark: false,
5807                        trailing_comments: vec![],
5808                        span: None,
5809                        inferred_type: None,
5810                    }),
5811                    right: Expression::boxed_column(Column {
5812                        name: Identifier::new("y".to_string()),
5813                        table: None,
5814                        join_mark: false,
5815                        trailing_comments: vec![],
5816                        span: None,
5817                        inferred_type: None,
5818                    }),
5819                    left_comments: vec![],
5820                    operator_comments: vec![],
5821                    trailing_comments: vec![],
5822                    inferred_type: None,
5823                }))),
5824                using: vec![],
5825                use_inner_keyword: false,
5826                use_outer_keyword: false,
5827                deferred_condition: false,
5828                join_hint: None,
5829                match_condition: None,
5830                pivots: Vec::new(),
5831                comments: Vec::new(),
5832                nesting_group: 0,
5833                directed: false,
5834            }],
5835            ..Select::new()
5836        }));
5837
5838        let result = eliminate_semi_and_anti_joins(select).unwrap();
5839        if let Expression::Select(s) = result {
5840            // Semi join should be removed
5841            assert!(s.joins.is_empty());
5842            // WHERE clause should have EXISTS
5843            assert!(s.where_clause.is_some());
5844        } else {
5845            panic!("Expected Select expression");
5846        }
5847    }
5848
5849    #[test]
5850    fn test_no_ilike_sql() {
5851        use crate::expressions::LikeOp;
5852
5853        // Test ILIKE conversion to LOWER+LIKE
5854        let ilike_expr = Expression::ILike(Box::new(LikeOp {
5855            left: Expression::boxed_column(Column {
5856                name: Identifier::new("name".to_string()),
5857                table: None,
5858                join_mark: false,
5859                trailing_comments: vec![],
5860                span: None,
5861                inferred_type: None,
5862            }),
5863            right: Expression::Literal(Box::new(Literal::String("%test%".to_string()))),
5864            escape: None,
5865            quantifier: None,
5866            inferred_type: None,
5867        }));
5868
5869        let result = no_ilike_sql(ilike_expr).unwrap();
5870        if let Expression::Like(like) = result {
5871            // Left should be LOWER(name)
5872            if let Expression::Function(f) = &like.left {
5873                assert_eq!(f.name, "LOWER");
5874            } else {
5875                panic!("Expected LOWER function on left");
5876            }
5877            // Right should be LOWER('%test%')
5878            if let Expression::Function(f) = &like.right {
5879                assert_eq!(f.name, "LOWER");
5880            } else {
5881                panic!("Expected LOWER function on right");
5882            }
5883        } else {
5884            panic!("Expected Like expression");
5885        }
5886    }
5887
5888    #[test]
5889    fn test_no_trycast_sql() {
5890        // Test TryCast conversion to Cast
5891        let trycast_expr = Expression::TryCast(Box::new(Cast {
5892            this: Expression::Literal(Box::new(Literal::String("123".to_string()))),
5893            to: DataType::Int {
5894                length: None,
5895                integer_spelling: false,
5896            },
5897            trailing_comments: vec![],
5898            double_colon_syntax: false,
5899            format: None,
5900            default: None,
5901            inferred_type: None,
5902        }));
5903
5904        let result = no_trycast_sql(trycast_expr).unwrap();
5905        assert!(matches!(result, Expression::Cast(_)));
5906    }
5907
5908    #[test]
5909    fn test_no_safe_cast_sql() {
5910        // Test SafeCast conversion to Cast
5911        let safe_cast_expr = Expression::SafeCast(Box::new(Cast {
5912            this: Expression::Literal(Box::new(Literal::String("123".to_string()))),
5913            to: DataType::Int {
5914                length: None,
5915                integer_spelling: false,
5916            },
5917            trailing_comments: vec![],
5918            double_colon_syntax: false,
5919            format: None,
5920            default: None,
5921            inferred_type: None,
5922        }));
5923
5924        let result = no_safe_cast_sql(safe_cast_expr).unwrap();
5925        assert!(matches!(result, Expression::Cast(_)));
5926    }
5927
5928    #[test]
5929    fn test_explode_to_unnest_presto() {
5930        let spark = Dialect::get(DialectType::Spark);
5931        let result = spark
5932            .transpile("SELECT EXPLODE(x) FROM tbl", DialectType::Presto)
5933            .unwrap();
5934        assert_eq!(
5935            result[0],
5936            "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))"
5937        );
5938    }
5939
5940    #[test]
5941    fn test_explode_to_unnest_bigquery() {
5942        let spark = Dialect::get(DialectType::Spark);
5943        let result = spark
5944            .transpile("SELECT EXPLODE(x) FROM tbl", DialectType::BigQuery)
5945            .unwrap();
5946        assert_eq!(
5947            result[0],
5948            "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))"
5949        );
5950    }
5951}