Skip to main content

polyglot_sql/
transforms.rs

1//! SQL AST Transforms
2//!
3//! This module provides functions to transform SQL ASTs for dialect compatibility.
4//! These transforms are used during transpilation to convert dialect-specific features
5//! to forms that are supported by the target dialect.
6//!
7//! Based on the Python implementation in `sqlglot/transforms.py`.
8
9use crate::dialects::transform_recursive;
10use crate::dialects::{Dialect, DialectType};
11use crate::error::Result;
12use crate::expressions::{
13    Alias, BinaryOp, BooleanLiteral, Cast, DataType, Exists, Expression, From, Function,
14    Identifier, Join, JoinKind, Lateral, LateralView, Literal, NamedArgSeparator, NamedArgument,
15    Over, Select, StructField, Subquery, UnaryFunc, UnnestFunc, Where,
16};
17use std::cell::RefCell;
18
19/// Apply a chain of transforms to an expression
20///
21/// # Arguments
22/// * `expr` - The expression to transform
23/// * `transforms` - A list of transform functions to apply in order
24///
25/// # Returns
26/// The transformed expression
27pub fn preprocess<F>(expr: Expression, transforms: &[F]) -> Result<Expression>
28where
29    F: Fn(Expression) -> Result<Expression>,
30{
31    let mut result = expr;
32    for transform in transforms {
33        result = transform(result)?;
34    }
35    Ok(result)
36}
37
38/// Convert UNNEST to EXPLODE (for Spark/Hive compatibility)
39///
40/// UNNEST is standard SQL but Spark uses EXPLODE instead.
41pub fn unnest_to_explode(expr: Expression) -> Result<Expression> {
42    match expr {
43        Expression::Unnest(unnest) => {
44            Ok(Expression::Explode(Box::new(UnaryFunc::new(unnest.this))))
45        }
46        _ => Ok(expr),
47    }
48}
49
50/// Convert CROSS JOIN UNNEST to LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks.
51///
52/// This is a SELECT-level structural transformation that:
53/// 1. Converts UNNEST in FROM clause to INLINE/EXPLODE
54/// 2. Converts CROSS JOIN (LATERAL) UNNEST to LATERAL VIEW entries
55/// 3. For single-arg UNNEST: uses EXPLODE
56/// 4. For multi-arg UNNEST: uses INLINE(ARRAYS_ZIP(...))
57///
58/// Based on Python sqlglot's `unnest_to_explode` transform in transforms.py (lines 290-391).
59pub fn unnest_to_explode_select(expr: Expression) -> Result<Expression> {
60    transform_recursive(expr, &unnest_to_explode_select_inner)
61}
62
63/// Helper to determine the UDTF function for an UNNEST expression.
64/// Single-arg UNNEST → EXPLODE, multi-arg → INLINE
65fn make_udtf_expr(unnest: &UnnestFunc) -> Expression {
66    let has_multi_expr = !unnest.expressions.is_empty();
67    if has_multi_expr {
68        // Multi-arg: INLINE(ARRAYS_ZIP(arg1, arg2, ...))
69        let mut all_args = vec![unnest.this.clone()];
70        all_args.extend(unnest.expressions.iter().cloned());
71        let arrays_zip =
72            Expression::Function(Box::new(Function::new("ARRAYS_ZIP".to_string(), all_args)));
73        Expression::Function(Box::new(Function::new(
74            "INLINE".to_string(),
75            vec![arrays_zip],
76        )))
77    } else {
78        // Single-arg: EXPLODE(arg)
79        Expression::Explode(Box::new(UnaryFunc::new(unnest.this.clone())))
80    }
81}
82
83fn unnest_to_explode_select_inner(expr: Expression) -> Result<Expression> {
84    let Expression::Select(mut select) = expr else {
85        return Ok(expr);
86    };
87
88    // Process FROM clause: UNNEST items need conversion
89    if let Some(ref mut from) = select.from {
90        if from.expressions.len() >= 1 {
91            let mut new_from_exprs = Vec::new();
92            let mut new_lateral_views = Vec::new();
93            let first_is_unnest = is_unnest_expr(&from.expressions[0]);
94
95            for (idx, from_item) in from.expressions.drain(..).enumerate() {
96                if idx == 0 && first_is_unnest {
97                    // UNNEST is the first (and possibly only) item in FROM
98                    // Replace it with INLINE/EXPLODE, keeping alias
99                    let replaced = replace_from_unnest(from_item);
100                    new_from_exprs.push(replaced);
101                } else if idx > 0 && is_unnest_expr(&from_item) {
102                    // Additional UNNEST items in FROM (comma-joined) → LATERAL VIEW
103                    let (alias_name, column_aliases, unnest_func) = extract_unnest_info(from_item);
104                    let udtf = make_udtf_expr(&unnest_func);
105                    new_lateral_views.push(LateralView {
106                        this: udtf,
107                        table_alias: alias_name,
108                        column_aliases,
109                        outer: false,
110                    });
111                } else {
112                    new_from_exprs.push(from_item);
113                }
114            }
115
116            from.expressions = new_from_exprs;
117            // Append lateral views for comma-joined UNNESTs
118            select.lateral_views.extend(new_lateral_views);
119        }
120    }
121
122    // Process joins: CROSS JOIN (LATERAL) UNNEST → LATERAL VIEW
123    let mut remaining_joins = Vec::new();
124    for join in select.joins.drain(..) {
125        if matches!(join.kind, JoinKind::Cross | JoinKind::Inner) {
126            let (is_unnest, is_lateral) = check_join_unnest(&join.this);
127            if is_unnest {
128                // Extract UNNEST info from join, handling Lateral wrapper
129                let (lateral_alias, lateral_col_aliases, join_expr) = if is_lateral {
130                    if let Expression::Lateral(lat) = join.this {
131                        // Extract alias from Lateral struct
132                        let alias = lat.alias.map(|s| Identifier::new(&s));
133                        let col_aliases: Vec<Identifier> = lat
134                            .column_aliases
135                            .iter()
136                            .map(|s| Identifier::new(s))
137                            .collect();
138                        (alias, col_aliases, *lat.this)
139                    } else {
140                        (None, Vec::new(), join.this)
141                    }
142                } else {
143                    (None, Vec::new(), join.this)
144                };
145
146                let (alias_name, column_aliases, unnest_func) = extract_unnest_info(join_expr);
147
148                // Prefer Lateral's alias over UNNEST's alias
149                let final_alias = lateral_alias.or(alias_name);
150                let final_col_aliases = if !lateral_col_aliases.is_empty() {
151                    lateral_col_aliases
152                } else {
153                    column_aliases
154                };
155
156                // Use "unnest" as default alias if none provided (for single-arg case)
157                let table_alias = final_alias.or_else(|| Some(Identifier::new("unnest")));
158                let col_aliases = if final_col_aliases.is_empty() {
159                    vec![Identifier::new("unnest")]
160                } else {
161                    final_col_aliases
162                };
163
164                let udtf = make_udtf_expr(&unnest_func);
165                select.lateral_views.push(LateralView {
166                    this: udtf,
167                    table_alias,
168                    column_aliases: col_aliases,
169                    outer: false,
170                });
171            } else {
172                remaining_joins.push(join);
173            }
174        } else {
175            remaining_joins.push(join);
176        }
177    }
178    select.joins = remaining_joins;
179
180    Ok(Expression::Select(select))
181}
182
183/// Check if an expression is or wraps an UNNEST
184fn is_unnest_expr(expr: &Expression) -> bool {
185    match expr {
186        Expression::Unnest(_) => true,
187        Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
188        _ => false,
189    }
190}
191
192/// Check if a join's expression is an UNNEST (possibly wrapped in Lateral)
193fn check_join_unnest(expr: &Expression) -> (bool, bool) {
194    match expr {
195        Expression::Unnest(_) => (true, false),
196        Expression::Alias(a) => {
197            if matches!(a.this, Expression::Unnest(_)) {
198                (true, false)
199            } else {
200                (false, false)
201            }
202        }
203        Expression::Lateral(lat) => match &*lat.this {
204            Expression::Unnest(_) => (true, true),
205            Expression::Alias(a) => {
206                if matches!(a.this, Expression::Unnest(_)) {
207                    (true, true)
208                } else {
209                    (false, true)
210                }
211            }
212            _ => (false, true),
213        },
214        _ => (false, false),
215    }
216}
217
218/// Replace an UNNEST in FROM with INLINE/EXPLODE, preserving alias structure
219fn replace_from_unnest(from_item: Expression) -> Expression {
220    match from_item {
221        Expression::Alias(mut a) => {
222            if let Expression::Unnest(unnest) = a.this {
223                a.this = make_udtf_expr(&unnest);
224            }
225            Expression::Alias(a)
226        }
227        Expression::Unnest(unnest) => make_udtf_expr(&unnest),
228        other => other,
229    }
230}
231
232/// Extract alias info and UnnestFunc from an expression (possibly wrapped in Alias)
233fn extract_unnest_info(expr: Expression) -> (Option<Identifier>, Vec<Identifier>, UnnestFunc) {
234    match expr {
235        Expression::Alias(a) => {
236            if let Expression::Unnest(unnest) = a.this {
237                (Some(a.alias), a.column_aliases, *unnest)
238            } else {
239                // Should not happen if we already checked is_unnest_expr
240                (
241                    Some(a.alias),
242                    a.column_aliases,
243                    UnnestFunc {
244                        this: a.this,
245                        expressions: Vec::new(),
246                        with_ordinality: false,
247                        alias: None,
248                        offset_alias: None,
249                    },
250                )
251            }
252        }
253        Expression::Unnest(unnest) => {
254            let alias = unnest.alias.clone();
255            (alias, Vec::new(), *unnest)
256        }
257        _ => (
258            None,
259            Vec::new(),
260            UnnestFunc {
261                this: expr,
262                expressions: Vec::new(),
263                with_ordinality: false,
264                alias: None,
265                offset_alias: None,
266            },
267        ),
268    }
269}
270
271/// Convert EXPLODE to UNNEST (for standard SQL compatibility)
272pub fn explode_to_unnest(expr: Expression) -> Result<Expression> {
273    match expr {
274        Expression::Explode(explode) => Ok(Expression::Unnest(Box::new(UnnestFunc {
275            this: explode.this,
276            expressions: Vec::new(),
277            with_ordinality: false,
278            alias: None,
279            offset_alias: None,
280        }))),
281        _ => Ok(expr),
282    }
283}
284
285/// Replace boolean literals for dialects that don't support them
286///
287/// Converts TRUE/FALSE to 1/0 for dialects like older MySQL versions
288pub fn replace_bool_with_int(expr: Expression) -> Result<Expression> {
289    match expr {
290        Expression::Boolean(b) => {
291            let value = if b.value { "1" } else { "0" };
292            Ok(Expression::Literal(Box::new(Literal::Number(
293                value.to_string(),
294            ))))
295        }
296        _ => Ok(expr),
297    }
298}
299
300/// Replace integer literals for dialects that prefer boolean
301///
302/// Converts 1/0 to TRUE/FALSE
303pub fn replace_int_with_bool(expr: Expression) -> Result<Expression> {
304    match expr {
305        Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1" || n == "0") =>
306        {
307            let Literal::Number(n) = lit.as_ref() else {
308                unreachable!()
309            };
310            Ok(Expression::Boolean(BooleanLiteral { value: n == "1" }))
311        }
312        _ => Ok(expr),
313    }
314}
315
316/// Remove precision from parameterized types
317///
318/// Some dialects don't support precision parameters on certain types.
319/// This transform removes them, e.g., VARCHAR(255) → VARCHAR, DECIMAL(10,2) → DECIMAL
320pub fn remove_precision_parameterized_types(expr: Expression) -> Result<Expression> {
321    Ok(strip_type_params_recursive(expr))
322}
323
324/// Recursively strip type parameters from DataType values in an expression
325fn strip_type_params_recursive(expr: Expression) -> Expression {
326    match expr {
327        // Handle Cast expressions - strip precision from target type
328        Expression::Cast(mut cast) => {
329            cast.to = strip_data_type_params(cast.to);
330            // Also recursively process the expression being cast
331            cast.this = strip_type_params_recursive(cast.this);
332            Expression::Cast(cast)
333        }
334        // Handle TryCast expressions (uses same Cast struct)
335        Expression::TryCast(mut try_cast) => {
336            try_cast.to = strip_data_type_params(try_cast.to);
337            try_cast.this = strip_type_params_recursive(try_cast.this);
338            Expression::TryCast(try_cast)
339        }
340        // Handle SafeCast expressions (uses same Cast struct)
341        Expression::SafeCast(mut safe_cast) => {
342            safe_cast.to = strip_data_type_params(safe_cast.to);
343            safe_cast.this = strip_type_params_recursive(safe_cast.this);
344            Expression::SafeCast(safe_cast)
345        }
346        // For now, pass through other expressions
347        // A full implementation would recursively visit all nodes
348        _ => expr,
349    }
350}
351
352/// Strip precision/scale/length parameters from a DataType
353fn strip_data_type_params(dt: DataType) -> DataType {
354    match dt {
355        // Numeric types with precision/scale
356        DataType::Decimal { .. } => DataType::Decimal {
357            precision: None,
358            scale: None,
359        },
360        DataType::TinyInt { .. } => DataType::TinyInt { length: None },
361        DataType::SmallInt { .. } => DataType::SmallInt { length: None },
362        DataType::Int { .. } => DataType::Int {
363            length: None,
364            integer_spelling: false,
365        },
366        DataType::BigInt { .. } => DataType::BigInt { length: None },
367
368        // String types with length
369        DataType::Char { .. } => DataType::Char { length: None },
370        DataType::VarChar { .. } => DataType::VarChar {
371            length: None,
372            parenthesized_length: false,
373        },
374
375        // Binary types with length
376        DataType::Binary { .. } => DataType::Binary { length: None },
377        DataType::VarBinary { .. } => DataType::VarBinary { length: None },
378
379        // Bit types with length
380        DataType::Bit { .. } => DataType::Bit { length: None },
381        DataType::VarBit { .. } => DataType::VarBit { length: None },
382
383        // Time types with precision
384        DataType::Time { .. } => DataType::Time {
385            precision: None,
386            timezone: false,
387        },
388        DataType::Timestamp { timezone, .. } => DataType::Timestamp {
389            precision: None,
390            timezone,
391        },
392
393        // Array - recursively strip element type
394        DataType::Array {
395            element_type,
396            dimension,
397        } => DataType::Array {
398            element_type: Box::new(strip_data_type_params(*element_type)),
399            dimension,
400        },
401
402        // Map - recursively strip key and value types
403        DataType::Map {
404            key_type,
405            value_type,
406        } => DataType::Map {
407            key_type: Box::new(strip_data_type_params(*key_type)),
408            value_type: Box::new(strip_data_type_params(*value_type)),
409        },
410
411        // Struct - recursively strip field types
412        DataType::Struct { fields, nested } => DataType::Struct {
413            fields: fields
414                .into_iter()
415                .map(|f| {
416                    StructField::with_options(
417                        f.name,
418                        strip_data_type_params(f.data_type),
419                        f.options,
420                    )
421                })
422                .collect(),
423            nested,
424        },
425
426        // Vector - strip dimension
427        DataType::Vector { element_type, .. } => DataType::Vector {
428            element_type: element_type.map(|et| Box::new(strip_data_type_params(*et))),
429            dimension: None,
430        },
431
432        // Object - recursively strip field types
433        DataType::Object { fields, modifier } => DataType::Object {
434            fields: fields
435                .into_iter()
436                .map(|(name, ty, not_null)| (name, strip_data_type_params(ty), not_null))
437                .collect(),
438            modifier,
439        },
440
441        // Other types pass through unchanged
442        other => other,
443    }
444}
445
446/// Eliminate QUALIFY clause by converting to a subquery with WHERE filter
447///
448/// QUALIFY is supported by Snowflake, BigQuery, and DuckDB but not by most other dialects.
449///
450/// Converts:
451/// ```sql
452/// SELECT * FROM t QUALIFY ROW_NUMBER() OVER (...) = 1
453/// ```
454/// To:
455/// ```sql
456/// SELECT * FROM (SELECT *, ROW_NUMBER() OVER (...) AS _w FROM t) _t WHERE _w = 1
457/// ```
458///
459/// Reference: `transforms.py:194-255`
460pub fn eliminate_qualify(expr: Expression) -> Result<Expression> {
461    match expr {
462        Expression::Select(mut select) => {
463            if let Some(qualify) = select.qualify.take() {
464                // Python sqlglot approach:
465                // 1. Extract the window function from the qualify condition
466                // 2. Add it as _w alias to the inner select
467                // 3. Replace the window function reference with _w in the outer WHERE
468                // 4. Keep original select expressions in the outer query
469
470                let qualify_filter = qualify.this;
471                let window_alias_name = "_w".to_string();
472                let window_alias_ident = Identifier::new(window_alias_name.clone());
473
474                // Try to extract window function from comparison
475                // Pattern: WINDOW_FUNC = value -> inner adds WINDOW_FUNC AS _w, outer WHERE _w = value
476                let (window_expr, outer_where) =
477                    extract_window_from_condition(qualify_filter.clone(), &window_alias_ident);
478
479                if let Some(win_expr) = window_expr {
480                    // Add window function as _w alias to inner select
481                    let window_alias_expr =
482                        Expression::Alias(Box::new(crate::expressions::Alias {
483                            this: win_expr,
484                            alias: window_alias_ident.clone(),
485                            column_aliases: vec![],
486                            pre_alias_comments: vec![],
487                            trailing_comments: vec![],
488                            inferred_type: None,
489                        }));
490
491                    // For the outer SELECT, replace aliased expressions with just the alias reference
492                    // e.g., `1 AS other_id` in inner -> `other_id` in outer
493                    // Non-aliased expressions (columns, identifiers) stay as-is
494                    let outer_exprs: Vec<Expression> = select
495                        .expressions
496                        .iter()
497                        .map(|expr| {
498                            if let Expression::Alias(a) = expr {
499                                // Replace with just the alias identifier as a column reference
500                                Expression::Column(Box::new(crate::expressions::Column {
501                                    name: a.alias.clone(),
502                                    table: None,
503                                    join_mark: false,
504                                    trailing_comments: vec![],
505                                    span: None,
506                                    inferred_type: None,
507                                }))
508                            } else {
509                                expr.clone()
510                            }
511                        })
512                        .collect();
513                    select.expressions.push(window_alias_expr);
514
515                    // Create the inner subquery
516                    let inner_select = Expression::Select(select);
517                    let subquery = Subquery {
518                        this: inner_select,
519                        alias: Some(Identifier::new("_t".to_string())),
520                        column_aliases: vec![],
521                        order_by: None,
522                        limit: None,
523                        offset: None,
524                        distribute_by: None,
525                        sort_by: None,
526                        cluster_by: None,
527                        lateral: false,
528                        modifiers_inside: false,
529                        trailing_comments: vec![],
530                        inferred_type: None,
531                    };
532
533                    // Create the outer SELECT with alias-resolved expressions and WHERE _w <op> value
534                    let outer_select = Select {
535                        expressions: outer_exprs,
536                        from: Some(From {
537                            expressions: vec![Expression::Subquery(Box::new(subquery))],
538                        }),
539                        where_clause: Some(Where { this: outer_where }),
540                        ..Select::new()
541                    };
542
543                    return Ok(Expression::Select(Box::new(outer_select)));
544                } else {
545                    // Fallback: if we can't extract a window function, use old approach
546                    let qualify_alias = Expression::Alias(Box::new(crate::expressions::Alias {
547                        this: qualify_filter.clone(),
548                        alias: window_alias_ident.clone(),
549                        column_aliases: vec![],
550                        pre_alias_comments: vec![],
551                        trailing_comments: vec![],
552                        inferred_type: None,
553                    }));
554
555                    let original_exprs = select.expressions.clone();
556                    select.expressions.push(qualify_alias);
557
558                    let inner_select = Expression::Select(select);
559                    let subquery = Subquery {
560                        this: inner_select,
561                        alias: Some(Identifier::new("_t".to_string())),
562                        column_aliases: vec![],
563                        order_by: None,
564                        limit: None,
565                        offset: None,
566                        distribute_by: None,
567                        sort_by: None,
568                        cluster_by: None,
569                        lateral: false,
570                        modifiers_inside: false,
571                        trailing_comments: vec![],
572                        inferred_type: None,
573                    };
574
575                    let outer_select = Select {
576                        expressions: original_exprs,
577                        from: Some(From {
578                            expressions: vec![Expression::Subquery(Box::new(subquery))],
579                        }),
580                        where_clause: Some(Where {
581                            this: Expression::Column(Box::new(crate::expressions::Column {
582                                name: window_alias_ident,
583                                table: None,
584                                join_mark: false,
585                                trailing_comments: vec![],
586                                span: None,
587                                inferred_type: None,
588                            })),
589                        }),
590                        ..Select::new()
591                    };
592
593                    return Ok(Expression::Select(Box::new(outer_select)));
594                }
595            }
596            Ok(Expression::Select(select))
597        }
598        other => Ok(other),
599    }
600}
601
602/// Extract a window function from a qualify condition.
603/// Returns (window_expression, rewritten_condition) if found.
604/// The rewritten condition replaces the window function with a column reference to the alias.
605fn extract_window_from_condition(
606    condition: Expression,
607    alias: &Identifier,
608) -> (Option<Expression>, Expression) {
609    let alias_col = Expression::Column(Box::new(crate::expressions::Column {
610        name: alias.clone(),
611        table: None,
612        join_mark: false,
613        trailing_comments: vec![],
614        span: None,
615        inferred_type: None,
616    }));
617
618    // Check if condition is a simple comparison with a window function on one side
619    match condition {
620        // WINDOW_FUNC = value
621        Expression::Eq(ref op) => {
622            if is_window_expr(&op.left) {
623                (
624                    Some(op.left.clone()),
625                    Expression::Eq(Box::new(BinaryOp {
626                        left: alias_col,
627                        right: op.right.clone(),
628                        ..(**op).clone()
629                    })),
630                )
631            } else if is_window_expr(&op.right) {
632                (
633                    Some(op.right.clone()),
634                    Expression::Eq(Box::new(BinaryOp {
635                        left: op.left.clone(),
636                        right: alias_col,
637                        ..(**op).clone()
638                    })),
639                )
640            } else {
641                (None, condition)
642            }
643        }
644        Expression::Neq(ref op) => {
645            if is_window_expr(&op.left) {
646                (
647                    Some(op.left.clone()),
648                    Expression::Neq(Box::new(BinaryOp {
649                        left: alias_col,
650                        right: op.right.clone(),
651                        ..(**op).clone()
652                    })),
653                )
654            } else if is_window_expr(&op.right) {
655                (
656                    Some(op.right.clone()),
657                    Expression::Neq(Box::new(BinaryOp {
658                        left: op.left.clone(),
659                        right: alias_col,
660                        ..(**op).clone()
661                    })),
662                )
663            } else {
664                (None, condition)
665            }
666        }
667        Expression::Lt(ref op) => {
668            if is_window_expr(&op.left) {
669                (
670                    Some(op.left.clone()),
671                    Expression::Lt(Box::new(BinaryOp {
672                        left: alias_col,
673                        right: op.right.clone(),
674                        ..(**op).clone()
675                    })),
676                )
677            } else if is_window_expr(&op.right) {
678                (
679                    Some(op.right.clone()),
680                    Expression::Lt(Box::new(BinaryOp {
681                        left: op.left.clone(),
682                        right: alias_col,
683                        ..(**op).clone()
684                    })),
685                )
686            } else {
687                (None, condition)
688            }
689        }
690        Expression::Lte(ref op) => {
691            if is_window_expr(&op.left) {
692                (
693                    Some(op.left.clone()),
694                    Expression::Lte(Box::new(BinaryOp {
695                        left: alias_col,
696                        right: op.right.clone(),
697                        ..(**op).clone()
698                    })),
699                )
700            } else if is_window_expr(&op.right) {
701                (
702                    Some(op.right.clone()),
703                    Expression::Lte(Box::new(BinaryOp {
704                        left: op.left.clone(),
705                        right: alias_col,
706                        ..(**op).clone()
707                    })),
708                )
709            } else {
710                (None, condition)
711            }
712        }
713        Expression::Gt(ref op) => {
714            if is_window_expr(&op.left) {
715                (
716                    Some(op.left.clone()),
717                    Expression::Gt(Box::new(BinaryOp {
718                        left: alias_col,
719                        right: op.right.clone(),
720                        ..(**op).clone()
721                    })),
722                )
723            } else if is_window_expr(&op.right) {
724                (
725                    Some(op.right.clone()),
726                    Expression::Gt(Box::new(BinaryOp {
727                        left: op.left.clone(),
728                        right: alias_col,
729                        ..(**op).clone()
730                    })),
731                )
732            } else {
733                (None, condition)
734            }
735        }
736        Expression::Gte(ref op) => {
737            if is_window_expr(&op.left) {
738                (
739                    Some(op.left.clone()),
740                    Expression::Gte(Box::new(BinaryOp {
741                        left: alias_col,
742                        right: op.right.clone(),
743                        ..(**op).clone()
744                    })),
745                )
746            } else if is_window_expr(&op.right) {
747                (
748                    Some(op.right.clone()),
749                    Expression::Gte(Box::new(BinaryOp {
750                        left: op.left.clone(),
751                        right: alias_col,
752                        ..(**op).clone()
753                    })),
754                )
755            } else {
756                (None, condition)
757            }
758        }
759        // If the condition is just a window function (bare QUALIFY expression)
760        _ if is_window_expr(&condition) => (Some(condition), alias_col),
761        // Can't extract window function
762        _ => (None, condition),
763    }
764}
765
766/// Check if an expression is a window function
767fn is_window_expr(expr: &Expression) -> bool {
768    matches!(expr, Expression::Window(_) | Expression::WindowFunction(_))
769}
770
771/// Eliminate DISTINCT ON clause by converting to a subquery with ROW_NUMBER
772///
773/// DISTINCT ON is PostgreSQL-specific. For dialects that don't support it,
774/// this converts it to a subquery with a ROW_NUMBER() window function.
775///
776/// Converts:
777/// ```sql
778/// SELECT DISTINCT ON (a) a, b FROM t ORDER BY a, b
779/// ```
780/// To:
781/// ```sql
782/// SELECT a, b FROM (
783///     SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY a, b) AS _row_number
784///     FROM t
785/// ) _t WHERE _row_number = 1
786/// ```
787///
788/// Reference: `transforms.py:138-191`
789pub fn eliminate_distinct_on(expr: Expression) -> Result<Expression> {
790    eliminate_distinct_on_for_dialect(expr, None, None)
791}
792
793/// Eliminate DISTINCT ON with dialect-specific NULL ordering behavior.
794///
795/// For dialects where NULLs don't sort first by default in DESC ordering,
796/// we need to add explicit NULL ordering to preserve DISTINCT ON semantics.
797pub fn eliminate_distinct_on_for_dialect(
798    expr: Expression,
799    target: Option<DialectType>,
800    source: Option<DialectType>,
801) -> Result<Expression> {
802    use crate::expressions::Case;
803
804    // PostgreSQL and DuckDB support DISTINCT ON natively - skip elimination
805    if matches!(
806        target,
807        Some(DialectType::PostgreSQL) | Some(DialectType::DuckDB)
808    ) {
809        return Ok(expr);
810    }
811
812    // Determine NULL ordering mode based on target dialect
813    // Oracle/Redshift/Snowflake: NULLS FIRST is default for DESC -> no change needed
814    // BigQuery/Spark/Presto/Hive/etc: need explicit NULLS FIRST
815    // MySQL/TSQL: no NULLS FIRST syntax -> use CASE WHEN IS NULL
816    enum NullsMode {
817        None,       // Default NULLS FIRST behavior (Oracle, Redshift, Snowflake)
818        NullsFirst, // Add explicit NULLS FIRST (BigQuery, Spark, Presto, Hive, etc.)
819        CaseExpr,   // Use CASE WHEN IS NULL for NULLS FIRST simulation (MySQL, StarRocks, TSQL)
820    }
821
822    let nulls_mode = match target {
823        Some(DialectType::MySQL)
824        | Some(DialectType::SingleStore)
825        | Some(DialectType::TSQL)
826        | Some(DialectType::Fabric) => NullsMode::CaseExpr,
827        Some(DialectType::Oracle) | Some(DialectType::Redshift) | Some(DialectType::Snowflake) => {
828            NullsMode::None
829        }
830        Some(DialectType::StarRocks) => {
831            if matches!(source, Some(DialectType::Redshift)) {
832                NullsMode::CaseExpr
833            } else {
834                NullsMode::None
835            }
836        }
837        // All other dialects that don't support DISTINCT ON: use NULLS FIRST
838        _ => NullsMode::NullsFirst,
839    };
840
841    match expr {
842        Expression::Select(mut select) => {
843            if let Some(distinct_cols) = select.distinct_on.take() {
844                if !distinct_cols.is_empty() {
845                    // Create ROW_NUMBER() OVER (PARTITION BY distinct_cols ORDER BY ...)
846                    let row_number_alias = Identifier::new("_row_number".to_string());
847
848                    // Get order_by expressions, or use distinct_cols as default order
849                    let order_exprs = if let Some(ref order_by) = select.order_by {
850                        let mut exprs = order_by.expressions.clone();
851                        // Add NULL ordering based on target dialect
852                        match nulls_mode {
853                            NullsMode::NullsFirst => {
854                                for ord in &mut exprs {
855                                    if ord.desc && ord.nulls_first.is_none() {
856                                        ord.nulls_first = Some(true);
857                                    }
858                                }
859                            }
860                            NullsMode::CaseExpr => {
861                                // For each DESC column without explicit nulls ordering,
862                                // prepend: CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
863                                let mut new_exprs = Vec::new();
864                                for ord in &exprs {
865                                    if ord.desc && ord.nulls_first.is_none() {
866                                        // Add CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
867                                        let null_check = Expression::Case(Box::new(Case {
868                                            operand: None,
869                                            whens: vec![(
870                                                Expression::IsNull(Box::new(
871                                                    crate::expressions::IsNull {
872                                                        this: ord.this.clone(),
873                                                        not: false,
874                                                        postfix_form: false,
875                                                    },
876                                                )),
877                                                Expression::Literal(Box::new(Literal::Number(
878                                                    "1".to_string(),
879                                                ))),
880                                            )],
881                                            else_: Some(Expression::Literal(Box::new(
882                                                Literal::Number("0".to_string()),
883                                            ))),
884                                            comments: Vec::new(),
885                                            inferred_type: None,
886                                        }));
887                                        new_exprs.push(crate::expressions::Ordered {
888                                            this: null_check,
889                                            desc: true,
890                                            nulls_first: None,
891                                            explicit_asc: false,
892                                            with_fill: None,
893                                        });
894                                    }
895                                    new_exprs.push(ord.clone());
896                                }
897                                exprs = new_exprs;
898                            }
899                            NullsMode::None => {}
900                        }
901                        exprs
902                    } else {
903                        distinct_cols
904                            .iter()
905                            .map(|e| crate::expressions::Ordered {
906                                this: e.clone(),
907                                desc: false,
908                                nulls_first: None,
909                                explicit_asc: false,
910                                with_fill: None,
911                            })
912                            .collect()
913                    };
914
915                    // Create window function: ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)
916                    let row_number_func =
917                        Expression::WindowFunction(Box::new(crate::expressions::WindowFunction {
918                            this: Expression::RowNumber(crate::expressions::RowNumber),
919                            over: Over {
920                                partition_by: distinct_cols,
921                                order_by: order_exprs,
922                                frame: None,
923                                window_name: None,
924                                alias: None,
925                            },
926                            keep: None,
927                            inferred_type: None,
928                        }));
929
930                    // Build aliased inner expressions and outer column references
931                    // Inner: SELECT a AS a, b AS b, ROW_NUMBER() OVER (...) AS _row_number
932                    // Outer: SELECT a, b FROM (...)
933                    let mut inner_aliased_exprs = Vec::new();
934                    let mut outer_select_exprs = Vec::new();
935                    for orig_expr in &select.expressions {
936                        match orig_expr {
937                            Expression::Alias(alias) => {
938                                // Already aliased - keep as-is in inner, reference alias in outer
939                                inner_aliased_exprs.push(orig_expr.clone());
940                                outer_select_exprs.push(Expression::Column(Box::new(
941                                    crate::expressions::Column {
942                                        name: alias.alias.clone(),
943                                        table: None,
944                                        join_mark: false,
945                                        trailing_comments: vec![],
946                                        span: None,
947                                        inferred_type: None,
948                                    },
949                                )));
950                            }
951                            Expression::Column(col) => {
952                                // Wrap in alias: a AS a in inner, just a in outer
953                                inner_aliased_exprs.push(Expression::Alias(Box::new(
954                                    crate::expressions::Alias {
955                                        this: orig_expr.clone(),
956                                        alias: col.name.clone(),
957                                        column_aliases: vec![],
958                                        pre_alias_comments: vec![],
959                                        trailing_comments: vec![],
960                                        inferred_type: None,
961                                    },
962                                )));
963                                outer_select_exprs.push(Expression::Column(Box::new(
964                                    crate::expressions::Column {
965                                        name: col.name.clone(),
966                                        table: None,
967                                        join_mark: false,
968                                        trailing_comments: vec![],
969                                        span: None,
970                                        inferred_type: None,
971                                    },
972                                )));
973                            }
974                            _ => {
975                                // Complex expression without alias - include as-is in both
976                                inner_aliased_exprs.push(orig_expr.clone());
977                                outer_select_exprs.push(orig_expr.clone());
978                            }
979                        }
980                    }
981
982                    // Add ROW_NUMBER as aliased expression to inner select list
983                    let row_number_alias_expr =
984                        Expression::Alias(Box::new(crate::expressions::Alias {
985                            this: row_number_func,
986                            alias: row_number_alias.clone(),
987                            column_aliases: vec![],
988                            pre_alias_comments: vec![],
989                            trailing_comments: vec![],
990                            inferred_type: None,
991                        }));
992                    inner_aliased_exprs.push(row_number_alias_expr);
993
994                    // Replace inner select's expressions with aliased versions
995                    select.expressions = inner_aliased_exprs;
996
997                    // Remove ORDER BY from inner query (it's now in the window function)
998                    let _inner_order_by = select.order_by.take();
999
1000                    // Clear DISTINCT from inner select (DISTINCT ON is replaced by ROW_NUMBER)
1001                    select.distinct = false;
1002
1003                    // Create inner subquery
1004                    let inner_select = Expression::Select(select);
1005                    let subquery = Subquery {
1006                        this: inner_select,
1007                        alias: Some(Identifier::new("_t".to_string())),
1008                        column_aliases: vec![],
1009                        order_by: None,
1010                        limit: None,
1011                        offset: None,
1012                        distribute_by: None,
1013                        sort_by: None,
1014                        cluster_by: None,
1015                        lateral: false,
1016                        modifiers_inside: false,
1017                        trailing_comments: vec![],
1018                        inferred_type: None,
1019                    };
1020
1021                    // Create outer SELECT with WHERE _row_number = 1
1022                    // No ORDER BY on outer query
1023                    let outer_select = Select {
1024                        expressions: outer_select_exprs,
1025                        from: Some(From {
1026                            expressions: vec![Expression::Subquery(Box::new(subquery))],
1027                        }),
1028                        where_clause: Some(Where {
1029                            this: Expression::Eq(Box::new(BinaryOp {
1030                                left: Expression::Column(Box::new(crate::expressions::Column {
1031                                    name: row_number_alias,
1032                                    table: None,
1033                                    join_mark: false,
1034                                    trailing_comments: vec![],
1035                                    span: None,
1036                                    inferred_type: None,
1037                                })),
1038                                right: Expression::Literal(Box::new(Literal::Number(
1039                                    "1".to_string(),
1040                                ))),
1041                                left_comments: vec![],
1042                                operator_comments: vec![],
1043                                trailing_comments: vec![],
1044                                inferred_type: None,
1045                            })),
1046                        }),
1047                        ..Select::new()
1048                    };
1049
1050                    return Ok(Expression::Select(Box::new(outer_select)));
1051                }
1052            }
1053            Ok(Expression::Select(select))
1054        }
1055        other => Ok(other),
1056    }
1057}
1058
1059/// Convert SEMI and ANTI joins into equivalent forms that use EXISTS instead.
1060///
1061/// For dialects that don't support SEMI/ANTI join syntax, this converts:
1062/// - `SELECT * FROM a SEMI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1063/// - `SELECT * FROM a ANTI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1064///
1065/// Reference: `transforms.py:607-621`
1066pub fn eliminate_semi_and_anti_joins(expr: Expression) -> Result<Expression> {
1067    match expr {
1068        Expression::Select(mut select) => {
1069            let mut new_joins = Vec::new();
1070            let mut extra_where_conditions = Vec::new();
1071
1072            for join in select.joins.drain(..) {
1073                match join.kind {
1074                    JoinKind::Semi | JoinKind::LeftSemi => {
1075                        if let Some(on_condition) = join.on {
1076                            // Create: EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1077                            let subquery_select = Select {
1078                                expressions: vec![Expression::Literal(Box::new(Literal::Number(
1079                                    "1".to_string(),
1080                                )))],
1081                                from: Some(From {
1082                                    expressions: vec![join.this],
1083                                }),
1084                                where_clause: Some(Where { this: on_condition }),
1085                                ..Select::new()
1086                            };
1087
1088                            let exists = Expression::Exists(Box::new(Exists {
1089                                this: Expression::Subquery(Box::new(Subquery {
1090                                    this: Expression::Select(Box::new(subquery_select)),
1091                                    alias: None,
1092                                    column_aliases: vec![],
1093                                    order_by: None,
1094                                    limit: None,
1095                                    offset: None,
1096                                    distribute_by: None,
1097                                    sort_by: None,
1098                                    cluster_by: None,
1099                                    lateral: false,
1100                                    modifiers_inside: false,
1101                                    trailing_comments: vec![],
1102                                    inferred_type: None,
1103                                })),
1104                                not: false,
1105                            }));
1106
1107                            extra_where_conditions.push(exists);
1108                        }
1109                    }
1110                    JoinKind::Anti | JoinKind::LeftAnti => {
1111                        if let Some(on_condition) = join.on {
1112                            // Create: NOT EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1113                            let subquery_select = Select {
1114                                expressions: vec![Expression::Literal(Box::new(Literal::Number(
1115                                    "1".to_string(),
1116                                )))],
1117                                from: Some(From {
1118                                    expressions: vec![join.this],
1119                                }),
1120                                where_clause: Some(Where { this: on_condition }),
1121                                ..Select::new()
1122                            };
1123
1124                            // Use Exists with not: true for NOT EXISTS
1125                            let not_exists = Expression::Exists(Box::new(Exists {
1126                                this: Expression::Subquery(Box::new(Subquery {
1127                                    this: Expression::Select(Box::new(subquery_select)),
1128                                    alias: None,
1129                                    column_aliases: vec![],
1130                                    order_by: None,
1131                                    limit: None,
1132                                    offset: None,
1133                                    distribute_by: None,
1134                                    sort_by: None,
1135                                    cluster_by: None,
1136                                    lateral: false,
1137                                    modifiers_inside: false,
1138                                    trailing_comments: vec![],
1139                                    inferred_type: None,
1140                                })),
1141                                not: true,
1142                            }));
1143
1144                            extra_where_conditions.push(not_exists);
1145                        }
1146                    }
1147                    _ => {
1148                        // Keep other join types as-is
1149                        new_joins.push(join);
1150                    }
1151                }
1152            }
1153
1154            select.joins = new_joins;
1155
1156            // Add EXISTS conditions to WHERE clause
1157            if !extra_where_conditions.is_empty() {
1158                let combined = extra_where_conditions
1159                    .into_iter()
1160                    .reduce(|acc, cond| {
1161                        Expression::And(Box::new(BinaryOp {
1162                            left: acc,
1163                            right: cond,
1164                            left_comments: vec![],
1165                            operator_comments: vec![],
1166                            trailing_comments: vec![],
1167                            inferred_type: None,
1168                        }))
1169                    })
1170                    .unwrap();
1171
1172                select.where_clause = match select.where_clause {
1173                    Some(Where { this: existing }) => Some(Where {
1174                        this: Expression::And(Box::new(BinaryOp {
1175                            left: existing,
1176                            right: combined,
1177                            left_comments: vec![],
1178                            operator_comments: vec![],
1179                            trailing_comments: vec![],
1180                            inferred_type: None,
1181                        })),
1182                    }),
1183                    None => Some(Where { this: combined }),
1184                };
1185            }
1186
1187            Ok(Expression::Select(select))
1188        }
1189        other => Ok(other),
1190    }
1191}
1192
1193/// Convert FULL OUTER JOIN to a UNION of LEFT and RIGHT OUTER joins.
1194///
1195/// For dialects that don't support FULL OUTER JOIN, this converts:
1196/// ```sql
1197/// SELECT * FROM a FULL OUTER JOIN b ON a.x = b.x
1198/// ```
1199/// To:
1200/// ```sql
1201/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1202/// UNION ALL
1203/// SELECT * FROM a RIGHT OUTER JOIN b ON a.x = b.x
1204/// WHERE NOT EXISTS (SELECT 1 FROM a WHERE a.x = b.x)
1205/// ```
1206///
1207/// Note: This transformation currently only works for queries with a single FULL OUTER join.
1208///
1209/// Reference: `transforms.py:624-661`
1210pub fn eliminate_full_outer_join(expr: Expression) -> Result<Expression> {
1211    match expr {
1212        Expression::Select(mut select) => {
1213            // Find FULL OUTER joins
1214            let full_outer_join_idx = select.joins.iter().position(|j| j.kind == JoinKind::Full);
1215
1216            if let Some(idx) = full_outer_join_idx {
1217                // We only handle queries with a single FULL OUTER join
1218                let full_join_count = select
1219                    .joins
1220                    .iter()
1221                    .filter(|j| j.kind == JoinKind::Full)
1222                    .count();
1223                if full_join_count != 1 {
1224                    return Ok(Expression::Select(select));
1225                }
1226
1227                // Clone the query for the right side of the UNION
1228                let mut right_select = select.clone();
1229
1230                // Get the join condition from the FULL OUTER join
1231                let full_join = &select.joins[idx];
1232                let join_condition = full_join.on.clone();
1233
1234                // Left side: convert FULL to LEFT
1235                select.joins[idx].kind = JoinKind::Left;
1236
1237                // Right side: convert FULL to RIGHT and add NOT EXISTS condition
1238                right_select.joins[idx].kind = JoinKind::Right;
1239
1240                // Build NOT EXISTS for the right side to exclude rows that matched
1241                if let (Some(ref from), Some(ref join_cond)) = (&select.from, &join_condition) {
1242                    if !from.expressions.is_empty() {
1243                        let anti_subquery = Expression::Select(Box::new(Select {
1244                            expressions: vec![Expression::Literal(Box::new(Literal::Number(
1245                                "1".to_string(),
1246                            )))],
1247                            from: Some(from.clone()),
1248                            where_clause: Some(Where {
1249                                this: join_cond.clone(),
1250                            }),
1251                            ..Select::new()
1252                        }));
1253
1254                        let not_exists = Expression::Not(Box::new(crate::expressions::UnaryOp {
1255                            inferred_type: None,
1256                            this: Expression::Exists(Box::new(Exists {
1257                                this: Expression::Subquery(Box::new(Subquery {
1258                                    this: anti_subquery,
1259                                    alias: None,
1260                                    column_aliases: vec![],
1261                                    order_by: None,
1262                                    limit: None,
1263                                    offset: None,
1264                                    distribute_by: None,
1265                                    sort_by: None,
1266                                    cluster_by: None,
1267                                    lateral: false,
1268                                    modifiers_inside: false,
1269                                    trailing_comments: vec![],
1270                                    inferred_type: None,
1271                                })),
1272                                not: false,
1273                            })),
1274                        }));
1275
1276                        // Add NOT EXISTS to the WHERE clause
1277                        right_select.where_clause = Some(Where {
1278                            this: match right_select.where_clause {
1279                                Some(w) => Expression::And(Box::new(BinaryOp {
1280                                    left: w.this,
1281                                    right: not_exists,
1282                                    left_comments: vec![],
1283                                    operator_comments: vec![],
1284                                    trailing_comments: vec![],
1285                                    inferred_type: None,
1286                                })),
1287                                None => not_exists,
1288                            },
1289                        });
1290                    }
1291                }
1292
1293                // Remove WITH clause from right side (CTEs should only be on left)
1294                right_select.with = None;
1295
1296                // Remove ORDER BY from left side (will be applied after UNION)
1297                let order_by = select.order_by.take();
1298
1299                // Create UNION ALL of left and right
1300                let union = crate::expressions::Union {
1301                    left: Expression::Select(select),
1302                    right: Expression::Select(right_select),
1303                    all: true, // UNION ALL
1304                    distinct: false,
1305                    with: None,
1306                    order_by,
1307                    limit: None,
1308                    offset: None,
1309                    distribute_by: None,
1310                    sort_by: None,
1311                    cluster_by: None,
1312                    by_name: false,
1313                    side: None,
1314                    kind: None,
1315                    corresponding: false,
1316                    strict: false,
1317                    on_columns: Vec::new(),
1318                };
1319
1320                return Ok(Expression::Union(Box::new(union)));
1321            }
1322
1323            Ok(Expression::Select(select))
1324        }
1325        other => Ok(other),
1326    }
1327}
1328
1329/// Move CTEs to the top level of the query.
1330///
1331/// Some dialects (e.g., Hive, T-SQL, Spark prior to version 3) only allow CTEs to be
1332/// defined at the top-level, so for example queries like:
1333///
1334/// ```sql
1335/// SELECT * FROM (WITH t(c) AS (SELECT 1) SELECT * FROM t) AS subq
1336/// ```
1337///
1338/// are invalid in those dialects. This transformation moves all CTEs to the top level.
1339///
1340/// Reference: `transforms.py:664-700`
1341pub fn move_ctes_to_top_level(expr: Expression) -> Result<Expression> {
1342    match expr {
1343        Expression::Select(mut select) => {
1344            // Phase 1: Collect CTEs from nested subqueries (not inside CTE definitions)
1345            let mut collected_ctes: Vec<crate::expressions::Cte> = Vec::new();
1346            let mut has_recursive = false;
1347
1348            collect_nested_ctes(
1349                &Expression::Select(select.clone()),
1350                &mut collected_ctes,
1351                &mut has_recursive,
1352                true,
1353            );
1354
1355            // Phase 2: Flatten CTEs nested inside top-level CTE definitions
1356            // This handles: WITH c AS (WITH b AS (...) SELECT ...) -> WITH b AS (...), c AS (SELECT ...)
1357            let mut cte_body_collected: Vec<(String, Vec<crate::expressions::Cte>)> = Vec::new();
1358            if let Some(ref with) = select.with {
1359                for cte in &with.ctes {
1360                    let mut body_ctes: Vec<crate::expressions::Cte> = Vec::new();
1361                    collect_ctes_from_cte_body(&cte.this, &mut body_ctes, &mut has_recursive);
1362                    if !body_ctes.is_empty() {
1363                        cte_body_collected.push((cte.alias.name.clone(), body_ctes));
1364                    }
1365                }
1366            }
1367
1368            let has_subquery_ctes = !collected_ctes.is_empty();
1369            let has_body_ctes = !cte_body_collected.is_empty();
1370
1371            if has_subquery_ctes || has_body_ctes {
1372                // Strip WITH clauses from inner subqueries
1373                strip_nested_with_clauses(&mut select, true);
1374
1375                // Strip WITH clauses from CTE body definitions
1376                if has_body_ctes {
1377                    if let Some(ref mut with) = select.with {
1378                        for cte in with.ctes.iter_mut() {
1379                            strip_with_from_cte_body(&mut cte.this);
1380                        }
1381                    }
1382                }
1383
1384                let top_with = select.with.get_or_insert_with(|| crate::expressions::With {
1385                    ctes: Vec::new(),
1386                    recursive: false,
1387                    leading_comments: vec![],
1388                    search: None,
1389                });
1390
1391                if has_recursive {
1392                    top_with.recursive = true;
1393                }
1394
1395                // Insert body CTEs before their parent CTE (Python sqlglot behavior)
1396                if has_body_ctes {
1397                    let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
1398                    for mut cte in top_with.ctes.drain(..) {
1399                        // Check if this CTE has nested CTEs to insert before it
1400                        if let Some(pos) = cte_body_collected
1401                            .iter()
1402                            .position(|(name, _)| *name == cte.alias.name)
1403                        {
1404                            let (_, mut nested) = cte_body_collected.remove(pos);
1405                            // Strip WITH from each nested CTE's body too
1406                            for nested_cte in nested.iter_mut() {
1407                                strip_with_from_cte_body(&mut nested_cte.this);
1408                            }
1409                            new_ctes.extend(nested);
1410                        }
1411                        // Also strip WITH from the parent CTE's body
1412                        strip_with_from_cte_body(&mut cte.this);
1413                        new_ctes.push(cte);
1414                    }
1415                    top_with.ctes = new_ctes;
1416                }
1417
1418                // Append collected subquery CTEs after existing ones
1419                top_with.ctes.extend(collected_ctes);
1420            }
1421
1422            Ok(Expression::Select(select))
1423        }
1424        other => Ok(other),
1425    }
1426}
1427
1428/// Recursively collect CTEs from within CTE body expressions (for deep nesting)
1429fn collect_ctes_from_cte_body(
1430    expr: &Expression,
1431    collected: &mut Vec<crate::expressions::Cte>,
1432    has_recursive: &mut bool,
1433) {
1434    if let Expression::Select(select) = expr {
1435        if let Some(ref with) = select.with {
1436            if with.recursive {
1437                *has_recursive = true;
1438            }
1439            for cte in &with.ctes {
1440                // Recursively collect from this CTE's body first (depth-first)
1441                collect_ctes_from_cte_body(&cte.this, collected, has_recursive);
1442                // Then add this CTE itself
1443                collected.push(cte.clone());
1444            }
1445        }
1446    }
1447}
1448
1449/// Strip WITH clauses from CTE body expressions
1450fn strip_with_from_cte_body(expr: &mut Expression) {
1451    if let Expression::Select(ref mut select) = expr {
1452        select.with = None;
1453    }
1454}
1455
1456/// Strip WITH clauses from nested subqueries (after hoisting to top level)
1457fn strip_nested_with_clauses(select: &mut Select, _is_top_level: bool) {
1458    // Strip WITH from FROM subqueries
1459    if let Some(ref mut from) = select.from {
1460        for expr in from.expressions.iter_mut() {
1461            strip_with_from_expr(expr);
1462        }
1463    }
1464    // Strip from JOINs
1465    for join in select.joins.iter_mut() {
1466        strip_with_from_expr(&mut join.this);
1467    }
1468    // Strip from select expressions
1469    for expr in select.expressions.iter_mut() {
1470        strip_with_from_expr(expr);
1471    }
1472    // Strip from WHERE
1473    if let Some(ref mut w) = select.where_clause {
1474        strip_with_from_expr(&mut w.this);
1475    }
1476}
1477
1478fn strip_with_from_expr(expr: &mut Expression) {
1479    match expr {
1480        Expression::Subquery(ref mut subquery) => {
1481            strip_with_from_inner_query(&mut subquery.this);
1482        }
1483        Expression::Alias(ref mut alias) => {
1484            strip_with_from_expr(&mut alias.this);
1485        }
1486        Expression::Select(ref mut select) => {
1487            // Strip WITH from this SELECT (it's nested)
1488            select.with = None;
1489            // Recurse into its subqueries
1490            strip_nested_with_clauses(select, false);
1491        }
1492        _ => {}
1493    }
1494}
1495
1496fn strip_with_from_inner_query(expr: &mut Expression) {
1497    if let Expression::Select(ref mut select) = expr {
1498        select.with = None;
1499        strip_nested_with_clauses(select, false);
1500    }
1501}
1502
1503/// Helper to recursively collect CTEs from nested subqueries
1504fn collect_nested_ctes(
1505    expr: &Expression,
1506    collected: &mut Vec<crate::expressions::Cte>,
1507    has_recursive: &mut bool,
1508    is_top_level: bool,
1509) {
1510    match expr {
1511        Expression::Select(select) => {
1512            // If this is not the top level and has a WITH clause, collect its CTEs
1513            if !is_top_level {
1514                if let Some(ref with) = select.with {
1515                    if with.recursive {
1516                        *has_recursive = true;
1517                    }
1518                    collected.extend(with.ctes.clone());
1519                }
1520            }
1521
1522            // Recurse into FROM clause
1523            if let Some(ref from) = select.from {
1524                for expr in &from.expressions {
1525                    collect_nested_ctes(expr, collected, has_recursive, false);
1526                }
1527            }
1528
1529            // Recurse into JOINs
1530            for join in &select.joins {
1531                collect_nested_ctes(&join.this, collected, has_recursive, false);
1532            }
1533
1534            // Recurse into select expressions (for subqueries in SELECT)
1535            for sel_expr in &select.expressions {
1536                collect_nested_ctes(sel_expr, collected, has_recursive, false);
1537            }
1538
1539            // Recurse into WHERE
1540            if let Some(ref where_clause) = select.where_clause {
1541                collect_nested_ctes(&where_clause.this, collected, has_recursive, false);
1542            }
1543        }
1544        Expression::Subquery(subquery) => {
1545            // Process the inner query
1546            collect_nested_ctes(&subquery.this, collected, has_recursive, false);
1547        }
1548        Expression::Alias(alias) => {
1549            collect_nested_ctes(&alias.this, collected, has_recursive, false);
1550        }
1551        // Add more expression types as needed
1552        _ => {}
1553    }
1554}
1555
1556/// Inline window definitions from WINDOW clause.
1557///
1558/// Some dialects don't support named windows. This transform inlines them:
1559///
1560/// ```sql
1561/// SELECT SUM(a) OVER w FROM t WINDOW w AS (PARTITION BY b)
1562/// ```
1563///
1564/// To:
1565///
1566/// ```sql
1567/// SELECT SUM(a) OVER (PARTITION BY b) FROM t
1568/// ```
1569///
1570/// Reference: `transforms.py:975-1003`
1571pub fn eliminate_window_clause(expr: Expression) -> Result<Expression> {
1572    match expr {
1573        Expression::Select(mut select) => {
1574            if let Some(named_windows) = select.windows.take() {
1575                // Build a map of window name -> window spec
1576                let window_map: std::collections::HashMap<String, &Over> = named_windows
1577                    .iter()
1578                    .map(|nw| (nw.name.name.to_lowercase(), &nw.spec))
1579                    .collect();
1580
1581                // Inline window references in the select expressions
1582                select.expressions = select
1583                    .expressions
1584                    .into_iter()
1585                    .map(|e| inline_window_refs(e, &window_map))
1586                    .collect();
1587            }
1588            Ok(Expression::Select(select))
1589        }
1590        other => Ok(other),
1591    }
1592}
1593
1594/// Helper function to inline window references in an expression
1595fn inline_window_refs(
1596    expr: Expression,
1597    window_map: &std::collections::HashMap<String, &Over>,
1598) -> Expression {
1599    match expr {
1600        Expression::WindowFunction(mut wf) => {
1601            // Check if this window references a named window
1602            if let Some(ref name) = wf.over.window_name {
1603                let key = name.name.to_lowercase();
1604                if let Some(named_spec) = window_map.get(&key) {
1605                    // Inherit properties from the named window
1606                    if wf.over.partition_by.is_empty() && !named_spec.partition_by.is_empty() {
1607                        wf.over.partition_by = named_spec.partition_by.clone();
1608                    }
1609                    if wf.over.order_by.is_empty() && !named_spec.order_by.is_empty() {
1610                        wf.over.order_by = named_spec.order_by.clone();
1611                    }
1612                    if wf.over.frame.is_none() && named_spec.frame.is_some() {
1613                        wf.over.frame = named_spec.frame.clone();
1614                    }
1615                    // Clear the window name reference
1616                    wf.over.window_name = None;
1617                }
1618            }
1619            Expression::WindowFunction(wf)
1620        }
1621        Expression::Alias(mut alias) => {
1622            // Recurse into aliased expressions
1623            alias.this = inline_window_refs(alias.this, window_map);
1624            Expression::Alias(alias)
1625        }
1626        // For a complete implementation, we would need to recursively visit all expressions
1627        // that can contain window functions (CASE, subqueries, etc.)
1628        other => other,
1629    }
1630}
1631
1632/// Eliminate Oracle-style (+) join marks by converting to standard JOINs.
1633///
1634/// Oracle uses (+) syntax for outer joins:
1635/// ```sql
1636/// SELECT * FROM a, b WHERE a.x = b.x(+)
1637/// ```
1638///
1639/// This is converted to standard LEFT OUTER JOIN:
1640/// ```sql
1641/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1642/// ```
1643///
1644/// Reference: `transforms.py:828-945`
1645pub fn eliminate_join_marks(expr: Expression) -> Result<Expression> {
1646    match expr {
1647        Expression::Select(mut select) => {
1648            // Check if there are any join marks in the WHERE clause
1649            let has_join_marks = select
1650                .where_clause
1651                .as_ref()
1652                .map_or(false, |w| contains_join_mark(&w.this));
1653
1654            if !has_join_marks {
1655                return Ok(Expression::Select(select));
1656            }
1657
1658            // Collect tables from FROM clause
1659            let from_tables: Vec<String> = select
1660                .from
1661                .as_ref()
1662                .map(|f| {
1663                    f.expressions
1664                        .iter()
1665                        .filter_map(|e| get_table_name(e))
1666                        .collect()
1667                })
1668                .unwrap_or_default();
1669
1670            // Extract join conditions and their marked tables from WHERE
1671            let mut join_conditions: std::collections::HashMap<String, Vec<Expression>> =
1672                std::collections::HashMap::new();
1673            let mut remaining_conditions: Vec<Expression> = Vec::new();
1674
1675            if let Some(ref where_clause) = select.where_clause {
1676                extract_join_mark_conditions(
1677                    &where_clause.this,
1678                    &mut join_conditions,
1679                    &mut remaining_conditions,
1680                );
1681            }
1682
1683            // Build new JOINs for each marked table
1684            let mut new_joins = select.joins.clone();
1685            for (table_name, conditions) in join_conditions {
1686                // Find if this table is in FROM or existing JOINs
1687                let table_in_from = from_tables.contains(&table_name);
1688
1689                if table_in_from && !conditions.is_empty() {
1690                    // Create LEFT JOIN with combined conditions
1691                    let combined_condition = conditions.into_iter().reduce(|a, b| {
1692                        Expression::And(Box::new(BinaryOp {
1693                            left: a,
1694                            right: b,
1695                            left_comments: vec![],
1696                            operator_comments: vec![],
1697                            trailing_comments: vec![],
1698                            inferred_type: None,
1699                        }))
1700                    });
1701
1702                    // Find the table in FROM and move it to a JOIN
1703                    if let Some(ref mut from) = select.from {
1704                        if let Some(pos) = from
1705                            .expressions
1706                            .iter()
1707                            .position(|e| get_table_name(e).map_or(false, |n| n == table_name))
1708                        {
1709                            if from.expressions.len() > 1 {
1710                                let join_table = from.expressions.remove(pos);
1711                                new_joins.push(crate::expressions::Join {
1712                                    this: join_table,
1713                                    kind: JoinKind::Left,
1714                                    on: combined_condition,
1715                                    using: vec![],
1716                                    use_inner_keyword: false,
1717                                    use_outer_keyword: true,
1718                                    deferred_condition: false,
1719                                    join_hint: None,
1720                                    match_condition: None,
1721                                    pivots: Vec::new(),
1722                                    comments: Vec::new(),
1723                                    nesting_group: 0,
1724                                    directed: false,
1725                                });
1726                            }
1727                        }
1728                    }
1729                }
1730            }
1731
1732            select.joins = new_joins;
1733
1734            // Update WHERE with remaining conditions
1735            if remaining_conditions.is_empty() {
1736                select.where_clause = None;
1737            } else {
1738                let combined = remaining_conditions.into_iter().reduce(|a, b| {
1739                    Expression::And(Box::new(BinaryOp {
1740                        left: a,
1741                        right: b,
1742                        left_comments: vec![],
1743                        operator_comments: vec![],
1744                        trailing_comments: vec![],
1745                        inferred_type: None,
1746                    }))
1747                });
1748                select.where_clause = combined.map(|c| Where { this: c });
1749            }
1750
1751            // Clear join marks from all columns
1752            clear_join_marks(&mut Expression::Select(select.clone()));
1753
1754            Ok(Expression::Select(select))
1755        }
1756        other => Ok(other),
1757    }
1758}
1759
1760/// Check if an expression contains any columns with join marks
1761fn contains_join_mark(expr: &Expression) -> bool {
1762    match expr {
1763        Expression::Column(col) => col.join_mark,
1764        Expression::And(op) | Expression::Or(op) => {
1765            contains_join_mark(&op.left) || contains_join_mark(&op.right)
1766        }
1767        Expression::Eq(op)
1768        | Expression::Neq(op)
1769        | Expression::Lt(op)
1770        | Expression::Lte(op)
1771        | Expression::Gt(op)
1772        | Expression::Gte(op) => contains_join_mark(&op.left) || contains_join_mark(&op.right),
1773        Expression::Not(op) => contains_join_mark(&op.this),
1774        _ => false,
1775    }
1776}
1777
1778/// Get table name from a table expression
1779fn get_table_name(expr: &Expression) -> Option<String> {
1780    match expr {
1781        Expression::Table(t) => Some(t.name.name.clone()),
1782        Expression::Alias(a) => Some(a.alias.name.clone()),
1783        _ => None,
1784    }
1785}
1786
1787/// Extract join mark conditions from WHERE clause
1788fn extract_join_mark_conditions(
1789    expr: &Expression,
1790    join_conditions: &mut std::collections::HashMap<String, Vec<Expression>>,
1791    remaining: &mut Vec<Expression>,
1792) {
1793    match expr {
1794        Expression::And(op) => {
1795            extract_join_mark_conditions(&op.left, join_conditions, remaining);
1796            extract_join_mark_conditions(&op.right, join_conditions, remaining);
1797        }
1798        _ => {
1799            if let Some(table) = get_join_mark_table(expr) {
1800                join_conditions
1801                    .entry(table)
1802                    .or_insert_with(Vec::new)
1803                    .push(expr.clone());
1804            } else {
1805                remaining.push(expr.clone());
1806            }
1807        }
1808    }
1809}
1810
1811/// Get the table name of a column with join mark in an expression
1812fn get_join_mark_table(expr: &Expression) -> Option<String> {
1813    match expr {
1814        Expression::Eq(op)
1815        | Expression::Neq(op)
1816        | Expression::Lt(op)
1817        | Expression::Lte(op)
1818        | Expression::Gt(op)
1819        | Expression::Gte(op) => {
1820            // Check both sides for join mark columns
1821            if let Expression::Column(col) = &op.left {
1822                if col.join_mark {
1823                    return col.table.as_ref().map(|t| t.name.clone());
1824                }
1825            }
1826            if let Expression::Column(col) = &op.right {
1827                if col.join_mark {
1828                    return col.table.as_ref().map(|t| t.name.clone());
1829                }
1830            }
1831            None
1832        }
1833        _ => None,
1834    }
1835}
1836
1837/// Clear join marks from all columns in an expression
1838fn clear_join_marks(expr: &mut Expression) {
1839    match expr {
1840        Expression::Column(col) => col.join_mark = false,
1841        Expression::Select(select) => {
1842            if let Some(ref mut w) = select.where_clause {
1843                clear_join_marks(&mut w.this);
1844            }
1845            for sel_expr in &mut select.expressions {
1846                clear_join_marks(sel_expr);
1847            }
1848        }
1849        Expression::And(op) | Expression::Or(op) => {
1850            clear_join_marks(&mut op.left);
1851            clear_join_marks(&mut op.right);
1852        }
1853        Expression::Eq(op)
1854        | Expression::Neq(op)
1855        | Expression::Lt(op)
1856        | Expression::Lte(op)
1857        | Expression::Gt(op)
1858        | Expression::Gte(op) => {
1859            clear_join_marks(&mut op.left);
1860            clear_join_marks(&mut op.right);
1861        }
1862        _ => {}
1863    }
1864}
1865
1866/// Add column names to recursive CTE definitions.
1867///
1868/// Uses projection output names in recursive CTE definitions to define the CTEs' columns.
1869/// This is required by some dialects that need explicit column names in recursive CTEs.
1870///
1871/// Reference: `transforms.py:576-592`
1872pub fn add_recursive_cte_column_names(expr: Expression) -> Result<Expression> {
1873    match expr {
1874        Expression::Select(mut select) => {
1875            if let Some(ref mut with) = select.with {
1876                if with.recursive {
1877                    let mut counter = 0;
1878                    for cte in &mut with.ctes {
1879                        if cte.columns.is_empty() {
1880                            // Try to get column names from the CTE's SELECT
1881                            if let Expression::Select(ref cte_select) = cte.this {
1882                                let names: Vec<Identifier> = cte_select
1883                                    .expressions
1884                                    .iter()
1885                                    .map(|e| match e {
1886                                        Expression::Alias(a) => a.alias.clone(),
1887                                        Expression::Column(c) => c.name.clone(),
1888                                        _ => {
1889                                            counter += 1;
1890                                            Identifier::new(format!("_c_{}", counter))
1891                                        }
1892                                    })
1893                                    .collect();
1894                                cte.columns = names;
1895                            }
1896                        }
1897                    }
1898                }
1899            }
1900            Ok(Expression::Select(select))
1901        }
1902        other => Ok(other),
1903    }
1904}
1905
1906/// Convert epoch string in CAST to timestamp literal.
1907///
1908/// Replaces `CAST('epoch' AS TIMESTAMP)` with `CAST('1970-01-01 00:00:00' AS TIMESTAMP)`
1909/// for dialects that don't support the 'epoch' keyword.
1910///
1911/// Reference: `transforms.py:595-604`
1912pub fn epoch_cast_to_ts(expr: Expression) -> Result<Expression> {
1913    match expr {
1914        Expression::Cast(mut cast) => {
1915            if let Expression::Literal(ref lit) = cast.this {
1916                if let Literal::String(ref s) = lit.as_ref() {
1917                    if s.to_lowercase() == "epoch" {
1918                        if is_temporal_type(&cast.to) {
1919                            cast.this = Expression::Literal(Box::new(Literal::String(
1920                                "1970-01-01 00:00:00".to_string(),
1921                            )));
1922                        }
1923                    }
1924                }
1925            }
1926            Ok(Expression::Cast(cast))
1927        }
1928        Expression::TryCast(mut try_cast) => {
1929            if let Expression::Literal(ref lit) = try_cast.this {
1930                if let Literal::String(ref s) = lit.as_ref() {
1931                    if s.to_lowercase() == "epoch" {
1932                        if is_temporal_type(&try_cast.to) {
1933                            try_cast.this = Expression::Literal(Box::new(Literal::String(
1934                                "1970-01-01 00:00:00".to_string(),
1935                            )));
1936                        }
1937                    }
1938                }
1939            }
1940            Ok(Expression::TryCast(try_cast))
1941        }
1942        other => Ok(other),
1943    }
1944}
1945
1946/// Check if a DataType is a temporal type (DATE, TIMESTAMP, etc.)
1947fn is_temporal_type(dt: &DataType) -> bool {
1948    matches!(
1949        dt,
1950        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
1951    )
1952}
1953
1954/// Ensure boolean values in conditions.
1955///
1956/// Converts numeric values used in conditions into explicit boolean expressions.
1957/// For dialects that require explicit booleans in WHERE clauses.
1958///
1959/// Converts:
1960/// ```sql
1961/// WHERE column
1962/// ```
1963/// To:
1964/// ```sql
1965/// WHERE column <> 0
1966/// ```
1967///
1968/// And:
1969/// ```sql
1970/// WHERE 1
1971/// ```
1972/// To:
1973/// ```sql
1974/// WHERE 1 <> 0
1975/// ```
1976///
1977/// Reference: `transforms.py:703-721`
1978pub fn ensure_bools(expr: Expression) -> Result<Expression> {
1979    // First, recursively process Case WHEN conditions throughout the expression tree
1980    let expr = ensure_bools_in_case(expr);
1981    match expr {
1982        Expression::Select(mut select) => {
1983            // Transform WHERE clause condition
1984            if let Some(ref mut where_clause) = select.where_clause {
1985                where_clause.this = ensure_bool_condition(where_clause.this.clone());
1986            }
1987            // Transform HAVING clause condition
1988            if let Some(ref mut having) = select.having {
1989                having.this = ensure_bool_condition(having.this.clone());
1990            }
1991            Ok(Expression::Select(select))
1992        }
1993        // Top-level AND/OR/NOT expressions also need ensure_bools processing
1994        Expression::And(_) | Expression::Or(_) | Expression::Not(_) => {
1995            Ok(ensure_bool_condition(expr))
1996        }
1997        other => Ok(other),
1998    }
1999}
2000
2001/// Recursively walk the expression tree to find Case expressions and apply
2002/// ensure_bool_condition to their WHEN conditions. This ensures that
2003/// `CASE WHEN TRUE` becomes `CASE WHEN (1 = 1)` etc.
2004fn ensure_bools_in_case(expr: Expression) -> Expression {
2005    match expr {
2006        Expression::Case(mut case) => {
2007            case.whens = case
2008                .whens
2009                .into_iter()
2010                .map(|(condition, result)| {
2011                    let new_condition = ensure_bool_condition(ensure_bools_in_case(condition));
2012                    let new_result = ensure_bools_in_case(result);
2013                    (new_condition, new_result)
2014                })
2015                .collect();
2016            if let Some(else_expr) = case.else_ {
2017                case.else_ = Some(ensure_bools_in_case(else_expr));
2018            }
2019            Expression::Case(Box::new(*case))
2020        }
2021        Expression::Select(mut select) => {
2022            // Recursively process expressions in the SELECT list
2023            select.expressions = select
2024                .expressions
2025                .into_iter()
2026                .map(ensure_bools_in_case)
2027                .collect();
2028            // Process WHERE/HAVING are handled by ensure_bools main function
2029            Expression::Select(select)
2030        }
2031        Expression::Alias(mut alias) => {
2032            alias.this = ensure_bools_in_case(alias.this);
2033            Expression::Alias(alias)
2034        }
2035        Expression::Paren(mut paren) => {
2036            paren.this = ensure_bools_in_case(paren.this);
2037            Expression::Paren(paren)
2038        }
2039        other => other,
2040    }
2041}
2042
2043/// Helper to check if an expression is inherently boolean (returns a boolean value).
2044/// Inherently boolean expressions include comparisons, predicates, logical operators, etc.
2045fn is_boolean_expression(expr: &Expression) -> bool {
2046    matches!(
2047        expr,
2048        Expression::Eq(_)
2049            | Expression::Neq(_)
2050            | Expression::Lt(_)
2051            | Expression::Lte(_)
2052            | Expression::Gt(_)
2053            | Expression::Gte(_)
2054            | Expression::Is(_)
2055            | Expression::IsNull(_)
2056            | Expression::IsTrue(_)
2057            | Expression::IsFalse(_)
2058            | Expression::Like(_)
2059            | Expression::ILike(_)
2060            | Expression::SimilarTo(_)
2061            | Expression::Glob(_)
2062            | Expression::RegexpLike(_)
2063            | Expression::In(_)
2064            | Expression::Between(_)
2065            | Expression::Exists(_)
2066            | Expression::And(_)
2067            | Expression::Or(_)
2068            | Expression::Not(_)
2069            | Expression::Any(_)
2070            | Expression::All(_)
2071            | Expression::EqualNull(_)
2072    )
2073}
2074
2075/// Helper to wrap a non-boolean expression with `<> 0`
2076fn wrap_neq_zero(expr: Expression) -> Expression {
2077    Expression::Neq(Box::new(BinaryOp {
2078        left: expr,
2079        right: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
2080        left_comments: vec![],
2081        operator_comments: vec![],
2082        trailing_comments: vec![],
2083        inferred_type: None,
2084    }))
2085}
2086
2087/// Helper to convert a condition expression to ensure it's boolean.
2088///
2089/// In TSQL, conditions in WHERE/HAVING must be boolean expressions.
2090/// Non-boolean expressions (columns, literals, casts, function calls, etc.)
2091/// are wrapped with `<> 0`. Boolean literals are converted to `(1 = 1)` or `(1 = 0)`.
2092fn ensure_bool_condition(expr: Expression) -> Expression {
2093    match expr {
2094        // For AND/OR, recursively process children
2095        Expression::And(op) => {
2096            let new_op = BinaryOp {
2097                left: ensure_bool_condition(op.left.clone()),
2098                right: ensure_bool_condition(op.right.clone()),
2099                left_comments: op.left_comments.clone(),
2100                operator_comments: op.operator_comments.clone(),
2101                trailing_comments: op.trailing_comments.clone(),
2102                inferred_type: None,
2103            };
2104            Expression::And(Box::new(new_op))
2105        }
2106        Expression::Or(op) => {
2107            let new_op = BinaryOp {
2108                left: ensure_bool_condition(op.left.clone()),
2109                right: ensure_bool_condition(op.right.clone()),
2110                left_comments: op.left_comments.clone(),
2111                operator_comments: op.operator_comments.clone(),
2112                trailing_comments: op.trailing_comments.clone(),
2113                inferred_type: None,
2114            };
2115            Expression::Or(Box::new(new_op))
2116        }
2117        // For NOT, recursively process the inner expression
2118        Expression::Not(op) => Expression::Not(Box::new(crate::expressions::UnaryOp {
2119            this: ensure_bool_condition(op.this.clone()),
2120            inferred_type: None,
2121        })),
2122        // For Paren, recurse into inner expression
2123        Expression::Paren(paren) => Expression::Paren(Box::new(crate::expressions::Paren {
2124            this: ensure_bool_condition(paren.this.clone()),
2125            trailing_comments: paren.trailing_comments.clone(),
2126        })),
2127        // Boolean literals: true -> (1 = 1), false -> (1 = 0)
2128        Expression::Boolean(BooleanLiteral { value: true }) => {
2129            Expression::Paren(Box::new(crate::expressions::Paren {
2130                this: Expression::Eq(Box::new(BinaryOp {
2131                    left: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2132                    right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2133                    left_comments: vec![],
2134                    operator_comments: vec![],
2135                    trailing_comments: vec![],
2136                    inferred_type: None,
2137                })),
2138                trailing_comments: vec![],
2139            }))
2140        }
2141        Expression::Boolean(BooleanLiteral { value: false }) => {
2142            Expression::Paren(Box::new(crate::expressions::Paren {
2143                this: Expression::Eq(Box::new(BinaryOp {
2144                    left: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2145                    right: Expression::Literal(Box::new(Literal::Number("0".to_string()))),
2146                    left_comments: vec![],
2147                    operator_comments: vec![],
2148                    trailing_comments: vec![],
2149                    inferred_type: None,
2150                })),
2151                trailing_comments: vec![],
2152            }))
2153        }
2154        // Already boolean expressions pass through unchanged
2155        ref e if is_boolean_expression(e) => expr,
2156        // Everything else (Column, Identifier, Cast, Literal::Number, function calls, etc.)
2157        // gets wrapped with <> 0
2158        _ => wrap_neq_zero(expr),
2159    }
2160}
2161
2162/// Remove table qualifiers from column references.
2163///
2164/// Converts `table.column` to just `column` throughout the expression tree.
2165///
2166/// Reference: `transforms.py:724-730`
2167pub fn unqualify_columns(expr: Expression) -> Result<Expression> {
2168    Ok(unqualify_columns_recursive(expr))
2169}
2170
2171/// Recursively remove table qualifiers from column references
2172fn unqualify_columns_recursive(expr: Expression) -> Expression {
2173    match expr {
2174        Expression::Column(mut col) => {
2175            col.table = None;
2176            Expression::Column(col)
2177        }
2178        Expression::Select(mut select) => {
2179            select.expressions = select
2180                .expressions
2181                .into_iter()
2182                .map(unqualify_columns_recursive)
2183                .collect();
2184            if let Some(ref mut where_clause) = select.where_clause {
2185                where_clause.this = unqualify_columns_recursive(where_clause.this.clone());
2186            }
2187            if let Some(ref mut having) = select.having {
2188                having.this = unqualify_columns_recursive(having.this.clone());
2189            }
2190            if let Some(ref mut group_by) = select.group_by {
2191                group_by.expressions = group_by
2192                    .expressions
2193                    .iter()
2194                    .cloned()
2195                    .map(unqualify_columns_recursive)
2196                    .collect();
2197            }
2198            if let Some(ref mut order_by) = select.order_by {
2199                order_by.expressions = order_by
2200                    .expressions
2201                    .iter()
2202                    .map(|o| crate::expressions::Ordered {
2203                        this: unqualify_columns_recursive(o.this.clone()),
2204                        desc: o.desc,
2205                        nulls_first: o.nulls_first,
2206                        explicit_asc: o.explicit_asc,
2207                        with_fill: o.with_fill.clone(),
2208                    })
2209                    .collect();
2210            }
2211            for join in &mut select.joins {
2212                if let Some(ref mut on) = join.on {
2213                    *on = unqualify_columns_recursive(on.clone());
2214                }
2215            }
2216            Expression::Select(select)
2217        }
2218        Expression::Alias(mut alias) => {
2219            alias.this = unqualify_columns_recursive(alias.this);
2220            Expression::Alias(alias)
2221        }
2222        // Binary operations
2223        Expression::And(op) => Expression::And(Box::new(unqualify_binary_op(*op))),
2224        Expression::Or(op) => Expression::Or(Box::new(unqualify_binary_op(*op))),
2225        Expression::Eq(op) => Expression::Eq(Box::new(unqualify_binary_op(*op))),
2226        Expression::Neq(op) => Expression::Neq(Box::new(unqualify_binary_op(*op))),
2227        Expression::Lt(op) => Expression::Lt(Box::new(unqualify_binary_op(*op))),
2228        Expression::Lte(op) => Expression::Lte(Box::new(unqualify_binary_op(*op))),
2229        Expression::Gt(op) => Expression::Gt(Box::new(unqualify_binary_op(*op))),
2230        Expression::Gte(op) => Expression::Gte(Box::new(unqualify_binary_op(*op))),
2231        Expression::Add(op) => Expression::Add(Box::new(unqualify_binary_op(*op))),
2232        Expression::Sub(op) => Expression::Sub(Box::new(unqualify_binary_op(*op))),
2233        Expression::Mul(op) => Expression::Mul(Box::new(unqualify_binary_op(*op))),
2234        Expression::Div(op) => Expression::Div(Box::new(unqualify_binary_op(*op))),
2235        // Functions
2236        Expression::Function(mut func) => {
2237            func.args = func
2238                .args
2239                .into_iter()
2240                .map(unqualify_columns_recursive)
2241                .collect();
2242            Expression::Function(func)
2243        }
2244        Expression::AggregateFunction(mut func) => {
2245            func.args = func
2246                .args
2247                .into_iter()
2248                .map(unqualify_columns_recursive)
2249                .collect();
2250            Expression::AggregateFunction(func)
2251        }
2252        Expression::Case(mut case) => {
2253            case.whens = case
2254                .whens
2255                .into_iter()
2256                .map(|(cond, result)| {
2257                    (
2258                        unqualify_columns_recursive(cond),
2259                        unqualify_columns_recursive(result),
2260                    )
2261                })
2262                .collect();
2263            if let Some(else_expr) = case.else_ {
2264                case.else_ = Some(unqualify_columns_recursive(else_expr));
2265            }
2266            Expression::Case(case)
2267        }
2268        // Other expressions pass through unchanged
2269        other => other,
2270    }
2271}
2272
2273/// Helper to unqualify columns in a binary operation
2274fn unqualify_binary_op(mut op: BinaryOp) -> BinaryOp {
2275    op.left = unqualify_columns_recursive(op.left);
2276    op.right = unqualify_columns_recursive(op.right);
2277    op
2278}
2279
2280/// Convert UNNEST(GENERATE_DATE_ARRAY(...)) to recursive CTE.
2281///
2282/// For dialects that don't support GENERATE_DATE_ARRAY, this converts:
2283/// ```sql
2284/// SELECT * FROM UNNEST(GENERATE_DATE_ARRAY('2024-01-01', '2024-01-31', INTERVAL 1 DAY)) AS d(date_value)
2285/// ```
2286/// To a recursive CTE:
2287/// ```sql
2288/// WITH RECURSIVE _generated_dates(date_value) AS (
2289///     SELECT CAST('2024-01-01' AS DATE) AS date_value
2290///     UNION ALL
2291///     SELECT CAST(DATE_ADD(date_value, 1, DAY) AS DATE)
2292///     FROM _generated_dates
2293///     WHERE CAST(DATE_ADD(date_value, 1, DAY) AS DATE) <= CAST('2024-01-31' AS DATE)
2294/// )
2295/// SELECT date_value FROM _generated_dates
2296/// ```
2297///
2298/// Reference: `transforms.py:68-122`
2299pub fn unnest_generate_date_array_using_recursive_cte(expr: Expression) -> Result<Expression> {
2300    match expr {
2301        Expression::Select(mut select) => {
2302            let mut cte_count = 0;
2303            let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
2304
2305            // Process existing CTE bodies first (to handle CTE-wrapped GENERATE_DATE_ARRAY)
2306            if let Some(ref mut with) = select.with {
2307                for cte in &mut with.ctes {
2308                    process_expression_for_gda(&mut cte.this, &mut cte_count, &mut new_ctes);
2309                }
2310            }
2311
2312            // Process FROM clause
2313            if let Some(ref mut from) = select.from {
2314                for table_expr in &mut from.expressions {
2315                    if let Some((cte, replacement)) =
2316                        try_convert_generate_date_array(table_expr, &mut cte_count)
2317                    {
2318                        new_ctes.push(cte);
2319                        *table_expr = replacement;
2320                    }
2321                }
2322            }
2323
2324            // Process JOINs
2325            for join in &mut select.joins {
2326                if let Some((cte, replacement)) =
2327                    try_convert_generate_date_array(&join.this, &mut cte_count)
2328                {
2329                    new_ctes.push(cte);
2330                    join.this = replacement;
2331                }
2332            }
2333
2334            // Add collected CTEs to the WITH clause
2335            if !new_ctes.is_empty() {
2336                let with_clause = select.with.get_or_insert_with(|| crate::expressions::With {
2337                    ctes: Vec::new(),
2338                    recursive: true, // Recursive CTEs
2339                    leading_comments: vec![],
2340                    search: None,
2341                });
2342                with_clause.recursive = true;
2343
2344                // Prepend new CTEs before existing ones
2345                let mut all_ctes = new_ctes;
2346                all_ctes.append(&mut with_clause.ctes);
2347                with_clause.ctes = all_ctes;
2348            }
2349
2350            Ok(Expression::Select(select))
2351        }
2352        other => Ok(other),
2353    }
2354}
2355
2356/// Recursively process an expression tree to find and convert UNNEST(GENERATE_DATE_ARRAY)
2357/// inside CTE bodies, subqueries, etc.
2358fn process_expression_for_gda(
2359    expr: &mut Expression,
2360    cte_count: &mut usize,
2361    new_ctes: &mut Vec<crate::expressions::Cte>,
2362) {
2363    match expr {
2364        Expression::Select(ref mut select) => {
2365            // Process FROM clause
2366            if let Some(ref mut from) = select.from {
2367                for table_expr in &mut from.expressions {
2368                    if let Some((cte, replacement)) =
2369                        try_convert_generate_date_array(table_expr, cte_count)
2370                    {
2371                        new_ctes.push(cte);
2372                        *table_expr = replacement;
2373                    }
2374                }
2375            }
2376            // Process JOINs
2377            for join in &mut select.joins {
2378                if let Some((cte, replacement)) =
2379                    try_convert_generate_date_array(&join.this, cte_count)
2380                {
2381                    new_ctes.push(cte);
2382                    join.this = replacement;
2383                }
2384            }
2385        }
2386        Expression::Union(ref mut u) => {
2387            process_expression_for_gda(&mut u.left, cte_count, new_ctes);
2388            process_expression_for_gda(&mut u.right, cte_count, new_ctes);
2389        }
2390        Expression::Subquery(ref mut sq) => {
2391            process_expression_for_gda(&mut sq.this, cte_count, new_ctes);
2392        }
2393        _ => {}
2394    }
2395}
2396
2397/// Try to convert an UNNEST(GENERATE_DATE_ARRAY(...)) to a recursive CTE reference.
2398/// `column_name_override` allows the caller to specify a custom column name (from alias).
2399fn try_convert_generate_date_array(
2400    expr: &Expression,
2401    cte_count: &mut usize,
2402) -> Option<(crate::expressions::Cte, Expression)> {
2403    try_convert_generate_date_array_with_name(expr, cte_count, None)
2404}
2405
2406fn try_convert_generate_date_array_with_name(
2407    expr: &Expression,
2408    cte_count: &mut usize,
2409    column_name_override: Option<&str>,
2410) -> Option<(crate::expressions::Cte, Expression)> {
2411    // Helper: extract (start, end, step) from GENERATE_DATE_ARRAY/GenerateSeries variants
2412    fn extract_gda_args(
2413        inner: &Expression,
2414    ) -> Option<(&Expression, &Expression, Option<&Expression>)> {
2415        match inner {
2416            Expression::GenerateDateArray(gda) => {
2417                let start = gda.start.as_ref()?;
2418                let end = gda.end.as_ref()?;
2419                let step = gda.step.as_deref();
2420                Some((start, end, step))
2421            }
2422            Expression::GenerateSeries(gs) => {
2423                let start = gs.start.as_deref()?;
2424                let end = gs.end.as_deref()?;
2425                let step = gs.step.as_deref();
2426                Some((start, end, step))
2427            }
2428            Expression::Function(f) if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") => {
2429                if f.args.len() >= 2 {
2430                    let start = &f.args[0];
2431                    let end = &f.args[1];
2432                    let step = f.args.get(2);
2433                    Some((start, end, step))
2434                } else {
2435                    None
2436                }
2437            }
2438            _ => None,
2439        }
2440    }
2441
2442    // Look for UNNEST containing GENERATE_DATE_ARRAY
2443    if let Expression::Unnest(unnest) = expr {
2444        if let Some((start, end, step_opt)) = extract_gda_args(&unnest.this) {
2445            let start = start;
2446            let end = end;
2447            let step: Option<&Expression> = step_opt;
2448
2449            // Generate CTE name
2450            let cte_name = if *cte_count == 0 {
2451                "_generated_dates".to_string()
2452            } else {
2453                format!("_generated_dates_{}", cte_count)
2454            };
2455            *cte_count += 1;
2456
2457            let column_name =
2458                Identifier::new(column_name_override.unwrap_or("date_value").to_string());
2459
2460            // Helper: wrap expression in CAST(... AS DATE) unless already a date literal or CAST to DATE
2461            let cast_to_date = |expr: &Expression| -> Expression {
2462                match expr {
2463                    Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
2464                        // DATE '...' -> convert to CAST('...' AS DATE) to match expected output
2465                        if let Expression::Literal(lit) = expr {
2466                            if let Literal::Date(d) = lit.as_ref() {
2467                                Expression::Cast(Box::new(Cast {
2468                                    this: Expression::Literal(Box::new(Literal::String(d.clone()))),
2469                                    to: DataType::Date,
2470                                    trailing_comments: vec![],
2471                                    double_colon_syntax: false,
2472                                    format: None,
2473                                    default: None,
2474                                    inferred_type: None,
2475                                }))
2476                            } else {
2477                                expr.clone()
2478                            }
2479                        } else {
2480                            unreachable!()
2481                        }
2482                    }
2483                    Expression::Cast(c) if matches!(c.to, DataType::Date) => expr.clone(),
2484                    _ => Expression::Cast(Box::new(Cast {
2485                        this: expr.clone(),
2486                        to: DataType::Date,
2487                        trailing_comments: vec![],
2488                        double_colon_syntax: false,
2489                        format: None,
2490                        default: None,
2491                        inferred_type: None,
2492                    })),
2493                }
2494            };
2495
2496            // Build base case: SELECT CAST(start AS DATE) AS date_value
2497            let base_select = Select {
2498                expressions: vec![Expression::Alias(Box::new(crate::expressions::Alias {
2499                    this: cast_to_date(start),
2500                    alias: column_name.clone(),
2501                    column_aliases: vec![],
2502                    pre_alias_comments: vec![],
2503                    trailing_comments: vec![],
2504                    inferred_type: None,
2505                }))],
2506                ..Select::new()
2507            };
2508
2509            // Normalize interval: convert String("1") -> Number("1") so it generates without quotes
2510            let normalize_interval = |expr: &Expression| -> Expression {
2511                if let Expression::Interval(ref iv) = expr {
2512                    let mut iv_clone = iv.as_ref().clone();
2513                    if let Some(Expression::Literal(ref lit)) = iv_clone.this {
2514                        if let Literal::String(ref s) = lit.as_ref() {
2515                            // Convert numeric strings to Number literals for unquoted output
2516                            if s.parse::<f64>().is_ok() {
2517                                iv_clone.this =
2518                                    Some(Expression::Literal(Box::new(Literal::Number(s.clone()))));
2519                            }
2520                        }
2521                    }
2522                    Expression::Interval(Box::new(iv_clone))
2523                } else {
2524                    expr.clone()
2525                }
2526            };
2527
2528            // Build recursive case: DateAdd(date_value, count, unit) from CTE where result <= end
2529            // Extract interval unit and count from step expression
2530            let normalized_step = step.map(|s| normalize_interval(s)).unwrap_or_else(|| {
2531                Expression::Interval(Box::new(crate::expressions::Interval {
2532                    this: Some(Expression::Literal(Box::new(Literal::Number(
2533                        "1".to_string(),
2534                    )))),
2535                    unit: Some(crate::expressions::IntervalUnitSpec::Simple {
2536                        unit: crate::expressions::IntervalUnit::Day,
2537                        use_plural: false,
2538                    }),
2539                }))
2540            });
2541
2542            // Extract unit and count from interval expression to build DateAddFunc
2543            let (add_unit, add_count) = extract_interval_unit_and_count(&normalized_step);
2544
2545            let date_add_expr = Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
2546                this: Expression::Column(Box::new(crate::expressions::Column {
2547                    name: column_name.clone(),
2548                    table: None,
2549                    join_mark: false,
2550                    trailing_comments: vec![],
2551                    span: None,
2552                    inferred_type: None,
2553                })),
2554                interval: add_count,
2555                unit: add_unit,
2556            }));
2557
2558            let cast_date_add = Expression::Cast(Box::new(Cast {
2559                this: date_add_expr.clone(),
2560                to: DataType::Date,
2561                trailing_comments: vec![],
2562                double_colon_syntax: false,
2563                format: None,
2564                default: None,
2565                inferred_type: None,
2566            }));
2567
2568            let recursive_select = Select {
2569                expressions: vec![cast_date_add.clone()],
2570                from: Some(From {
2571                    expressions: vec![Expression::Table(Box::new(
2572                        crate::expressions::TableRef::new(&cte_name),
2573                    ))],
2574                }),
2575                where_clause: Some(Where {
2576                    this: Expression::Lte(Box::new(BinaryOp {
2577                        left: cast_date_add,
2578                        right: cast_to_date(end),
2579                        left_comments: vec![],
2580                        operator_comments: vec![],
2581                        trailing_comments: vec![],
2582                        inferred_type: None,
2583                    })),
2584                }),
2585                ..Select::new()
2586            };
2587
2588            // Build UNION ALL of base and recursive
2589            let union = crate::expressions::Union {
2590                left: Expression::Select(Box::new(base_select)),
2591                right: Expression::Select(Box::new(recursive_select)),
2592                all: true, // UNION ALL
2593                distinct: false,
2594                with: None,
2595                order_by: None,
2596                limit: None,
2597                offset: None,
2598                distribute_by: None,
2599                sort_by: None,
2600                cluster_by: None,
2601                by_name: false,
2602                side: None,
2603                kind: None,
2604                corresponding: false,
2605                strict: false,
2606                on_columns: Vec::new(),
2607            };
2608
2609            // Create CTE
2610            let cte = crate::expressions::Cte {
2611                this: Expression::Union(Box::new(union)),
2612                alias: Identifier::new(cte_name.clone()),
2613                columns: vec![column_name.clone()],
2614                materialized: None,
2615                key_expressions: Vec::new(),
2616                alias_first: true,
2617                comments: Vec::new(),
2618            };
2619
2620            // Create replacement: SELECT date_value FROM cte_name
2621            let replacement_select = Select {
2622                expressions: vec![Expression::Column(Box::new(crate::expressions::Column {
2623                    name: column_name,
2624                    table: None,
2625                    join_mark: false,
2626                    trailing_comments: vec![],
2627                    span: None,
2628                    inferred_type: None,
2629                }))],
2630                from: Some(From {
2631                    expressions: vec![Expression::Table(Box::new(
2632                        crate::expressions::TableRef::new(&cte_name),
2633                    ))],
2634                }),
2635                ..Select::new()
2636            };
2637
2638            let replacement = Expression::Subquery(Box::new(Subquery {
2639                this: Expression::Select(Box::new(replacement_select)),
2640                alias: Some(Identifier::new(cte_name)),
2641                column_aliases: vec![],
2642                order_by: None,
2643                limit: None,
2644                offset: None,
2645                distribute_by: None,
2646                sort_by: None,
2647                cluster_by: None,
2648                lateral: false,
2649                modifiers_inside: false,
2650                trailing_comments: vec![],
2651                inferred_type: None,
2652            }));
2653
2654            return Some((cte, replacement));
2655        }
2656    }
2657
2658    // Also check for aliased UNNEST like UNNEST(...) AS _q(date_week)
2659    if let Expression::Alias(alias) = expr {
2660        // Extract column name from alias column_aliases if present
2661        let col_name = alias.column_aliases.first().map(|id| id.name.as_str());
2662        if let Some((cte, replacement)) =
2663            try_convert_generate_date_array_with_name(&alias.this, cte_count, col_name)
2664        {
2665            // If we extracted a column name from the alias, don't preserve the outer alias
2666            // since the CTE now uses that column name directly
2667            if col_name.is_some() {
2668                return Some((cte, replacement));
2669            }
2670            let new_alias = Expression::Alias(Box::new(crate::expressions::Alias {
2671                this: replacement,
2672                alias: alias.alias.clone(),
2673                column_aliases: alias.column_aliases.clone(),
2674                pre_alias_comments: alias.pre_alias_comments.clone(),
2675                trailing_comments: alias.trailing_comments.clone(),
2676                inferred_type: None,
2677            }));
2678            return Some((cte, new_alias));
2679        }
2680    }
2681
2682    None
2683}
2684
2685/// Extract interval unit and count from an interval expression.
2686/// Handles both structured intervals (with separate unit field) and
2687/// string-encoded intervals like `INTERVAL '1 WEEK'` where unit is None
2688/// and the value contains both count and unit.
2689fn extract_interval_unit_and_count(
2690    expr: &Expression,
2691) -> (crate::expressions::IntervalUnit, Expression) {
2692    use crate::expressions::{IntervalUnit, IntervalUnitSpec, Literal};
2693
2694    if let Expression::Interval(ref iv) = expr {
2695        // First try: structured unit field
2696        if let Some(ref unit_spec) = iv.unit {
2697            if let IntervalUnitSpec::Simple { unit, .. } = unit_spec {
2698                let count = match &iv.this {
2699                    Some(e) => e.clone(),
2700                    None => Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2701                };
2702                return (unit.clone(), count);
2703            }
2704        }
2705
2706        // Second try: parse from string value like "1 WEEK" or "1"
2707        if let Some(ref val_expr) = iv.this {
2708            match val_expr {
2709                Expression::Literal(lit)
2710                    if matches!(lit.as_ref(), Literal::String(_) | Literal::Number(_)) =>
2711                {
2712                    let s = match lit.as_ref() {
2713                        Literal::String(s) | Literal::Number(s) => s,
2714                        _ => unreachable!(),
2715                    };
2716                    // Try to parse "count unit" format like "1 WEEK", "1 MONTH"
2717                    let parts: Vec<&str> = s.trim().splitn(2, char::is_whitespace).collect();
2718                    if parts.len() == 2 {
2719                        let count_str = parts[0].trim();
2720                        let unit_str = parts[1].trim().to_uppercase();
2721                        let unit = match unit_str.as_str() {
2722                            "YEAR" | "YEARS" => IntervalUnit::Year,
2723                            "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
2724                            "MONTH" | "MONTHS" => IntervalUnit::Month,
2725                            "WEEK" | "WEEKS" => IntervalUnit::Week,
2726                            "DAY" | "DAYS" => IntervalUnit::Day,
2727                            "HOUR" | "HOURS" => IntervalUnit::Hour,
2728                            "MINUTE" | "MINUTES" => IntervalUnit::Minute,
2729                            "SECOND" | "SECONDS" => IntervalUnit::Second,
2730                            "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
2731                            "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
2732                            _ => IntervalUnit::Day,
2733                        };
2734                        return (
2735                            unit,
2736                            Expression::Literal(Box::new(Literal::Number(count_str.to_string()))),
2737                        );
2738                    }
2739                    // Just a number with no unit - default to Day
2740                    if s.parse::<f64>().is_ok() {
2741                        return (
2742                            IntervalUnit::Day,
2743                            Expression::Literal(Box::new(Literal::Number(s.clone()))),
2744                        );
2745                    }
2746                }
2747                _ => {}
2748            }
2749        }
2750
2751        // Fallback
2752        (
2753            IntervalUnit::Day,
2754            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2755        )
2756    } else {
2757        (
2758            IntervalUnit::Day,
2759            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
2760        )
2761    }
2762}
2763
2764/// Convert ILIKE to LOWER(x) LIKE LOWER(y).
2765///
2766/// For dialects that don't support ILIKE (case-insensitive LIKE), this converts:
2767/// ```sql
2768/// SELECT * FROM t WHERE x ILIKE '%pattern%'
2769/// ```
2770/// To:
2771/// ```sql
2772/// SELECT * FROM t WHERE LOWER(x) LIKE LOWER('%pattern%')
2773/// ```
2774///
2775/// Reference: `generator.py:no_ilike_sql()`
2776pub fn no_ilike_sql(expr: Expression) -> Result<Expression> {
2777    match expr {
2778        Expression::ILike(ilike) => {
2779            // Create LOWER(left) LIKE LOWER(right)
2780            let lower_left = Expression::Function(Box::new(crate::expressions::Function {
2781                name: "LOWER".to_string(),
2782                args: vec![ilike.left],
2783                distinct: false,
2784                trailing_comments: vec![],
2785                use_bracket_syntax: false,
2786                no_parens: false,
2787                quoted: false,
2788                span: None,
2789                inferred_type: None,
2790            }));
2791
2792            let lower_right = Expression::Function(Box::new(crate::expressions::Function {
2793                name: "LOWER".to_string(),
2794                args: vec![ilike.right],
2795                distinct: false,
2796                trailing_comments: vec![],
2797                use_bracket_syntax: false,
2798                no_parens: false,
2799                quoted: false,
2800                span: None,
2801                inferred_type: None,
2802            }));
2803
2804            Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
2805                left: lower_left,
2806                right: lower_right,
2807                escape: ilike.escape,
2808                quantifier: ilike.quantifier,
2809                inferred_type: None,
2810            })))
2811        }
2812        other => Ok(other),
2813    }
2814}
2815
2816/// Convert TryCast to Cast.
2817///
2818/// For dialects that don't support TRY_CAST (safe cast that returns NULL on error),
2819/// this converts TRY_CAST to regular CAST. Note: This may cause runtime errors
2820/// for invalid casts that TRY_CAST would handle gracefully.
2821///
2822/// Reference: `generator.py:no_trycast_sql()`
2823pub fn no_trycast_sql(expr: Expression) -> Result<Expression> {
2824    match expr {
2825        Expression::TryCast(try_cast) => Ok(Expression::Cast(try_cast)),
2826        other => Ok(other),
2827    }
2828}
2829
2830/// Convert SafeCast to Cast.
2831///
2832/// For dialects that don't support SAFE_CAST (BigQuery's safe cast syntax),
2833/// this converts SAFE_CAST to regular CAST.
2834pub fn no_safe_cast_sql(expr: Expression) -> Result<Expression> {
2835    match expr {
2836        Expression::SafeCast(safe_cast) => Ok(Expression::Cast(safe_cast)),
2837        other => Ok(other),
2838    }
2839}
2840
2841/// Convert COMMENT ON statements to inline comments.
2842///
2843/// For dialects that don't support COMMENT ON syntax, this can transform
2844/// comment statements into inline comments or skip them entirely.
2845///
2846/// Reference: `generator.py:no_comment_column_constraint_sql()`
2847pub fn no_comment_column_constraint(expr: Expression) -> Result<Expression> {
2848    // For now, just pass through - comment handling is done in generator
2849    Ok(expr)
2850}
2851
2852/// Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...)).
2853///
2854/// Some dialects use GENERATE_SERIES as a table-valued function, while others
2855/// prefer the UNNEST syntax. This converts:
2856/// ```sql
2857/// SELECT * FROM GENERATE_SERIES(1, 10) AS t(n)
2858/// ```
2859/// To:
2860/// ```sql
2861/// SELECT * FROM UNNEST(GENERATE_SERIES(1, 10)) AS _u(n)
2862/// ```
2863///
2864/// Reference: `transforms.py:125-135`
2865pub fn unnest_generate_series(expr: Expression) -> Result<Expression> {
2866    // Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...))
2867    // This handles the case where GENERATE_SERIES is used as a table-valued function
2868    match expr {
2869        Expression::Table(ref table) => {
2870            // Check if the table name matches GENERATE_SERIES pattern
2871            // In practice, this would be Expression::GenerateSeries wrapped in a Table context
2872            if table.name.name.to_uppercase() == "GENERATE_SERIES" {
2873                // Create UNNEST wrapper
2874                let unnest = Expression::Unnest(Box::new(UnnestFunc {
2875                    this: expr.clone(),
2876                    expressions: Vec::new(),
2877                    with_ordinality: false,
2878                    alias: None,
2879                    offset_alias: None,
2880                }));
2881
2882                // If there's an alias, wrap in alias
2883                return Ok(Expression::Alias(Box::new(crate::expressions::Alias {
2884                    this: unnest,
2885                    alias: Identifier::new("_u".to_string()),
2886                    column_aliases: vec![],
2887                    pre_alias_comments: vec![],
2888                    trailing_comments: vec![],
2889                    inferred_type: None,
2890                })));
2891            }
2892            Ok(expr)
2893        }
2894        Expression::GenerateSeries(gs) => {
2895            // Wrap GenerateSeries directly in UNNEST
2896            let unnest = Expression::Unnest(Box::new(UnnestFunc {
2897                this: Expression::GenerateSeries(gs),
2898                expressions: Vec::new(),
2899                with_ordinality: false,
2900                alias: None,
2901                offset_alias: None,
2902            }));
2903            Ok(unnest)
2904        }
2905        other => Ok(other),
2906    }
2907}
2908
2909/// Convert UNNEST(GENERATE_SERIES(start, end, step)) to a subquery for PostgreSQL.
2910///
2911/// PostgreSQL's GENERATE_SERIES returns rows directly, so UNNEST wrapping is unnecessary.
2912/// Instead, convert to:
2913/// ```sql
2914/// (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2915/// ```
2916///
2917/// This handles the case where GENERATE_DATE_ARRAY was converted to GENERATE_SERIES
2918/// during cross-dialect normalization, but the original had UNNEST wrapping.
2919pub fn unwrap_unnest_generate_series_for_postgres(expr: Expression) -> Result<Expression> {
2920    use crate::dialects::transform_recursive;
2921    transform_recursive(expr, &unwrap_unnest_generate_series_single)
2922}
2923
2924fn unwrap_unnest_generate_series_single(expr: Expression) -> Result<Expression> {
2925    use crate::expressions::*;
2926    // Match UNNEST(GENERATE_SERIES(...)) patterns in FROM clauses
2927    match expr {
2928        Expression::Select(mut select) => {
2929            // Process FROM clause
2930            if let Some(ref mut from) = select.from {
2931                for table_expr in &mut from.expressions {
2932                    if let Some(replacement) = try_unwrap_unnest_gen_series(table_expr) {
2933                        *table_expr = replacement;
2934                    }
2935                }
2936            }
2937            // Process JOINs
2938            for join in &mut select.joins {
2939                if let Some(replacement) = try_unwrap_unnest_gen_series(&join.this) {
2940                    join.this = replacement;
2941                }
2942            }
2943            Ok(Expression::Select(select))
2944        }
2945        other => Ok(other),
2946    }
2947}
2948
2949/// Try to convert an UNNEST(GENERATE_SERIES(...)) to a PostgreSQL subquery.
2950/// Returns the replacement expression if applicable.
2951fn try_unwrap_unnest_gen_series(expr: &Expression) -> Option<Expression> {
2952    use crate::expressions::*;
2953
2954    // Match Unnest containing GenerateSeries
2955    let gen_series = match expr {
2956        Expression::Unnest(unnest) => {
2957            if let Expression::GenerateSeries(ref gs) = unnest.this {
2958                Some(gs.as_ref().clone())
2959            } else {
2960                None
2961            }
2962        }
2963        Expression::Alias(alias) => {
2964            if let Expression::Unnest(ref unnest) = alias.this {
2965                if let Expression::GenerateSeries(ref gs) = unnest.this {
2966                    Some(gs.as_ref().clone())
2967                } else {
2968                    None
2969                }
2970            } else {
2971                None
2972            }
2973        }
2974        _ => None,
2975    };
2976
2977    let gs = gen_series?;
2978
2979    // Build: (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2980    let value_col = Expression::boxed_column(Column {
2981        name: Identifier::new("value".to_string()),
2982        table: None,
2983        join_mark: false,
2984        trailing_comments: vec![],
2985        span: None,
2986        inferred_type: None,
2987    });
2988
2989    let cast_value = Expression::Cast(Box::new(Cast {
2990        this: value_col,
2991        to: DataType::Date,
2992        trailing_comments: vec![],
2993        double_colon_syntax: false,
2994        format: None,
2995        default: None,
2996        inferred_type: None,
2997    }));
2998
2999    let gen_series_expr = Expression::GenerateSeries(Box::new(gs));
3000
3001    // GENERATE_SERIES(...) AS _t(value)
3002    let gen_series_aliased = Expression::Alias(Box::new(Alias {
3003        this: gen_series_expr,
3004        alias: Identifier::new("_t".to_string()),
3005        column_aliases: vec![Identifier::new("value".to_string())],
3006        pre_alias_comments: vec![],
3007        trailing_comments: vec![],
3008        inferred_type: None,
3009    }));
3010
3011    let mut inner_select = Select::new();
3012    inner_select.expressions = vec![cast_value];
3013    inner_select.from = Some(From {
3014        expressions: vec![gen_series_aliased],
3015    });
3016
3017    let inner_select_expr = Expression::Select(Box::new(inner_select));
3018
3019    let subquery = Expression::Subquery(Box::new(Subquery {
3020        this: inner_select_expr,
3021        alias: None,
3022        column_aliases: vec![],
3023        order_by: None,
3024        limit: None,
3025        offset: None,
3026        distribute_by: None,
3027        sort_by: None,
3028        cluster_by: None,
3029        lateral: false,
3030        modifiers_inside: false,
3031        trailing_comments: vec![],
3032        inferred_type: None,
3033    }));
3034
3035    // Wrap in alias AS _unnested_generate_series
3036    Some(Expression::Alias(Box::new(Alias {
3037        this: subquery,
3038        alias: Identifier::new("_unnested_generate_series".to_string()),
3039        column_aliases: vec![],
3040        pre_alias_comments: vec![],
3041        trailing_comments: vec![],
3042        inferred_type: None,
3043    })))
3044}
3045
3046/// Expand BETWEEN expressions in DELETE statements to >= AND <=
3047///
3048/// Some dialects (like StarRocks) don't support BETWEEN in DELETE statements
3049/// or prefer the expanded form. This transforms:
3050///   `DELETE FROM t WHERE a BETWEEN b AND c`
3051/// to:
3052///   `DELETE FROM t WHERE a >= b AND a <= c`
3053pub fn expand_between_in_delete(expr: Expression) -> Result<Expression> {
3054    match expr {
3055        Expression::Delete(mut delete) => {
3056            // If there's a WHERE clause, expand any BETWEEN expressions in it
3057            if let Some(ref mut where_clause) = delete.where_clause {
3058                where_clause.this = expand_between_recursive(where_clause.this.clone());
3059            }
3060            Ok(Expression::Delete(delete))
3061        }
3062        other => Ok(other),
3063    }
3064}
3065
3066/// Recursively expand BETWEEN expressions to >= AND <=
3067fn expand_between_recursive(expr: Expression) -> Expression {
3068    match expr {
3069        // Expand: a BETWEEN b AND c -> a >= b AND a <= c
3070        // Expand: a NOT BETWEEN b AND c -> a < b OR a > c
3071        Expression::Between(between) => {
3072            let this = expand_between_recursive(between.this.clone());
3073            let low = expand_between_recursive(between.low);
3074            let high = expand_between_recursive(between.high);
3075
3076            if between.not {
3077                // NOT BETWEEN: a < b OR a > c
3078                Expression::Or(Box::new(BinaryOp::new(
3079                    Expression::Lt(Box::new(BinaryOp::new(this.clone(), low))),
3080                    Expression::Gt(Box::new(BinaryOp::new(this, high))),
3081                )))
3082            } else {
3083                // BETWEEN: a >= b AND a <= c
3084                Expression::And(Box::new(BinaryOp::new(
3085                    Expression::Gte(Box::new(BinaryOp::new(this.clone(), low))),
3086                    Expression::Lte(Box::new(BinaryOp::new(this, high))),
3087                )))
3088            }
3089        }
3090
3091        // Recursively process AND/OR expressions
3092        Expression::And(mut op) => {
3093            op.left = expand_between_recursive(op.left);
3094            op.right = expand_between_recursive(op.right);
3095            Expression::And(op)
3096        }
3097        Expression::Or(mut op) => {
3098            op.left = expand_between_recursive(op.left);
3099            op.right = expand_between_recursive(op.right);
3100            Expression::Or(op)
3101        }
3102        Expression::Not(mut op) => {
3103            op.this = expand_between_recursive(op.this);
3104            Expression::Not(op)
3105        }
3106
3107        // Recursively process parenthesized expressions
3108        Expression::Paren(mut paren) => {
3109            paren.this = expand_between_recursive(paren.this);
3110            Expression::Paren(paren)
3111        }
3112
3113        // Pass through everything else unchanged
3114        other => other,
3115    }
3116}
3117
3118/// Push down CTE column names into SELECT expressions.
3119///
3120/// BigQuery doesn't support column names when defining a CTE, e.g.:
3121/// `WITH vartab(v) AS (SELECT ...)` is not valid.
3122/// Instead, it expects: `WITH vartab AS (SELECT ... AS v)`.
3123///
3124/// This transform removes the CTE column aliases and adds them as
3125/// aliases on the SELECT expressions.
3126pub fn pushdown_cte_column_names(expr: Expression) -> Result<Expression> {
3127    match expr {
3128        Expression::Select(mut select) => {
3129            if let Some(ref mut with) = select.with {
3130                for cte in &mut with.ctes {
3131                    if !cte.columns.is_empty() {
3132                        // Check if the CTE body is a star query - if so, just strip column names
3133                        let is_star = matches!(&cte.this, Expression::Select(s) if
3134                            s.expressions.len() == 1 && matches!(&s.expressions[0], Expression::Star(_)));
3135
3136                        if is_star {
3137                            // Can't push down column names for star queries, just remove them
3138                            cte.columns.clear();
3139                            continue;
3140                        }
3141
3142                        // Extract column names
3143                        let column_names: Vec<Identifier> = cte.columns.drain(..).collect();
3144
3145                        // Push column names down into the SELECT expressions
3146                        if let Expression::Select(ref mut inner_select) = cte.this {
3147                            let new_exprs: Vec<Expression> = inner_select
3148                                .expressions
3149                                .drain(..)
3150                                .zip(
3151                                    column_names
3152                                        .into_iter()
3153                                        .chain(std::iter::repeat_with(|| Identifier::new(""))),
3154                                )
3155                                .map(|(expr, col_name)| {
3156                                    if col_name.name.is_empty() {
3157                                        return expr;
3158                                    }
3159                                    // If already aliased, replace the alias
3160                                    match expr {
3161                                        Expression::Alias(mut a) => {
3162                                            a.alias = col_name;
3163                                            Expression::Alias(a)
3164                                        }
3165                                        other => {
3166                                            Expression::Alias(Box::new(crate::expressions::Alias {
3167                                                this: other,
3168                                                alias: col_name,
3169                                                column_aliases: Vec::new(),
3170                                                pre_alias_comments: Vec::new(),
3171                                                trailing_comments: Vec::new(),
3172                                                inferred_type: None,
3173                                            }))
3174                                        }
3175                                    }
3176                                })
3177                                .collect();
3178                            inner_select.expressions = new_exprs;
3179                        }
3180                    }
3181                }
3182            }
3183            Ok(Expression::Select(select))
3184        }
3185        other => Ok(other),
3186    }
3187}
3188
3189/// Simplify nested parentheses around VALUES in FROM clause.
3190/// Converts `FROM ((VALUES (1)))` to `FROM (VALUES (1))` by stripping redundant wrapping.
3191/// Handles various nesting patterns: Subquery(Paren(Values)), Paren(Paren(Values)), etc.
3192pub fn simplify_nested_paren_values(expr: Expression) -> Result<Expression> {
3193    match expr {
3194        Expression::Select(mut select) => {
3195            if let Some(ref mut from) = select.from {
3196                for from_item in from.expressions.iter_mut() {
3197                    simplify_paren_values_in_from(from_item);
3198                }
3199            }
3200            Ok(Expression::Select(select))
3201        }
3202        other => Ok(other),
3203    }
3204}
3205
3206fn simplify_paren_values_in_from(expr: &mut Expression) {
3207    // Check various patterns and build replacement if needed
3208    let replacement = match expr {
3209        // Subquery(Paren(Values)) -> Subquery with Values directly
3210        Expression::Subquery(ref subquery) => {
3211            if let Expression::Paren(ref paren) = subquery.this {
3212                if matches!(&paren.this, Expression::Values(_)) {
3213                    let mut new_sub = subquery.as_ref().clone();
3214                    new_sub.this = paren.this.clone();
3215                    Some(Expression::Subquery(Box::new(new_sub)))
3216                } else {
3217                    None
3218                }
3219            } else {
3220                None
3221            }
3222        }
3223        // Paren(Subquery(Values)) -> Subquery(Values) - strip the Paren wrapper
3224        // Paren(Paren(Values)) -> Paren(Values) - strip one layer
3225        Expression::Paren(ref outer_paren) => {
3226            if let Expression::Subquery(ref subquery) = outer_paren.this {
3227                // Paren(Subquery(Values)) -> Subquery(Values) - strip outer Paren
3228                if matches!(&subquery.this, Expression::Values(_)) {
3229                    Some(outer_paren.this.clone())
3230                }
3231                // Paren(Subquery(Paren(Values))) -> Subquery(Values)
3232                else if let Expression::Paren(ref paren) = subquery.this {
3233                    if matches!(&paren.this, Expression::Values(_)) {
3234                        let mut new_sub = subquery.as_ref().clone();
3235                        new_sub.this = paren.this.clone();
3236                        Some(Expression::Subquery(Box::new(new_sub)))
3237                    } else {
3238                        None
3239                    }
3240                } else {
3241                    None
3242                }
3243            } else if let Expression::Paren(ref inner_paren) = outer_paren.this {
3244                if matches!(&inner_paren.this, Expression::Values(_)) {
3245                    Some(outer_paren.this.clone())
3246                } else {
3247                    None
3248                }
3249            } else {
3250                None
3251            }
3252        }
3253        _ => None,
3254    };
3255    if let Some(new_expr) = replacement {
3256        *expr = new_expr;
3257    }
3258}
3259
3260/// Add auto-generated table aliases (like `_t0`) for POSEXPLODE/EXPLODE in FROM clause
3261/// when the alias has column_aliases but no alias name.
3262/// This is needed for Spark target: `FROM POSEXPLODE(x) AS (a, b)` -> `FROM POSEXPLODE(x) AS _t0(a, b)`
3263pub fn add_auto_table_alias(expr: Expression) -> Result<Expression> {
3264    match expr {
3265        Expression::Select(mut select) => {
3266            // Process FROM expressions
3267            if let Some(ref mut from) = select.from {
3268                let mut counter = 0usize;
3269                for from_item in from.expressions.iter_mut() {
3270                    add_auto_alias_to_from_item(from_item, &mut counter);
3271                }
3272            }
3273            Ok(Expression::Select(select))
3274        }
3275        other => Ok(other),
3276    }
3277}
3278
3279fn add_auto_alias_to_from_item(expr: &mut Expression, counter: &mut usize) {
3280    use crate::expressions::Identifier;
3281
3282    match expr {
3283        Expression::Alias(ref mut alias) => {
3284            // If the alias name is empty and there are column_aliases, add auto-generated name
3285            if alias.alias.name.is_empty() && !alias.column_aliases.is_empty() {
3286                alias.alias = Identifier::new(format!("_t{}", counter));
3287                *counter += 1;
3288            }
3289        }
3290        _ => {}
3291    }
3292}
3293
3294/// Convert BigQuery-style UNNEST aliases to column-alias format for DuckDB/Presto/Spark.
3295///
3296/// BigQuery uses: `UNNEST(arr) AS x` where x is a column alias.
3297/// DuckDB/Presto/Spark need: `UNNEST(arr) AS _t0(x)` where _t0 is a table alias and x is the column alias.
3298///
3299/// Propagate struct field names from the first named struct in an array to subsequent unnamed structs.
3300///
3301/// In BigQuery, `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92)]` means the second struct
3302/// should inherit field names from the first: `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score)]`.
3303pub fn propagate_struct_field_names(expr: Expression) -> Result<Expression> {
3304    use crate::dialects::transform_recursive;
3305    transform_recursive(expr, &propagate_struct_names_in_expr)
3306}
3307
3308fn propagate_struct_names_in_expr(expr: Expression) -> Result<Expression> {
3309    use crate::expressions::{Alias, ArrayConstructor, Function, Identifier};
3310
3311    /// Helper to propagate struct field names within an array of expressions
3312    fn propagate_in_elements(elements: &[Expression]) -> Option<Vec<Expression>> {
3313        if elements.len() <= 1 {
3314            return None;
3315        }
3316        // Check if first element is a named STRUCT function
3317        if let Some(Expression::Function(ref first_struct)) = elements.first() {
3318            if first_struct.name.eq_ignore_ascii_case("STRUCT") {
3319                // Extract field names from first struct
3320                let field_names: Vec<Option<String>> = first_struct
3321                    .args
3322                    .iter()
3323                    .map(|arg| {
3324                        if let Expression::Alias(a) = arg {
3325                            Some(a.alias.name.clone())
3326                        } else {
3327                            None
3328                        }
3329                    })
3330                    .collect();
3331
3332                // Only propagate if first struct has at least one named field
3333                if field_names.iter().any(|n| n.is_some()) {
3334                    let mut new_elements = Vec::with_capacity(elements.len());
3335                    new_elements.push(elements[0].clone());
3336
3337                    for elem in &elements[1..] {
3338                        if let Expression::Function(ref s) = elem {
3339                            if s.name.eq_ignore_ascii_case("STRUCT")
3340                                && s.args.len() == field_names.len()
3341                            {
3342                                // Check if this struct has NO names (all unnamed)
3343                                let all_unnamed =
3344                                    s.args.iter().all(|a| !matches!(a, Expression::Alias(_)));
3345                                if all_unnamed {
3346                                    // Apply names from first struct
3347                                    let new_args: Vec<Expression> = s
3348                                        .args
3349                                        .iter()
3350                                        .zip(field_names.iter())
3351                                        .map(|(val, name)| {
3352                                            if let Some(n) = name {
3353                                                Expression::Alias(Box::new(Alias::new(
3354                                                    val.clone(),
3355                                                    Identifier::new(n.clone()),
3356                                                )))
3357                                            } else {
3358                                                val.clone()
3359                                            }
3360                                        })
3361                                        .collect();
3362                                    new_elements.push(Expression::Function(Box::new(
3363                                        Function::new("STRUCT".to_string(), new_args),
3364                                    )));
3365                                    continue;
3366                                }
3367                            }
3368                        }
3369                        new_elements.push(elem.clone());
3370                    }
3371
3372                    return Some(new_elements);
3373                }
3374            }
3375        }
3376        None
3377    }
3378
3379    // Look for Array expressions containing STRUCT function calls
3380    if let Expression::Array(ref arr) = expr {
3381        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3382            return Ok(Expression::Array(Box::new(crate::expressions::Array {
3383                expressions: new_elements,
3384            })));
3385        }
3386    }
3387
3388    // Also handle ArrayFunc (ArrayConstructor) - bracket notation [STRUCT(...), ...]
3389    if let Expression::ArrayFunc(ref arr) = expr {
3390        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3391            return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
3392                expressions: new_elements,
3393                bracket_notation: arr.bracket_notation,
3394                use_list_keyword: arr.use_list_keyword,
3395            })));
3396        }
3397    }
3398
3399    Ok(expr)
3400}
3401
3402/// This walks the entire expression tree to find SELECT statements and converts UNNEST aliases
3403/// in their FROM clauses and JOINs.
3404pub fn unnest_alias_to_column_alias(expr: Expression) -> Result<Expression> {
3405    use crate::dialects::transform_recursive;
3406    transform_recursive(expr, &unnest_alias_transform_single_select)
3407}
3408
3409/// Move UNNEST items from FROM clause to CROSS JOINs without changing alias format.
3410/// Used for BigQuery -> BigQuery/Redshift where we want CROSS JOIN but not _t0(col) aliases.
3411pub fn unnest_from_to_cross_join(expr: Expression) -> Result<Expression> {
3412    use crate::dialects::transform_recursive;
3413    transform_recursive(expr, &unnest_from_to_cross_join_single_select)
3414}
3415
3416fn unnest_from_to_cross_join_single_select(expr: Expression) -> Result<Expression> {
3417    if let Expression::Select(mut select) = expr {
3418        if let Some(ref mut from) = select.from {
3419            if from.expressions.len() > 1 {
3420                let mut new_from_exprs = Vec::new();
3421                let mut new_cross_joins = Vec::new();
3422
3423                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3424                    if idx == 0 {
3425                        new_from_exprs.push(from_item);
3426                    } else {
3427                        let is_unnest = match &from_item {
3428                            Expression::Unnest(_) => true,
3429                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3430                            _ => false,
3431                        };
3432
3433                        if is_unnest {
3434                            new_cross_joins.push(crate::expressions::Join {
3435                                this: from_item,
3436                                on: None,
3437                                using: Vec::new(),
3438                                kind: JoinKind::Cross,
3439                                use_inner_keyword: false,
3440                                use_outer_keyword: false,
3441                                deferred_condition: false,
3442                                join_hint: None,
3443                                match_condition: None,
3444                                pivots: Vec::new(),
3445                                comments: Vec::new(),
3446                                nesting_group: 0,
3447                                directed: false,
3448                            });
3449                        } else {
3450                            new_from_exprs.push(from_item);
3451                        }
3452                    }
3453                }
3454
3455                from.expressions = new_from_exprs;
3456                new_cross_joins.append(&mut select.joins);
3457                select.joins = new_cross_joins;
3458            }
3459        }
3460
3461        Ok(Expression::Select(select))
3462    } else {
3463        Ok(expr)
3464    }
3465}
3466
3467/// Wrap UNNEST function aliases in JOIN items from `AS name` to `AS _u(name)`
3468/// Used for PostgreSQL → Presto/Trino transpilation where GENERATE_SERIES is
3469/// converted to UNNEST(SEQUENCE) and the alias needs the column-alias format.
3470pub fn wrap_unnest_join_aliases(expr: Expression) -> Result<Expression> {
3471    use crate::dialects::transform_recursive;
3472    transform_recursive(expr, &wrap_unnest_join_aliases_single)
3473}
3474
3475fn wrap_unnest_join_aliases_single(expr: Expression) -> Result<Expression> {
3476    if let Expression::Select(mut select) = expr {
3477        // Process JOIN items
3478        for join in &mut select.joins {
3479            wrap_unnest_alias_in_join_item(&mut join.this);
3480        }
3481        Ok(Expression::Select(select))
3482    } else {
3483        Ok(expr)
3484    }
3485}
3486
3487/// If a join item is an Alias wrapping an UNNEST function, convert alias to _u(alias_name) format
3488fn wrap_unnest_alias_in_join_item(expr: &mut Expression) {
3489    use crate::expressions::Identifier;
3490    if let Expression::Alias(alias) = expr {
3491        // Check if the inner expression is a function call to UNNEST
3492        let is_unnest = match &alias.this {
3493            Expression::Function(f) => f.name.eq_ignore_ascii_case("UNNEST"),
3494            _ => false,
3495        };
3496
3497        if is_unnest && alias.column_aliases.is_empty() {
3498            // Simple alias like `AS s` -> wrap to `AS _u(s)`
3499            let original_alias_name = alias.alias.name.clone();
3500            alias.alias = Identifier {
3501                name: "_u".to_string(),
3502                quoted: false,
3503                trailing_comments: Vec::new(),
3504                span: None,
3505            };
3506            alias.column_aliases = vec![Identifier {
3507                name: original_alias_name,
3508                quoted: false,
3509                trailing_comments: Vec::new(),
3510                span: None,
3511            }];
3512        }
3513    }
3514}
3515
3516fn unnest_alias_transform_single_select(expr: Expression) -> Result<Expression> {
3517    if let Expression::Select(mut select) = expr {
3518        let mut counter = 0usize;
3519
3520        // Process FROM expressions: convert aliases AND move UNNEST items to CROSS JOIN
3521        if let Some(ref mut from) = select.from {
3522            // First pass: convert aliases in-place
3523            for from_item in from.expressions.iter_mut() {
3524                convert_unnest_alias_in_from(from_item, &mut counter);
3525            }
3526
3527            // Second pass: move UNNEST items from FROM to CROSS JOINs
3528            if from.expressions.len() > 1 {
3529                let mut new_from_exprs = Vec::new();
3530                let mut new_cross_joins = Vec::new();
3531
3532                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3533                    if idx == 0 {
3534                        // First expression always stays in FROM
3535                        new_from_exprs.push(from_item);
3536                    } else {
3537                        // Check if this is UNNEST or Alias(UNNEST)
3538                        let is_unnest = match &from_item {
3539                            Expression::Unnest(_) => true,
3540                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3541                            _ => false,
3542                        };
3543
3544                        if is_unnest {
3545                            // Convert to CROSS JOIN
3546                            new_cross_joins.push(crate::expressions::Join {
3547                                this: from_item,
3548                                on: None,
3549                                using: Vec::new(),
3550                                kind: JoinKind::Cross,
3551                                use_inner_keyword: false,
3552                                use_outer_keyword: false,
3553                                deferred_condition: false,
3554                                join_hint: None,
3555                                match_condition: None,
3556                                pivots: Vec::new(),
3557                                comments: Vec::new(),
3558                                nesting_group: 0,
3559                                directed: false,
3560                            });
3561                        } else {
3562                            // Keep non-UNNEST items in FROM
3563                            new_from_exprs.push(from_item);
3564                        }
3565                    }
3566                }
3567
3568                from.expressions = new_from_exprs;
3569                // Prepend cross joins before existing joins
3570                new_cross_joins.append(&mut select.joins);
3571                select.joins = new_cross_joins;
3572            }
3573        }
3574
3575        // Process JOINs (existing joins that may have UNNEST aliases)
3576        for join in select.joins.iter_mut() {
3577            convert_unnest_alias_in_from(&mut join.this, &mut counter);
3578        }
3579
3580        Ok(Expression::Select(select))
3581    } else {
3582        Ok(expr)
3583    }
3584}
3585
3586fn convert_unnest_alias_in_from(expr: &mut Expression, counter: &mut usize) {
3587    use crate::expressions::Identifier;
3588
3589    if let Expression::Alias(ref mut alias) = expr {
3590        // Check if the inner expression is UNNEST (or EXPLODE)
3591        let is_unnest = matches!(&alias.this, Expression::Unnest(_))
3592            || matches!(&alias.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"));
3593
3594        if is_unnest && alias.column_aliases.is_empty() {
3595            // Convert: UNNEST(arr) AS x -> UNNEST(arr) AS _tN(x)
3596            let col_alias = alias.alias.clone();
3597            alias.column_aliases = vec![col_alias];
3598            alias.alias = Identifier::new(format!("_t{}", counter));
3599            *counter += 1;
3600        }
3601    }
3602}
3603
3604/// Expand POSEXPLODE in SELECT expressions for DuckDB.
3605///
3606/// Converts `SELECT POSEXPLODE(x)` to `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col`
3607/// Handles both aliased and unaliased forms:
3608/// - `SELECT POSEXPLODE(x) AS (a, b)` -> `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b`
3609/// - `SELECT * FROM POSEXPLODE(x) AS (a, b)` -> `SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)`
3610pub fn expand_posexplode_duckdb(expr: Expression) -> Result<Expression> {
3611    use crate::expressions::{Alias, Function};
3612
3613    match expr {
3614        Expression::Select(mut select) => {
3615            // Check if any SELECT expression is a POSEXPLODE function
3616            let mut new_expressions = Vec::new();
3617            let mut changed = false;
3618
3619            for sel_expr in select.expressions.drain(..) {
3620                // Check for POSEXPLODE(x) AS (a, b) - aliased form
3621                if let Expression::Alias(ref alias_box) = sel_expr {
3622                    if let Expression::Function(ref func) = alias_box.this {
3623                        if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3624                            let arg = func.args[0].clone();
3625                            // Get alias names: default pos, col
3626                            let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3627                                (
3628                                    alias_box.column_aliases[0].name.clone(),
3629                                    alias_box.column_aliases[1].name.clone(),
3630                                )
3631                            } else if !alias_box.alias.is_empty() {
3632                                // Single alias like AS x - use as col name, "pos" for position
3633                                ("pos".to_string(), alias_box.alias.name.clone())
3634                            } else {
3635                                ("pos".to_string(), "col".to_string())
3636                            };
3637
3638                            // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos_name
3639                            let gen_subscripts = Expression::Function(Box::new(Function::new(
3640                                "GENERATE_SUBSCRIPTS".to_string(),
3641                                vec![
3642                                    arg.clone(),
3643                                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3644                                ],
3645                            )));
3646                            let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3647                                gen_subscripts,
3648                                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3649                            )));
3650                            let pos_alias = Expression::Alias(Box::new(Alias {
3651                                this: sub_one,
3652                                alias: Identifier::new(pos_name),
3653                                column_aliases: Vec::new(),
3654                                pre_alias_comments: Vec::new(),
3655                                trailing_comments: Vec::new(),
3656                                inferred_type: None,
3657                            }));
3658
3659                            // UNNEST(x) AS col_name
3660                            let unnest = Expression::Unnest(Box::new(UnnestFunc {
3661                                this: arg,
3662                                expressions: Vec::new(),
3663                                with_ordinality: false,
3664                                alias: None,
3665                                offset_alias: None,
3666                            }));
3667                            let col_alias = Expression::Alias(Box::new(Alias {
3668                                this: unnest,
3669                                alias: Identifier::new(col_name),
3670                                column_aliases: Vec::new(),
3671                                pre_alias_comments: Vec::new(),
3672                                trailing_comments: Vec::new(),
3673                                inferred_type: None,
3674                            }));
3675
3676                            new_expressions.push(pos_alias);
3677                            new_expressions.push(col_alias);
3678                            changed = true;
3679                            continue;
3680                        }
3681                    }
3682                }
3683
3684                // Check for bare POSEXPLODE(x) - unaliased form
3685                if let Expression::Function(ref func) = sel_expr {
3686                    if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3687                        let arg = func.args[0].clone();
3688                        let pos_name = "pos";
3689                        let col_name = "col";
3690
3691                        // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos
3692                        let gen_subscripts = Expression::Function(Box::new(Function::new(
3693                            "GENERATE_SUBSCRIPTS".to_string(),
3694                            vec![
3695                                arg.clone(),
3696                                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3697                            ],
3698                        )));
3699                        let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3700                            gen_subscripts,
3701                            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3702                        )));
3703                        let pos_alias = Expression::Alias(Box::new(Alias {
3704                            this: sub_one,
3705                            alias: Identifier::new(pos_name),
3706                            column_aliases: Vec::new(),
3707                            pre_alias_comments: Vec::new(),
3708                            trailing_comments: Vec::new(),
3709                            inferred_type: None,
3710                        }));
3711
3712                        // UNNEST(x) AS col
3713                        let unnest = Expression::Unnest(Box::new(UnnestFunc {
3714                            this: arg,
3715                            expressions: Vec::new(),
3716                            with_ordinality: false,
3717                            alias: None,
3718                            offset_alias: None,
3719                        }));
3720                        let col_alias = Expression::Alias(Box::new(Alias {
3721                            this: unnest,
3722                            alias: Identifier::new(col_name),
3723                            column_aliases: Vec::new(),
3724                            pre_alias_comments: Vec::new(),
3725                            trailing_comments: Vec::new(),
3726                            inferred_type: None,
3727                        }));
3728
3729                        new_expressions.push(pos_alias);
3730                        new_expressions.push(col_alias);
3731                        changed = true;
3732                        continue;
3733                    }
3734                }
3735
3736                // Not a POSEXPLODE, keep as-is
3737                new_expressions.push(sel_expr);
3738            }
3739
3740            if changed {
3741                select.expressions = new_expressions;
3742            } else {
3743                select.expressions = new_expressions;
3744            }
3745
3746            // Also handle POSEXPLODE in FROM clause:
3747            // SELECT * FROM POSEXPLODE(x) AS (a, b) -> SELECT * FROM (SELECT ...)
3748            if let Some(ref mut from) = select.from {
3749                expand_posexplode_in_from_duckdb(from)?;
3750            }
3751
3752            Ok(Expression::Select(select))
3753        }
3754        other => Ok(other),
3755    }
3756}
3757
3758/// Helper to expand POSEXPLODE in FROM clause for DuckDB
3759fn expand_posexplode_in_from_duckdb(from: &mut From) -> Result<()> {
3760    use crate::expressions::{Alias, Function};
3761
3762    let mut new_expressions = Vec::new();
3763    let mut _changed = false;
3764
3765    for table_expr in from.expressions.drain(..) {
3766        // Check for POSEXPLODE(x) AS (a, b) in FROM
3767        if let Expression::Alias(ref alias_box) = table_expr {
3768            if let Expression::Function(ref func) = alias_box.this {
3769                if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3770                    let arg = func.args[0].clone();
3771                    let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3772                        (
3773                            alias_box.column_aliases[0].name.clone(),
3774                            alias_box.column_aliases[1].name.clone(),
3775                        )
3776                    } else {
3777                        ("pos".to_string(), "col".to_string())
3778                    };
3779
3780                    // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)
3781                    let gen_subscripts = Expression::Function(Box::new(Function::new(
3782                        "GENERATE_SUBSCRIPTS".to_string(),
3783                        vec![
3784                            arg.clone(),
3785                            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3786                        ],
3787                    )));
3788                    let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3789                        gen_subscripts,
3790                        Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3791                    )));
3792                    let pos_alias = Expression::Alias(Box::new(Alias {
3793                        this: sub_one,
3794                        alias: Identifier::new(&pos_name),
3795                        column_aliases: Vec::new(),
3796                        pre_alias_comments: Vec::new(),
3797                        trailing_comments: Vec::new(),
3798                        inferred_type: None,
3799                    }));
3800                    let unnest = Expression::Unnest(Box::new(UnnestFunc {
3801                        this: arg,
3802                        expressions: Vec::new(),
3803                        with_ordinality: false,
3804                        alias: None,
3805                        offset_alias: None,
3806                    }));
3807                    let col_alias = Expression::Alias(Box::new(Alias {
3808                        this: unnest,
3809                        alias: Identifier::new(&col_name),
3810                        column_aliases: Vec::new(),
3811                        pre_alias_comments: Vec::new(),
3812                        trailing_comments: Vec::new(),
3813                        inferred_type: None,
3814                    }));
3815
3816                    let mut inner_select = Select::new();
3817                    inner_select.expressions = vec![pos_alias, col_alias];
3818
3819                    let subquery = Expression::Subquery(Box::new(Subquery {
3820                        this: Expression::Select(Box::new(inner_select)),
3821                        alias: None,
3822                        column_aliases: Vec::new(),
3823                        order_by: None,
3824                        limit: None,
3825                        offset: None,
3826                        distribute_by: None,
3827                        sort_by: None,
3828                        cluster_by: None,
3829                        lateral: false,
3830                        modifiers_inside: false,
3831                        trailing_comments: Vec::new(),
3832                        inferred_type: None,
3833                    }));
3834                    new_expressions.push(subquery);
3835                    _changed = true;
3836                    continue;
3837                }
3838            }
3839        }
3840
3841        // Also check for bare POSEXPLODE(x) in FROM (no alias)
3842        if let Expression::Function(ref func) = table_expr {
3843            if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3844                let arg = func.args[0].clone();
3845
3846                // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col)
3847                let gen_subscripts = Expression::Function(Box::new(Function::new(
3848                    "GENERATE_SUBSCRIPTS".to_string(),
3849                    vec![
3850                        arg.clone(),
3851                        Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3852                    ],
3853                )));
3854                let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3855                    gen_subscripts,
3856                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
3857                )));
3858                let pos_alias = Expression::Alias(Box::new(Alias {
3859                    this: sub_one,
3860                    alias: Identifier::new("pos"),
3861                    column_aliases: Vec::new(),
3862                    pre_alias_comments: Vec::new(),
3863                    trailing_comments: Vec::new(),
3864                    inferred_type: None,
3865                }));
3866                let unnest = Expression::Unnest(Box::new(UnnestFunc {
3867                    this: arg,
3868                    expressions: Vec::new(),
3869                    with_ordinality: false,
3870                    alias: None,
3871                    offset_alias: None,
3872                }));
3873                let col_alias = Expression::Alias(Box::new(Alias {
3874                    this: unnest,
3875                    alias: Identifier::new("col"),
3876                    column_aliases: Vec::new(),
3877                    pre_alias_comments: Vec::new(),
3878                    trailing_comments: Vec::new(),
3879                    inferred_type: None,
3880                }));
3881
3882                let mut inner_select = Select::new();
3883                inner_select.expressions = vec![pos_alias, col_alias];
3884
3885                let subquery = Expression::Subquery(Box::new(Subquery {
3886                    this: Expression::Select(Box::new(inner_select)),
3887                    alias: None,
3888                    column_aliases: Vec::new(),
3889                    order_by: None,
3890                    limit: None,
3891                    offset: None,
3892                    distribute_by: None,
3893                    sort_by: None,
3894                    cluster_by: None,
3895                    lateral: false,
3896                    modifiers_inside: false,
3897                    trailing_comments: Vec::new(),
3898                    inferred_type: None,
3899                }));
3900                new_expressions.push(subquery);
3901                _changed = true;
3902                continue;
3903            }
3904        }
3905
3906        new_expressions.push(table_expr);
3907    }
3908
3909    from.expressions = new_expressions;
3910    Ok(())
3911}
3912
3913/// Convert EXPLODE/POSEXPLODE in SELECT projections into CROSS JOIN UNNEST patterns.
3914///
3915/// This implements the `explode_projection_to_unnest` transform from Python sqlglot.
3916/// It restructures queries like:
3917///   `SELECT EXPLODE(x) FROM tbl`
3918/// into:
3919///   `SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(...) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE ...`
3920///
3921/// The transform handles:
3922/// - EXPLODE(x) and POSEXPLODE(x) functions
3923/// - Name collision avoidance (_u, _u_2, ... and col, col_2, ...)
3924/// - Multiple EXPLODE/POSEXPLODE in one SELECT
3925/// - Queries with or without FROM clause
3926/// - Presto (index_offset=1) and BigQuery (index_offset=0) variants
3927pub fn explode_projection_to_unnest(expr: Expression, target: DialectType) -> Result<Expression> {
3928    match expr {
3929        Expression::Select(select) => explode_projection_to_unnest_impl(*select, target),
3930        other => Ok(other),
3931    }
3932}
3933
3934/// Snowflake-specific rewrite to mirror Python sqlglot's explode_projection_to_unnest behavior
3935/// when FLATTEN appears in a nested LATERAL within a SELECT projection.
3936///
3937/// This intentionally rewrites:
3938/// - `LATERAL FLATTEN(INPUT => x) alias`
3939/// into:
3940/// - `LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS alias(SEQ, KEY, PATH, INDEX, VALUE, THIS)`
3941/// and appends CROSS JOIN TABLE(FLATTEN(...)) range/entity joins plus alignment predicates
3942/// to the containing SELECT.
3943pub fn snowflake_flatten_projection_to_unnest(expr: Expression) -> Result<Expression> {
3944    match expr {
3945        Expression::Select(select) => snowflake_flatten_projection_to_unnest_impl(*select),
3946        other => Ok(other),
3947    }
3948}
3949
3950fn snowflake_flatten_projection_to_unnest_impl(mut select: Select) -> Result<Expression> {
3951    let mut flattened_inputs: Vec<Expression> = Vec::new();
3952    let mut new_selects: Vec<Expression> = Vec::with_capacity(select.expressions.len());
3953
3954    for sel_expr in select.expressions.into_iter() {
3955        let found_input: RefCell<Option<Expression>> = RefCell::new(None);
3956
3957        let rewritten = transform_recursive(sel_expr, &|e| {
3958            if let Expression::Lateral(lat) = e {
3959                if let Some(input_expr) = extract_flatten_input(&lat) {
3960                    if found_input.borrow().is_none() {
3961                        *found_input.borrow_mut() = Some(input_expr);
3962                    }
3963                    return Ok(Expression::Lateral(Box::new(rewrite_flatten_lateral(*lat))));
3964                }
3965                return Ok(Expression::Lateral(lat));
3966            }
3967            Ok(e)
3968        })?;
3969
3970        if let Some(input) = found_input.into_inner() {
3971            flattened_inputs.push(input);
3972        }
3973        new_selects.push(rewritten);
3974    }
3975
3976    if flattened_inputs.is_empty() {
3977        select.expressions = new_selects;
3978        return Ok(Expression::Select(Box::new(select)));
3979    }
3980
3981    select.expressions = new_selects;
3982
3983    for (idx, input_expr) in flattened_inputs.into_iter().enumerate() {
3984        // Match sqlglot naming: first pair is _u/_u_2 with pos/pos_2 and entity.
3985        let is_first = idx == 0;
3986        let series_alias = if is_first {
3987            "pos".to_string()
3988        } else {
3989            format!("pos_{}", idx + 1)
3990        };
3991        let series_source_alias = if is_first {
3992            "_u".to_string()
3993        } else {
3994            format!("_u_{}", idx * 2 + 1)
3995        };
3996        let unnest_source_alias = if is_first {
3997            "_u_2".to_string()
3998        } else {
3999            format!("_u_{}", idx * 2 + 2)
4000        };
4001        let pos2_alias = if is_first {
4002            "pos_2".to_string()
4003        } else {
4004            format!("{}_2", series_alias)
4005        };
4006        let entity_alias = if is_first {
4007            "entity".to_string()
4008        } else {
4009            format!("entity_{}", idx + 1)
4010        };
4011
4012        let array_size_call = Expression::Function(Box::new(Function::new(
4013            "ARRAY_SIZE".to_string(),
4014            vec![Expression::NamedArgument(Box::new(NamedArgument {
4015                name: Identifier::new("INPUT"),
4016                value: input_expr.clone(),
4017                separator: NamedArgSeparator::DArrow,
4018            }))],
4019        )));
4020
4021        let greatest = Expression::Function(Box::new(Function::new(
4022            "GREATEST".to_string(),
4023            vec![array_size_call.clone()],
4024        )));
4025
4026        let series_end = Expression::Add(Box::new(BinaryOp::new(
4027            Expression::Paren(Box::new(crate::expressions::Paren {
4028                this: Expression::Sub(Box::new(BinaryOp::new(
4029                    greatest,
4030                    Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4031                ))),
4032                trailing_comments: Vec::new(),
4033            })),
4034            Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4035        )));
4036
4037        let series_range = Expression::Function(Box::new(Function::new(
4038            "ARRAY_GENERATE_RANGE".to_string(),
4039            vec![
4040                Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4041                series_end,
4042            ],
4043        )));
4044
4045        let series_flatten = Expression::Function(Box::new(Function::new(
4046            "FLATTEN".to_string(),
4047            vec![Expression::NamedArgument(Box::new(NamedArgument {
4048                name: Identifier::new("INPUT"),
4049                value: series_range,
4050                separator: NamedArgSeparator::DArrow,
4051            }))],
4052        )));
4053
4054        let series_table = Expression::Function(Box::new(Function::new(
4055            "TABLE".to_string(),
4056            vec![series_flatten],
4057        )));
4058
4059        let series_alias_expr = Expression::Alias(Box::new(Alias {
4060            this: series_table,
4061            alias: Identifier::new(series_source_alias.clone()),
4062            column_aliases: vec![
4063                Identifier::new("seq"),
4064                Identifier::new("key"),
4065                Identifier::new("path"),
4066                Identifier::new("index"),
4067                Identifier::new(series_alias.clone()),
4068                Identifier::new("this"),
4069            ],
4070            pre_alias_comments: Vec::new(),
4071            trailing_comments: Vec::new(),
4072            inferred_type: None,
4073        }));
4074
4075        select.joins.push(Join {
4076            this: series_alias_expr,
4077            on: None,
4078            using: Vec::new(),
4079            kind: JoinKind::Cross,
4080            use_inner_keyword: false,
4081            use_outer_keyword: false,
4082            deferred_condition: false,
4083            join_hint: None,
4084            match_condition: None,
4085            pivots: Vec::new(),
4086            comments: Vec::new(),
4087            nesting_group: 0,
4088            directed: false,
4089        });
4090
4091        let entity_flatten = Expression::Function(Box::new(Function::new(
4092            "FLATTEN".to_string(),
4093            vec![Expression::NamedArgument(Box::new(NamedArgument {
4094                name: Identifier::new("INPUT"),
4095                value: input_expr.clone(),
4096                separator: NamedArgSeparator::DArrow,
4097            }))],
4098        )));
4099
4100        let entity_table = Expression::Function(Box::new(Function::new(
4101            "TABLE".to_string(),
4102            vec![entity_flatten],
4103        )));
4104
4105        let entity_alias_expr = Expression::Alias(Box::new(Alias {
4106            this: entity_table,
4107            alias: Identifier::new(unnest_source_alias.clone()),
4108            column_aliases: vec![
4109                Identifier::new("seq"),
4110                Identifier::new("key"),
4111                Identifier::new("path"),
4112                Identifier::new(pos2_alias.clone()),
4113                Identifier::new(entity_alias.clone()),
4114                Identifier::new("this"),
4115            ],
4116            pre_alias_comments: Vec::new(),
4117            trailing_comments: Vec::new(),
4118            inferred_type: None,
4119        }));
4120
4121        select.joins.push(Join {
4122            this: entity_alias_expr,
4123            on: None,
4124            using: Vec::new(),
4125            kind: JoinKind::Cross,
4126            use_inner_keyword: false,
4127            use_outer_keyword: false,
4128            deferred_condition: false,
4129            join_hint: None,
4130            match_condition: None,
4131            pivots: Vec::new(),
4132            comments: Vec::new(),
4133            nesting_group: 0,
4134            directed: false,
4135        });
4136
4137        let pos_col =
4138            Expression::qualified_column(series_source_alias.clone(), series_alias.clone());
4139        let pos2_col =
4140            Expression::qualified_column(unnest_source_alias.clone(), pos2_alias.clone());
4141
4142        let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), pos2_col.clone())));
4143        let size_minus_1 = Expression::Paren(Box::new(crate::expressions::Paren {
4144            this: Expression::Sub(Box::new(BinaryOp::new(
4145                array_size_call,
4146                Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4147            ))),
4148            trailing_comments: Vec::new(),
4149        }));
4150        let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_minus_1.clone())));
4151        let pos2_eq_size = Expression::Eq(Box::new(BinaryOp::new(pos2_col, size_minus_1)));
4152        let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos2_eq_size)));
4153        let or_cond = Expression::Or(Box::new(BinaryOp::new(
4154            eq,
4155            Expression::Paren(Box::new(crate::expressions::Paren {
4156                this: and_cond,
4157                trailing_comments: Vec::new(),
4158            })),
4159        )));
4160
4161        select.where_clause = Some(match select.where_clause.take() {
4162            Some(existing) => Where {
4163                this: Expression::And(Box::new(BinaryOp::new(existing.this, or_cond))),
4164            },
4165            None => Where { this: or_cond },
4166        });
4167    }
4168
4169    Ok(Expression::Select(Box::new(select)))
4170}
4171
4172fn extract_flatten_input(lat: &Lateral) -> Option<Expression> {
4173    let Expression::Function(f) = lat.this.as_ref() else {
4174        return None;
4175    };
4176    if !f.name.eq_ignore_ascii_case("FLATTEN") {
4177        return None;
4178    }
4179
4180    for arg in &f.args {
4181        if let Expression::NamedArgument(na) = arg {
4182            if na.name.name.eq_ignore_ascii_case("INPUT") {
4183                return Some(na.value.clone());
4184            }
4185        }
4186    }
4187    f.args.first().cloned()
4188}
4189
4190fn rewrite_flatten_lateral(mut lat: Lateral) -> Lateral {
4191    let cond = Expression::Eq(Box::new(BinaryOp::new(
4192        Expression::qualified_column("_u", "pos"),
4193        Expression::qualified_column("_u_2", "pos_2"),
4194    )));
4195    let true_expr = Expression::qualified_column("_u_2", "entity");
4196    let iff_expr = Expression::Function(Box::new(Function::new(
4197        "IFF".to_string(),
4198        vec![cond, true_expr, Expression::Null(crate::expressions::Null)],
4199    )));
4200
4201    lat.this = Box::new(iff_expr);
4202    if lat.column_aliases.is_empty() {
4203        lat.column_aliases = vec![
4204            "SEQ".to_string(),
4205            "KEY".to_string(),
4206            "PATH".to_string(),
4207            "INDEX".to_string(),
4208            "VALUE".to_string(),
4209            "THIS".to_string(),
4210        ];
4211    }
4212    lat
4213}
4214
4215/// Info about an EXPLODE/POSEXPLODE found in a SELECT projection
4216struct ExplodeInfo {
4217    /// The argument to EXPLODE/POSEXPLODE (the array expression)
4218    arg_sql: String,
4219    /// The alias for the exploded column
4220    explode_alias: String,
4221    /// The alias for the position column
4222    pos_alias: String,
4223    /// Source alias for this unnest (e.g., _u_2)
4224    unnest_source_alias: String,
4225}
4226
4227fn explode_projection_to_unnest_impl(select: Select, target: DialectType) -> Result<Expression> {
4228    let is_presto = matches!(
4229        target,
4230        DialectType::Presto | DialectType::Trino | DialectType::Athena
4231    );
4232    let is_bigquery = matches!(target, DialectType::BigQuery);
4233
4234    if !is_presto && !is_bigquery {
4235        return Ok(Expression::Select(Box::new(select)));
4236    }
4237
4238    // Check if any SELECT projection contains EXPLODE or POSEXPLODE
4239    let has_explode = select.expressions.iter().any(|e| expr_contains_explode(e));
4240    if !has_explode {
4241        return Ok(Expression::Select(Box::new(select)));
4242    }
4243
4244    // Collect taken names from existing SELECT expressions and FROM sources
4245    let mut taken_select_names = std::collections::HashSet::new();
4246    let mut taken_source_names = std::collections::HashSet::new();
4247
4248    // Collect names from existing SELECT expressions (output names)
4249    for sel in &select.expressions {
4250        if let Some(name) = get_output_name(sel) {
4251            taken_select_names.insert(name);
4252        }
4253    }
4254
4255    // Also add the explode arg name if it's a column reference
4256    for sel in &select.expressions {
4257        let explode_expr = find_explode_in_expr(sel);
4258        if let Some(arg) = explode_expr {
4259            if let Some(name) = get_output_name(&arg) {
4260                taken_select_names.insert(name);
4261            }
4262        }
4263    }
4264
4265    // Collect source names from FROM clause
4266    if let Some(ref from) = select.from {
4267        for from_expr in &from.expressions {
4268            collect_source_names(from_expr, &mut taken_source_names);
4269        }
4270    }
4271    // Also collect from JOINs
4272    for join in &select.joins {
4273        collect_source_names(&join.this, &mut taken_source_names);
4274    }
4275
4276    // Generate series alias
4277    let series_alias = new_name(&mut taken_select_names, "pos");
4278
4279    // Generate series source alias
4280    let series_source_alias = new_name(&mut taken_source_names, "_u");
4281
4282    // Get the target dialect for generating expression SQL
4283    let target_dialect = Dialect::get(target);
4284
4285    // Process each SELECT expression, collecting explode info
4286    let mut explode_infos: Vec<ExplodeInfo> = Vec::new();
4287    let mut new_projections: Vec<String> = Vec::new();
4288
4289    for (_idx, sel_expr) in select.expressions.iter().enumerate() {
4290        let explode_data = extract_explode_data(sel_expr);
4291
4292        if let Some((is_posexplode, arg_expr, explicit_alias, explicit_pos_alias)) = explode_data {
4293            // Generate the argument SQL in target dialect
4294            let arg_sql = target_dialect
4295                .generate(&arg_expr)
4296                .unwrap_or_else(|_| "NULL".to_string());
4297
4298            let unnest_source_alias = new_name(&mut taken_source_names, "_u");
4299
4300            let explode_alias = if let Some(ref ea) = explicit_alias {
4301                // Use the explicit alias directly (it was explicitly specified by the user)
4302                // Remove from taken_select_names first to avoid false collision with itself
4303                taken_select_names.remove(ea.as_str());
4304                // Now check for collision with other names
4305                let name = new_name(&mut taken_select_names, ea);
4306                name
4307            } else {
4308                new_name(&mut taken_select_names, "col")
4309            };
4310
4311            let pos_alias = if let Some(ref pa) = explicit_pos_alias {
4312                // Use the explicit pos alias directly
4313                taken_select_names.remove(pa.as_str());
4314                let name = new_name(&mut taken_select_names, pa);
4315                name
4316            } else {
4317                new_name(&mut taken_select_names, "pos")
4318            };
4319
4320            // Build the IF projection
4321            if is_presto {
4322                // Presto: IF(_u.pos = _u_2.pos_2, _u_2.col) AS col
4323                let if_col = format!(
4324                    "IF({}.{} = {}.{}, {}.{}) AS {}",
4325                    series_source_alias,
4326                    series_alias,
4327                    unnest_source_alias,
4328                    pos_alias,
4329                    unnest_source_alias,
4330                    explode_alias,
4331                    explode_alias
4332                );
4333                new_projections.push(if_col);
4334
4335                // For POSEXPLODE, also add the position projection
4336                if is_posexplode {
4337                    let if_pos = format!(
4338                        "IF({}.{} = {}.{}, {}.{}) AS {}",
4339                        series_source_alias,
4340                        series_alias,
4341                        unnest_source_alias,
4342                        pos_alias,
4343                        unnest_source_alias,
4344                        pos_alias,
4345                        pos_alias
4346                    );
4347                    new_projections.push(if_pos);
4348                }
4349            } else {
4350                // BigQuery: IF(pos = pos_2, col, NULL) AS col
4351                let if_col = format!(
4352                    "IF({} = {}, {}, NULL) AS {}",
4353                    series_alias, pos_alias, explode_alias, explode_alias
4354                );
4355                new_projections.push(if_col);
4356
4357                // For POSEXPLODE, also add the position projection
4358                if is_posexplode {
4359                    let if_pos = format!(
4360                        "IF({} = {}, {}, NULL) AS {}",
4361                        series_alias, pos_alias, pos_alias, pos_alias
4362                    );
4363                    new_projections.push(if_pos);
4364                }
4365            }
4366
4367            explode_infos.push(ExplodeInfo {
4368                arg_sql,
4369                explode_alias,
4370                pos_alias,
4371                unnest_source_alias,
4372            });
4373        } else {
4374            // Not an EXPLODE expression, generate as-is
4375            let sel_sql = target_dialect
4376                .generate(sel_expr)
4377                .unwrap_or_else(|_| "*".to_string());
4378            new_projections.push(sel_sql);
4379        }
4380    }
4381
4382    if explode_infos.is_empty() {
4383        return Ok(Expression::Select(Box::new(select)));
4384    }
4385
4386    // Build the FROM clause
4387    let mut from_parts: Vec<String> = Vec::new();
4388
4389    // Existing FROM sources
4390    if let Some(ref from) = select.from {
4391        for from_expr in &from.expressions {
4392            let from_sql = target_dialect.generate(from_expr).unwrap_or_default();
4393            from_parts.push(from_sql);
4394        }
4395    }
4396
4397    // Build the size expressions for the series generator
4398    let size_exprs: Vec<String> = explode_infos
4399        .iter()
4400        .map(|info| {
4401            if is_presto {
4402                format!("CARDINALITY({})", info.arg_sql)
4403            } else {
4404                format!("ARRAY_LENGTH({})", info.arg_sql)
4405            }
4406        })
4407        .collect();
4408
4409    let greatest_arg = if size_exprs.len() == 1 {
4410        size_exprs[0].clone()
4411    } else {
4412        format!("GREATEST({})", size_exprs.join(", "))
4413    };
4414
4415    // Build the series source
4416    // greatest_arg is already "GREATEST(...)" when multiple, or "CARDINALITY(x)" / "ARRAY_LENGTH(x)" when single
4417    let series_sql = if is_presto {
4418        // SEQUENCE(1, GREATEST(CARDINALITY(x))) for single, SEQUENCE(1, GREATEST(C(a), C(b))) for multiple
4419        if size_exprs.len() == 1 {
4420            format!(
4421                "UNNEST(SEQUENCE(1, GREATEST({}))) AS {}({})",
4422                greatest_arg, series_source_alias, series_alias
4423            )
4424        } else {
4425            // greatest_arg already has GREATEST(...) wrapper
4426            format!(
4427                "UNNEST(SEQUENCE(1, {})) AS {}({})",
4428                greatest_arg, series_source_alias, series_alias
4429            )
4430        }
4431    } else {
4432        // GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1) for single
4433        if size_exprs.len() == 1 {
4434            format!(
4435                "UNNEST(GENERATE_ARRAY(0, GREATEST({}) - 1)) AS {}",
4436                greatest_arg, series_alias
4437            )
4438        } else {
4439            // greatest_arg already has GREATEST(...) wrapper
4440            format!(
4441                "UNNEST(GENERATE_ARRAY(0, {} - 1)) AS {}",
4442                greatest_arg, series_alias
4443            )
4444        }
4445    };
4446
4447    // Build CROSS JOIN UNNEST clauses
4448    // Always use Presto-style (WITH ORDINALITY) for the SQL string to parse,
4449    // then convert to BigQuery-style AST after parsing if needed
4450    let mut cross_joins: Vec<String> = Vec::new();
4451
4452    for info in &explode_infos {
4453        // Always use WITH ORDINALITY syntax (which our parser handles)
4454        cross_joins.push(format!(
4455            "CROSS JOIN UNNEST({}) WITH ORDINALITY AS {}({}, {})",
4456            info.arg_sql, info.unnest_source_alias, info.explode_alias, info.pos_alias
4457        ));
4458    }
4459
4460    // Build WHERE clause
4461    let mut where_conditions: Vec<String> = Vec::new();
4462
4463    for info in &explode_infos {
4464        let size_expr = if is_presto {
4465            format!("CARDINALITY({})", info.arg_sql)
4466        } else {
4467            format!("ARRAY_LENGTH({})", info.arg_sql)
4468        };
4469
4470        let cond = if is_presto {
4471            format!(
4472                "{series_src}.{series_al} = {unnest_src}.{pos_al} OR ({series_src}.{series_al} > {size} AND {unnest_src}.{pos_al} = {size})",
4473                series_src = series_source_alias,
4474                series_al = series_alias,
4475                unnest_src = info.unnest_source_alias,
4476                pos_al = info.pos_alias,
4477                size = size_expr
4478            )
4479        } else {
4480            format!(
4481                "{series_al} = {pos_al} OR ({series_al} > ({size} - 1) AND {pos_al} = ({size} - 1))",
4482                series_al = series_alias,
4483                pos_al = info.pos_alias,
4484                size = size_expr
4485            )
4486        };
4487
4488        where_conditions.push(cond);
4489    }
4490
4491    // Combine WHERE conditions with AND (wrapped in parens if multiple)
4492    let where_sql = if where_conditions.len() == 1 {
4493        where_conditions[0].clone()
4494    } else {
4495        where_conditions
4496            .iter()
4497            .map(|c| format!("({})", c))
4498            .collect::<Vec<_>>()
4499            .join(" AND ")
4500    };
4501
4502    // Build the complete SQL
4503    let select_part = new_projections.join(", ");
4504
4505    // FROM part: if there was no original FROM, the series becomes the FROM source
4506    let from_and_joins = if from_parts.is_empty() {
4507        // No original FROM: series is the FROM source, everything else is CROSS JOIN
4508        format!("FROM {} {}", series_sql, cross_joins.join(" "))
4509    } else {
4510        format!(
4511            "FROM {} {} {}",
4512            from_parts.join(", "),
4513            format!("CROSS JOIN {}", series_sql),
4514            cross_joins.join(" ")
4515        )
4516    };
4517
4518    let full_sql = format!(
4519        "SELECT {} {} WHERE {}",
4520        select_part, from_and_joins, where_sql
4521    );
4522
4523    // Parse the constructed SQL using the Generic dialect (which handles all SQL syntax)
4524    // We use Generic instead of the target dialect to avoid parser limitations
4525    let generic_dialect = Dialect::get(DialectType::Generic);
4526    let parsed = generic_dialect.parse(&full_sql);
4527    match parsed {
4528        Ok(mut stmts) if !stmts.is_empty() => {
4529            let mut result = stmts.remove(0);
4530
4531            // For BigQuery, convert Presto-style UNNEST AST to BigQuery-style
4532            // Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos])
4533            // BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos) [no outer Alias]
4534            if is_bigquery {
4535                convert_unnest_presto_to_bigquery(&mut result);
4536            }
4537
4538            Ok(result)
4539        }
4540        _ => {
4541            // If parsing fails, return the original expression unchanged
4542            Ok(Expression::Select(Box::new(select)))
4543        }
4544    }
4545}
4546
4547/// Convert Presto-style UNNEST WITH ORDINALITY to BigQuery-style UNNEST WITH OFFSET in the AST.
4548/// Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos_N])
4549/// BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos_N)
4550fn convert_unnest_presto_to_bigquery(expr: &mut Expression) {
4551    match expr {
4552        Expression::Select(ref mut select) => {
4553            // Convert in FROM clause
4554            if let Some(ref mut from) = select.from {
4555                for from_item in from.expressions.iter_mut() {
4556                    convert_unnest_presto_to_bigquery(from_item);
4557                }
4558            }
4559            // Convert in JOINs
4560            for join in select.joins.iter_mut() {
4561                convert_unnest_presto_to_bigquery(&mut join.this);
4562            }
4563        }
4564        Expression::Alias(ref alias) => {
4565            // Check if this is Alias(Unnest(with_ordinality=true), ..., column_aliases=[col, pos])
4566            if let Expression::Unnest(ref unnest) = alias.this {
4567                if unnest.with_ordinality && alias.column_aliases.len() >= 2 {
4568                    let col_alias = alias.column_aliases[0].clone();
4569                    let pos_alias = alias.column_aliases[1].clone();
4570                    let mut new_unnest = unnest.as_ref().clone();
4571                    new_unnest.alias = Some(col_alias);
4572                    new_unnest.offset_alias = Some(pos_alias);
4573                    // Replace the Alias(Unnest) with just Unnest
4574                    *expr = Expression::Unnest(Box::new(new_unnest));
4575                }
4576            }
4577        }
4578        _ => {}
4579    }
4580}
4581
4582/// Find a new name that doesn't conflict with existing names.
4583/// Tries `base`, then `base_2`, `base_3`, etc.
4584fn new_name(names: &mut std::collections::HashSet<String>, base: &str) -> String {
4585    if !names.contains(base) {
4586        names.insert(base.to_string());
4587        return base.to_string();
4588    }
4589    let mut i = 2;
4590    loop {
4591        let candidate = format!("{}_{}", base, i);
4592        if !names.contains(&candidate) {
4593            names.insert(candidate.clone());
4594            return candidate;
4595        }
4596        i += 1;
4597    }
4598}
4599
4600/// Check if an expression contains EXPLODE or POSEXPLODE
4601fn expr_contains_explode(expr: &Expression) -> bool {
4602    match expr {
4603        Expression::Explode(_) => true,
4604        Expression::ExplodeOuter(_) => true,
4605        Expression::Function(f) => {
4606            let name = f.name.to_uppercase();
4607            name == "POSEXPLODE" || name == "POSEXPLODE_OUTER"
4608        }
4609        Expression::Alias(a) => expr_contains_explode(&a.this),
4610        _ => false,
4611    }
4612}
4613
4614/// Find the EXPLODE/POSEXPLODE expression within a select item, return the arg
4615fn find_explode_in_expr(expr: &Expression) -> Option<Expression> {
4616    match expr {
4617        Expression::Explode(uf) => Some(uf.this.clone()),
4618        Expression::ExplodeOuter(uf) => Some(uf.this.clone()),
4619        Expression::Function(f) => {
4620            let name = f.name.to_uppercase();
4621            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4622                Some(f.args[0].clone())
4623            } else {
4624                None
4625            }
4626        }
4627        Expression::Alias(a) => find_explode_in_expr(&a.this),
4628        _ => None,
4629    }
4630}
4631
4632/// Extract explode data from a SELECT expression.
4633/// Returns (is_posexplode, arg_expression, explicit_col_alias, explicit_pos_alias)
4634fn extract_explode_data(
4635    expr: &Expression,
4636) -> Option<(bool, Expression, Option<String>, Option<String>)> {
4637    match expr {
4638        // Bare EXPLODE(x) without alias
4639        Expression::Explode(uf) => Some((false, uf.this.clone(), None, None)),
4640        Expression::ExplodeOuter(uf) => Some((false, uf.this.clone(), None, None)),
4641        // Bare POSEXPLODE(x) without alias
4642        Expression::Function(f) => {
4643            let name = f.name.to_uppercase();
4644            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4645                Some((true, f.args[0].clone(), None, None))
4646            } else {
4647                None
4648            }
4649        }
4650        // Aliased: EXPLODE(x) AS col, or POSEXPLODE(x) AS (a, b)
4651        Expression::Alias(a) => {
4652            match &a.this {
4653                Expression::Explode(uf) => {
4654                    let alias = if !a.alias.is_empty() {
4655                        Some(a.alias.name.clone())
4656                    } else {
4657                        None
4658                    };
4659                    Some((false, uf.this.clone(), alias, None))
4660                }
4661                Expression::ExplodeOuter(uf) => {
4662                    let alias = if !a.alias.is_empty() {
4663                        Some(a.alias.name.clone())
4664                    } else {
4665                        None
4666                    };
4667                    Some((false, uf.this.clone(), alias, None))
4668                }
4669                Expression::Function(f) => {
4670                    let name = f.name.to_uppercase();
4671                    if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4672                        // Check for column aliases: AS (a, b)
4673                        if a.column_aliases.len() == 2 {
4674                            let pos_alias = a.column_aliases[0].name.clone();
4675                            let col_alias = a.column_aliases[1].name.clone();
4676                            Some((true, f.args[0].clone(), Some(col_alias), Some(pos_alias)))
4677                        } else if !a.alias.is_empty() {
4678                            // Single alias: AS x
4679                            Some((true, f.args[0].clone(), Some(a.alias.name.clone()), None))
4680                        } else {
4681                            Some((true, f.args[0].clone(), None, None))
4682                        }
4683                    } else {
4684                        None
4685                    }
4686                }
4687                _ => None,
4688            }
4689        }
4690        _ => None,
4691    }
4692}
4693
4694/// Get the output name of a SELECT expression
4695fn get_output_name(expr: &Expression) -> Option<String> {
4696    match expr {
4697        Expression::Alias(a) => {
4698            if !a.alias.is_empty() {
4699                Some(a.alias.name.clone())
4700            } else {
4701                None
4702            }
4703        }
4704        Expression::Column(c) => Some(c.name.name.clone()),
4705        Expression::Identifier(id) => Some(id.name.clone()),
4706        _ => None,
4707    }
4708}
4709
4710/// Collect source names from a FROM/JOIN expression
4711fn collect_source_names(expr: &Expression, names: &mut std::collections::HashSet<String>) {
4712    match expr {
4713        Expression::Alias(a) => {
4714            if !a.alias.is_empty() {
4715                names.insert(a.alias.name.clone());
4716            }
4717        }
4718        Expression::Subquery(s) => {
4719            if let Some(ref alias) = s.alias {
4720                names.insert(alias.name.clone());
4721            }
4722        }
4723        Expression::Table(t) => {
4724            if let Some(ref alias) = t.alias {
4725                names.insert(alias.name.clone());
4726            } else {
4727                names.insert(t.name.name.clone());
4728            }
4729        }
4730        Expression::Column(c) => {
4731            names.insert(c.name.name.clone());
4732        }
4733        Expression::Identifier(id) => {
4734            names.insert(id.name.clone());
4735        }
4736        _ => {}
4737    }
4738}
4739
4740/// Strip UNNEST wrapping from column reference arguments for Redshift target.
4741/// BigQuery UNNEST(column_ref) -> Redshift: just column_ref
4742pub fn strip_unnest_column_refs(expr: Expression) -> Result<Expression> {
4743    use crate::dialects::transform_recursive;
4744    transform_recursive(expr, &strip_unnest_column_refs_single)
4745}
4746
4747fn strip_unnest_column_refs_single(expr: Expression) -> Result<Expression> {
4748    if let Expression::Select(mut select) = expr {
4749        // Process JOINs (UNNEST items have been moved to joins by unnest_from_to_cross_join)
4750        for join in select.joins.iter_mut() {
4751            strip_unnest_from_expr(&mut join.this);
4752        }
4753        // Process FROM items too
4754        if let Some(ref mut from) = select.from {
4755            for from_item in from.expressions.iter_mut() {
4756                strip_unnest_from_expr(from_item);
4757            }
4758        }
4759        Ok(Expression::Select(select))
4760    } else {
4761        Ok(expr)
4762    }
4763}
4764
4765/// If expr is Alias(UNNEST(column_ref), alias) where UNNEST arg is a column/dot path,
4766/// replace with Alias(column_ref, alias) to strip the UNNEST.
4767fn strip_unnest_from_expr(expr: &mut Expression) {
4768    if let Expression::Alias(ref mut alias) = expr {
4769        if let Expression::Unnest(ref unnest) = alias.this {
4770            let is_column_ref = matches!(&unnest.this, Expression::Column(_) | Expression::Dot(_));
4771            if is_column_ref {
4772                // Replace UNNEST(col_ref) with just col_ref
4773                let inner = unnest.this.clone();
4774                alias.this = inner;
4775            }
4776        }
4777    }
4778}
4779
4780/// Wrap DuckDB UNNEST of struct arrays in (SELECT UNNEST(..., max_depth => 2)) subquery.
4781/// BigQuery UNNEST of struct arrays needs this wrapping for DuckDB to properly expand struct fields.
4782pub fn wrap_duckdb_unnest_struct(expr: Expression) -> Result<Expression> {
4783    use crate::dialects::transform_recursive;
4784    transform_recursive(expr, &wrap_duckdb_unnest_struct_single)
4785}
4786
4787fn wrap_duckdb_unnest_struct_single(expr: Expression) -> Result<Expression> {
4788    if let Expression::Select(mut select) = expr {
4789        // Process FROM items
4790        if let Some(ref mut from) = select.from {
4791            for from_item in from.expressions.iter_mut() {
4792                try_wrap_unnest_in_subquery(from_item);
4793            }
4794        }
4795
4796        // Process JOINs
4797        for join in select.joins.iter_mut() {
4798            try_wrap_unnest_in_subquery(&mut join.this);
4799        }
4800
4801        Ok(Expression::Select(select))
4802    } else {
4803        Ok(expr)
4804    }
4805}
4806
4807/// Check if an expression contains struct array elements that need DuckDB UNNEST wrapping.
4808fn is_struct_array_unnest_arg(expr: &Expression) -> bool {
4809    match expr {
4810        // Array literal containing struct elements
4811        Expression::Array(arr) => arr
4812            .expressions
4813            .iter()
4814            .any(|e| matches!(e, Expression::Struct(_))),
4815        Expression::ArrayFunc(arr) => arr
4816            .expressions
4817            .iter()
4818            .any(|e| matches!(e, Expression::Struct(_))),
4819        // CAST to struct array type, e.g. CAST([] AS STRUCT(x BIGINT)[])
4820        Expression::Cast(c) => {
4821            matches!(&c.to, DataType::Array { element_type, .. } if matches!(**element_type, DataType::Struct { .. }))
4822        }
4823        _ => false,
4824    }
4825}
4826
4827/// Try to wrap an UNNEST expression in a (SELECT UNNEST(..., max_depth => 2)) subquery.
4828/// Handles both bare UNNEST and Alias(UNNEST).
4829fn try_wrap_unnest_in_subquery(expr: &mut Expression) {
4830    // Check for Alias wrapping UNNEST
4831    if let Expression::Alias(ref alias) = expr {
4832        if let Expression::Unnest(ref unnest) = alias.this {
4833            if is_struct_array_unnest_arg(&unnest.this) {
4834                let unnest_clone = (**unnest).clone();
4835                let alias_name = alias.alias.clone();
4836                let new_expr = make_unnest_subquery(unnest_clone, Some(alias_name));
4837                *expr = new_expr;
4838                return;
4839            }
4840        }
4841    }
4842
4843    // Check for bare UNNEST
4844    if let Expression::Unnest(ref unnest) = expr {
4845        if is_struct_array_unnest_arg(&unnest.this) {
4846            let unnest_clone = (**unnest).clone();
4847            let new_expr = make_unnest_subquery(unnest_clone, None);
4848            *expr = new_expr;
4849        }
4850    }
4851}
4852
4853/// Create (SELECT UNNEST(arg, max_depth => 2)) [AS alias] subquery.
4854fn make_unnest_subquery(unnest: UnnestFunc, alias: Option<Identifier>) -> Expression {
4855    // Build UNNEST function call with max_depth => 2 named argument
4856    let max_depth_arg = Expression::NamedArgument(Box::new(NamedArgument {
4857        name: Identifier::new("max_depth".to_string()),
4858        value: Expression::Literal(Box::new(Literal::Number("2".to_string()))),
4859        separator: NamedArgSeparator::DArrow,
4860    }));
4861
4862    let mut unnest_args = vec![unnest.this];
4863    unnest_args.extend(unnest.expressions);
4864    unnest_args.push(max_depth_arg);
4865
4866    let unnest_func =
4867        Expression::Function(Box::new(Function::new("UNNEST".to_string(), unnest_args)));
4868
4869    // Build SELECT UNNEST(...)
4870    let mut inner_select = Select::new();
4871    inner_select.expressions = vec![unnest_func];
4872    let inner_select = Expression::Select(Box::new(inner_select));
4873
4874    // Wrap in subquery
4875    let subquery = Subquery {
4876        this: inner_select,
4877        alias,
4878        column_aliases: Vec::new(),
4879        order_by: None,
4880        limit: None,
4881        offset: None,
4882        distribute_by: None,
4883        sort_by: None,
4884        cluster_by: None,
4885        lateral: false,
4886        modifiers_inside: false,
4887        trailing_comments: Vec::new(),
4888        inferred_type: None,
4889    };
4890
4891    Expression::Subquery(Box::new(subquery))
4892}
4893
4894/// Wrap UNION with ORDER BY/LIMIT in a subquery.
4895///
4896/// Some dialects (ClickHouse, TSQL) don't support ORDER BY/LIMIT directly on UNION.
4897/// This transform converts:
4898///   SELECT ... UNION SELECT ... ORDER BY x LIMIT n
4899/// to:
4900///   SELECT * FROM (SELECT ... UNION SELECT ...) AS _l_0 ORDER BY x LIMIT n
4901///
4902/// NOTE: Our parser may place ORDER BY/LIMIT on the right-hand SELECT rather than
4903/// the Union (unlike Python sqlglot). This function handles both cases by checking
4904/// the right-hand SELECT for trailing ORDER BY/LIMIT and moving them to the Union.
4905pub fn no_limit_order_by_union(expr: Expression) -> Result<Expression> {
4906    use crate::expressions::{Limit as LimitClause, Offset as OffsetClause, OrderBy, Star};
4907
4908    match expr {
4909        Expression::Union(mut u) => {
4910            // Check if ORDER BY/LIMIT are on the rightmost Select instead of the Union
4911            // (our parser may attach them to the right SELECT)
4912            if u.order_by.is_none() && u.limit.is_none() && u.offset.is_none() {
4913                // Find the rightmost Select and check for ORDER BY/LIMIT
4914                if let Expression::Select(ref mut right_select) = u.right {
4915                    if right_select.order_by.is_some()
4916                        || right_select.limit.is_some()
4917                        || right_select.offset.is_some()
4918                    {
4919                        // Move ORDER BY/LIMIT from right Select to Union
4920                        u.order_by = right_select.order_by.take();
4921                        u.limit = right_select.limit.take().map(|l| Box::new(l.this));
4922                        u.offset = right_select.offset.take().map(|o| Box::new(o.this));
4923                    }
4924                }
4925            }
4926
4927            let has_order_or_limit =
4928                u.order_by.is_some() || u.limit.is_some() || u.offset.is_some();
4929            if has_order_or_limit {
4930                // Extract ORDER BY, LIMIT, OFFSET from the Union
4931                let order_by: Option<OrderBy> = u.order_by.take();
4932                let union_limit: Option<Box<Expression>> = u.limit.take();
4933                let union_offset: Option<Box<Expression>> = u.offset.take();
4934
4935                // Convert Union's limit (Box<Expression>) to Select's limit (Limit struct)
4936                let select_limit: Option<LimitClause> = union_limit.map(|l| LimitClause {
4937                    this: *l,
4938                    percent: false,
4939                    comments: Vec::new(),
4940                });
4941
4942                // Convert Union's offset (Box<Expression>) to Select's offset (Offset struct)
4943                let select_offset: Option<OffsetClause> = union_offset.map(|o| OffsetClause {
4944                    this: *o,
4945                    rows: None,
4946                });
4947
4948                // Create a subquery from the Union
4949                let subquery = Subquery {
4950                    this: Expression::Union(u),
4951                    alias: Some(Identifier::new("_l_0")),
4952                    column_aliases: Vec::new(),
4953                    lateral: false,
4954                    modifiers_inside: false,
4955                    order_by: None,
4956                    limit: None,
4957                    offset: None,
4958                    distribute_by: None,
4959                    sort_by: None,
4960                    cluster_by: None,
4961                    trailing_comments: Vec::new(),
4962                    inferred_type: None,
4963                };
4964
4965                // Build SELECT * FROM (UNION) AS _l_0 ORDER BY ... LIMIT ...
4966                let mut select = Select::default();
4967                select.expressions = vec![Expression::Star(Star {
4968                    table: None,
4969                    except: None,
4970                    replace: None,
4971                    rename: None,
4972                    trailing_comments: Vec::new(),
4973                    span: None,
4974                })];
4975                select.from = Some(From {
4976                    expressions: vec![Expression::Subquery(Box::new(subquery))],
4977                });
4978                select.order_by = order_by;
4979                select.limit = select_limit;
4980                select.offset = select_offset;
4981
4982                Ok(Expression::Select(Box::new(select)))
4983            } else {
4984                Ok(Expression::Union(u))
4985            }
4986        }
4987        _ => Ok(expr),
4988    }
4989}
4990
4991/// Expand LIKE ANY / ILIKE ANY to OR chains.
4992///
4993/// For dialects that don't support quantifiers on LIKE/ILIKE (e.g. DuckDB),
4994/// expand `x LIKE ANY (('a', 'b'))` to `x LIKE 'a' OR x LIKE 'b'`.
4995///
4996/// Handles precedence: when LIKE ANY (→OR) is inside AND, wraps in parens.
4997/// When LIKE ALL (→AND) is inside OR, wraps in parens for readability.
4998pub fn expand_like_any(expr: Expression) -> Result<Expression> {
4999    use crate::expressions::{BinaryOp, LikeOp, Paren};
5000
5001    /// Sentinel comment used to mark Paren nodes created by LIKE ALL expansion.
5002    /// These markers are stripped in a cleanup pass unless they end up inside an OR parent.
5003    const LIKE_ALL_MARKER: &str = "__LIKE_ALL_EXPANSION__";
5004
5005    fn unwrap_parens(e: &Expression) -> &Expression {
5006        match e {
5007            Expression::Paren(p) => unwrap_parens(&p.this),
5008            _ => e,
5009        }
5010    }
5011
5012    fn extract_tuple_values(e: &Expression) -> Option<Vec<Expression>> {
5013        let inner = unwrap_parens(e);
5014        match inner {
5015            Expression::Tuple(t) => Some(t.expressions.clone()),
5016            // Single value in parens: treat as single-element list
5017            _ if !matches!(e, Expression::Tuple(_)) => Some(vec![inner.clone()]),
5018            _ => None,
5019        }
5020    }
5021
5022    /// Build a chain of LIKE/ILIKE conditions joined by a combiner (OR for ANY, AND for ALL).
5023    fn expand_like_quantifier(
5024        op: &LikeOp,
5025        values: Vec<Expression>,
5026        is_ilike: bool,
5027        combiner: fn(Expression, Expression) -> Expression,
5028        wrap_marker: bool,
5029    ) -> Expression {
5030        let num_values = values.len();
5031        let mut result: Option<Expression> = None;
5032        for val in values {
5033            let like = if is_ilike {
5034                Expression::ILike(Box::new(LikeOp {
5035                    left: op.left.clone(),
5036                    right: val,
5037                    escape: op.escape.clone(),
5038                    quantifier: None,
5039                    inferred_type: None,
5040                }))
5041            } else {
5042                Expression::Like(Box::new(LikeOp {
5043                    left: op.left.clone(),
5044                    right: val,
5045                    escape: op.escape.clone(),
5046                    quantifier: None,
5047                    inferred_type: None,
5048                }))
5049            };
5050            result = Some(match result {
5051                None => like,
5052                Some(prev) => combiner(prev, like),
5053            });
5054        }
5055        let expanded = result.unwrap_or_else(|| unreachable!("values is non-empty"));
5056        // For LIKE ALL (AND chain) with multiple values, wrap in a marker Paren.
5057        // The marker lets us distinguish expansion-created AND from parser-created AND
5058        // when deciding whether to keep parens inside OR.
5059        if wrap_marker && num_values > 1 {
5060            Expression::Paren(Box::new(Paren {
5061                this: expanded,
5062                trailing_comments: vec![LIKE_ALL_MARKER.to_string()],
5063            }))
5064        } else {
5065            expanded
5066        }
5067    }
5068
5069    fn or_combiner(a: Expression, b: Expression) -> Expression {
5070        Expression::Or(Box::new(BinaryOp::new(a, b)))
5071    }
5072
5073    fn and_combiner(a: Expression, b: Expression) -> Expression {
5074        Expression::And(Box::new(BinaryOp::new(a, b)))
5075    }
5076
5077    fn is_like_all_marker(p: &Paren) -> bool {
5078        p.trailing_comments.len() == 1 && p.trailing_comments[0] == LIKE_ALL_MARKER
5079    }
5080
5081    // Phase 1: Expand LIKE ANY/ALL and fix precedence in a single bottom-up pass.
5082    //
5083    // - LIKE ANY → bare Or chain
5084    // - LIKE ALL → Paren(And chain) with marker comment
5085    // - And handler: wraps bare Or children in Paren (from LIKE ANY expansion;
5086    //   parser never creates bare Or inside And)
5087    // - Or handler: converts marker Paren to clean Paren (keeps the wrapping)
5088    let result = transform_recursive(expr, &|e| {
5089        match e {
5090            // LIKE ANY -> OR chain (bare)
5091            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ANY") => {
5092                if let Some(values) = extract_tuple_values(&op.right) {
5093                    if values.is_empty() {
5094                        return Ok(e);
5095                    }
5096                    Ok(expand_like_quantifier(
5097                        op,
5098                        values,
5099                        false,
5100                        or_combiner,
5101                        false,
5102                    ))
5103                } else {
5104                    Ok(e)
5105                }
5106            }
5107            // LIKE ALL -> AND chain (with marker Paren)
5108            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ALL") => {
5109                if let Some(values) = extract_tuple_values(&op.right) {
5110                    if values.is_empty() {
5111                        return Ok(e);
5112                    }
5113                    Ok(expand_like_quantifier(
5114                        op,
5115                        values,
5116                        false,
5117                        and_combiner,
5118                        true,
5119                    ))
5120                } else {
5121                    Ok(e)
5122                }
5123            }
5124            // ILIKE ANY -> OR chain (bare)
5125            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ANY") => {
5126                if let Some(values) = extract_tuple_values(&op.right) {
5127                    if values.is_empty() {
5128                        return Ok(e);
5129                    }
5130                    Ok(expand_like_quantifier(op, values, true, or_combiner, false))
5131                } else {
5132                    Ok(e)
5133                }
5134            }
5135            // ILIKE ALL -> AND chain (with marker Paren)
5136            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ALL") => {
5137                if let Some(values) = extract_tuple_values(&op.right) {
5138                    if values.is_empty() {
5139                        return Ok(e);
5140                    }
5141                    Ok(expand_like_quantifier(op, values, true, and_combiner, true))
5142                } else {
5143                    Ok(e)
5144                }
5145            }
5146            // After children are expanded (bottom-up), fix And nodes:
5147            // Wrap bare Or children in Paren (from LIKE ANY expansion).
5148            // The parser never produces bare Or inside And (AND binds tighter than OR,
5149            // so explicit parens in SQL like "(a OR b) AND c" create Paren(Or(...)) in the AST).
5150            Expression::And(mut op) => {
5151                if matches!(&op.left, Expression::Or(_)) {
5152                    op.left = Expression::Paren(Box::new(Paren {
5153                        this: op.left,
5154                        trailing_comments: vec![],
5155                    }));
5156                }
5157                if matches!(&op.right, Expression::Or(_)) {
5158                    op.right = Expression::Paren(Box::new(Paren {
5159                        this: op.right,
5160                        trailing_comments: vec![],
5161                    }));
5162                }
5163                Ok(Expression::And(op))
5164            }
5165            // After children are expanded (bottom-up), fix Or nodes:
5166            // Convert marker Paren(And) to clean Paren(And) so the cleanup pass won't strip it.
5167            Expression::Or(mut op) => {
5168                if let Expression::Paren(ref mut p) = op.left {
5169                    if is_like_all_marker(p) {
5170                        p.trailing_comments.clear();
5171                    }
5172                }
5173                if let Expression::Paren(ref mut p) = op.right {
5174                    if is_like_all_marker(p) {
5175                        p.trailing_comments.clear();
5176                    }
5177                }
5178                Ok(Expression::Or(op))
5179            }
5180            _ => Ok(e),
5181        }
5182    })?;
5183
5184    // Phase 2: Strip remaining marker Paren(And) nodes that weren't inside an Or parent.
5185    // These are standalone LIKE ALL expansions (e.g., in SELECT expressions) that don't
5186    // need parentheses.
5187    transform_recursive(result, &|e| {
5188        if let Expression::Paren(p) = &e {
5189            if is_like_all_marker(p) {
5190                let Expression::Paren(p) = e else {
5191                    unreachable!()
5192                };
5193                return Ok(p.this);
5194            }
5195        }
5196        Ok(e)
5197    })
5198}
5199
5200/// Ensures all unaliased column outputs in subqueries and CTEs get self-aliases.
5201///
5202/// This is needed for TSQL which requires derived table outputs to be aliased.
5203/// For example: `SELECT c FROM t` inside a subquery becomes `SELECT c AS c FROM t`.
5204///
5205/// Mirrors Python sqlglot's `qualify_derived_table_outputs` function which is applied
5206/// as a TRANSFORMS preprocessor for Subquery and CTE expressions in the TSQL dialect.
5207pub fn qualify_derived_table_outputs(expr: Expression) -> Result<Expression> {
5208    use crate::expressions::Alias;
5209
5210    fn add_self_aliases_to_select(select: &mut Select) {
5211        let new_expressions: Vec<Expression> = select
5212            .expressions
5213            .iter()
5214            .map(|e| {
5215                match e {
5216                    // Column reference without alias -> add self-alias
5217                    Expression::Column(col) => {
5218                        let alias_name = col.name.clone();
5219                        Expression::Alias(Box::new(Alias {
5220                            this: e.clone(),
5221                            alias: alias_name,
5222                            column_aliases: Vec::new(),
5223                            pre_alias_comments: Vec::new(),
5224                            trailing_comments: Vec::new(),
5225                            inferred_type: None,
5226                        }))
5227                    }
5228                    // Already aliased or star or other -> keep as is
5229                    _ => e.clone(),
5230                }
5231            })
5232            .collect();
5233        select.expressions = new_expressions;
5234    }
5235
5236    fn walk_and_qualify(expr: &mut Expression) {
5237        match expr {
5238            Expression::Select(ref mut select) => {
5239                // Qualify subqueries in FROM
5240                if let Some(ref mut from) = select.from {
5241                    for e in from.expressions.iter_mut() {
5242                        qualify_subquery_expr(e);
5243                        walk_and_qualify(e);
5244                    }
5245                }
5246                // Qualify subqueries in JOINs
5247                for join in select.joins.iter_mut() {
5248                    qualify_subquery_expr(&mut join.this);
5249                    walk_and_qualify(&mut join.this);
5250                }
5251                // Recurse into expressions (for correlated subqueries etc.)
5252                for e in select.expressions.iter_mut() {
5253                    walk_and_qualify(e);
5254                }
5255                // Recurse into WHERE
5256                if let Some(ref mut w) = select.where_clause {
5257                    walk_and_qualify(&mut w.this);
5258                }
5259            }
5260            Expression::Subquery(ref mut subquery) => {
5261                walk_and_qualify(&mut subquery.this);
5262            }
5263            Expression::Union(ref mut u) => {
5264                walk_and_qualify(&mut u.left);
5265                walk_and_qualify(&mut u.right);
5266            }
5267            Expression::Intersect(ref mut i) => {
5268                walk_and_qualify(&mut i.left);
5269                walk_and_qualify(&mut i.right);
5270            }
5271            Expression::Except(ref mut e) => {
5272                walk_and_qualify(&mut e.left);
5273                walk_and_qualify(&mut e.right);
5274            }
5275            Expression::Cte(ref mut cte) => {
5276                walk_and_qualify(&mut cte.this);
5277            }
5278            _ => {}
5279        }
5280    }
5281
5282    fn qualify_subquery_expr(expr: &mut Expression) {
5283        match expr {
5284            Expression::Subquery(ref mut subquery) => {
5285                // Only qualify if the subquery has a table alias but no column aliases
5286                if subquery.alias.is_some() && subquery.column_aliases.is_empty() {
5287                    if let Expression::Select(ref mut inner_select) = subquery.this {
5288                        // Check the inner select doesn't use *
5289                        let has_star = inner_select
5290                            .expressions
5291                            .iter()
5292                            .any(|e| matches!(e, Expression::Star(_)));
5293                        if !has_star {
5294                            add_self_aliases_to_select(inner_select);
5295                        }
5296                    }
5297                }
5298                // Recurse into the subquery's inner query
5299                walk_and_qualify(&mut subquery.this);
5300            }
5301            Expression::Alias(ref mut alias) => {
5302                qualify_subquery_expr(&mut alias.this);
5303            }
5304            _ => {}
5305        }
5306    }
5307
5308    let mut result = expr;
5309    walk_and_qualify(&mut result);
5310
5311    // Also qualify CTE inner queries at the top level
5312    if let Expression::Select(ref mut select) = result {
5313        if let Some(ref mut with) = select.with {
5314            for cte in with.ctes.iter_mut() {
5315                // CTE with column names -> no need to qualify
5316                if cte.columns.is_empty() {
5317                    // Walk into the CTE's inner query for nested subqueries
5318                    walk_and_qualify(&mut cte.this);
5319                }
5320            }
5321        }
5322    }
5323
5324    Ok(result)
5325}
5326
5327#[cfg(test)]
5328mod tests {
5329    use super::*;
5330    use crate::dialects::{Dialect, DialectType};
5331    use crate::expressions::Column;
5332
5333    fn gen(expr: &Expression) -> String {
5334        let dialect = Dialect::get(DialectType::Generic);
5335        dialect.generate(expr).unwrap()
5336    }
5337
5338    #[test]
5339    fn test_preprocess() {
5340        let expr = Expression::Boolean(BooleanLiteral { value: true });
5341        let result = preprocess(expr, &[replace_bool_with_int]).unwrap();
5342        assert!(
5343            matches!(result, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
5344        );
5345    }
5346
5347    #[test]
5348    fn test_preprocess_chain() {
5349        // Test chaining multiple transforms using function pointers
5350        let expr = Expression::Boolean(BooleanLiteral { value: true });
5351        // Create array of function pointers (all same type)
5352        let transforms: Vec<fn(Expression) -> Result<Expression>> =
5353            vec![replace_bool_with_int, replace_int_with_bool];
5354        let result = preprocess(expr, &transforms).unwrap();
5355        // After replace_bool_with_int: 1
5356        // After replace_int_with_bool: true
5357        if let Expression::Boolean(b) = result {
5358            assert!(b.value);
5359        } else {
5360            panic!("Expected boolean literal");
5361        }
5362    }
5363
5364    #[test]
5365    fn test_unnest_to_explode() {
5366        let unnest = Expression::Unnest(Box::new(UnnestFunc {
5367            this: Expression::boxed_column(Column {
5368                name: Identifier::new("arr".to_string()),
5369                table: None,
5370                join_mark: false,
5371                trailing_comments: vec![],
5372                span: None,
5373                inferred_type: None,
5374            }),
5375            expressions: Vec::new(),
5376            with_ordinality: false,
5377            alias: None,
5378            offset_alias: None,
5379        }));
5380
5381        let result = unnest_to_explode(unnest).unwrap();
5382        assert!(matches!(result, Expression::Explode(_)));
5383    }
5384
5385    #[test]
5386    fn test_explode_to_unnest() {
5387        let explode = Expression::Explode(Box::new(UnaryFunc {
5388            this: Expression::boxed_column(Column {
5389                name: Identifier::new("arr".to_string()),
5390                table: None,
5391                join_mark: false,
5392                trailing_comments: vec![],
5393                span: None,
5394                inferred_type: None,
5395            }),
5396            original_name: None,
5397            inferred_type: None,
5398        }));
5399
5400        let result = explode_to_unnest(explode).unwrap();
5401        assert!(matches!(result, Expression::Unnest(_)));
5402    }
5403
5404    #[test]
5405    fn test_replace_bool_with_int() {
5406        let true_expr = Expression::Boolean(BooleanLiteral { value: true });
5407        let result = replace_bool_with_int(true_expr).unwrap();
5408        if let Expression::Literal(lit) = result {
5409            if let Literal::Number(n) = lit.as_ref() {
5410                assert_eq!(n, "1");
5411            }
5412        } else {
5413            panic!("Expected number literal");
5414        }
5415
5416        let false_expr = Expression::Boolean(BooleanLiteral { value: false });
5417        let result = replace_bool_with_int(false_expr).unwrap();
5418        if let Expression::Literal(lit) = result {
5419            if let Literal::Number(n) = lit.as_ref() {
5420                assert_eq!(n, "0");
5421            }
5422        } else {
5423            panic!("Expected number literal");
5424        }
5425    }
5426
5427    #[test]
5428    fn test_replace_int_with_bool() {
5429        let one_expr = Expression::Literal(Box::new(Literal::Number("1".to_string())));
5430        let result = replace_int_with_bool(one_expr).unwrap();
5431        if let Expression::Boolean(b) = result {
5432            assert!(b.value);
5433        } else {
5434            panic!("Expected boolean true");
5435        }
5436
5437        let zero_expr = Expression::Literal(Box::new(Literal::Number("0".to_string())));
5438        let result = replace_int_with_bool(zero_expr).unwrap();
5439        if let Expression::Boolean(b) = result {
5440            assert!(!b.value);
5441        } else {
5442            panic!("Expected boolean false");
5443        }
5444
5445        // Test that other numbers are not converted
5446        let two_expr = Expression::Literal(Box::new(Literal::Number("2".to_string())));
5447        let result = replace_int_with_bool(two_expr).unwrap();
5448        assert!(
5449            matches!(result, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
5450        );
5451    }
5452
5453    #[test]
5454    fn test_strip_data_type_params() {
5455        // Test Decimal
5456        let decimal = DataType::Decimal {
5457            precision: Some(10),
5458            scale: Some(2),
5459        };
5460        let stripped = strip_data_type_params(decimal);
5461        assert_eq!(
5462            stripped,
5463            DataType::Decimal {
5464                precision: None,
5465                scale: None
5466            }
5467        );
5468
5469        // Test VarChar
5470        let varchar = DataType::VarChar {
5471            length: Some(255),
5472            parenthesized_length: false,
5473        };
5474        let stripped = strip_data_type_params(varchar);
5475        assert_eq!(
5476            stripped,
5477            DataType::VarChar {
5478                length: None,
5479                parenthesized_length: false
5480            }
5481        );
5482
5483        // Test Char
5484        let char_type = DataType::Char { length: Some(10) };
5485        let stripped = strip_data_type_params(char_type);
5486        assert_eq!(stripped, DataType::Char { length: None });
5487
5488        // Test Timestamp (preserve timezone)
5489        let timestamp = DataType::Timestamp {
5490            precision: Some(6),
5491            timezone: true,
5492        };
5493        let stripped = strip_data_type_params(timestamp);
5494        assert_eq!(
5495            stripped,
5496            DataType::Timestamp {
5497                precision: None,
5498                timezone: true
5499            }
5500        );
5501
5502        // Test Array (recursive)
5503        let array = DataType::Array {
5504            element_type: Box::new(DataType::VarChar {
5505                length: Some(100),
5506                parenthesized_length: false,
5507            }),
5508            dimension: None,
5509        };
5510        let stripped = strip_data_type_params(array);
5511        assert_eq!(
5512            stripped,
5513            DataType::Array {
5514                element_type: Box::new(DataType::VarChar {
5515                    length: None,
5516                    parenthesized_length: false
5517                }),
5518                dimension: None,
5519            }
5520        );
5521
5522        // Test types without params are unchanged
5523        let text = DataType::Text;
5524        let stripped = strip_data_type_params(text);
5525        assert_eq!(stripped, DataType::Text);
5526    }
5527
5528    #[test]
5529    fn test_remove_precision_parameterized_types_cast() {
5530        // Create a CAST(1 AS DECIMAL(10, 2)) expression
5531        let cast_expr = Expression::Cast(Box::new(Cast {
5532            this: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5533            to: DataType::Decimal {
5534                precision: Some(10),
5535                scale: Some(2),
5536            },
5537            trailing_comments: vec![],
5538            double_colon_syntax: false,
5539            format: None,
5540            default: None,
5541            inferred_type: None,
5542        }));
5543
5544        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5545        if let Expression::Cast(cast) = result {
5546            assert_eq!(
5547                cast.to,
5548                DataType::Decimal {
5549                    precision: None,
5550                    scale: None
5551                }
5552            );
5553        } else {
5554            panic!("Expected Cast expression");
5555        }
5556    }
5557
5558    #[test]
5559    fn test_remove_precision_parameterized_types_varchar() {
5560        // Create a CAST('hello' AS VARCHAR(10)) expression
5561        let cast_expr = Expression::Cast(Box::new(Cast {
5562            this: Expression::Literal(Box::new(Literal::String("hello".to_string()))),
5563            to: DataType::VarChar {
5564                length: Some(10),
5565                parenthesized_length: false,
5566            },
5567            trailing_comments: vec![],
5568            double_colon_syntax: false,
5569            format: None,
5570            default: None,
5571            inferred_type: None,
5572        }));
5573
5574        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5575        if let Expression::Cast(cast) = result {
5576            assert_eq!(
5577                cast.to,
5578                DataType::VarChar {
5579                    length: None,
5580                    parenthesized_length: false
5581                }
5582            );
5583        } else {
5584            panic!("Expected Cast expression");
5585        }
5586    }
5587
5588    #[test]
5589    fn test_remove_precision_direct_cast() {
5590        // Test transform on a direct Cast expression (not nested in Select)
5591        // The current implementation handles top-level Cast expressions;
5592        // a full implementation would need recursive AST traversal
5593        let cast = Expression::Cast(Box::new(Cast {
5594            this: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
5595            to: DataType::Decimal {
5596                precision: Some(10),
5597                scale: Some(2),
5598            },
5599            trailing_comments: vec![],
5600            double_colon_syntax: false,
5601            format: None,
5602            default: None,
5603            inferred_type: None,
5604        }));
5605
5606        let transformed = remove_precision_parameterized_types(cast).unwrap();
5607        let generated = gen(&transformed);
5608
5609        // Should now be DECIMAL without precision
5610        assert!(generated.contains("DECIMAL"));
5611        assert!(!generated.contains("(10"));
5612    }
5613
5614    #[test]
5615    fn test_epoch_cast_to_ts() {
5616        // Test CAST('epoch' AS TIMESTAMP) → CAST('1970-01-01 00:00:00' AS TIMESTAMP)
5617        let cast_expr = Expression::Cast(Box::new(Cast {
5618            this: Expression::Literal(Box::new(Literal::String("epoch".to_string()))),
5619            to: DataType::Timestamp {
5620                precision: None,
5621                timezone: false,
5622            },
5623            trailing_comments: vec![],
5624            double_colon_syntax: false,
5625            format: None,
5626            default: None,
5627            inferred_type: None,
5628        }));
5629
5630        let result = epoch_cast_to_ts(cast_expr).unwrap();
5631        if let Expression::Cast(cast) = result {
5632            if let Expression::Literal(lit) = cast.this {
5633                if let Literal::String(s) = lit.as_ref() {
5634                    assert_eq!(s, "1970-01-01 00:00:00");
5635                }
5636            } else {
5637                panic!("Expected string literal");
5638            }
5639        } else {
5640            panic!("Expected Cast expression");
5641        }
5642    }
5643
5644    #[test]
5645    fn test_epoch_cast_to_ts_preserves_non_epoch() {
5646        // Test that non-epoch strings are preserved
5647        let cast_expr = Expression::Cast(Box::new(Cast {
5648            this: Expression::Literal(Box::new(Literal::String("2024-01-15".to_string()))),
5649            to: DataType::Timestamp {
5650                precision: None,
5651                timezone: false,
5652            },
5653            trailing_comments: vec![],
5654            double_colon_syntax: false,
5655            format: None,
5656            default: None,
5657            inferred_type: None,
5658        }));
5659
5660        let result = epoch_cast_to_ts(cast_expr).unwrap();
5661        if let Expression::Cast(cast) = result {
5662            if let Expression::Literal(lit) = cast.this {
5663                if let Literal::String(s) = lit.as_ref() {
5664                    assert_eq!(s, "2024-01-15");
5665                }
5666            } else {
5667                panic!("Expected string literal");
5668            }
5669        } else {
5670            panic!("Expected Cast expression");
5671        }
5672    }
5673
5674    #[test]
5675    fn test_unqualify_columns() {
5676        // Test that table qualifiers are removed
5677        let col = Expression::boxed_column(Column {
5678            name: Identifier::new("id".to_string()),
5679            table: Some(Identifier::new("users".to_string())),
5680            join_mark: false,
5681            trailing_comments: vec![],
5682            span: None,
5683            inferred_type: None,
5684        });
5685
5686        let result = unqualify_columns(col).unwrap();
5687        if let Expression::Column(c) = result {
5688            assert!(c.table.is_none());
5689            assert_eq!(c.name.name, "id");
5690        } else {
5691            panic!("Expected Column expression");
5692        }
5693    }
5694
5695    #[test]
5696    fn test_is_temporal_type() {
5697        assert!(is_temporal_type(&DataType::Date));
5698        assert!(is_temporal_type(&DataType::Timestamp {
5699            precision: None,
5700            timezone: false
5701        }));
5702        assert!(is_temporal_type(&DataType::Time {
5703            precision: None,
5704            timezone: false
5705        }));
5706        assert!(!is_temporal_type(&DataType::Int {
5707            length: None,
5708            integer_spelling: false
5709        }));
5710        assert!(!is_temporal_type(&DataType::VarChar {
5711            length: None,
5712            parenthesized_length: false
5713        }));
5714    }
5715
5716    #[test]
5717    fn test_eliminate_semi_join_basic() {
5718        use crate::expressions::{Join, TableRef};
5719
5720        // Test that semi joins are converted to EXISTS
5721        let select = Expression::Select(Box::new(Select {
5722            expressions: vec![Expression::boxed_column(Column {
5723                name: Identifier::new("a".to_string()),
5724                table: None,
5725                join_mark: false,
5726                trailing_comments: vec![],
5727                span: None,
5728                inferred_type: None,
5729            })],
5730            from: Some(From {
5731                expressions: vec![Expression::Table(Box::new(TableRef::new("t1")))],
5732            }),
5733            joins: vec![Join {
5734                this: Expression::Table(Box::new(TableRef::new("t2"))),
5735                kind: JoinKind::Semi,
5736                on: Some(Expression::Eq(Box::new(BinaryOp {
5737                    left: Expression::boxed_column(Column {
5738                        name: Identifier::new("x".to_string()),
5739                        table: None,
5740                        join_mark: false,
5741                        trailing_comments: vec![],
5742                        span: None,
5743                        inferred_type: None,
5744                    }),
5745                    right: Expression::boxed_column(Column {
5746                        name: Identifier::new("y".to_string()),
5747                        table: None,
5748                        join_mark: false,
5749                        trailing_comments: vec![],
5750                        span: None,
5751                        inferred_type: None,
5752                    }),
5753                    left_comments: vec![],
5754                    operator_comments: vec![],
5755                    trailing_comments: vec![],
5756                    inferred_type: None,
5757                }))),
5758                using: vec![],
5759                use_inner_keyword: false,
5760                use_outer_keyword: false,
5761                deferred_condition: false,
5762                join_hint: None,
5763                match_condition: None,
5764                pivots: Vec::new(),
5765                comments: Vec::new(),
5766                nesting_group: 0,
5767                directed: false,
5768            }],
5769            ..Select::new()
5770        }));
5771
5772        let result = eliminate_semi_and_anti_joins(select).unwrap();
5773        if let Expression::Select(s) = result {
5774            // Semi join should be removed
5775            assert!(s.joins.is_empty());
5776            // WHERE clause should have EXISTS
5777            assert!(s.where_clause.is_some());
5778        } else {
5779            panic!("Expected Select expression");
5780        }
5781    }
5782
5783    #[test]
5784    fn test_no_ilike_sql() {
5785        use crate::expressions::LikeOp;
5786
5787        // Test ILIKE conversion to LOWER+LIKE
5788        let ilike_expr = Expression::ILike(Box::new(LikeOp {
5789            left: Expression::boxed_column(Column {
5790                name: Identifier::new("name".to_string()),
5791                table: None,
5792                join_mark: false,
5793                trailing_comments: vec![],
5794                span: None,
5795                inferred_type: None,
5796            }),
5797            right: Expression::Literal(Box::new(Literal::String("%test%".to_string()))),
5798            escape: None,
5799            quantifier: None,
5800            inferred_type: None,
5801        }));
5802
5803        let result = no_ilike_sql(ilike_expr).unwrap();
5804        if let Expression::Like(like) = result {
5805            // Left should be LOWER(name)
5806            if let Expression::Function(f) = &like.left {
5807                assert_eq!(f.name, "LOWER");
5808            } else {
5809                panic!("Expected LOWER function on left");
5810            }
5811            // Right should be LOWER('%test%')
5812            if let Expression::Function(f) = &like.right {
5813                assert_eq!(f.name, "LOWER");
5814            } else {
5815                panic!("Expected LOWER function on right");
5816            }
5817        } else {
5818            panic!("Expected Like expression");
5819        }
5820    }
5821
5822    #[test]
5823    fn test_no_trycast_sql() {
5824        // Test TryCast conversion to Cast
5825        let trycast_expr = Expression::TryCast(Box::new(Cast {
5826            this: Expression::Literal(Box::new(Literal::String("123".to_string()))),
5827            to: DataType::Int {
5828                length: None,
5829                integer_spelling: false,
5830            },
5831            trailing_comments: vec![],
5832            double_colon_syntax: false,
5833            format: None,
5834            default: None,
5835            inferred_type: None,
5836        }));
5837
5838        let result = no_trycast_sql(trycast_expr).unwrap();
5839        assert!(matches!(result, Expression::Cast(_)));
5840    }
5841
5842    #[test]
5843    fn test_no_safe_cast_sql() {
5844        // Test SafeCast conversion to Cast
5845        let safe_cast_expr = Expression::SafeCast(Box::new(Cast {
5846            this: Expression::Literal(Box::new(Literal::String("123".to_string()))),
5847            to: DataType::Int {
5848                length: None,
5849                integer_spelling: false,
5850            },
5851            trailing_comments: vec![],
5852            double_colon_syntax: false,
5853            format: None,
5854            default: None,
5855            inferred_type: None,
5856        }));
5857
5858        let result = no_safe_cast_sql(safe_cast_expr).unwrap();
5859        assert!(matches!(result, Expression::Cast(_)));
5860    }
5861
5862    #[test]
5863    fn test_explode_to_unnest_presto() {
5864        let spark = Dialect::get(DialectType::Spark);
5865        let result = spark
5866            .transpile("SELECT EXPLODE(x) FROM tbl", DialectType::Presto)
5867            .unwrap();
5868        assert_eq!(
5869            result[0],
5870            "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))"
5871        );
5872    }
5873
5874    #[test]
5875    fn test_explode_to_unnest_bigquery() {
5876        let spark = Dialect::get(DialectType::Spark);
5877        let result = spark
5878            .transpile("SELECT EXPLODE(x) FROM tbl", DialectType::BigQuery)
5879            .unwrap();
5880        assert_eq!(
5881            result[0],
5882            "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))"
5883        );
5884    }
5885}