Skip to main content

polyglot_sql/
transforms.rs

1//! SQL AST Transforms
2//!
3//! This module provides functions to transform SQL ASTs for dialect compatibility.
4//! These transforms are used during transpilation to convert dialect-specific features
5//! to forms that are supported by the target dialect.
6//!
7//! Based on the Python implementation in `sqlglot/transforms.py`.
8
9use crate::dialects::transform_recursive;
10use crate::dialects::{Dialect, DialectType};
11use crate::error::Result;
12use crate::expressions::{
13    Alias, BinaryOp, BooleanLiteral, Cast, DataType, Exists, Expression, From, Function,
14    Identifier, Join, JoinKind, Lateral, LateralView, Literal, NamedArgSeparator, NamedArgument,
15    Over, Select, StructField, Subquery, UnaryFunc, UnnestFunc, Where,
16};
17use std::cell::RefCell;
18
19/// Apply a chain of transforms to an expression
20///
21/// # Arguments
22/// * `expr` - The expression to transform
23/// * `transforms` - A list of transform functions to apply in order
24///
25/// # Returns
26/// The transformed expression
27pub fn preprocess<F>(expr: Expression, transforms: &[F]) -> Result<Expression>
28where
29    F: Fn(Expression) -> Result<Expression>,
30{
31    let mut result = expr;
32    for transform in transforms {
33        result = transform(result)?;
34    }
35    Ok(result)
36}
37
38/// Convert UNNEST to EXPLODE (for Spark/Hive compatibility)
39///
40/// UNNEST is standard SQL but Spark uses EXPLODE instead.
41pub fn unnest_to_explode(expr: Expression) -> Result<Expression> {
42    match expr {
43        Expression::Unnest(unnest) => {
44            Ok(Expression::Explode(Box::new(UnaryFunc::new(unnest.this))))
45        }
46        _ => Ok(expr),
47    }
48}
49
50/// Convert CROSS JOIN UNNEST to LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks.
51///
52/// This is a SELECT-level structural transformation that:
53/// 1. Converts UNNEST in FROM clause to INLINE/EXPLODE
54/// 2. Converts CROSS JOIN (LATERAL) UNNEST to LATERAL VIEW entries
55/// 3. For single-arg UNNEST: uses EXPLODE
56/// 4. For multi-arg UNNEST: uses INLINE(ARRAYS_ZIP(...))
57///
58/// Based on Python sqlglot's `unnest_to_explode` transform in transforms.py (lines 290-391).
59pub fn unnest_to_explode_select(expr: Expression) -> Result<Expression> {
60    transform_recursive(expr, &unnest_to_explode_select_inner)
61}
62
63/// Helper to determine the UDTF function for an UNNEST expression.
64/// Single-arg UNNEST → EXPLODE, multi-arg → INLINE
65fn make_udtf_expr(unnest: &UnnestFunc) -> Expression {
66    let has_multi_expr = !unnest.expressions.is_empty();
67    if has_multi_expr {
68        // Multi-arg: INLINE(ARRAYS_ZIP(arg1, arg2, ...))
69        let mut all_args = vec![unnest.this.clone()];
70        all_args.extend(unnest.expressions.iter().cloned());
71        let arrays_zip =
72            Expression::Function(Box::new(Function::new("ARRAYS_ZIP".to_string(), all_args)));
73        Expression::Function(Box::new(Function::new(
74            "INLINE".to_string(),
75            vec![arrays_zip],
76        )))
77    } else {
78        // Single-arg: EXPLODE(arg)
79        Expression::Explode(Box::new(UnaryFunc::new(unnest.this.clone())))
80    }
81}
82
83fn unnest_to_explode_select_inner(expr: Expression) -> Result<Expression> {
84    let Expression::Select(mut select) = expr else {
85        return Ok(expr);
86    };
87
88    // Process FROM clause: UNNEST items need conversion
89    if let Some(ref mut from) = select.from {
90        if from.expressions.len() >= 1 {
91            let mut new_from_exprs = Vec::new();
92            let mut new_lateral_views = Vec::new();
93            let first_is_unnest = is_unnest_expr(&from.expressions[0]);
94
95            for (idx, from_item) in from.expressions.drain(..).enumerate() {
96                if idx == 0 && first_is_unnest {
97                    // UNNEST is the first (and possibly only) item in FROM
98                    // Replace it with INLINE/EXPLODE, keeping alias
99                    let replaced = replace_from_unnest(from_item);
100                    new_from_exprs.push(replaced);
101                } else if idx > 0 && is_unnest_expr(&from_item) {
102                    // Additional UNNEST items in FROM (comma-joined) → LATERAL VIEW
103                    let (alias_name, column_aliases, unnest_func) = extract_unnest_info(from_item);
104                    let udtf = make_udtf_expr(&unnest_func);
105                    new_lateral_views.push(LateralView {
106                        this: udtf,
107                        table_alias: alias_name,
108                        column_aliases,
109                        outer: false,
110                    });
111                } else {
112                    new_from_exprs.push(from_item);
113                }
114            }
115
116            from.expressions = new_from_exprs;
117            // Append lateral views for comma-joined UNNESTs
118            select.lateral_views.extend(new_lateral_views);
119        }
120    }
121
122    // Process joins: CROSS JOIN (LATERAL) UNNEST → LATERAL VIEW
123    let mut remaining_joins = Vec::new();
124    for join in select.joins.drain(..) {
125        if matches!(join.kind, JoinKind::Cross | JoinKind::Inner) {
126            let (is_unnest, is_lateral) = check_join_unnest(&join.this);
127            if is_unnest {
128                // Extract UNNEST info from join, handling Lateral wrapper
129                let (lateral_alias, lateral_col_aliases, join_expr) = if is_lateral {
130                    if let Expression::Lateral(lat) = join.this {
131                        // Extract alias from Lateral struct
132                        let alias = lat.alias.map(|s| Identifier::new(&s));
133                        let col_aliases: Vec<Identifier> = lat
134                            .column_aliases
135                            .iter()
136                            .map(|s| Identifier::new(s))
137                            .collect();
138                        (alias, col_aliases, *lat.this)
139                    } else {
140                        (None, Vec::new(), join.this)
141                    }
142                } else {
143                    (None, Vec::new(), join.this)
144                };
145
146                let (alias_name, column_aliases, unnest_func) = extract_unnest_info(join_expr);
147
148                // Prefer Lateral's alias over UNNEST's alias
149                let final_alias = lateral_alias.or(alias_name);
150                let final_col_aliases = if !lateral_col_aliases.is_empty() {
151                    lateral_col_aliases
152                } else {
153                    column_aliases
154                };
155
156                // Use "unnest" as default alias if none provided (for single-arg case)
157                let table_alias = final_alias.or_else(|| Some(Identifier::new("unnest")));
158                let col_aliases = if final_col_aliases.is_empty() {
159                    vec![Identifier::new("unnest")]
160                } else {
161                    final_col_aliases
162                };
163
164                let udtf = make_udtf_expr(&unnest_func);
165                select.lateral_views.push(LateralView {
166                    this: udtf,
167                    table_alias,
168                    column_aliases: col_aliases,
169                    outer: false,
170                });
171            } else {
172                remaining_joins.push(join);
173            }
174        } else {
175            remaining_joins.push(join);
176        }
177    }
178    select.joins = remaining_joins;
179
180    Ok(Expression::Select(select))
181}
182
183/// Check if an expression is or wraps an UNNEST
184fn is_unnest_expr(expr: &Expression) -> bool {
185    match expr {
186        Expression::Unnest(_) => true,
187        Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
188        _ => false,
189    }
190}
191
192/// Check if a join's expression is an UNNEST (possibly wrapped in Lateral)
193fn check_join_unnest(expr: &Expression) -> (bool, bool) {
194    match expr {
195        Expression::Unnest(_) => (true, false),
196        Expression::Alias(a) => {
197            if matches!(a.this, Expression::Unnest(_)) {
198                (true, false)
199            } else {
200                (false, false)
201            }
202        }
203        Expression::Lateral(lat) => match &*lat.this {
204            Expression::Unnest(_) => (true, true),
205            Expression::Alias(a) => {
206                if matches!(a.this, Expression::Unnest(_)) {
207                    (true, true)
208                } else {
209                    (false, true)
210                }
211            }
212            _ => (false, true),
213        },
214        _ => (false, false),
215    }
216}
217
218/// Replace an UNNEST in FROM with INLINE/EXPLODE, preserving alias structure
219fn replace_from_unnest(from_item: Expression) -> Expression {
220    match from_item {
221        Expression::Alias(mut a) => {
222            if let Expression::Unnest(unnest) = a.this {
223                a.this = make_udtf_expr(&unnest);
224            }
225            Expression::Alias(a)
226        }
227        Expression::Unnest(unnest) => make_udtf_expr(&unnest),
228        other => other,
229    }
230}
231
232/// Extract alias info and UnnestFunc from an expression (possibly wrapped in Alias)
233fn extract_unnest_info(expr: Expression) -> (Option<Identifier>, Vec<Identifier>, UnnestFunc) {
234    match expr {
235        Expression::Alias(a) => {
236            if let Expression::Unnest(unnest) = a.this {
237                (Some(a.alias), a.column_aliases, *unnest)
238            } else {
239                // Should not happen if we already checked is_unnest_expr
240                (
241                    Some(a.alias),
242                    a.column_aliases,
243                    UnnestFunc {
244                        this: a.this,
245                        expressions: Vec::new(),
246                        with_ordinality: false,
247                        alias: None,
248                        offset_alias: None,
249                    },
250                )
251            }
252        }
253        Expression::Unnest(unnest) => {
254            let alias = unnest.alias.clone();
255            (alias, Vec::new(), *unnest)
256        }
257        _ => (
258            None,
259            Vec::new(),
260            UnnestFunc {
261                this: expr,
262                expressions: Vec::new(),
263                with_ordinality: false,
264                alias: None,
265                offset_alias: None,
266            },
267        ),
268    }
269}
270
271/// Convert EXPLODE to UNNEST (for standard SQL compatibility)
272pub fn explode_to_unnest(expr: Expression) -> Result<Expression> {
273    match expr {
274        Expression::Explode(explode) => Ok(Expression::Unnest(Box::new(UnnestFunc {
275            this: explode.this,
276            expressions: Vec::new(),
277            with_ordinality: false,
278            alias: None,
279            offset_alias: None,
280        }))),
281        _ => Ok(expr),
282    }
283}
284
285/// Replace boolean literals for dialects that don't support them
286///
287/// Converts TRUE/FALSE to 1/0 for dialects like older MySQL versions
288pub fn replace_bool_with_int(expr: Expression) -> Result<Expression> {
289    match expr {
290        Expression::Boolean(b) => {
291            let value = if b.value { "1" } else { "0" };
292            Ok(Expression::Literal(Literal::Number(value.to_string())))
293        }
294        _ => Ok(expr),
295    }
296}
297
298/// Replace integer literals for dialects that prefer boolean
299///
300/// Converts 1/0 to TRUE/FALSE
301pub fn replace_int_with_bool(expr: Expression) -> Result<Expression> {
302    match expr {
303        Expression::Literal(Literal::Number(n)) if n == "1" || n == "0" => {
304            Ok(Expression::Boolean(BooleanLiteral { value: n == "1" }))
305        }
306        _ => Ok(expr),
307    }
308}
309
310/// Remove precision from parameterized types
311///
312/// Some dialects don't support precision parameters on certain types.
313/// This transform removes them, e.g., VARCHAR(255) → VARCHAR, DECIMAL(10,2) → DECIMAL
314pub fn remove_precision_parameterized_types(expr: Expression) -> Result<Expression> {
315    Ok(strip_type_params_recursive(expr))
316}
317
318/// Recursively strip type parameters from DataType values in an expression
319fn strip_type_params_recursive(expr: Expression) -> Expression {
320    match expr {
321        // Handle Cast expressions - strip precision from target type
322        Expression::Cast(mut cast) => {
323            cast.to = strip_data_type_params(cast.to);
324            // Also recursively process the expression being cast
325            cast.this = strip_type_params_recursive(cast.this);
326            Expression::Cast(cast)
327        }
328        // Handle TryCast expressions (uses same Cast struct)
329        Expression::TryCast(mut try_cast) => {
330            try_cast.to = strip_data_type_params(try_cast.to);
331            try_cast.this = strip_type_params_recursive(try_cast.this);
332            Expression::TryCast(try_cast)
333        }
334        // Handle SafeCast expressions (uses same Cast struct)
335        Expression::SafeCast(mut safe_cast) => {
336            safe_cast.to = strip_data_type_params(safe_cast.to);
337            safe_cast.this = strip_type_params_recursive(safe_cast.this);
338            Expression::SafeCast(safe_cast)
339        }
340        // For now, pass through other expressions
341        // A full implementation would recursively visit all nodes
342        _ => expr,
343    }
344}
345
346/// Strip precision/scale/length parameters from a DataType
347fn strip_data_type_params(dt: DataType) -> DataType {
348    match dt {
349        // Numeric types with precision/scale
350        DataType::Decimal { .. } => DataType::Decimal {
351            precision: None,
352            scale: None,
353        },
354        DataType::TinyInt { .. } => DataType::TinyInt { length: None },
355        DataType::SmallInt { .. } => DataType::SmallInt { length: None },
356        DataType::Int { .. } => DataType::Int {
357            length: None,
358            integer_spelling: false,
359        },
360        DataType::BigInt { .. } => DataType::BigInt { length: None },
361
362        // String types with length
363        DataType::Char { .. } => DataType::Char { length: None },
364        DataType::VarChar { .. } => DataType::VarChar {
365            length: None,
366            parenthesized_length: false,
367        },
368
369        // Binary types with length
370        DataType::Binary { .. } => DataType::Binary { length: None },
371        DataType::VarBinary { .. } => DataType::VarBinary { length: None },
372
373        // Bit types with length
374        DataType::Bit { .. } => DataType::Bit { length: None },
375        DataType::VarBit { .. } => DataType::VarBit { length: None },
376
377        // Time types with precision
378        DataType::Time { .. } => DataType::Time {
379            precision: None,
380            timezone: false,
381        },
382        DataType::Timestamp { timezone, .. } => DataType::Timestamp {
383            precision: None,
384            timezone,
385        },
386
387        // Array - recursively strip element type
388        DataType::Array {
389            element_type,
390            dimension,
391        } => DataType::Array {
392            element_type: Box::new(strip_data_type_params(*element_type)),
393            dimension,
394        },
395
396        // Map - recursively strip key and value types
397        DataType::Map {
398            key_type,
399            value_type,
400        } => DataType::Map {
401            key_type: Box::new(strip_data_type_params(*key_type)),
402            value_type: Box::new(strip_data_type_params(*value_type)),
403        },
404
405        // Struct - recursively strip field types
406        DataType::Struct { fields, nested } => DataType::Struct {
407            fields: fields
408                .into_iter()
409                .map(|f| {
410                    StructField::with_options(
411                        f.name,
412                        strip_data_type_params(f.data_type),
413                        f.options,
414                    )
415                })
416                .collect(),
417            nested,
418        },
419
420        // Vector - strip dimension
421        DataType::Vector { element_type, .. } => DataType::Vector {
422            element_type: element_type.map(|et| Box::new(strip_data_type_params(*et))),
423            dimension: None,
424        },
425
426        // Object - recursively strip field types
427        DataType::Object { fields, modifier } => DataType::Object {
428            fields: fields
429                .into_iter()
430                .map(|(name, ty, not_null)| (name, strip_data_type_params(ty), not_null))
431                .collect(),
432            modifier,
433        },
434
435        // Other types pass through unchanged
436        other => other,
437    }
438}
439
440/// Eliminate QUALIFY clause by converting to a subquery with WHERE filter
441///
442/// QUALIFY is supported by Snowflake, BigQuery, and DuckDB but not by most other dialects.
443///
444/// Converts:
445/// ```sql
446/// SELECT * FROM t QUALIFY ROW_NUMBER() OVER (...) = 1
447/// ```
448/// To:
449/// ```sql
450/// SELECT * FROM (SELECT *, ROW_NUMBER() OVER (...) AS _w FROM t) _t WHERE _w = 1
451/// ```
452///
453/// Reference: `transforms.py:194-255`
454pub fn eliminate_qualify(expr: Expression) -> Result<Expression> {
455    match expr {
456        Expression::Select(mut select) => {
457            if let Some(qualify) = select.qualify.take() {
458                // Python sqlglot approach:
459                // 1. Extract the window function from the qualify condition
460                // 2. Add it as _w alias to the inner select
461                // 3. Replace the window function reference with _w in the outer WHERE
462                // 4. Keep original select expressions in the outer query
463
464                let qualify_filter = qualify.this;
465                let window_alias_name = "_w".to_string();
466                let window_alias_ident = Identifier::new(window_alias_name.clone());
467
468                // Try to extract window function from comparison
469                // Pattern: WINDOW_FUNC = value -> inner adds WINDOW_FUNC AS _w, outer WHERE _w = value
470                let (window_expr, outer_where) =
471                    extract_window_from_condition(qualify_filter.clone(), &window_alias_ident);
472
473                if let Some(win_expr) = window_expr {
474                    // Add window function as _w alias to inner select
475                    let window_alias_expr =
476                        Expression::Alias(Box::new(crate::expressions::Alias {
477                            this: win_expr,
478                            alias: window_alias_ident.clone(),
479                            column_aliases: vec![],
480                            pre_alias_comments: vec![],
481                            trailing_comments: vec![],
482                        }));
483
484                    // For the outer SELECT, replace aliased expressions with just the alias reference
485                    // e.g., `1 AS other_id` in inner -> `other_id` in outer
486                    // Non-aliased expressions (columns, identifiers) stay as-is
487                    let outer_exprs: Vec<Expression> = select
488                        .expressions
489                        .iter()
490                        .map(|expr| {
491                            if let Expression::Alias(a) = expr {
492                                // Replace with just the alias identifier as a column reference
493                                Expression::Column(crate::expressions::Column {
494                                    name: a.alias.clone(),
495                                    table: None,
496                                    join_mark: false,
497                                    trailing_comments: vec![],
498                                    span: None,
499                                })
500                            } else {
501                                expr.clone()
502                            }
503                        })
504                        .collect();
505                    select.expressions.push(window_alias_expr);
506
507                    // Create the inner subquery
508                    let inner_select = Expression::Select(select);
509                    let subquery = Subquery {
510                        this: inner_select,
511                        alias: Some(Identifier::new("_t".to_string())),
512                        column_aliases: vec![],
513                        order_by: None,
514                        limit: None,
515                        offset: None,
516                        distribute_by: None,
517                        sort_by: None,
518                        cluster_by: None,
519                        lateral: false,
520                        modifiers_inside: false,
521                        trailing_comments: vec![],
522                    };
523
524                    // Create the outer SELECT with alias-resolved expressions and WHERE _w <op> value
525                    let outer_select = Select {
526                        expressions: outer_exprs,
527                        from: Some(From {
528                            expressions: vec![Expression::Subquery(Box::new(subquery))],
529                        }),
530                        where_clause: Some(Where { this: outer_where }),
531                        ..Select::new()
532                    };
533
534                    return Ok(Expression::Select(Box::new(outer_select)));
535                } else {
536                    // Fallback: if we can't extract a window function, use old approach
537                    let qualify_alias = Expression::Alias(Box::new(crate::expressions::Alias {
538                        this: qualify_filter.clone(),
539                        alias: window_alias_ident.clone(),
540                        column_aliases: vec![],
541                        pre_alias_comments: vec![],
542                        trailing_comments: vec![],
543                    }));
544
545                    let original_exprs = select.expressions.clone();
546                    select.expressions.push(qualify_alias);
547
548                    let inner_select = Expression::Select(select);
549                    let subquery = Subquery {
550                        this: inner_select,
551                        alias: Some(Identifier::new("_t".to_string())),
552                        column_aliases: vec![],
553                        order_by: None,
554                        limit: None,
555                        offset: None,
556                        distribute_by: None,
557                        sort_by: None,
558                        cluster_by: None,
559                        lateral: false,
560                        modifiers_inside: false,
561                        trailing_comments: vec![],
562                    };
563
564                    let outer_select = Select {
565                        expressions: original_exprs,
566                        from: Some(From {
567                            expressions: vec![Expression::Subquery(Box::new(subquery))],
568                        }),
569                        where_clause: Some(Where {
570                            this: Expression::Column(crate::expressions::Column {
571                                name: window_alias_ident,
572                                table: None,
573                                join_mark: false,
574                                trailing_comments: vec![],
575                                span: None,
576                            }),
577                        }),
578                        ..Select::new()
579                    };
580
581                    return Ok(Expression::Select(Box::new(outer_select)));
582                }
583            }
584            Ok(Expression::Select(select))
585        }
586        other => Ok(other),
587    }
588}
589
590/// Extract a window function from a qualify condition.
591/// Returns (window_expression, rewritten_condition) if found.
592/// The rewritten condition replaces the window function with a column reference to the alias.
593fn extract_window_from_condition(
594    condition: Expression,
595    alias: &Identifier,
596) -> (Option<Expression>, Expression) {
597    let alias_col = Expression::Column(crate::expressions::Column {
598        name: alias.clone(),
599        table: None,
600        join_mark: false,
601        trailing_comments: vec![],
602        span: None,
603    });
604
605    // Check if condition is a simple comparison with a window function on one side
606    match condition {
607        // WINDOW_FUNC = value
608        Expression::Eq(ref op) => {
609            if is_window_expr(&op.left) {
610                (
611                    Some(op.left.clone()),
612                    Expression::Eq(Box::new(BinaryOp {
613                        left: alias_col,
614                        right: op.right.clone(),
615                        ..(**op).clone()
616                    })),
617                )
618            } else if is_window_expr(&op.right) {
619                (
620                    Some(op.right.clone()),
621                    Expression::Eq(Box::new(BinaryOp {
622                        left: op.left.clone(),
623                        right: alias_col,
624                        ..(**op).clone()
625                    })),
626                )
627            } else {
628                (None, condition)
629            }
630        }
631        Expression::Neq(ref op) => {
632            if is_window_expr(&op.left) {
633                (
634                    Some(op.left.clone()),
635                    Expression::Neq(Box::new(BinaryOp {
636                        left: alias_col,
637                        right: op.right.clone(),
638                        ..(**op).clone()
639                    })),
640                )
641            } else if is_window_expr(&op.right) {
642                (
643                    Some(op.right.clone()),
644                    Expression::Neq(Box::new(BinaryOp {
645                        left: op.left.clone(),
646                        right: alias_col,
647                        ..(**op).clone()
648                    })),
649                )
650            } else {
651                (None, condition)
652            }
653        }
654        Expression::Lt(ref op) => {
655            if is_window_expr(&op.left) {
656                (
657                    Some(op.left.clone()),
658                    Expression::Lt(Box::new(BinaryOp {
659                        left: alias_col,
660                        right: op.right.clone(),
661                        ..(**op).clone()
662                    })),
663                )
664            } else if is_window_expr(&op.right) {
665                (
666                    Some(op.right.clone()),
667                    Expression::Lt(Box::new(BinaryOp {
668                        left: op.left.clone(),
669                        right: alias_col,
670                        ..(**op).clone()
671                    })),
672                )
673            } else {
674                (None, condition)
675            }
676        }
677        Expression::Lte(ref op) => {
678            if is_window_expr(&op.left) {
679                (
680                    Some(op.left.clone()),
681                    Expression::Lte(Box::new(BinaryOp {
682                        left: alias_col,
683                        right: op.right.clone(),
684                        ..(**op).clone()
685                    })),
686                )
687            } else if is_window_expr(&op.right) {
688                (
689                    Some(op.right.clone()),
690                    Expression::Lte(Box::new(BinaryOp {
691                        left: op.left.clone(),
692                        right: alias_col,
693                        ..(**op).clone()
694                    })),
695                )
696            } else {
697                (None, condition)
698            }
699        }
700        Expression::Gt(ref op) => {
701            if is_window_expr(&op.left) {
702                (
703                    Some(op.left.clone()),
704                    Expression::Gt(Box::new(BinaryOp {
705                        left: alias_col,
706                        right: op.right.clone(),
707                        ..(**op).clone()
708                    })),
709                )
710            } else if is_window_expr(&op.right) {
711                (
712                    Some(op.right.clone()),
713                    Expression::Gt(Box::new(BinaryOp {
714                        left: op.left.clone(),
715                        right: alias_col,
716                        ..(**op).clone()
717                    })),
718                )
719            } else {
720                (None, condition)
721            }
722        }
723        Expression::Gte(ref op) => {
724            if is_window_expr(&op.left) {
725                (
726                    Some(op.left.clone()),
727                    Expression::Gte(Box::new(BinaryOp {
728                        left: alias_col,
729                        right: op.right.clone(),
730                        ..(**op).clone()
731                    })),
732                )
733            } else if is_window_expr(&op.right) {
734                (
735                    Some(op.right.clone()),
736                    Expression::Gte(Box::new(BinaryOp {
737                        left: op.left.clone(),
738                        right: alias_col,
739                        ..(**op).clone()
740                    })),
741                )
742            } else {
743                (None, condition)
744            }
745        }
746        // If the condition is just a window function (bare QUALIFY expression)
747        _ if is_window_expr(&condition) => (Some(condition), alias_col),
748        // Can't extract window function
749        _ => (None, condition),
750    }
751}
752
753/// Check if an expression is a window function
754fn is_window_expr(expr: &Expression) -> bool {
755    matches!(expr, Expression::Window(_) | Expression::WindowFunction(_))
756}
757
758/// Eliminate DISTINCT ON clause by converting to a subquery with ROW_NUMBER
759///
760/// DISTINCT ON is PostgreSQL-specific. For dialects that don't support it,
761/// this converts it to a subquery with a ROW_NUMBER() window function.
762///
763/// Converts:
764/// ```sql
765/// SELECT DISTINCT ON (a) a, b FROM t ORDER BY a, b
766/// ```
767/// To:
768/// ```sql
769/// SELECT a, b FROM (
770///     SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a ORDER BY a, b) AS _row_number
771///     FROM t
772/// ) _t WHERE _row_number = 1
773/// ```
774///
775/// Reference: `transforms.py:138-191`
776pub fn eliminate_distinct_on(expr: Expression) -> Result<Expression> {
777    eliminate_distinct_on_for_dialect(expr, None)
778}
779
780/// Eliminate DISTINCT ON with dialect-specific NULL ordering behavior.
781///
782/// For dialects where NULLs don't sort first by default in DESC ordering,
783/// we need to add explicit NULL ordering to preserve DISTINCT ON semantics.
784pub fn eliminate_distinct_on_for_dialect(
785    expr: Expression,
786    target: Option<DialectType>,
787) -> Result<Expression> {
788    use crate::expressions::Case;
789
790    // PostgreSQL and DuckDB support DISTINCT ON natively - skip elimination
791    if matches!(
792        target,
793        Some(DialectType::PostgreSQL) | Some(DialectType::DuckDB)
794    ) {
795        return Ok(expr);
796    }
797
798    // Determine NULL ordering mode based on target dialect
799    // Oracle/Redshift/Snowflake: NULLS FIRST is default for DESC -> no change needed
800    // BigQuery/Spark/Presto/Hive/etc: need explicit NULLS FIRST
801    // MySQL/StarRocks/TSQL: no NULLS FIRST syntax -> use CASE WHEN IS NULL
802    enum NullsMode {
803        None,       // Default NULLS FIRST behavior (Oracle, Redshift, Snowflake)
804        NullsFirst, // Add explicit NULLS FIRST (BigQuery, Spark, Presto, Hive, etc.)
805        CaseExpr,   // Use CASE WHEN IS NULL for NULLS FIRST simulation (MySQL, StarRocks, TSQL)
806    }
807
808    let nulls_mode = match target {
809        Some(DialectType::MySQL)
810        | Some(DialectType::StarRocks)
811        | Some(DialectType::SingleStore)
812        | Some(DialectType::TSQL)
813        | Some(DialectType::Fabric) => NullsMode::CaseExpr,
814        Some(DialectType::Oracle) | Some(DialectType::Redshift) | Some(DialectType::Snowflake) => {
815            NullsMode::None
816        }
817        // All other dialects that don't support DISTINCT ON: use NULLS FIRST
818        _ => NullsMode::NullsFirst,
819    };
820
821    match expr {
822        Expression::Select(mut select) => {
823            if let Some(distinct_cols) = select.distinct_on.take() {
824                if !distinct_cols.is_empty() {
825                    // Create ROW_NUMBER() OVER (PARTITION BY distinct_cols ORDER BY ...)
826                    let row_number_alias = Identifier::new("_row_number".to_string());
827
828                    // Get order_by expressions, or use distinct_cols as default order
829                    let order_exprs = if let Some(ref order_by) = select.order_by {
830                        let mut exprs = order_by.expressions.clone();
831                        // Add NULL ordering based on target dialect
832                        match nulls_mode {
833                            NullsMode::NullsFirst => {
834                                for ord in &mut exprs {
835                                    if ord.desc && ord.nulls_first.is_none() {
836                                        ord.nulls_first = Some(true);
837                                    }
838                                }
839                            }
840                            NullsMode::CaseExpr => {
841                                // For each DESC column without explicit nulls ordering,
842                                // prepend: CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
843                                let mut new_exprs = Vec::new();
844                                for ord in &exprs {
845                                    if ord.desc && ord.nulls_first.is_none() {
846                                        // Add CASE WHEN col IS NULL THEN 1 ELSE 0 END DESC
847                                        let null_check = Expression::Case(Box::new(Case {
848                                            operand: None,
849                                            whens: vec![(
850                                                Expression::IsNull(Box::new(
851                                                    crate::expressions::IsNull {
852                                                        this: ord.this.clone(),
853                                                        not: false,
854                                                        postfix_form: false,
855                                                    },
856                                                )),
857                                                Expression::Literal(Literal::Number(
858                                                    "1".to_string(),
859                                                )),
860                                            )],
861                                            else_: Some(Expression::Literal(Literal::Number(
862                                                "0".to_string(),
863                                            ))),
864                                            comments: Vec::new(),
865                                        }));
866                                        new_exprs.push(crate::expressions::Ordered {
867                                            this: null_check,
868                                            desc: true,
869                                            nulls_first: None,
870                                            explicit_asc: false,
871                                            with_fill: None,
872                                        });
873                                    }
874                                    new_exprs.push(ord.clone());
875                                }
876                                exprs = new_exprs;
877                            }
878                            NullsMode::None => {}
879                        }
880                        exprs
881                    } else {
882                        distinct_cols
883                            .iter()
884                            .map(|e| crate::expressions::Ordered {
885                                this: e.clone(),
886                                desc: false,
887                                nulls_first: None,
888                                explicit_asc: false,
889                                with_fill: None,
890                            })
891                            .collect()
892                    };
893
894                    // Create window function: ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...)
895                    let row_number_func =
896                        Expression::WindowFunction(Box::new(crate::expressions::WindowFunction {
897                            this: Expression::RowNumber(crate::expressions::RowNumber),
898                            over: Over {
899                                partition_by: distinct_cols,
900                                order_by: order_exprs,
901                                frame: None,
902                                window_name: None,
903                                alias: None,
904                            },
905                            keep: None,
906                        }));
907
908                    // Build aliased inner expressions and outer column references
909                    // Inner: SELECT a AS a, b AS b, ROW_NUMBER() OVER (...) AS _row_number
910                    // Outer: SELECT a, b FROM (...)
911                    let mut inner_aliased_exprs = Vec::new();
912                    let mut outer_select_exprs = Vec::new();
913                    for orig_expr in &select.expressions {
914                        match orig_expr {
915                            Expression::Alias(alias) => {
916                                // Already aliased - keep as-is in inner, reference alias in outer
917                                inner_aliased_exprs.push(orig_expr.clone());
918                                outer_select_exprs.push(Expression::Column(
919                                    crate::expressions::Column {
920                                        name: alias.alias.clone(),
921                                        table: None,
922                                        join_mark: false,
923                                        trailing_comments: vec![],
924                                        span: None,
925                                    },
926                                ));
927                            }
928                            Expression::Column(col) => {
929                                // Wrap in alias: a AS a in inner, just a in outer
930                                inner_aliased_exprs.push(Expression::Alias(Box::new(
931                                    crate::expressions::Alias {
932                                        this: orig_expr.clone(),
933                                        alias: col.name.clone(),
934                                        column_aliases: vec![],
935                                        pre_alias_comments: vec![],
936                                        trailing_comments: vec![],
937                                    },
938                                )));
939                                outer_select_exprs.push(Expression::Column(
940                                    crate::expressions::Column {
941                                        name: col.name.clone(),
942                                        table: None,
943                                        join_mark: false,
944                                        trailing_comments: vec![],
945                                        span: None,
946                                    },
947                                ));
948                            }
949                            _ => {
950                                // Complex expression without alias - include as-is in both
951                                inner_aliased_exprs.push(orig_expr.clone());
952                                outer_select_exprs.push(orig_expr.clone());
953                            }
954                        }
955                    }
956
957                    // Add ROW_NUMBER as aliased expression to inner select list
958                    let row_number_alias_expr =
959                        Expression::Alias(Box::new(crate::expressions::Alias {
960                            this: row_number_func,
961                            alias: row_number_alias.clone(),
962                            column_aliases: vec![],
963                            pre_alias_comments: vec![],
964                            trailing_comments: vec![],
965                        }));
966                    inner_aliased_exprs.push(row_number_alias_expr);
967
968                    // Replace inner select's expressions with aliased versions
969                    select.expressions = inner_aliased_exprs;
970
971                    // Remove ORDER BY from inner query (it's now in the window function)
972                    let _inner_order_by = select.order_by.take();
973
974                    // Clear DISTINCT from inner select (DISTINCT ON is replaced by ROW_NUMBER)
975                    select.distinct = false;
976
977                    // Create inner subquery
978                    let inner_select = Expression::Select(select);
979                    let subquery = Subquery {
980                        this: inner_select,
981                        alias: Some(Identifier::new("_t".to_string())),
982                        column_aliases: vec![],
983                        order_by: None,
984                        limit: None,
985                        offset: None,
986                        distribute_by: None,
987                        sort_by: None,
988                        cluster_by: None,
989                        lateral: false,
990                        modifiers_inside: false,
991                        trailing_comments: vec![],
992                    };
993
994                    // Create outer SELECT with WHERE _row_number = 1
995                    // No ORDER BY on outer query
996                    let outer_select = Select {
997                        expressions: outer_select_exprs,
998                        from: Some(From {
999                            expressions: vec![Expression::Subquery(Box::new(subquery))],
1000                        }),
1001                        where_clause: Some(Where {
1002                            this: Expression::Eq(Box::new(BinaryOp {
1003                                left: Expression::Column(crate::expressions::Column {
1004                                    name: row_number_alias,
1005                                    table: None,
1006                                    join_mark: false,
1007                                    trailing_comments: vec![],
1008                                    span: None,
1009                                }),
1010                                right: Expression::Literal(Literal::Number("1".to_string())),
1011                                left_comments: vec![],
1012                                operator_comments: vec![],
1013                                trailing_comments: vec![],
1014                            })),
1015                        }),
1016                        ..Select::new()
1017                    };
1018
1019                    return Ok(Expression::Select(Box::new(outer_select)));
1020                }
1021            }
1022            Ok(Expression::Select(select))
1023        }
1024        other => Ok(other),
1025    }
1026}
1027
1028/// Convert SEMI and ANTI joins into equivalent forms that use EXISTS instead.
1029///
1030/// For dialects that don't support SEMI/ANTI join syntax, this converts:
1031/// - `SELECT * FROM a SEMI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1032/// - `SELECT * FROM a ANTI JOIN b ON a.x = b.x` → `SELECT * FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE a.x = b.x)`
1033///
1034/// Reference: `transforms.py:607-621`
1035pub fn eliminate_semi_and_anti_joins(expr: Expression) -> Result<Expression> {
1036    match expr {
1037        Expression::Select(mut select) => {
1038            let mut new_joins = Vec::new();
1039            let mut extra_where_conditions = Vec::new();
1040
1041            for join in select.joins.drain(..) {
1042                match join.kind {
1043                    JoinKind::Semi | JoinKind::LeftSemi => {
1044                        if let Some(on_condition) = join.on {
1045                            // Create: EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1046                            let subquery_select = Select {
1047                                expressions: vec![Expression::Literal(Literal::Number(
1048                                    "1".to_string(),
1049                                ))],
1050                                from: Some(From {
1051                                    expressions: vec![join.this],
1052                                }),
1053                                where_clause: Some(Where { this: on_condition }),
1054                                ..Select::new()
1055                            };
1056
1057                            let exists = Expression::Exists(Box::new(Exists {
1058                                this: Expression::Subquery(Box::new(Subquery {
1059                                    this: Expression::Select(Box::new(subquery_select)),
1060                                    alias: None,
1061                                    column_aliases: vec![],
1062                                    order_by: None,
1063                                    limit: None,
1064                                    offset: None,
1065                                    distribute_by: None,
1066                                    sort_by: None,
1067                                    cluster_by: None,
1068                                    lateral: false,
1069                                    modifiers_inside: false,
1070                                    trailing_comments: vec![],
1071                                })),
1072                                not: false,
1073                            }));
1074
1075                            extra_where_conditions.push(exists);
1076                        }
1077                    }
1078                    JoinKind::Anti | JoinKind::LeftAnti => {
1079                        if let Some(on_condition) = join.on {
1080                            // Create: NOT EXISTS (SELECT 1 FROM join_table WHERE on_condition)
1081                            let subquery_select = Select {
1082                                expressions: vec![Expression::Literal(Literal::Number(
1083                                    "1".to_string(),
1084                                ))],
1085                                from: Some(From {
1086                                    expressions: vec![join.this],
1087                                }),
1088                                where_clause: Some(Where { this: on_condition }),
1089                                ..Select::new()
1090                            };
1091
1092                            // Use Exists with not: true for NOT EXISTS
1093                            let not_exists = Expression::Exists(Box::new(Exists {
1094                                this: Expression::Subquery(Box::new(Subquery {
1095                                    this: Expression::Select(Box::new(subquery_select)),
1096                                    alias: None,
1097                                    column_aliases: vec![],
1098                                    order_by: None,
1099                                    limit: None,
1100                                    offset: None,
1101                                    distribute_by: None,
1102                                    sort_by: None,
1103                                    cluster_by: None,
1104                                    lateral: false,
1105                                    modifiers_inside: false,
1106                                    trailing_comments: vec![],
1107                                })),
1108                                not: true,
1109                            }));
1110
1111                            extra_where_conditions.push(not_exists);
1112                        }
1113                    }
1114                    _ => {
1115                        // Keep other join types as-is
1116                        new_joins.push(join);
1117                    }
1118                }
1119            }
1120
1121            select.joins = new_joins;
1122
1123            // Add EXISTS conditions to WHERE clause
1124            if !extra_where_conditions.is_empty() {
1125                let combined = extra_where_conditions
1126                    .into_iter()
1127                    .reduce(|acc, cond| {
1128                        Expression::And(Box::new(BinaryOp {
1129                            left: acc,
1130                            right: cond,
1131                            left_comments: vec![],
1132                            operator_comments: vec![],
1133                            trailing_comments: vec![],
1134                        }))
1135                    })
1136                    .unwrap();
1137
1138                select.where_clause = match select.where_clause {
1139                    Some(Where { this: existing }) => Some(Where {
1140                        this: Expression::And(Box::new(BinaryOp {
1141                            left: existing,
1142                            right: combined,
1143                            left_comments: vec![],
1144                            operator_comments: vec![],
1145                            trailing_comments: vec![],
1146                        })),
1147                    }),
1148                    None => Some(Where { this: combined }),
1149                };
1150            }
1151
1152            Ok(Expression::Select(select))
1153        }
1154        other => Ok(other),
1155    }
1156}
1157
1158/// Convert FULL OUTER JOIN to a UNION of LEFT and RIGHT OUTER joins.
1159///
1160/// For dialects that don't support FULL OUTER JOIN, this converts:
1161/// ```sql
1162/// SELECT * FROM a FULL OUTER JOIN b ON a.x = b.x
1163/// ```
1164/// To:
1165/// ```sql
1166/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1167/// UNION ALL
1168/// SELECT * FROM a RIGHT OUTER JOIN b ON a.x = b.x
1169/// WHERE NOT EXISTS (SELECT 1 FROM a WHERE a.x = b.x)
1170/// ```
1171///
1172/// Note: This transformation currently only works for queries with a single FULL OUTER join.
1173///
1174/// Reference: `transforms.py:624-661`
1175pub fn eliminate_full_outer_join(expr: Expression) -> Result<Expression> {
1176    match expr {
1177        Expression::Select(mut select) => {
1178            // Find FULL OUTER joins
1179            let full_outer_join_idx = select.joins.iter().position(|j| j.kind == JoinKind::Full);
1180
1181            if let Some(idx) = full_outer_join_idx {
1182                // We only handle queries with a single FULL OUTER join
1183                let full_join_count = select
1184                    .joins
1185                    .iter()
1186                    .filter(|j| j.kind == JoinKind::Full)
1187                    .count();
1188                if full_join_count != 1 {
1189                    return Ok(Expression::Select(select));
1190                }
1191
1192                // Clone the query for the right side of the UNION
1193                let mut right_select = select.clone();
1194
1195                // Get the join condition from the FULL OUTER join
1196                let full_join = &select.joins[idx];
1197                let join_condition = full_join.on.clone();
1198
1199                // Left side: convert FULL to LEFT
1200                select.joins[idx].kind = JoinKind::Left;
1201
1202                // Right side: convert FULL to RIGHT and add NOT EXISTS condition
1203                right_select.joins[idx].kind = JoinKind::Right;
1204
1205                // Build NOT EXISTS for the right side to exclude rows that matched
1206                if let (Some(ref from), Some(ref join_cond)) = (&select.from, &join_condition) {
1207                    if !from.expressions.is_empty() {
1208                        let anti_subquery = Expression::Select(Box::new(Select {
1209                            expressions: vec![Expression::Literal(Literal::Number(
1210                                "1".to_string(),
1211                            ))],
1212                            from: Some(from.clone()),
1213                            where_clause: Some(Where {
1214                                this: join_cond.clone(),
1215                            }),
1216                            ..Select::new()
1217                        }));
1218
1219                        let not_exists = Expression::Not(Box::new(crate::expressions::UnaryOp {
1220                            this: Expression::Exists(Box::new(Exists {
1221                                this: Expression::Subquery(Box::new(Subquery {
1222                                    this: anti_subquery,
1223                                    alias: None,
1224                                    column_aliases: vec![],
1225                                    order_by: None,
1226                                    limit: None,
1227                                    offset: None,
1228                                    distribute_by: None,
1229                                    sort_by: None,
1230                                    cluster_by: None,
1231                                    lateral: false,
1232                                    modifiers_inside: false,
1233                                    trailing_comments: vec![],
1234                                })),
1235                                not: false,
1236                            })),
1237                        }));
1238
1239                        // Add NOT EXISTS to the WHERE clause
1240                        right_select.where_clause = Some(Where {
1241                            this: match right_select.where_clause {
1242                                Some(w) => Expression::And(Box::new(BinaryOp {
1243                                    left: w.this,
1244                                    right: not_exists,
1245                                    left_comments: vec![],
1246                                    operator_comments: vec![],
1247                                    trailing_comments: vec![],
1248                                })),
1249                                None => not_exists,
1250                            },
1251                        });
1252                    }
1253                }
1254
1255                // Remove WITH clause from right side (CTEs should only be on left)
1256                right_select.with = None;
1257
1258                // Remove ORDER BY from left side (will be applied after UNION)
1259                let order_by = select.order_by.take();
1260
1261                // Create UNION ALL of left and right
1262                let union = crate::expressions::Union {
1263                    left: Expression::Select(select),
1264                    right: Expression::Select(right_select),
1265                    all: true, // UNION ALL
1266                    distinct: false,
1267                    with: None,
1268                    order_by,
1269                    limit: None,
1270                    offset: None,
1271                    distribute_by: None,
1272                    sort_by: None,
1273                    cluster_by: None,
1274                    by_name: false,
1275                    side: None,
1276                    kind: None,
1277                    corresponding: false,
1278                    strict: false,
1279                    on_columns: Vec::new(),
1280                };
1281
1282                return Ok(Expression::Union(Box::new(union)));
1283            }
1284
1285            Ok(Expression::Select(select))
1286        }
1287        other => Ok(other),
1288    }
1289}
1290
1291/// Move CTEs to the top level of the query.
1292///
1293/// Some dialects (e.g., Hive, T-SQL, Spark prior to version 3) only allow CTEs to be
1294/// defined at the top-level, so for example queries like:
1295///
1296/// ```sql
1297/// SELECT * FROM (WITH t(c) AS (SELECT 1) SELECT * FROM t) AS subq
1298/// ```
1299///
1300/// are invalid in those dialects. This transformation moves all CTEs to the top level.
1301///
1302/// Reference: `transforms.py:664-700`
1303pub fn move_ctes_to_top_level(expr: Expression) -> Result<Expression> {
1304    match expr {
1305        Expression::Select(mut select) => {
1306            // Phase 1: Collect CTEs from nested subqueries (not inside CTE definitions)
1307            let mut collected_ctes: Vec<crate::expressions::Cte> = Vec::new();
1308            let mut has_recursive = false;
1309
1310            collect_nested_ctes(
1311                &Expression::Select(select.clone()),
1312                &mut collected_ctes,
1313                &mut has_recursive,
1314                true,
1315            );
1316
1317            // Phase 2: Flatten CTEs nested inside top-level CTE definitions
1318            // This handles: WITH c AS (WITH b AS (...) SELECT ...) -> WITH b AS (...), c AS (SELECT ...)
1319            let mut cte_body_collected: Vec<(String, Vec<crate::expressions::Cte>)> = Vec::new();
1320            if let Some(ref with) = select.with {
1321                for cte in &with.ctes {
1322                    let mut body_ctes: Vec<crate::expressions::Cte> = Vec::new();
1323                    collect_ctes_from_cte_body(&cte.this, &mut body_ctes, &mut has_recursive);
1324                    if !body_ctes.is_empty() {
1325                        cte_body_collected.push((cte.alias.name.clone(), body_ctes));
1326                    }
1327                }
1328            }
1329
1330            let has_subquery_ctes = !collected_ctes.is_empty();
1331            let has_body_ctes = !cte_body_collected.is_empty();
1332
1333            if has_subquery_ctes || has_body_ctes {
1334                // Strip WITH clauses from inner subqueries
1335                strip_nested_with_clauses(&mut select, true);
1336
1337                // Strip WITH clauses from CTE body definitions
1338                if has_body_ctes {
1339                    if let Some(ref mut with) = select.with {
1340                        for cte in with.ctes.iter_mut() {
1341                            strip_with_from_cte_body(&mut cte.this);
1342                        }
1343                    }
1344                }
1345
1346                let top_with = select.with.get_or_insert_with(|| crate::expressions::With {
1347                    ctes: Vec::new(),
1348                    recursive: false,
1349                    leading_comments: vec![],
1350                    search: None,
1351                });
1352
1353                if has_recursive {
1354                    top_with.recursive = true;
1355                }
1356
1357                // Insert body CTEs before their parent CTE (Python sqlglot behavior)
1358                if has_body_ctes {
1359                    let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
1360                    for mut cte in top_with.ctes.drain(..) {
1361                        // Check if this CTE has nested CTEs to insert before it
1362                        if let Some(pos) = cte_body_collected
1363                            .iter()
1364                            .position(|(name, _)| *name == cte.alias.name)
1365                        {
1366                            let (_, mut nested) = cte_body_collected.remove(pos);
1367                            // Strip WITH from each nested CTE's body too
1368                            for nested_cte in nested.iter_mut() {
1369                                strip_with_from_cte_body(&mut nested_cte.this);
1370                            }
1371                            new_ctes.extend(nested);
1372                        }
1373                        // Also strip WITH from the parent CTE's body
1374                        strip_with_from_cte_body(&mut cte.this);
1375                        new_ctes.push(cte);
1376                    }
1377                    top_with.ctes = new_ctes;
1378                }
1379
1380                // Append collected subquery CTEs after existing ones
1381                top_with.ctes.extend(collected_ctes);
1382            }
1383
1384            Ok(Expression::Select(select))
1385        }
1386        other => Ok(other),
1387    }
1388}
1389
1390/// Recursively collect CTEs from within CTE body expressions (for deep nesting)
1391fn collect_ctes_from_cte_body(
1392    expr: &Expression,
1393    collected: &mut Vec<crate::expressions::Cte>,
1394    has_recursive: &mut bool,
1395) {
1396    if let Expression::Select(select) = expr {
1397        if let Some(ref with) = select.with {
1398            if with.recursive {
1399                *has_recursive = true;
1400            }
1401            for cte in &with.ctes {
1402                // Recursively collect from this CTE's body first (depth-first)
1403                collect_ctes_from_cte_body(&cte.this, collected, has_recursive);
1404                // Then add this CTE itself
1405                collected.push(cte.clone());
1406            }
1407        }
1408    }
1409}
1410
1411/// Strip WITH clauses from CTE body expressions
1412fn strip_with_from_cte_body(expr: &mut Expression) {
1413    if let Expression::Select(ref mut select) = expr {
1414        select.with = None;
1415    }
1416}
1417
1418/// Strip WITH clauses from nested subqueries (after hoisting to top level)
1419fn strip_nested_with_clauses(select: &mut Select, _is_top_level: bool) {
1420    // Strip WITH from FROM subqueries
1421    if let Some(ref mut from) = select.from {
1422        for expr in from.expressions.iter_mut() {
1423            strip_with_from_expr(expr);
1424        }
1425    }
1426    // Strip from JOINs
1427    for join in select.joins.iter_mut() {
1428        strip_with_from_expr(&mut join.this);
1429    }
1430    // Strip from select expressions
1431    for expr in select.expressions.iter_mut() {
1432        strip_with_from_expr(expr);
1433    }
1434    // Strip from WHERE
1435    if let Some(ref mut w) = select.where_clause {
1436        strip_with_from_expr(&mut w.this);
1437    }
1438}
1439
1440fn strip_with_from_expr(expr: &mut Expression) {
1441    match expr {
1442        Expression::Subquery(ref mut subquery) => {
1443            strip_with_from_inner_query(&mut subquery.this);
1444        }
1445        Expression::Alias(ref mut alias) => {
1446            strip_with_from_expr(&mut alias.this);
1447        }
1448        Expression::Select(ref mut select) => {
1449            // Strip WITH from this SELECT (it's nested)
1450            select.with = None;
1451            // Recurse into its subqueries
1452            strip_nested_with_clauses(select, false);
1453        }
1454        _ => {}
1455    }
1456}
1457
1458fn strip_with_from_inner_query(expr: &mut Expression) {
1459    if let Expression::Select(ref mut select) = expr {
1460        select.with = None;
1461        strip_nested_with_clauses(select, false);
1462    }
1463}
1464
1465/// Helper to recursively collect CTEs from nested subqueries
1466fn collect_nested_ctes(
1467    expr: &Expression,
1468    collected: &mut Vec<crate::expressions::Cte>,
1469    has_recursive: &mut bool,
1470    is_top_level: bool,
1471) {
1472    match expr {
1473        Expression::Select(select) => {
1474            // If this is not the top level and has a WITH clause, collect its CTEs
1475            if !is_top_level {
1476                if let Some(ref with) = select.with {
1477                    if with.recursive {
1478                        *has_recursive = true;
1479                    }
1480                    collected.extend(with.ctes.clone());
1481                }
1482            }
1483
1484            // Recurse into FROM clause
1485            if let Some(ref from) = select.from {
1486                for expr in &from.expressions {
1487                    collect_nested_ctes(expr, collected, has_recursive, false);
1488                }
1489            }
1490
1491            // Recurse into JOINs
1492            for join in &select.joins {
1493                collect_nested_ctes(&join.this, collected, has_recursive, false);
1494            }
1495
1496            // Recurse into select expressions (for subqueries in SELECT)
1497            for sel_expr in &select.expressions {
1498                collect_nested_ctes(sel_expr, collected, has_recursive, false);
1499            }
1500
1501            // Recurse into WHERE
1502            if let Some(ref where_clause) = select.where_clause {
1503                collect_nested_ctes(&where_clause.this, collected, has_recursive, false);
1504            }
1505        }
1506        Expression::Subquery(subquery) => {
1507            // Process the inner query
1508            collect_nested_ctes(&subquery.this, collected, has_recursive, false);
1509        }
1510        Expression::Alias(alias) => {
1511            collect_nested_ctes(&alias.this, collected, has_recursive, false);
1512        }
1513        // Add more expression types as needed
1514        _ => {}
1515    }
1516}
1517
1518/// Inline window definitions from WINDOW clause.
1519///
1520/// Some dialects don't support named windows. This transform inlines them:
1521///
1522/// ```sql
1523/// SELECT SUM(a) OVER w FROM t WINDOW w AS (PARTITION BY b)
1524/// ```
1525///
1526/// To:
1527///
1528/// ```sql
1529/// SELECT SUM(a) OVER (PARTITION BY b) FROM t
1530/// ```
1531///
1532/// Reference: `transforms.py:975-1003`
1533pub fn eliminate_window_clause(expr: Expression) -> Result<Expression> {
1534    match expr {
1535        Expression::Select(mut select) => {
1536            if let Some(named_windows) = select.windows.take() {
1537                // Build a map of window name -> window spec
1538                let window_map: std::collections::HashMap<String, &Over> = named_windows
1539                    .iter()
1540                    .map(|nw| (nw.name.name.to_lowercase(), &nw.spec))
1541                    .collect();
1542
1543                // Inline window references in the select expressions
1544                select.expressions = select
1545                    .expressions
1546                    .into_iter()
1547                    .map(|e| inline_window_refs(e, &window_map))
1548                    .collect();
1549            }
1550            Ok(Expression::Select(select))
1551        }
1552        other => Ok(other),
1553    }
1554}
1555
1556/// Helper function to inline window references in an expression
1557fn inline_window_refs(
1558    expr: Expression,
1559    window_map: &std::collections::HashMap<String, &Over>,
1560) -> Expression {
1561    match expr {
1562        Expression::WindowFunction(mut wf) => {
1563            // Check if this window references a named window
1564            if let Some(ref name) = wf.over.window_name {
1565                let key = name.name.to_lowercase();
1566                if let Some(named_spec) = window_map.get(&key) {
1567                    // Inherit properties from the named window
1568                    if wf.over.partition_by.is_empty() && !named_spec.partition_by.is_empty() {
1569                        wf.over.partition_by = named_spec.partition_by.clone();
1570                    }
1571                    if wf.over.order_by.is_empty() && !named_spec.order_by.is_empty() {
1572                        wf.over.order_by = named_spec.order_by.clone();
1573                    }
1574                    if wf.over.frame.is_none() && named_spec.frame.is_some() {
1575                        wf.over.frame = named_spec.frame.clone();
1576                    }
1577                    // Clear the window name reference
1578                    wf.over.window_name = None;
1579                }
1580            }
1581            Expression::WindowFunction(wf)
1582        }
1583        Expression::Alias(mut alias) => {
1584            // Recurse into aliased expressions
1585            alias.this = inline_window_refs(alias.this, window_map);
1586            Expression::Alias(alias)
1587        }
1588        // For a complete implementation, we would need to recursively visit all expressions
1589        // that can contain window functions (CASE, subqueries, etc.)
1590        other => other,
1591    }
1592}
1593
1594/// Eliminate Oracle-style (+) join marks by converting to standard JOINs.
1595///
1596/// Oracle uses (+) syntax for outer joins:
1597/// ```sql
1598/// SELECT * FROM a, b WHERE a.x = b.x(+)
1599/// ```
1600///
1601/// This is converted to standard LEFT OUTER JOIN:
1602/// ```sql
1603/// SELECT * FROM a LEFT OUTER JOIN b ON a.x = b.x
1604/// ```
1605///
1606/// Reference: `transforms.py:828-945`
1607pub fn eliminate_join_marks(expr: Expression) -> Result<Expression> {
1608    match expr {
1609        Expression::Select(mut select) => {
1610            // Check if there are any join marks in the WHERE clause
1611            let has_join_marks = select
1612                .where_clause
1613                .as_ref()
1614                .map_or(false, |w| contains_join_mark(&w.this));
1615
1616            if !has_join_marks {
1617                return Ok(Expression::Select(select));
1618            }
1619
1620            // Collect tables from FROM clause
1621            let from_tables: Vec<String> = select
1622                .from
1623                .as_ref()
1624                .map(|f| {
1625                    f.expressions
1626                        .iter()
1627                        .filter_map(|e| get_table_name(e))
1628                        .collect()
1629                })
1630                .unwrap_or_default();
1631
1632            // Extract join conditions and their marked tables from WHERE
1633            let mut join_conditions: std::collections::HashMap<String, Vec<Expression>> =
1634                std::collections::HashMap::new();
1635            let mut remaining_conditions: Vec<Expression> = Vec::new();
1636
1637            if let Some(ref where_clause) = select.where_clause {
1638                extract_join_mark_conditions(
1639                    &where_clause.this,
1640                    &mut join_conditions,
1641                    &mut remaining_conditions,
1642                );
1643            }
1644
1645            // Build new JOINs for each marked table
1646            let mut new_joins = select.joins.clone();
1647            for (table_name, conditions) in join_conditions {
1648                // Find if this table is in FROM or existing JOINs
1649                let table_in_from = from_tables.contains(&table_name);
1650
1651                if table_in_from && !conditions.is_empty() {
1652                    // Create LEFT JOIN with combined conditions
1653                    let combined_condition = conditions.into_iter().reduce(|a, b| {
1654                        Expression::And(Box::new(BinaryOp {
1655                            left: a,
1656                            right: b,
1657                            left_comments: vec![],
1658                            operator_comments: vec![],
1659                            trailing_comments: vec![],
1660                        }))
1661                    });
1662
1663                    // Find the table in FROM and move it to a JOIN
1664                    if let Some(ref mut from) = select.from {
1665                        if let Some(pos) = from
1666                            .expressions
1667                            .iter()
1668                            .position(|e| get_table_name(e).map_or(false, |n| n == table_name))
1669                        {
1670                            if from.expressions.len() > 1 {
1671                                let join_table = from.expressions.remove(pos);
1672                                new_joins.push(crate::expressions::Join {
1673                                    this: join_table,
1674                                    kind: JoinKind::Left,
1675                                    on: combined_condition,
1676                                    using: vec![],
1677                                    use_inner_keyword: false,
1678                                    use_outer_keyword: true,
1679                                    deferred_condition: false,
1680                                    join_hint: None,
1681                                    match_condition: None,
1682                                    pivots: Vec::new(),
1683                                    comments: Vec::new(),
1684                                    nesting_group: 0,
1685                                    directed: false,
1686                                });
1687                            }
1688                        }
1689                    }
1690                }
1691            }
1692
1693            select.joins = new_joins;
1694
1695            // Update WHERE with remaining conditions
1696            if remaining_conditions.is_empty() {
1697                select.where_clause = None;
1698            } else {
1699                let combined = remaining_conditions.into_iter().reduce(|a, b| {
1700                    Expression::And(Box::new(BinaryOp {
1701                        left: a,
1702                        right: b,
1703                        left_comments: vec![],
1704                        operator_comments: vec![],
1705                        trailing_comments: vec![],
1706                    }))
1707                });
1708                select.where_clause = combined.map(|c| Where { this: c });
1709            }
1710
1711            // Clear join marks from all columns
1712            clear_join_marks(&mut Expression::Select(select.clone()));
1713
1714            Ok(Expression::Select(select))
1715        }
1716        other => Ok(other),
1717    }
1718}
1719
1720/// Check if an expression contains any columns with join marks
1721fn contains_join_mark(expr: &Expression) -> bool {
1722    match expr {
1723        Expression::Column(col) => col.join_mark,
1724        Expression::And(op) | Expression::Or(op) => {
1725            contains_join_mark(&op.left) || contains_join_mark(&op.right)
1726        }
1727        Expression::Eq(op)
1728        | Expression::Neq(op)
1729        | Expression::Lt(op)
1730        | Expression::Lte(op)
1731        | Expression::Gt(op)
1732        | Expression::Gte(op) => contains_join_mark(&op.left) || contains_join_mark(&op.right),
1733        Expression::Not(op) => contains_join_mark(&op.this),
1734        _ => false,
1735    }
1736}
1737
1738/// Get table name from a table expression
1739fn get_table_name(expr: &Expression) -> Option<String> {
1740    match expr {
1741        Expression::Table(t) => Some(t.name.name.clone()),
1742        Expression::Alias(a) => Some(a.alias.name.clone()),
1743        _ => None,
1744    }
1745}
1746
1747/// Extract join mark conditions from WHERE clause
1748fn extract_join_mark_conditions(
1749    expr: &Expression,
1750    join_conditions: &mut std::collections::HashMap<String, Vec<Expression>>,
1751    remaining: &mut Vec<Expression>,
1752) {
1753    match expr {
1754        Expression::And(op) => {
1755            extract_join_mark_conditions(&op.left, join_conditions, remaining);
1756            extract_join_mark_conditions(&op.right, join_conditions, remaining);
1757        }
1758        _ => {
1759            if let Some(table) = get_join_mark_table(expr) {
1760                join_conditions
1761                    .entry(table)
1762                    .or_insert_with(Vec::new)
1763                    .push(expr.clone());
1764            } else {
1765                remaining.push(expr.clone());
1766            }
1767        }
1768    }
1769}
1770
1771/// Get the table name of a column with join mark in an expression
1772fn get_join_mark_table(expr: &Expression) -> Option<String> {
1773    match expr {
1774        Expression::Eq(op)
1775        | Expression::Neq(op)
1776        | Expression::Lt(op)
1777        | Expression::Lte(op)
1778        | Expression::Gt(op)
1779        | Expression::Gte(op) => {
1780            // Check both sides for join mark columns
1781            if let Expression::Column(col) = &op.left {
1782                if col.join_mark {
1783                    return col.table.as_ref().map(|t| t.name.clone());
1784                }
1785            }
1786            if let Expression::Column(col) = &op.right {
1787                if col.join_mark {
1788                    return col.table.as_ref().map(|t| t.name.clone());
1789                }
1790            }
1791            None
1792        }
1793        _ => None,
1794    }
1795}
1796
1797/// Clear join marks from all columns in an expression
1798fn clear_join_marks(expr: &mut Expression) {
1799    match expr {
1800        Expression::Column(col) => col.join_mark = false,
1801        Expression::Select(select) => {
1802            if let Some(ref mut w) = select.where_clause {
1803                clear_join_marks(&mut w.this);
1804            }
1805            for sel_expr in &mut select.expressions {
1806                clear_join_marks(sel_expr);
1807            }
1808        }
1809        Expression::And(op) | Expression::Or(op) => {
1810            clear_join_marks(&mut op.left);
1811            clear_join_marks(&mut op.right);
1812        }
1813        Expression::Eq(op)
1814        | Expression::Neq(op)
1815        | Expression::Lt(op)
1816        | Expression::Lte(op)
1817        | Expression::Gt(op)
1818        | Expression::Gte(op) => {
1819            clear_join_marks(&mut op.left);
1820            clear_join_marks(&mut op.right);
1821        }
1822        _ => {}
1823    }
1824}
1825
1826/// Add column names to recursive CTE definitions.
1827///
1828/// Uses projection output names in recursive CTE definitions to define the CTEs' columns.
1829/// This is required by some dialects that need explicit column names in recursive CTEs.
1830///
1831/// Reference: `transforms.py:576-592`
1832pub fn add_recursive_cte_column_names(expr: Expression) -> Result<Expression> {
1833    match expr {
1834        Expression::Select(mut select) => {
1835            if let Some(ref mut with) = select.with {
1836                if with.recursive {
1837                    let mut counter = 0;
1838                    for cte in &mut with.ctes {
1839                        if cte.columns.is_empty() {
1840                            // Try to get column names from the CTE's SELECT
1841                            if let Expression::Select(ref cte_select) = cte.this {
1842                                let names: Vec<Identifier> = cte_select
1843                                    .expressions
1844                                    .iter()
1845                                    .map(|e| match e {
1846                                        Expression::Alias(a) => a.alias.clone(),
1847                                        Expression::Column(c) => c.name.clone(),
1848                                        _ => {
1849                                            counter += 1;
1850                                            Identifier::new(format!("_c_{}", counter))
1851                                        }
1852                                    })
1853                                    .collect();
1854                                cte.columns = names;
1855                            }
1856                        }
1857                    }
1858                }
1859            }
1860            Ok(Expression::Select(select))
1861        }
1862        other => Ok(other),
1863    }
1864}
1865
1866/// Convert epoch string in CAST to timestamp literal.
1867///
1868/// Replaces `CAST('epoch' AS TIMESTAMP)` with `CAST('1970-01-01 00:00:00' AS TIMESTAMP)`
1869/// for dialects that don't support the 'epoch' keyword.
1870///
1871/// Reference: `transforms.py:595-604`
1872pub fn epoch_cast_to_ts(expr: Expression) -> Result<Expression> {
1873    match expr {
1874        Expression::Cast(mut cast) => {
1875            if let Expression::Literal(Literal::String(ref s)) = cast.this {
1876                if s.to_lowercase() == "epoch" {
1877                    if is_temporal_type(&cast.to) {
1878                        cast.this =
1879                            Expression::Literal(Literal::String("1970-01-01 00:00:00".to_string()));
1880                    }
1881                }
1882            }
1883            Ok(Expression::Cast(cast))
1884        }
1885        Expression::TryCast(mut try_cast) => {
1886            if let Expression::Literal(Literal::String(ref s)) = try_cast.this {
1887                if s.to_lowercase() == "epoch" {
1888                    if is_temporal_type(&try_cast.to) {
1889                        try_cast.this =
1890                            Expression::Literal(Literal::String("1970-01-01 00:00:00".to_string()));
1891                    }
1892                }
1893            }
1894            Ok(Expression::TryCast(try_cast))
1895        }
1896        other => Ok(other),
1897    }
1898}
1899
1900/// Check if a DataType is a temporal type (DATE, TIMESTAMP, etc.)
1901fn is_temporal_type(dt: &DataType) -> bool {
1902    matches!(
1903        dt,
1904        DataType::Date | DataType::Timestamp { .. } | DataType::Time { .. }
1905    )
1906}
1907
1908/// Ensure boolean values in conditions.
1909///
1910/// Converts numeric values used in conditions into explicit boolean expressions.
1911/// For dialects that require explicit booleans in WHERE clauses.
1912///
1913/// Converts:
1914/// ```sql
1915/// WHERE column
1916/// ```
1917/// To:
1918/// ```sql
1919/// WHERE column <> 0
1920/// ```
1921///
1922/// And:
1923/// ```sql
1924/// WHERE 1
1925/// ```
1926/// To:
1927/// ```sql
1928/// WHERE 1 <> 0
1929/// ```
1930///
1931/// Reference: `transforms.py:703-721`
1932pub fn ensure_bools(expr: Expression) -> Result<Expression> {
1933    // First, recursively process Case WHEN conditions throughout the expression tree
1934    let expr = ensure_bools_in_case(expr);
1935    match expr {
1936        Expression::Select(mut select) => {
1937            // Transform WHERE clause condition
1938            if let Some(ref mut where_clause) = select.where_clause {
1939                where_clause.this = ensure_bool_condition(where_clause.this.clone());
1940            }
1941            // Transform HAVING clause condition
1942            if let Some(ref mut having) = select.having {
1943                having.this = ensure_bool_condition(having.this.clone());
1944            }
1945            Ok(Expression::Select(select))
1946        }
1947        // Top-level AND/OR/NOT expressions also need ensure_bools processing
1948        Expression::And(_) | Expression::Or(_) | Expression::Not(_) => {
1949            Ok(ensure_bool_condition(expr))
1950        }
1951        other => Ok(other),
1952    }
1953}
1954
1955/// Recursively walk the expression tree to find Case expressions and apply
1956/// ensure_bool_condition to their WHEN conditions. This ensures that
1957/// `CASE WHEN TRUE` becomes `CASE WHEN (1 = 1)` etc.
1958fn ensure_bools_in_case(expr: Expression) -> Expression {
1959    match expr {
1960        Expression::Case(mut case) => {
1961            case.whens = case
1962                .whens
1963                .into_iter()
1964                .map(|(condition, result)| {
1965                    let new_condition = ensure_bool_condition(ensure_bools_in_case(condition));
1966                    let new_result = ensure_bools_in_case(result);
1967                    (new_condition, new_result)
1968                })
1969                .collect();
1970            if let Some(else_expr) = case.else_ {
1971                case.else_ = Some(ensure_bools_in_case(else_expr));
1972            }
1973            Expression::Case(Box::new(*case))
1974        }
1975        Expression::Select(mut select) => {
1976            // Recursively process expressions in the SELECT list
1977            select.expressions = select
1978                .expressions
1979                .into_iter()
1980                .map(ensure_bools_in_case)
1981                .collect();
1982            // Process WHERE/HAVING are handled by ensure_bools main function
1983            Expression::Select(select)
1984        }
1985        Expression::Alias(mut alias) => {
1986            alias.this = ensure_bools_in_case(alias.this);
1987            Expression::Alias(alias)
1988        }
1989        Expression::Paren(mut paren) => {
1990            paren.this = ensure_bools_in_case(paren.this);
1991            Expression::Paren(paren)
1992        }
1993        other => other,
1994    }
1995}
1996
1997/// Helper to check if an expression is inherently boolean (returns a boolean value).
1998/// Inherently boolean expressions include comparisons, predicates, logical operators, etc.
1999fn is_boolean_expression(expr: &Expression) -> bool {
2000    matches!(
2001        expr,
2002        Expression::Eq(_)
2003            | Expression::Neq(_)
2004            | Expression::Lt(_)
2005            | Expression::Lte(_)
2006            | Expression::Gt(_)
2007            | Expression::Gte(_)
2008            | Expression::Is(_)
2009            | Expression::IsNull(_)
2010            | Expression::IsTrue(_)
2011            | Expression::IsFalse(_)
2012            | Expression::Like(_)
2013            | Expression::ILike(_)
2014            | Expression::SimilarTo(_)
2015            | Expression::Glob(_)
2016            | Expression::RegexpLike(_)
2017            | Expression::In(_)
2018            | Expression::Between(_)
2019            | Expression::Exists(_)
2020            | Expression::And(_)
2021            | Expression::Or(_)
2022            | Expression::Not(_)
2023            | Expression::Any(_)
2024            | Expression::All(_)
2025            | Expression::EqualNull(_)
2026    )
2027}
2028
2029/// Helper to wrap a non-boolean expression with `<> 0`
2030fn wrap_neq_zero(expr: Expression) -> Expression {
2031    Expression::Neq(Box::new(BinaryOp {
2032        left: expr,
2033        right: Expression::Literal(Literal::Number("0".to_string())),
2034        left_comments: vec![],
2035        operator_comments: vec![],
2036        trailing_comments: vec![],
2037    }))
2038}
2039
2040/// Helper to convert a condition expression to ensure it's boolean.
2041///
2042/// In TSQL, conditions in WHERE/HAVING must be boolean expressions.
2043/// Non-boolean expressions (columns, literals, casts, function calls, etc.)
2044/// are wrapped with `<> 0`. Boolean literals are converted to `(1 = 1)` or `(1 = 0)`.
2045fn ensure_bool_condition(expr: Expression) -> Expression {
2046    match expr {
2047        // For AND/OR, recursively process children
2048        Expression::And(op) => {
2049            let new_op = BinaryOp {
2050                left: ensure_bool_condition(op.left.clone()),
2051                right: ensure_bool_condition(op.right.clone()),
2052                left_comments: op.left_comments.clone(),
2053                operator_comments: op.operator_comments.clone(),
2054                trailing_comments: op.trailing_comments.clone(),
2055            };
2056            Expression::And(Box::new(new_op))
2057        }
2058        Expression::Or(op) => {
2059            let new_op = BinaryOp {
2060                left: ensure_bool_condition(op.left.clone()),
2061                right: ensure_bool_condition(op.right.clone()),
2062                left_comments: op.left_comments.clone(),
2063                operator_comments: op.operator_comments.clone(),
2064                trailing_comments: op.trailing_comments.clone(),
2065            };
2066            Expression::Or(Box::new(new_op))
2067        }
2068        // For NOT, recursively process the inner expression
2069        Expression::Not(op) => Expression::Not(Box::new(crate::expressions::UnaryOp {
2070            this: ensure_bool_condition(op.this.clone()),
2071        })),
2072        // For Paren, recurse into inner expression
2073        Expression::Paren(paren) => Expression::Paren(Box::new(crate::expressions::Paren {
2074            this: ensure_bool_condition(paren.this.clone()),
2075            trailing_comments: paren.trailing_comments.clone(),
2076        })),
2077        // Boolean literals: true -> (1 = 1), false -> (1 = 0)
2078        Expression::Boolean(BooleanLiteral { value: true }) => {
2079            Expression::Paren(Box::new(crate::expressions::Paren {
2080                this: Expression::Eq(Box::new(BinaryOp {
2081                    left: Expression::Literal(Literal::Number("1".to_string())),
2082                    right: Expression::Literal(Literal::Number("1".to_string())),
2083                    left_comments: vec![],
2084                    operator_comments: vec![],
2085                    trailing_comments: vec![],
2086                })),
2087                trailing_comments: vec![],
2088            }))
2089        }
2090        Expression::Boolean(BooleanLiteral { value: false }) => {
2091            Expression::Paren(Box::new(crate::expressions::Paren {
2092                this: Expression::Eq(Box::new(BinaryOp {
2093                    left: Expression::Literal(Literal::Number("1".to_string())),
2094                    right: Expression::Literal(Literal::Number("0".to_string())),
2095                    left_comments: vec![],
2096                    operator_comments: vec![],
2097                    trailing_comments: vec![],
2098                })),
2099                trailing_comments: vec![],
2100            }))
2101        }
2102        // Already boolean expressions pass through unchanged
2103        ref e if is_boolean_expression(e) => expr,
2104        // Everything else (Column, Identifier, Cast, Literal::Number, function calls, etc.)
2105        // gets wrapped with <> 0
2106        _ => wrap_neq_zero(expr),
2107    }
2108}
2109
2110/// Remove table qualifiers from column references.
2111///
2112/// Converts `table.column` to just `column` throughout the expression tree.
2113///
2114/// Reference: `transforms.py:724-730`
2115pub fn unqualify_columns(expr: Expression) -> Result<Expression> {
2116    Ok(unqualify_columns_recursive(expr))
2117}
2118
2119/// Recursively remove table qualifiers from column references
2120fn unqualify_columns_recursive(expr: Expression) -> Expression {
2121    match expr {
2122        Expression::Column(mut col) => {
2123            col.table = None;
2124            Expression::Column(col)
2125        }
2126        Expression::Select(mut select) => {
2127            select.expressions = select
2128                .expressions
2129                .into_iter()
2130                .map(unqualify_columns_recursive)
2131                .collect();
2132            if let Some(ref mut where_clause) = select.where_clause {
2133                where_clause.this = unqualify_columns_recursive(where_clause.this.clone());
2134            }
2135            if let Some(ref mut having) = select.having {
2136                having.this = unqualify_columns_recursive(having.this.clone());
2137            }
2138            if let Some(ref mut group_by) = select.group_by {
2139                group_by.expressions = group_by
2140                    .expressions
2141                    .iter()
2142                    .cloned()
2143                    .map(unqualify_columns_recursive)
2144                    .collect();
2145            }
2146            if let Some(ref mut order_by) = select.order_by {
2147                order_by.expressions = order_by
2148                    .expressions
2149                    .iter()
2150                    .map(|o| crate::expressions::Ordered {
2151                        this: unqualify_columns_recursive(o.this.clone()),
2152                        desc: o.desc,
2153                        nulls_first: o.nulls_first,
2154                        explicit_asc: o.explicit_asc,
2155                        with_fill: o.with_fill.clone(),
2156                    })
2157                    .collect();
2158            }
2159            for join in &mut select.joins {
2160                if let Some(ref mut on) = join.on {
2161                    *on = unqualify_columns_recursive(on.clone());
2162                }
2163            }
2164            Expression::Select(select)
2165        }
2166        Expression::Alias(mut alias) => {
2167            alias.this = unqualify_columns_recursive(alias.this);
2168            Expression::Alias(alias)
2169        }
2170        // Binary operations
2171        Expression::And(op) => Expression::And(Box::new(unqualify_binary_op(*op))),
2172        Expression::Or(op) => Expression::Or(Box::new(unqualify_binary_op(*op))),
2173        Expression::Eq(op) => Expression::Eq(Box::new(unqualify_binary_op(*op))),
2174        Expression::Neq(op) => Expression::Neq(Box::new(unqualify_binary_op(*op))),
2175        Expression::Lt(op) => Expression::Lt(Box::new(unqualify_binary_op(*op))),
2176        Expression::Lte(op) => Expression::Lte(Box::new(unqualify_binary_op(*op))),
2177        Expression::Gt(op) => Expression::Gt(Box::new(unqualify_binary_op(*op))),
2178        Expression::Gte(op) => Expression::Gte(Box::new(unqualify_binary_op(*op))),
2179        Expression::Add(op) => Expression::Add(Box::new(unqualify_binary_op(*op))),
2180        Expression::Sub(op) => Expression::Sub(Box::new(unqualify_binary_op(*op))),
2181        Expression::Mul(op) => Expression::Mul(Box::new(unqualify_binary_op(*op))),
2182        Expression::Div(op) => Expression::Div(Box::new(unqualify_binary_op(*op))),
2183        // Functions
2184        Expression::Function(mut func) => {
2185            func.args = func
2186                .args
2187                .into_iter()
2188                .map(unqualify_columns_recursive)
2189                .collect();
2190            Expression::Function(func)
2191        }
2192        Expression::AggregateFunction(mut func) => {
2193            func.args = func
2194                .args
2195                .into_iter()
2196                .map(unqualify_columns_recursive)
2197                .collect();
2198            Expression::AggregateFunction(func)
2199        }
2200        Expression::Case(mut case) => {
2201            case.whens = case
2202                .whens
2203                .into_iter()
2204                .map(|(cond, result)| {
2205                    (
2206                        unqualify_columns_recursive(cond),
2207                        unqualify_columns_recursive(result),
2208                    )
2209                })
2210                .collect();
2211            if let Some(else_expr) = case.else_ {
2212                case.else_ = Some(unqualify_columns_recursive(else_expr));
2213            }
2214            Expression::Case(case)
2215        }
2216        // Other expressions pass through unchanged
2217        other => other,
2218    }
2219}
2220
2221/// Helper to unqualify columns in a binary operation
2222fn unqualify_binary_op(mut op: BinaryOp) -> BinaryOp {
2223    op.left = unqualify_columns_recursive(op.left);
2224    op.right = unqualify_columns_recursive(op.right);
2225    op
2226}
2227
2228/// Convert UNNEST(GENERATE_DATE_ARRAY(...)) to recursive CTE.
2229///
2230/// For dialects that don't support GENERATE_DATE_ARRAY, this converts:
2231/// ```sql
2232/// SELECT * FROM UNNEST(GENERATE_DATE_ARRAY('2024-01-01', '2024-01-31', INTERVAL 1 DAY)) AS d(date_value)
2233/// ```
2234/// To a recursive CTE:
2235/// ```sql
2236/// WITH RECURSIVE _generated_dates(date_value) AS (
2237///     SELECT CAST('2024-01-01' AS DATE) AS date_value
2238///     UNION ALL
2239///     SELECT CAST(DATE_ADD(date_value, 1, DAY) AS DATE)
2240///     FROM _generated_dates
2241///     WHERE CAST(DATE_ADD(date_value, 1, DAY) AS DATE) <= CAST('2024-01-31' AS DATE)
2242/// )
2243/// SELECT date_value FROM _generated_dates
2244/// ```
2245///
2246/// Reference: `transforms.py:68-122`
2247pub fn unnest_generate_date_array_using_recursive_cte(expr: Expression) -> Result<Expression> {
2248    match expr {
2249        Expression::Select(mut select) => {
2250            let mut cte_count = 0;
2251            let mut new_ctes: Vec<crate::expressions::Cte> = Vec::new();
2252
2253            // Process existing CTE bodies first (to handle CTE-wrapped GENERATE_DATE_ARRAY)
2254            if let Some(ref mut with) = select.with {
2255                for cte in &mut with.ctes {
2256                    process_expression_for_gda(&mut cte.this, &mut cte_count, &mut new_ctes);
2257                }
2258            }
2259
2260            // Process FROM clause
2261            if let Some(ref mut from) = select.from {
2262                for table_expr in &mut from.expressions {
2263                    if let Some((cte, replacement)) =
2264                        try_convert_generate_date_array(table_expr, &mut cte_count)
2265                    {
2266                        new_ctes.push(cte);
2267                        *table_expr = replacement;
2268                    }
2269                }
2270            }
2271
2272            // Process JOINs
2273            for join in &mut select.joins {
2274                if let Some((cte, replacement)) =
2275                    try_convert_generate_date_array(&join.this, &mut cte_count)
2276                {
2277                    new_ctes.push(cte);
2278                    join.this = replacement;
2279                }
2280            }
2281
2282            // Add collected CTEs to the WITH clause
2283            if !new_ctes.is_empty() {
2284                let with_clause = select.with.get_or_insert_with(|| crate::expressions::With {
2285                    ctes: Vec::new(),
2286                    recursive: true, // Recursive CTEs
2287                    leading_comments: vec![],
2288                    search: None,
2289                });
2290                with_clause.recursive = true;
2291
2292                // Prepend new CTEs before existing ones
2293                let mut all_ctes = new_ctes;
2294                all_ctes.append(&mut with_clause.ctes);
2295                with_clause.ctes = all_ctes;
2296            }
2297
2298            Ok(Expression::Select(select))
2299        }
2300        other => Ok(other),
2301    }
2302}
2303
2304/// Recursively process an expression tree to find and convert UNNEST(GENERATE_DATE_ARRAY)
2305/// inside CTE bodies, subqueries, etc.
2306fn process_expression_for_gda(
2307    expr: &mut Expression,
2308    cte_count: &mut usize,
2309    new_ctes: &mut Vec<crate::expressions::Cte>,
2310) {
2311    match expr {
2312        Expression::Select(ref mut select) => {
2313            // Process FROM clause
2314            if let Some(ref mut from) = select.from {
2315                for table_expr in &mut from.expressions {
2316                    if let Some((cte, replacement)) =
2317                        try_convert_generate_date_array(table_expr, cte_count)
2318                    {
2319                        new_ctes.push(cte);
2320                        *table_expr = replacement;
2321                    }
2322                }
2323            }
2324            // Process JOINs
2325            for join in &mut select.joins {
2326                if let Some((cte, replacement)) =
2327                    try_convert_generate_date_array(&join.this, cte_count)
2328                {
2329                    new_ctes.push(cte);
2330                    join.this = replacement;
2331                }
2332            }
2333        }
2334        Expression::Union(ref mut u) => {
2335            process_expression_for_gda(&mut u.left, cte_count, new_ctes);
2336            process_expression_for_gda(&mut u.right, cte_count, new_ctes);
2337        }
2338        Expression::Subquery(ref mut sq) => {
2339            process_expression_for_gda(&mut sq.this, cte_count, new_ctes);
2340        }
2341        _ => {}
2342    }
2343}
2344
2345/// Try to convert an UNNEST(GENERATE_DATE_ARRAY(...)) to a recursive CTE reference.
2346/// `column_name_override` allows the caller to specify a custom column name (from alias).
2347fn try_convert_generate_date_array(
2348    expr: &Expression,
2349    cte_count: &mut usize,
2350) -> Option<(crate::expressions::Cte, Expression)> {
2351    try_convert_generate_date_array_with_name(expr, cte_count, None)
2352}
2353
2354fn try_convert_generate_date_array_with_name(
2355    expr: &Expression,
2356    cte_count: &mut usize,
2357    column_name_override: Option<&str>,
2358) -> Option<(crate::expressions::Cte, Expression)> {
2359    // Helper: extract (start, end, step) from GENERATE_DATE_ARRAY/GenerateSeries variants
2360    fn extract_gda_args(
2361        inner: &Expression,
2362    ) -> Option<(&Expression, &Expression, Option<&Expression>)> {
2363        match inner {
2364            Expression::GenerateDateArray(gda) => {
2365                let start = gda.start.as_ref()?;
2366                let end = gda.end.as_ref()?;
2367                let step = gda.step.as_deref();
2368                Some((start, end, step))
2369            }
2370            Expression::GenerateSeries(gs) => {
2371                let start = gs.start.as_deref()?;
2372                let end = gs.end.as_deref()?;
2373                let step = gs.step.as_deref();
2374                Some((start, end, step))
2375            }
2376            Expression::Function(f) if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") => {
2377                if f.args.len() >= 2 {
2378                    let start = &f.args[0];
2379                    let end = &f.args[1];
2380                    let step = f.args.get(2);
2381                    Some((start, end, step))
2382                } else {
2383                    None
2384                }
2385            }
2386            _ => None,
2387        }
2388    }
2389
2390    // Look for UNNEST containing GENERATE_DATE_ARRAY
2391    if let Expression::Unnest(unnest) = expr {
2392        if let Some((start, end, step_opt)) = extract_gda_args(&unnest.this) {
2393            let start = start;
2394            let end = end;
2395            let step: Option<&Expression> = step_opt;
2396
2397            // Generate CTE name
2398            let cte_name = if *cte_count == 0 {
2399                "_generated_dates".to_string()
2400            } else {
2401                format!("_generated_dates_{}", cte_count)
2402            };
2403            *cte_count += 1;
2404
2405            let column_name =
2406                Identifier::new(column_name_override.unwrap_or("date_value").to_string());
2407
2408            // Helper: wrap expression in CAST(... AS DATE) unless already a date literal or CAST to DATE
2409            let cast_to_date = |expr: &Expression| -> Expression {
2410                match expr {
2411                    Expression::Literal(Literal::Date(_)) => {
2412                        // DATE '...' -> convert to CAST('...' AS DATE) to match expected output
2413                        if let Expression::Literal(Literal::Date(d)) = expr {
2414                            Expression::Cast(Box::new(Cast {
2415                                this: Expression::Literal(Literal::String(d.clone())),
2416                                to: DataType::Date,
2417                                trailing_comments: vec![],
2418                                double_colon_syntax: false,
2419                                format: None,
2420                                default: None,
2421                            }))
2422                        } else {
2423                            unreachable!()
2424                        }
2425                    }
2426                    Expression::Cast(c) if matches!(c.to, DataType::Date) => expr.clone(),
2427                    _ => Expression::Cast(Box::new(Cast {
2428                        this: expr.clone(),
2429                        to: DataType::Date,
2430                        trailing_comments: vec![],
2431                        double_colon_syntax: false,
2432                        format: None,
2433                        default: None,
2434                    })),
2435                }
2436            };
2437
2438            // Build base case: SELECT CAST(start AS DATE) AS date_value
2439            let base_select = Select {
2440                expressions: vec![Expression::Alias(Box::new(crate::expressions::Alias {
2441                    this: cast_to_date(start),
2442                    alias: column_name.clone(),
2443                    column_aliases: vec![],
2444                    pre_alias_comments: vec![],
2445                    trailing_comments: vec![],
2446                }))],
2447                ..Select::new()
2448            };
2449
2450            // Normalize interval: convert String("1") -> Number("1") so it generates without quotes
2451            let normalize_interval = |expr: &Expression| -> Expression {
2452                if let Expression::Interval(ref iv) = expr {
2453                    let mut iv_clone = iv.as_ref().clone();
2454                    if let Some(Expression::Literal(Literal::String(ref s))) = iv_clone.this {
2455                        // Convert numeric strings to Number literals for unquoted output
2456                        if s.parse::<f64>().is_ok() {
2457                            iv_clone.this = Some(Expression::Literal(Literal::Number(s.clone())));
2458                        }
2459                    }
2460                    Expression::Interval(Box::new(iv_clone))
2461                } else {
2462                    expr.clone()
2463                }
2464            };
2465
2466            // Build recursive case: DateAdd(date_value, count, unit) from CTE where result <= end
2467            // Extract interval unit and count from step expression
2468            let normalized_step = step.map(|s| normalize_interval(s)).unwrap_or_else(|| {
2469                Expression::Interval(Box::new(crate::expressions::Interval {
2470                    this: Some(Expression::Literal(Literal::Number("1".to_string()))),
2471                    unit: Some(crate::expressions::IntervalUnitSpec::Simple {
2472                        unit: crate::expressions::IntervalUnit::Day,
2473                        use_plural: false,
2474                    }),
2475                }))
2476            });
2477
2478            // Extract unit and count from interval expression to build DateAddFunc
2479            let (add_unit, add_count) = extract_interval_unit_and_count(&normalized_step);
2480
2481            let date_add_expr = Expression::DateAdd(Box::new(crate::expressions::DateAddFunc {
2482                this: Expression::Column(crate::expressions::Column {
2483                    name: column_name.clone(),
2484                    table: None,
2485                    join_mark: false,
2486                    trailing_comments: vec![],
2487                    span: None,
2488                }),
2489                interval: add_count,
2490                unit: add_unit,
2491            }));
2492
2493            let cast_date_add = Expression::Cast(Box::new(Cast {
2494                this: date_add_expr.clone(),
2495                to: DataType::Date,
2496                trailing_comments: vec![],
2497                double_colon_syntax: false,
2498                format: None,
2499                default: None,
2500            }));
2501
2502            let recursive_select = Select {
2503                expressions: vec![cast_date_add.clone()],
2504                from: Some(From {
2505                    expressions: vec![Expression::Table(crate::expressions::TableRef::new(
2506                        &cte_name,
2507                    ))],
2508                }),
2509                where_clause: Some(Where {
2510                    this: Expression::Lte(Box::new(BinaryOp {
2511                        left: cast_date_add,
2512                        right: cast_to_date(end),
2513                        left_comments: vec![],
2514                        operator_comments: vec![],
2515                        trailing_comments: vec![],
2516                    })),
2517                }),
2518                ..Select::new()
2519            };
2520
2521            // Build UNION ALL of base and recursive
2522            let union = crate::expressions::Union {
2523                left: Expression::Select(Box::new(base_select)),
2524                right: Expression::Select(Box::new(recursive_select)),
2525                all: true, // UNION ALL
2526                distinct: false,
2527                with: None,
2528                order_by: None,
2529                limit: None,
2530                offset: None,
2531                distribute_by: None,
2532                sort_by: None,
2533                cluster_by: None,
2534                by_name: false,
2535                side: None,
2536                kind: None,
2537                corresponding: false,
2538                strict: false,
2539                on_columns: Vec::new(),
2540            };
2541
2542            // Create CTE
2543            let cte = crate::expressions::Cte {
2544                this: Expression::Union(Box::new(union)),
2545                alias: Identifier::new(cte_name.clone()),
2546                columns: vec![column_name.clone()],
2547                materialized: None,
2548                key_expressions: Vec::new(),
2549                alias_first: true,
2550                comments: Vec::new(),
2551            };
2552
2553            // Create replacement: SELECT date_value FROM cte_name
2554            let replacement_select = Select {
2555                expressions: vec![Expression::Column(crate::expressions::Column {
2556                    name: column_name,
2557                    table: None,
2558                    join_mark: false,
2559                    trailing_comments: vec![],
2560                    span: None,
2561                })],
2562                from: Some(From {
2563                    expressions: vec![Expression::Table(crate::expressions::TableRef::new(
2564                        &cte_name,
2565                    ))],
2566                }),
2567                ..Select::new()
2568            };
2569
2570            let replacement = Expression::Subquery(Box::new(Subquery {
2571                this: Expression::Select(Box::new(replacement_select)),
2572                alias: Some(Identifier::new(cte_name)),
2573                column_aliases: vec![],
2574                order_by: None,
2575                limit: None,
2576                offset: None,
2577                distribute_by: None,
2578                sort_by: None,
2579                cluster_by: None,
2580                lateral: false,
2581                modifiers_inside: false,
2582                trailing_comments: vec![],
2583            }));
2584
2585            return Some((cte, replacement));
2586        }
2587    }
2588
2589    // Also check for aliased UNNEST like UNNEST(...) AS _q(date_week)
2590    if let Expression::Alias(alias) = expr {
2591        // Extract column name from alias column_aliases if present
2592        let col_name = alias.column_aliases.first().map(|id| id.name.as_str());
2593        if let Some((cte, replacement)) =
2594            try_convert_generate_date_array_with_name(&alias.this, cte_count, col_name)
2595        {
2596            // If we extracted a column name from the alias, don't preserve the outer alias
2597            // since the CTE now uses that column name directly
2598            if col_name.is_some() {
2599                return Some((cte, replacement));
2600            }
2601            let new_alias = Expression::Alias(Box::new(crate::expressions::Alias {
2602                this: replacement,
2603                alias: alias.alias.clone(),
2604                column_aliases: alias.column_aliases.clone(),
2605                pre_alias_comments: alias.pre_alias_comments.clone(),
2606                trailing_comments: alias.trailing_comments.clone(),
2607            }));
2608            return Some((cte, new_alias));
2609        }
2610    }
2611
2612    None
2613}
2614
2615/// Extract interval unit and count from an interval expression.
2616/// Handles both structured intervals (with separate unit field) and
2617/// string-encoded intervals like `INTERVAL '1 WEEK'` where unit is None
2618/// and the value contains both count and unit.
2619fn extract_interval_unit_and_count(
2620    expr: &Expression,
2621) -> (crate::expressions::IntervalUnit, Expression) {
2622    use crate::expressions::{IntervalUnit, IntervalUnitSpec, Literal};
2623
2624    if let Expression::Interval(ref iv) = expr {
2625        // First try: structured unit field
2626        if let Some(ref unit_spec) = iv.unit {
2627            if let IntervalUnitSpec::Simple { unit, .. } = unit_spec {
2628                let count = match &iv.this {
2629                    Some(e) => e.clone(),
2630                    None => Expression::Literal(Literal::Number("1".to_string())),
2631                };
2632                return (unit.clone(), count);
2633            }
2634        }
2635
2636        // Second try: parse from string value like "1 WEEK" or "1"
2637        if let Some(ref val_expr) = iv.this {
2638            match val_expr {
2639                Expression::Literal(Literal::String(s))
2640                | Expression::Literal(Literal::Number(s)) => {
2641                    // Try to parse "count unit" format like "1 WEEK", "1 MONTH"
2642                    let parts: Vec<&str> = s.trim().splitn(2, char::is_whitespace).collect();
2643                    if parts.len() == 2 {
2644                        let count_str = parts[0].trim();
2645                        let unit_str = parts[1].trim().to_uppercase();
2646                        let unit = match unit_str.as_str() {
2647                            "YEAR" | "YEARS" => IntervalUnit::Year,
2648                            "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
2649                            "MONTH" | "MONTHS" => IntervalUnit::Month,
2650                            "WEEK" | "WEEKS" => IntervalUnit::Week,
2651                            "DAY" | "DAYS" => IntervalUnit::Day,
2652                            "HOUR" | "HOURS" => IntervalUnit::Hour,
2653                            "MINUTE" | "MINUTES" => IntervalUnit::Minute,
2654                            "SECOND" | "SECONDS" => IntervalUnit::Second,
2655                            "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
2656                            "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
2657                            _ => IntervalUnit::Day,
2658                        };
2659                        return (
2660                            unit,
2661                            Expression::Literal(Literal::Number(count_str.to_string())),
2662                        );
2663                    }
2664                    // Just a number with no unit - default to Day
2665                    if s.parse::<f64>().is_ok() {
2666                        return (
2667                            IntervalUnit::Day,
2668                            Expression::Literal(Literal::Number(s.clone())),
2669                        );
2670                    }
2671                }
2672                _ => {}
2673            }
2674        }
2675
2676        // Fallback
2677        (
2678            IntervalUnit::Day,
2679            Expression::Literal(Literal::Number("1".to_string())),
2680        )
2681    } else {
2682        (
2683            IntervalUnit::Day,
2684            Expression::Literal(Literal::Number("1".to_string())),
2685        )
2686    }
2687}
2688
2689/// Convert ILIKE to LOWER(x) LIKE LOWER(y).
2690///
2691/// For dialects that don't support ILIKE (case-insensitive LIKE), this converts:
2692/// ```sql
2693/// SELECT * FROM t WHERE x ILIKE '%pattern%'
2694/// ```
2695/// To:
2696/// ```sql
2697/// SELECT * FROM t WHERE LOWER(x) LIKE LOWER('%pattern%')
2698/// ```
2699///
2700/// Reference: `generator.py:no_ilike_sql()`
2701pub fn no_ilike_sql(expr: Expression) -> Result<Expression> {
2702    match expr {
2703        Expression::ILike(ilike) => {
2704            // Create LOWER(left) LIKE LOWER(right)
2705            let lower_left = Expression::Function(Box::new(crate::expressions::Function {
2706                name: "LOWER".to_string(),
2707                args: vec![ilike.left],
2708                distinct: false,
2709                trailing_comments: vec![],
2710                use_bracket_syntax: false,
2711                no_parens: false,
2712                quoted: false,
2713                span: None,
2714            }));
2715
2716            let lower_right = Expression::Function(Box::new(crate::expressions::Function {
2717                name: "LOWER".to_string(),
2718                args: vec![ilike.right],
2719                distinct: false,
2720                trailing_comments: vec![],
2721                use_bracket_syntax: false,
2722                no_parens: false,
2723                quoted: false,
2724                span: None,
2725            }));
2726
2727            Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
2728                left: lower_left,
2729                right: lower_right,
2730                escape: ilike.escape,
2731                quantifier: ilike.quantifier,
2732            })))
2733        }
2734        other => Ok(other),
2735    }
2736}
2737
2738/// Convert TryCast to Cast.
2739///
2740/// For dialects that don't support TRY_CAST (safe cast that returns NULL on error),
2741/// this converts TRY_CAST to regular CAST. Note: This may cause runtime errors
2742/// for invalid casts that TRY_CAST would handle gracefully.
2743///
2744/// Reference: `generator.py:no_trycast_sql()`
2745pub fn no_trycast_sql(expr: Expression) -> Result<Expression> {
2746    match expr {
2747        Expression::TryCast(try_cast) => Ok(Expression::Cast(try_cast)),
2748        other => Ok(other),
2749    }
2750}
2751
2752/// Convert SafeCast to Cast.
2753///
2754/// For dialects that don't support SAFE_CAST (BigQuery's safe cast syntax),
2755/// this converts SAFE_CAST to regular CAST.
2756pub fn no_safe_cast_sql(expr: Expression) -> Result<Expression> {
2757    match expr {
2758        Expression::SafeCast(safe_cast) => Ok(Expression::Cast(safe_cast)),
2759        other => Ok(other),
2760    }
2761}
2762
2763/// Convert COMMENT ON statements to inline comments.
2764///
2765/// For dialects that don't support COMMENT ON syntax, this can transform
2766/// comment statements into inline comments or skip them entirely.
2767///
2768/// Reference: `generator.py:no_comment_column_constraint_sql()`
2769pub fn no_comment_column_constraint(expr: Expression) -> Result<Expression> {
2770    // For now, just pass through - comment handling is done in generator
2771    Ok(expr)
2772}
2773
2774/// Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...)).
2775///
2776/// Some dialects use GENERATE_SERIES as a table-valued function, while others
2777/// prefer the UNNEST syntax. This converts:
2778/// ```sql
2779/// SELECT * FROM GENERATE_SERIES(1, 10) AS t(n)
2780/// ```
2781/// To:
2782/// ```sql
2783/// SELECT * FROM UNNEST(GENERATE_SERIES(1, 10)) AS _u(n)
2784/// ```
2785///
2786/// Reference: `transforms.py:125-135`
2787pub fn unnest_generate_series(expr: Expression) -> Result<Expression> {
2788    // Convert TABLE GENERATE_SERIES to UNNEST(GENERATE_SERIES(...))
2789    // This handles the case where GENERATE_SERIES is used as a table-valued function
2790    match expr {
2791        Expression::Table(ref table) => {
2792            // Check if the table name matches GENERATE_SERIES pattern
2793            // In practice, this would be Expression::GenerateSeries wrapped in a Table context
2794            if table.name.name.to_uppercase() == "GENERATE_SERIES" {
2795                // Create UNNEST wrapper
2796                let unnest = Expression::Unnest(Box::new(UnnestFunc {
2797                    this: expr.clone(),
2798                    expressions: Vec::new(),
2799                    with_ordinality: false,
2800                    alias: None,
2801                    offset_alias: None,
2802                }));
2803
2804                // If there's an alias, wrap in alias
2805                return Ok(Expression::Alias(Box::new(crate::expressions::Alias {
2806                    this: unnest,
2807                    alias: Identifier::new("_u".to_string()),
2808                    column_aliases: vec![],
2809                    pre_alias_comments: vec![],
2810                    trailing_comments: vec![],
2811                })));
2812            }
2813            Ok(expr)
2814        }
2815        Expression::GenerateSeries(gs) => {
2816            // Wrap GenerateSeries directly in UNNEST
2817            let unnest = Expression::Unnest(Box::new(UnnestFunc {
2818                this: Expression::GenerateSeries(gs),
2819                expressions: Vec::new(),
2820                with_ordinality: false,
2821                alias: None,
2822                offset_alias: None,
2823            }));
2824            Ok(unnest)
2825        }
2826        other => Ok(other),
2827    }
2828}
2829
2830/// Convert UNNEST(GENERATE_SERIES(start, end, step)) to a subquery for PostgreSQL.
2831///
2832/// PostgreSQL's GENERATE_SERIES returns rows directly, so UNNEST wrapping is unnecessary.
2833/// Instead, convert to:
2834/// ```sql
2835/// (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2836/// ```
2837///
2838/// This handles the case where GENERATE_DATE_ARRAY was converted to GENERATE_SERIES
2839/// during cross-dialect normalization, but the original had UNNEST wrapping.
2840pub fn unwrap_unnest_generate_series_for_postgres(expr: Expression) -> Result<Expression> {
2841    use crate::dialects::transform_recursive;
2842    transform_recursive(expr, &unwrap_unnest_generate_series_single)
2843}
2844
2845fn unwrap_unnest_generate_series_single(expr: Expression) -> Result<Expression> {
2846    use crate::expressions::*;
2847    // Match UNNEST(GENERATE_SERIES(...)) patterns in FROM clauses
2848    match expr {
2849        Expression::Select(mut select) => {
2850            // Process FROM clause
2851            if let Some(ref mut from) = select.from {
2852                for table_expr in &mut from.expressions {
2853                    if let Some(replacement) = try_unwrap_unnest_gen_series(table_expr) {
2854                        *table_expr = replacement;
2855                    }
2856                }
2857            }
2858            // Process JOINs
2859            for join in &mut select.joins {
2860                if let Some(replacement) = try_unwrap_unnest_gen_series(&join.this) {
2861                    join.this = replacement;
2862                }
2863            }
2864            Ok(Expression::Select(select))
2865        }
2866        other => Ok(other),
2867    }
2868}
2869
2870/// Try to convert an UNNEST(GENERATE_SERIES(...)) to a PostgreSQL subquery.
2871/// Returns the replacement expression if applicable.
2872fn try_unwrap_unnest_gen_series(expr: &Expression) -> Option<Expression> {
2873    use crate::expressions::*;
2874
2875    // Match Unnest containing GenerateSeries
2876    let gen_series = match expr {
2877        Expression::Unnest(unnest) => {
2878            if let Expression::GenerateSeries(ref gs) = unnest.this {
2879                Some(gs.as_ref().clone())
2880            } else {
2881                None
2882            }
2883        }
2884        Expression::Alias(alias) => {
2885            if let Expression::Unnest(ref unnest) = alias.this {
2886                if let Expression::GenerateSeries(ref gs) = unnest.this {
2887                    Some(gs.as_ref().clone())
2888                } else {
2889                    None
2890                }
2891            } else {
2892                None
2893            }
2894        }
2895        _ => None,
2896    };
2897
2898    let gs = gen_series?;
2899
2900    // Build: (SELECT CAST(value AS DATE) FROM GENERATE_SERIES(start, end, step) AS _t(value)) AS _unnested_generate_series
2901    let value_col = Expression::Column(Column {
2902        name: Identifier::new("value".to_string()),
2903        table: None,
2904        join_mark: false,
2905        trailing_comments: vec![],
2906        span: None,
2907    });
2908
2909    let cast_value = Expression::Cast(Box::new(Cast {
2910        this: value_col,
2911        to: DataType::Date,
2912        trailing_comments: vec![],
2913        double_colon_syntax: false,
2914        format: None,
2915        default: None,
2916    }));
2917
2918    let gen_series_expr = Expression::GenerateSeries(Box::new(gs));
2919
2920    // GENERATE_SERIES(...) AS _t(value)
2921    let gen_series_aliased = Expression::Alias(Box::new(Alias {
2922        this: gen_series_expr,
2923        alias: Identifier::new("_t".to_string()),
2924        column_aliases: vec![Identifier::new("value".to_string())],
2925        pre_alias_comments: vec![],
2926        trailing_comments: vec![],
2927    }));
2928
2929    let mut inner_select = Select::new();
2930    inner_select.expressions = vec![cast_value];
2931    inner_select.from = Some(From {
2932        expressions: vec![gen_series_aliased],
2933    });
2934
2935    let inner_select_expr = Expression::Select(Box::new(inner_select));
2936
2937    let subquery = Expression::Subquery(Box::new(Subquery {
2938        this: inner_select_expr,
2939        alias: None,
2940        column_aliases: vec![],
2941        order_by: None,
2942        limit: None,
2943        offset: None,
2944        distribute_by: None,
2945        sort_by: None,
2946        cluster_by: None,
2947        lateral: false,
2948        modifiers_inside: false,
2949        trailing_comments: vec![],
2950    }));
2951
2952    // Wrap in alias AS _unnested_generate_series
2953    Some(Expression::Alias(Box::new(Alias {
2954        this: subquery,
2955        alias: Identifier::new("_unnested_generate_series".to_string()),
2956        column_aliases: vec![],
2957        pre_alias_comments: vec![],
2958        trailing_comments: vec![],
2959    })))
2960}
2961
2962/// Expand BETWEEN expressions in DELETE statements to >= AND <=
2963///
2964/// Some dialects (like StarRocks) don't support BETWEEN in DELETE statements
2965/// or prefer the expanded form. This transforms:
2966///   `DELETE FROM t WHERE a BETWEEN b AND c`
2967/// to:
2968///   `DELETE FROM t WHERE a >= b AND a <= c`
2969pub fn expand_between_in_delete(expr: Expression) -> Result<Expression> {
2970    match expr {
2971        Expression::Delete(mut delete) => {
2972            // If there's a WHERE clause, expand any BETWEEN expressions in it
2973            if let Some(ref mut where_clause) = delete.where_clause {
2974                where_clause.this = expand_between_recursive(where_clause.this.clone());
2975            }
2976            Ok(Expression::Delete(delete))
2977        }
2978        other => Ok(other),
2979    }
2980}
2981
2982/// Recursively expand BETWEEN expressions to >= AND <=
2983fn expand_between_recursive(expr: Expression) -> Expression {
2984    match expr {
2985        // Expand: a BETWEEN b AND c -> a >= b AND a <= c
2986        // Expand: a NOT BETWEEN b AND c -> a < b OR a > c
2987        Expression::Between(between) => {
2988            let this = expand_between_recursive(between.this.clone());
2989            let low = expand_between_recursive(between.low);
2990            let high = expand_between_recursive(between.high);
2991
2992            if between.not {
2993                // NOT BETWEEN: a < b OR a > c
2994                Expression::Or(Box::new(BinaryOp::new(
2995                    Expression::Lt(Box::new(BinaryOp::new(this.clone(), low))),
2996                    Expression::Gt(Box::new(BinaryOp::new(this, high))),
2997                )))
2998            } else {
2999                // BETWEEN: a >= b AND a <= c
3000                Expression::And(Box::new(BinaryOp::new(
3001                    Expression::Gte(Box::new(BinaryOp::new(this.clone(), low))),
3002                    Expression::Lte(Box::new(BinaryOp::new(this, high))),
3003                )))
3004            }
3005        }
3006
3007        // Recursively process AND/OR expressions
3008        Expression::And(mut op) => {
3009            op.left = expand_between_recursive(op.left);
3010            op.right = expand_between_recursive(op.right);
3011            Expression::And(op)
3012        }
3013        Expression::Or(mut op) => {
3014            op.left = expand_between_recursive(op.left);
3015            op.right = expand_between_recursive(op.right);
3016            Expression::Or(op)
3017        }
3018        Expression::Not(mut op) => {
3019            op.this = expand_between_recursive(op.this);
3020            Expression::Not(op)
3021        }
3022
3023        // Recursively process parenthesized expressions
3024        Expression::Paren(mut paren) => {
3025            paren.this = expand_between_recursive(paren.this);
3026            Expression::Paren(paren)
3027        }
3028
3029        // Pass through everything else unchanged
3030        other => other,
3031    }
3032}
3033
3034/// Push down CTE column names into SELECT expressions.
3035///
3036/// BigQuery doesn't support column names when defining a CTE, e.g.:
3037/// `WITH vartab(v) AS (SELECT ...)` is not valid.
3038/// Instead, it expects: `WITH vartab AS (SELECT ... AS v)`.
3039///
3040/// This transform removes the CTE column aliases and adds them as
3041/// aliases on the SELECT expressions.
3042pub fn pushdown_cte_column_names(expr: Expression) -> Result<Expression> {
3043    match expr {
3044        Expression::Select(mut select) => {
3045            if let Some(ref mut with) = select.with {
3046                for cte in &mut with.ctes {
3047                    if !cte.columns.is_empty() {
3048                        // Check if the CTE body is a star query - if so, just strip column names
3049                        let is_star = matches!(&cte.this, Expression::Select(s) if
3050                            s.expressions.len() == 1 && matches!(&s.expressions[0], Expression::Star(_)));
3051
3052                        if is_star {
3053                            // Can't push down column names for star queries, just remove them
3054                            cte.columns.clear();
3055                            continue;
3056                        }
3057
3058                        // Extract column names
3059                        let column_names: Vec<Identifier> = cte.columns.drain(..).collect();
3060
3061                        // Push column names down into the SELECT expressions
3062                        if let Expression::Select(ref mut inner_select) = cte.this {
3063                            let new_exprs: Vec<Expression> = inner_select
3064                                .expressions
3065                                .drain(..)
3066                                .zip(
3067                                    column_names
3068                                        .into_iter()
3069                                        .chain(std::iter::repeat_with(|| Identifier::new(""))),
3070                                )
3071                                .map(|(expr, col_name)| {
3072                                    if col_name.name.is_empty() {
3073                                        return expr;
3074                                    }
3075                                    // If already aliased, replace the alias
3076                                    match expr {
3077                                        Expression::Alias(mut a) => {
3078                                            a.alias = col_name;
3079                                            Expression::Alias(a)
3080                                        }
3081                                        other => {
3082                                            Expression::Alias(Box::new(crate::expressions::Alias {
3083                                                this: other,
3084                                                alias: col_name,
3085                                                column_aliases: Vec::new(),
3086                                                pre_alias_comments: Vec::new(),
3087                                                trailing_comments: Vec::new(),
3088                                            }))
3089                                        }
3090                                    }
3091                                })
3092                                .collect();
3093                            inner_select.expressions = new_exprs;
3094                        }
3095                    }
3096                }
3097            }
3098            Ok(Expression::Select(select))
3099        }
3100        other => Ok(other),
3101    }
3102}
3103
3104/// Simplify nested parentheses around VALUES in FROM clause.
3105/// Converts `FROM ((VALUES (1)))` to `FROM (VALUES (1))` by stripping redundant wrapping.
3106/// Handles various nesting patterns: Subquery(Paren(Values)), Paren(Paren(Values)), etc.
3107pub fn simplify_nested_paren_values(expr: Expression) -> Result<Expression> {
3108    match expr {
3109        Expression::Select(mut select) => {
3110            if let Some(ref mut from) = select.from {
3111                for from_item in from.expressions.iter_mut() {
3112                    simplify_paren_values_in_from(from_item);
3113                }
3114            }
3115            Ok(Expression::Select(select))
3116        }
3117        other => Ok(other),
3118    }
3119}
3120
3121fn simplify_paren_values_in_from(expr: &mut Expression) {
3122    // Check various patterns and build replacement if needed
3123    let replacement = match expr {
3124        // Subquery(Paren(Values)) -> Subquery with Values directly
3125        Expression::Subquery(ref subquery) => {
3126            if let Expression::Paren(ref paren) = subquery.this {
3127                if matches!(&paren.this, Expression::Values(_)) {
3128                    let mut new_sub = subquery.as_ref().clone();
3129                    new_sub.this = paren.this.clone();
3130                    Some(Expression::Subquery(Box::new(new_sub)))
3131                } else {
3132                    None
3133                }
3134            } else {
3135                None
3136            }
3137        }
3138        // Paren(Subquery(Values)) -> Subquery(Values) - strip the Paren wrapper
3139        // Paren(Paren(Values)) -> Paren(Values) - strip one layer
3140        Expression::Paren(ref outer_paren) => {
3141            if let Expression::Subquery(ref subquery) = outer_paren.this {
3142                // Paren(Subquery(Values)) -> Subquery(Values) - strip outer Paren
3143                if matches!(&subquery.this, Expression::Values(_)) {
3144                    Some(outer_paren.this.clone())
3145                }
3146                // Paren(Subquery(Paren(Values))) -> Subquery(Values)
3147                else if let Expression::Paren(ref paren) = subquery.this {
3148                    if matches!(&paren.this, Expression::Values(_)) {
3149                        let mut new_sub = subquery.as_ref().clone();
3150                        new_sub.this = paren.this.clone();
3151                        Some(Expression::Subquery(Box::new(new_sub)))
3152                    } else {
3153                        None
3154                    }
3155                } else {
3156                    None
3157                }
3158            } else if let Expression::Paren(ref inner_paren) = outer_paren.this {
3159                if matches!(&inner_paren.this, Expression::Values(_)) {
3160                    Some(outer_paren.this.clone())
3161                } else {
3162                    None
3163                }
3164            } else {
3165                None
3166            }
3167        }
3168        _ => None,
3169    };
3170    if let Some(new_expr) = replacement {
3171        *expr = new_expr;
3172    }
3173}
3174
3175/// Add auto-generated table aliases (like `_t0`) for POSEXPLODE/EXPLODE in FROM clause
3176/// when the alias has column_aliases but no alias name.
3177/// This is needed for Spark target: `FROM POSEXPLODE(x) AS (a, b)` -> `FROM POSEXPLODE(x) AS _t0(a, b)`
3178pub fn add_auto_table_alias(expr: Expression) -> Result<Expression> {
3179    match expr {
3180        Expression::Select(mut select) => {
3181            // Process FROM expressions
3182            if let Some(ref mut from) = select.from {
3183                let mut counter = 0usize;
3184                for from_item in from.expressions.iter_mut() {
3185                    add_auto_alias_to_from_item(from_item, &mut counter);
3186                }
3187            }
3188            Ok(Expression::Select(select))
3189        }
3190        other => Ok(other),
3191    }
3192}
3193
3194fn add_auto_alias_to_from_item(expr: &mut Expression, counter: &mut usize) {
3195    use crate::expressions::Identifier;
3196
3197    match expr {
3198        Expression::Alias(ref mut alias) => {
3199            // If the alias name is empty and there are column_aliases, add auto-generated name
3200            if alias.alias.name.is_empty() && !alias.column_aliases.is_empty() {
3201                alias.alias = Identifier::new(format!("_t{}", counter));
3202                *counter += 1;
3203            }
3204        }
3205        _ => {}
3206    }
3207}
3208
3209/// Convert BigQuery-style UNNEST aliases to column-alias format for DuckDB/Presto/Spark.
3210///
3211/// BigQuery uses: `UNNEST(arr) AS x` where x is a column alias.
3212/// DuckDB/Presto/Spark need: `UNNEST(arr) AS _t0(x)` where _t0 is a table alias and x is the column alias.
3213///
3214/// Propagate struct field names from the first named struct in an array to subsequent unnamed structs.
3215///
3216/// In BigQuery, `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob', 92)]` means the second struct
3217/// should inherit field names from the first: `[STRUCT('Alice' AS name, 85 AS score), STRUCT('Bob' AS name, 92 AS score)]`.
3218pub fn propagate_struct_field_names(expr: Expression) -> Result<Expression> {
3219    use crate::dialects::transform_recursive;
3220    transform_recursive(expr, &propagate_struct_names_in_expr)
3221}
3222
3223fn propagate_struct_names_in_expr(expr: Expression) -> Result<Expression> {
3224    use crate::expressions::{Alias, ArrayConstructor, Function, Identifier};
3225
3226    /// Helper to propagate struct field names within an array of expressions
3227    fn propagate_in_elements(elements: &[Expression]) -> Option<Vec<Expression>> {
3228        if elements.len() <= 1 {
3229            return None;
3230        }
3231        // Check if first element is a named STRUCT function
3232        if let Some(Expression::Function(ref first_struct)) = elements.first() {
3233            if first_struct.name.eq_ignore_ascii_case("STRUCT") {
3234                // Extract field names from first struct
3235                let field_names: Vec<Option<String>> = first_struct
3236                    .args
3237                    .iter()
3238                    .map(|arg| {
3239                        if let Expression::Alias(a) = arg {
3240                            Some(a.alias.name.clone())
3241                        } else {
3242                            None
3243                        }
3244                    })
3245                    .collect();
3246
3247                // Only propagate if first struct has at least one named field
3248                if field_names.iter().any(|n| n.is_some()) {
3249                    let mut new_elements = Vec::with_capacity(elements.len());
3250                    new_elements.push(elements[0].clone());
3251
3252                    for elem in &elements[1..] {
3253                        if let Expression::Function(ref s) = elem {
3254                            if s.name.eq_ignore_ascii_case("STRUCT")
3255                                && s.args.len() == field_names.len()
3256                            {
3257                                // Check if this struct has NO names (all unnamed)
3258                                let all_unnamed =
3259                                    s.args.iter().all(|a| !matches!(a, Expression::Alias(_)));
3260                                if all_unnamed {
3261                                    // Apply names from first struct
3262                                    let new_args: Vec<Expression> = s
3263                                        .args
3264                                        .iter()
3265                                        .zip(field_names.iter())
3266                                        .map(|(val, name)| {
3267                                            if let Some(n) = name {
3268                                                Expression::Alias(Box::new(Alias::new(
3269                                                    val.clone(),
3270                                                    Identifier::new(n.clone()),
3271                                                )))
3272                                            } else {
3273                                                val.clone()
3274                                            }
3275                                        })
3276                                        .collect();
3277                                    new_elements.push(Expression::Function(Box::new(
3278                                        Function::new("STRUCT".to_string(), new_args),
3279                                    )));
3280                                    continue;
3281                                }
3282                            }
3283                        }
3284                        new_elements.push(elem.clone());
3285                    }
3286
3287                    return Some(new_elements);
3288                }
3289            }
3290        }
3291        None
3292    }
3293
3294    // Look for Array expressions containing STRUCT function calls
3295    if let Expression::Array(ref arr) = expr {
3296        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3297            return Ok(Expression::Array(Box::new(crate::expressions::Array {
3298                expressions: new_elements,
3299            })));
3300        }
3301    }
3302
3303    // Also handle ArrayFunc (ArrayConstructor) - bracket notation [STRUCT(...), ...]
3304    if let Expression::ArrayFunc(ref arr) = expr {
3305        if let Some(new_elements) = propagate_in_elements(&arr.expressions) {
3306            return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
3307                expressions: new_elements,
3308                bracket_notation: arr.bracket_notation,
3309                use_list_keyword: arr.use_list_keyword,
3310            })));
3311        }
3312    }
3313
3314    Ok(expr)
3315}
3316
3317/// This walks the entire expression tree to find SELECT statements and converts UNNEST aliases
3318/// in their FROM clauses and JOINs.
3319pub fn unnest_alias_to_column_alias(expr: Expression) -> Result<Expression> {
3320    use crate::dialects::transform_recursive;
3321    transform_recursive(expr, &unnest_alias_transform_single_select)
3322}
3323
3324/// Move UNNEST items from FROM clause to CROSS JOINs without changing alias format.
3325/// Used for BigQuery -> BigQuery/Redshift where we want CROSS JOIN but not _t0(col) aliases.
3326pub fn unnest_from_to_cross_join(expr: Expression) -> Result<Expression> {
3327    use crate::dialects::transform_recursive;
3328    transform_recursive(expr, &unnest_from_to_cross_join_single_select)
3329}
3330
3331fn unnest_from_to_cross_join_single_select(expr: Expression) -> Result<Expression> {
3332    if let Expression::Select(mut select) = expr {
3333        if let Some(ref mut from) = select.from {
3334            if from.expressions.len() > 1 {
3335                let mut new_from_exprs = Vec::new();
3336                let mut new_cross_joins = Vec::new();
3337
3338                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3339                    if idx == 0 {
3340                        new_from_exprs.push(from_item);
3341                    } else {
3342                        let is_unnest = match &from_item {
3343                            Expression::Unnest(_) => true,
3344                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3345                            _ => false,
3346                        };
3347
3348                        if is_unnest {
3349                            new_cross_joins.push(crate::expressions::Join {
3350                                this: from_item,
3351                                on: None,
3352                                using: Vec::new(),
3353                                kind: JoinKind::Cross,
3354                                use_inner_keyword: false,
3355                                use_outer_keyword: false,
3356                                deferred_condition: false,
3357                                join_hint: None,
3358                                match_condition: None,
3359                                pivots: Vec::new(),
3360                                comments: Vec::new(),
3361                                nesting_group: 0,
3362                                directed: false,
3363                            });
3364                        } else {
3365                            new_from_exprs.push(from_item);
3366                        }
3367                    }
3368                }
3369
3370                from.expressions = new_from_exprs;
3371                new_cross_joins.append(&mut select.joins);
3372                select.joins = new_cross_joins;
3373            }
3374        }
3375
3376        Ok(Expression::Select(select))
3377    } else {
3378        Ok(expr)
3379    }
3380}
3381
3382/// Wrap UNNEST function aliases in JOIN items from `AS name` to `AS _u(name)`
3383/// Used for PostgreSQL → Presto/Trino transpilation where GENERATE_SERIES is
3384/// converted to UNNEST(SEQUENCE) and the alias needs the column-alias format.
3385pub fn wrap_unnest_join_aliases(expr: Expression) -> Result<Expression> {
3386    use crate::dialects::transform_recursive;
3387    transform_recursive(expr, &wrap_unnest_join_aliases_single)
3388}
3389
3390fn wrap_unnest_join_aliases_single(expr: Expression) -> Result<Expression> {
3391    if let Expression::Select(mut select) = expr {
3392        // Process JOIN items
3393        for join in &mut select.joins {
3394            wrap_unnest_alias_in_join_item(&mut join.this);
3395        }
3396        Ok(Expression::Select(select))
3397    } else {
3398        Ok(expr)
3399    }
3400}
3401
3402/// If a join item is an Alias wrapping an UNNEST function, convert alias to _u(alias_name) format
3403fn wrap_unnest_alias_in_join_item(expr: &mut Expression) {
3404    use crate::expressions::Identifier;
3405    if let Expression::Alias(alias) = expr {
3406        // Check if the inner expression is a function call to UNNEST
3407        let is_unnest = match &alias.this {
3408            Expression::Function(f) => f.name.eq_ignore_ascii_case("UNNEST"),
3409            _ => false,
3410        };
3411
3412        if is_unnest && alias.column_aliases.is_empty() {
3413            // Simple alias like `AS s` -> wrap to `AS _u(s)`
3414            let original_alias_name = alias.alias.name.clone();
3415            alias.alias = Identifier {
3416                name: "_u".to_string(),
3417                quoted: false,
3418                trailing_comments: Vec::new(),
3419                span: None,
3420            };
3421            alias.column_aliases = vec![Identifier {
3422                name: original_alias_name,
3423                quoted: false,
3424                trailing_comments: Vec::new(),
3425                span: None,
3426            }];
3427        }
3428    }
3429}
3430
3431fn unnest_alias_transform_single_select(expr: Expression) -> Result<Expression> {
3432    if let Expression::Select(mut select) = expr {
3433        let mut counter = 0usize;
3434
3435        // Process FROM expressions: convert aliases AND move UNNEST items to CROSS JOIN
3436        if let Some(ref mut from) = select.from {
3437            // First pass: convert aliases in-place
3438            for from_item in from.expressions.iter_mut() {
3439                convert_unnest_alias_in_from(from_item, &mut counter);
3440            }
3441
3442            // Second pass: move UNNEST items from FROM to CROSS JOINs
3443            if from.expressions.len() > 1 {
3444                let mut new_from_exprs = Vec::new();
3445                let mut new_cross_joins = Vec::new();
3446
3447                for (idx, from_item) in from.expressions.drain(..).enumerate() {
3448                    if idx == 0 {
3449                        // First expression always stays in FROM
3450                        new_from_exprs.push(from_item);
3451                    } else {
3452                        // Check if this is UNNEST or Alias(UNNEST)
3453                        let is_unnest = match &from_item {
3454                            Expression::Unnest(_) => true,
3455                            Expression::Alias(a) => matches!(a.this, Expression::Unnest(_)),
3456                            _ => false,
3457                        };
3458
3459                        if is_unnest {
3460                            // Convert to CROSS JOIN
3461                            new_cross_joins.push(crate::expressions::Join {
3462                                this: from_item,
3463                                on: None,
3464                                using: Vec::new(),
3465                                kind: JoinKind::Cross,
3466                                use_inner_keyword: false,
3467                                use_outer_keyword: false,
3468                                deferred_condition: false,
3469                                join_hint: None,
3470                                match_condition: None,
3471                                pivots: Vec::new(),
3472                                comments: Vec::new(),
3473                                nesting_group: 0,
3474                                directed: false,
3475                            });
3476                        } else {
3477                            // Keep non-UNNEST items in FROM
3478                            new_from_exprs.push(from_item);
3479                        }
3480                    }
3481                }
3482
3483                from.expressions = new_from_exprs;
3484                // Prepend cross joins before existing joins
3485                new_cross_joins.append(&mut select.joins);
3486                select.joins = new_cross_joins;
3487            }
3488        }
3489
3490        // Process JOINs (existing joins that may have UNNEST aliases)
3491        for join in select.joins.iter_mut() {
3492            convert_unnest_alias_in_from(&mut join.this, &mut counter);
3493        }
3494
3495        Ok(Expression::Select(select))
3496    } else {
3497        Ok(expr)
3498    }
3499}
3500
3501fn convert_unnest_alias_in_from(expr: &mut Expression, counter: &mut usize) {
3502    use crate::expressions::Identifier;
3503
3504    if let Expression::Alias(ref mut alias) = expr {
3505        // Check if the inner expression is UNNEST (or EXPLODE)
3506        let is_unnest = matches!(&alias.this, Expression::Unnest(_))
3507            || matches!(&alias.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"));
3508
3509        if is_unnest && alias.column_aliases.is_empty() {
3510            // Convert: UNNEST(arr) AS x -> UNNEST(arr) AS _tN(x)
3511            let col_alias = alias.alias.clone();
3512            alias.column_aliases = vec![col_alias];
3513            alias.alias = Identifier::new(format!("_t{}", counter));
3514            *counter += 1;
3515        }
3516    }
3517}
3518
3519/// Expand POSEXPLODE in SELECT expressions for DuckDB.
3520///
3521/// Converts `SELECT POSEXPLODE(x)` to `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col`
3522/// Handles both aliased and unaliased forms:
3523/// - `SELECT POSEXPLODE(x) AS (a, b)` -> `SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b`
3524/// - `SELECT * FROM POSEXPLODE(x) AS (a, b)` -> `SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)`
3525pub fn expand_posexplode_duckdb(expr: Expression) -> Result<Expression> {
3526    use crate::expressions::{Alias, Function};
3527
3528    match expr {
3529        Expression::Select(mut select) => {
3530            // Check if any SELECT expression is a POSEXPLODE function
3531            let mut new_expressions = Vec::new();
3532            let mut changed = false;
3533
3534            for sel_expr in select.expressions.drain(..) {
3535                // Check for POSEXPLODE(x) AS (a, b) - aliased form
3536                if let Expression::Alias(ref alias_box) = sel_expr {
3537                    if let Expression::Function(ref func) = alias_box.this {
3538                        if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3539                            let arg = func.args[0].clone();
3540                            // Get alias names: default pos, col
3541                            let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3542                                (
3543                                    alias_box.column_aliases[0].name.clone(),
3544                                    alias_box.column_aliases[1].name.clone(),
3545                                )
3546                            } else if !alias_box.alias.is_empty() {
3547                                // Single alias like AS x - use as col name, "pos" for position
3548                                ("pos".to_string(), alias_box.alias.name.clone())
3549                            } else {
3550                                ("pos".to_string(), "col".to_string())
3551                            };
3552
3553                            // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos_name
3554                            let gen_subscripts = Expression::Function(Box::new(Function::new(
3555                                "GENERATE_SUBSCRIPTS".to_string(),
3556                                vec![
3557                                    arg.clone(),
3558                                    Expression::Literal(Literal::Number("1".to_string())),
3559                                ],
3560                            )));
3561                            let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3562                                gen_subscripts,
3563                                Expression::Literal(Literal::Number("1".to_string())),
3564                            )));
3565                            let pos_alias = Expression::Alias(Box::new(Alias {
3566                                this: sub_one,
3567                                alias: Identifier::new(pos_name),
3568                                column_aliases: Vec::new(),
3569                                pre_alias_comments: Vec::new(),
3570                                trailing_comments: Vec::new(),
3571                            }));
3572
3573                            // UNNEST(x) AS col_name
3574                            let unnest = Expression::Unnest(Box::new(UnnestFunc {
3575                                this: arg,
3576                                expressions: Vec::new(),
3577                                with_ordinality: false,
3578                                alias: None,
3579                                offset_alias: None,
3580                            }));
3581                            let col_alias = Expression::Alias(Box::new(Alias {
3582                                this: unnest,
3583                                alias: Identifier::new(col_name),
3584                                column_aliases: Vec::new(),
3585                                pre_alias_comments: Vec::new(),
3586                                trailing_comments: Vec::new(),
3587                            }));
3588
3589                            new_expressions.push(pos_alias);
3590                            new_expressions.push(col_alias);
3591                            changed = true;
3592                            continue;
3593                        }
3594                    }
3595                }
3596
3597                // Check for bare POSEXPLODE(x) - unaliased form
3598                if let Expression::Function(ref func) = sel_expr {
3599                    if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3600                        let arg = func.args[0].clone();
3601                        let pos_name = "pos";
3602                        let col_name = "col";
3603
3604                        // GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos
3605                        let gen_subscripts = Expression::Function(Box::new(Function::new(
3606                            "GENERATE_SUBSCRIPTS".to_string(),
3607                            vec![
3608                                arg.clone(),
3609                                Expression::Literal(Literal::Number("1".to_string())),
3610                            ],
3611                        )));
3612                        let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3613                            gen_subscripts,
3614                            Expression::Literal(Literal::Number("1".to_string())),
3615                        )));
3616                        let pos_alias = Expression::Alias(Box::new(Alias {
3617                            this: sub_one,
3618                            alias: Identifier::new(pos_name),
3619                            column_aliases: Vec::new(),
3620                            pre_alias_comments: Vec::new(),
3621                            trailing_comments: Vec::new(),
3622                        }));
3623
3624                        // UNNEST(x) AS col
3625                        let unnest = Expression::Unnest(Box::new(UnnestFunc {
3626                            this: arg,
3627                            expressions: Vec::new(),
3628                            with_ordinality: false,
3629                            alias: None,
3630                            offset_alias: None,
3631                        }));
3632                        let col_alias = Expression::Alias(Box::new(Alias {
3633                            this: unnest,
3634                            alias: Identifier::new(col_name),
3635                            column_aliases: Vec::new(),
3636                            pre_alias_comments: Vec::new(),
3637                            trailing_comments: Vec::new(),
3638                        }));
3639
3640                        new_expressions.push(pos_alias);
3641                        new_expressions.push(col_alias);
3642                        changed = true;
3643                        continue;
3644                    }
3645                }
3646
3647                // Not a POSEXPLODE, keep as-is
3648                new_expressions.push(sel_expr);
3649            }
3650
3651            if changed {
3652                select.expressions = new_expressions;
3653            } else {
3654                select.expressions = new_expressions;
3655            }
3656
3657            // Also handle POSEXPLODE in FROM clause:
3658            // SELECT * FROM POSEXPLODE(x) AS (a, b) -> SELECT * FROM (SELECT ...)
3659            if let Some(ref mut from) = select.from {
3660                expand_posexplode_in_from_duckdb(from)?;
3661            }
3662
3663            Ok(Expression::Select(select))
3664        }
3665        other => Ok(other),
3666    }
3667}
3668
3669/// Helper to expand POSEXPLODE in FROM clause for DuckDB
3670fn expand_posexplode_in_from_duckdb(from: &mut From) -> Result<()> {
3671    use crate::expressions::{Alias, Function};
3672
3673    let mut new_expressions = Vec::new();
3674    let mut _changed = false;
3675
3676    for table_expr in from.expressions.drain(..) {
3677        // Check for POSEXPLODE(x) AS (a, b) in FROM
3678        if let Expression::Alias(ref alias_box) = table_expr {
3679            if let Expression::Function(ref func) = alias_box.this {
3680                if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3681                    let arg = func.args[0].clone();
3682                    let (pos_name, col_name) = if alias_box.column_aliases.len() == 2 {
3683                        (
3684                            alias_box.column_aliases[0].name.clone(),
3685                            alias_box.column_aliases[1].name.clone(),
3686                        )
3687                    } else {
3688                        ("pos".to_string(), "col".to_string())
3689                    };
3690
3691                    // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS a, UNNEST(x) AS b)
3692                    let gen_subscripts = Expression::Function(Box::new(Function::new(
3693                        "GENERATE_SUBSCRIPTS".to_string(),
3694                        vec![
3695                            arg.clone(),
3696                            Expression::Literal(Literal::Number("1".to_string())),
3697                        ],
3698                    )));
3699                    let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3700                        gen_subscripts,
3701                        Expression::Literal(Literal::Number("1".to_string())),
3702                    )));
3703                    let pos_alias = Expression::Alias(Box::new(Alias {
3704                        this: sub_one,
3705                        alias: Identifier::new(&pos_name),
3706                        column_aliases: Vec::new(),
3707                        pre_alias_comments: Vec::new(),
3708                        trailing_comments: Vec::new(),
3709                    }));
3710                    let unnest = Expression::Unnest(Box::new(UnnestFunc {
3711                        this: arg,
3712                        expressions: Vec::new(),
3713                        with_ordinality: false,
3714                        alias: None,
3715                        offset_alias: None,
3716                    }));
3717                    let col_alias = Expression::Alias(Box::new(Alias {
3718                        this: unnest,
3719                        alias: Identifier::new(&col_name),
3720                        column_aliases: Vec::new(),
3721                        pre_alias_comments: Vec::new(),
3722                        trailing_comments: Vec::new(),
3723                    }));
3724
3725                    let mut inner_select = Select::new();
3726                    inner_select.expressions = vec![pos_alias, col_alias];
3727
3728                    let subquery = Expression::Subquery(Box::new(Subquery {
3729                        this: Expression::Select(Box::new(inner_select)),
3730                        alias: None,
3731                        column_aliases: Vec::new(),
3732                        order_by: None,
3733                        limit: None,
3734                        offset: None,
3735                        distribute_by: None,
3736                        sort_by: None,
3737                        cluster_by: None,
3738                        lateral: false,
3739                        modifiers_inside: false,
3740                        trailing_comments: Vec::new(),
3741                    }));
3742                    new_expressions.push(subquery);
3743                    _changed = true;
3744                    continue;
3745                }
3746            }
3747        }
3748
3749        // Also check for bare POSEXPLODE(x) in FROM (no alias)
3750        if let Expression::Function(ref func) = table_expr {
3751            if func.name.eq_ignore_ascii_case("POSEXPLODE") && func.args.len() == 1 {
3752                let arg = func.args[0].clone();
3753
3754                // Create subquery: (SELECT GENERATE_SUBSCRIPTS(x, 1) - 1 AS pos, UNNEST(x) AS col)
3755                let gen_subscripts = Expression::Function(Box::new(Function::new(
3756                    "GENERATE_SUBSCRIPTS".to_string(),
3757                    vec![
3758                        arg.clone(),
3759                        Expression::Literal(Literal::Number("1".to_string())),
3760                    ],
3761                )));
3762                let sub_one = Expression::Sub(Box::new(BinaryOp::new(
3763                    gen_subscripts,
3764                    Expression::Literal(Literal::Number("1".to_string())),
3765                )));
3766                let pos_alias = Expression::Alias(Box::new(Alias {
3767                    this: sub_one,
3768                    alias: Identifier::new("pos"),
3769                    column_aliases: Vec::new(),
3770                    pre_alias_comments: Vec::new(),
3771                    trailing_comments: Vec::new(),
3772                }));
3773                let unnest = Expression::Unnest(Box::new(UnnestFunc {
3774                    this: arg,
3775                    expressions: Vec::new(),
3776                    with_ordinality: false,
3777                    alias: None,
3778                    offset_alias: None,
3779                }));
3780                let col_alias = Expression::Alias(Box::new(Alias {
3781                    this: unnest,
3782                    alias: Identifier::new("col"),
3783                    column_aliases: Vec::new(),
3784                    pre_alias_comments: Vec::new(),
3785                    trailing_comments: Vec::new(),
3786                }));
3787
3788                let mut inner_select = Select::new();
3789                inner_select.expressions = vec![pos_alias, col_alias];
3790
3791                let subquery = Expression::Subquery(Box::new(Subquery {
3792                    this: Expression::Select(Box::new(inner_select)),
3793                    alias: None,
3794                    column_aliases: Vec::new(),
3795                    order_by: None,
3796                    limit: None,
3797                    offset: None,
3798                    distribute_by: None,
3799                    sort_by: None,
3800                    cluster_by: None,
3801                    lateral: false,
3802                    modifiers_inside: false,
3803                    trailing_comments: Vec::new(),
3804                }));
3805                new_expressions.push(subquery);
3806                _changed = true;
3807                continue;
3808            }
3809        }
3810
3811        new_expressions.push(table_expr);
3812    }
3813
3814    from.expressions = new_expressions;
3815    Ok(())
3816}
3817
3818/// Convert EXPLODE/POSEXPLODE in SELECT projections into CROSS JOIN UNNEST patterns.
3819///
3820/// This implements the `explode_projection_to_unnest` transform from Python sqlglot.
3821/// It restructures queries like:
3822///   `SELECT EXPLODE(x) FROM tbl`
3823/// into:
3824///   `SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(...) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE ...`
3825///
3826/// The transform handles:
3827/// - EXPLODE(x) and POSEXPLODE(x) functions
3828/// - Name collision avoidance (_u, _u_2, ... and col, col_2, ...)
3829/// - Multiple EXPLODE/POSEXPLODE in one SELECT
3830/// - Queries with or without FROM clause
3831/// - Presto (index_offset=1) and BigQuery (index_offset=0) variants
3832pub fn explode_projection_to_unnest(expr: Expression, target: DialectType) -> Result<Expression> {
3833    match expr {
3834        Expression::Select(select) => explode_projection_to_unnest_impl(*select, target),
3835        other => Ok(other),
3836    }
3837}
3838
3839/// Snowflake-specific rewrite to mirror Python sqlglot's explode_projection_to_unnest behavior
3840/// when FLATTEN appears in a nested LATERAL within a SELECT projection.
3841///
3842/// This intentionally rewrites:
3843/// - `LATERAL FLATTEN(INPUT => x) alias`
3844/// into:
3845/// - `LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS alias(SEQ, KEY, PATH, INDEX, VALUE, THIS)`
3846/// and appends CROSS JOIN TABLE(FLATTEN(...)) range/entity joins plus alignment predicates
3847/// to the containing SELECT.
3848pub fn snowflake_flatten_projection_to_unnest(expr: Expression) -> Result<Expression> {
3849    match expr {
3850        Expression::Select(select) => snowflake_flatten_projection_to_unnest_impl(*select),
3851        other => Ok(other),
3852    }
3853}
3854
3855fn snowflake_flatten_projection_to_unnest_impl(mut select: Select) -> Result<Expression> {
3856    let mut flattened_inputs: Vec<Expression> = Vec::new();
3857    let mut new_selects: Vec<Expression> = Vec::with_capacity(select.expressions.len());
3858
3859    for sel_expr in select.expressions.into_iter() {
3860        let found_input: RefCell<Option<Expression>> = RefCell::new(None);
3861
3862        let rewritten = transform_recursive(sel_expr, &|e| {
3863            if let Expression::Lateral(lat) = e {
3864                if let Some(input_expr) = extract_flatten_input(&lat) {
3865                    if found_input.borrow().is_none() {
3866                        *found_input.borrow_mut() = Some(input_expr);
3867                    }
3868                    return Ok(Expression::Lateral(Box::new(rewrite_flatten_lateral(*lat))));
3869                }
3870                return Ok(Expression::Lateral(lat));
3871            }
3872            Ok(e)
3873        })?;
3874
3875        if let Some(input) = found_input.into_inner() {
3876            flattened_inputs.push(input);
3877        }
3878        new_selects.push(rewritten);
3879    }
3880
3881    if flattened_inputs.is_empty() {
3882        select.expressions = new_selects;
3883        return Ok(Expression::Select(Box::new(select)));
3884    }
3885
3886    select.expressions = new_selects;
3887
3888    for (idx, input_expr) in flattened_inputs.into_iter().enumerate() {
3889        // Match sqlglot naming: first pair is _u/_u_2 with pos/pos_2 and entity.
3890        let is_first = idx == 0;
3891        let series_alias = if is_first {
3892            "pos".to_string()
3893        } else {
3894            format!("pos_{}", idx + 1)
3895        };
3896        let series_source_alias = if is_first {
3897            "_u".to_string()
3898        } else {
3899            format!("_u_{}", idx * 2 + 1)
3900        };
3901        let unnest_source_alias = if is_first {
3902            "_u_2".to_string()
3903        } else {
3904            format!("_u_{}", idx * 2 + 2)
3905        };
3906        let pos2_alias = if is_first {
3907            "pos_2".to_string()
3908        } else {
3909            format!("{}_2", series_alias)
3910        };
3911        let entity_alias = if is_first {
3912            "entity".to_string()
3913        } else {
3914            format!("entity_{}", idx + 1)
3915        };
3916
3917        let array_size_call = Expression::Function(Box::new(Function::new(
3918            "ARRAY_SIZE".to_string(),
3919            vec![Expression::NamedArgument(Box::new(NamedArgument {
3920                name: Identifier::new("INPUT"),
3921                value: input_expr.clone(),
3922                separator: NamedArgSeparator::DArrow,
3923            }))],
3924        )));
3925
3926        let greatest = Expression::Function(Box::new(Function::new(
3927            "GREATEST".to_string(),
3928            vec![array_size_call.clone()],
3929        )));
3930
3931        let series_end = Expression::Add(Box::new(BinaryOp::new(
3932            Expression::Paren(Box::new(crate::expressions::Paren {
3933                this: Expression::Sub(Box::new(BinaryOp::new(
3934                    greatest,
3935                    Expression::Literal(Literal::Number("1".to_string())),
3936                ))),
3937                trailing_comments: Vec::new(),
3938            })),
3939            Expression::Literal(Literal::Number("1".to_string())),
3940        )));
3941
3942        let series_range = Expression::Function(Box::new(Function::new(
3943            "ARRAY_GENERATE_RANGE".to_string(),
3944            vec![
3945                Expression::Literal(Literal::Number("0".to_string())),
3946                series_end,
3947            ],
3948        )));
3949
3950        let series_flatten = Expression::Function(Box::new(Function::new(
3951            "FLATTEN".to_string(),
3952            vec![Expression::NamedArgument(Box::new(NamedArgument {
3953                name: Identifier::new("INPUT"),
3954                value: series_range,
3955                separator: NamedArgSeparator::DArrow,
3956            }))],
3957        )));
3958
3959        let series_table = Expression::Function(Box::new(Function::new(
3960            "TABLE".to_string(),
3961            vec![series_flatten],
3962        )));
3963
3964        let series_alias_expr = Expression::Alias(Box::new(Alias {
3965            this: series_table,
3966            alias: Identifier::new(series_source_alias.clone()),
3967            column_aliases: vec![
3968                Identifier::new("seq"),
3969                Identifier::new("key"),
3970                Identifier::new("path"),
3971                Identifier::new("index"),
3972                Identifier::new(series_alias.clone()),
3973                Identifier::new("this"),
3974            ],
3975            pre_alias_comments: Vec::new(),
3976            trailing_comments: Vec::new(),
3977        }));
3978
3979        select.joins.push(Join {
3980            this: series_alias_expr,
3981            on: None,
3982            using: Vec::new(),
3983            kind: JoinKind::Cross,
3984            use_inner_keyword: false,
3985            use_outer_keyword: false,
3986            deferred_condition: false,
3987            join_hint: None,
3988            match_condition: None,
3989            pivots: Vec::new(),
3990            comments: Vec::new(),
3991            nesting_group: 0,
3992            directed: false,
3993        });
3994
3995        let entity_flatten = Expression::Function(Box::new(Function::new(
3996            "FLATTEN".to_string(),
3997            vec![Expression::NamedArgument(Box::new(NamedArgument {
3998                name: Identifier::new("INPUT"),
3999                value: input_expr.clone(),
4000                separator: NamedArgSeparator::DArrow,
4001            }))],
4002        )));
4003
4004        let entity_table = Expression::Function(Box::new(Function::new(
4005            "TABLE".to_string(),
4006            vec![entity_flatten],
4007        )));
4008
4009        let entity_alias_expr = Expression::Alias(Box::new(Alias {
4010            this: entity_table,
4011            alias: Identifier::new(unnest_source_alias.clone()),
4012            column_aliases: vec![
4013                Identifier::new("seq"),
4014                Identifier::new("key"),
4015                Identifier::new("path"),
4016                Identifier::new(pos2_alias.clone()),
4017                Identifier::new(entity_alias.clone()),
4018                Identifier::new("this"),
4019            ],
4020            pre_alias_comments: Vec::new(),
4021            trailing_comments: Vec::new(),
4022        }));
4023
4024        select.joins.push(Join {
4025            this: entity_alias_expr,
4026            on: None,
4027            using: Vec::new(),
4028            kind: JoinKind::Cross,
4029            use_inner_keyword: false,
4030            use_outer_keyword: false,
4031            deferred_condition: false,
4032            join_hint: None,
4033            match_condition: None,
4034            pivots: Vec::new(),
4035            comments: Vec::new(),
4036            nesting_group: 0,
4037            directed: false,
4038        });
4039
4040        let pos_col =
4041            Expression::qualified_column(series_source_alias.clone(), series_alias.clone());
4042        let pos2_col =
4043            Expression::qualified_column(unnest_source_alias.clone(), pos2_alias.clone());
4044
4045        let eq = Expression::Eq(Box::new(BinaryOp::new(pos_col.clone(), pos2_col.clone())));
4046        let size_minus_1 = Expression::Paren(Box::new(crate::expressions::Paren {
4047            this: Expression::Sub(Box::new(BinaryOp::new(
4048                array_size_call,
4049                Expression::Literal(Literal::Number("1".to_string())),
4050            ))),
4051            trailing_comments: Vec::new(),
4052        }));
4053        let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_minus_1.clone())));
4054        let pos2_eq_size = Expression::Eq(Box::new(BinaryOp::new(pos2_col, size_minus_1)));
4055        let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos2_eq_size)));
4056        let or_cond = Expression::Or(Box::new(BinaryOp::new(
4057            eq,
4058            Expression::Paren(Box::new(crate::expressions::Paren {
4059                this: and_cond,
4060                trailing_comments: Vec::new(),
4061            })),
4062        )));
4063
4064        select.where_clause = Some(match select.where_clause.take() {
4065            Some(existing) => Where {
4066                this: Expression::And(Box::new(BinaryOp::new(existing.this, or_cond))),
4067            },
4068            None => Where { this: or_cond },
4069        });
4070    }
4071
4072    Ok(Expression::Select(Box::new(select)))
4073}
4074
4075fn extract_flatten_input(lat: &Lateral) -> Option<Expression> {
4076    let Expression::Function(f) = lat.this.as_ref() else {
4077        return None;
4078    };
4079    if !f.name.eq_ignore_ascii_case("FLATTEN") {
4080        return None;
4081    }
4082
4083    for arg in &f.args {
4084        if let Expression::NamedArgument(na) = arg {
4085            if na.name.name.eq_ignore_ascii_case("INPUT") {
4086                return Some(na.value.clone());
4087            }
4088        }
4089    }
4090    f.args.first().cloned()
4091}
4092
4093fn rewrite_flatten_lateral(mut lat: Lateral) -> Lateral {
4094    let cond = Expression::Eq(Box::new(BinaryOp::new(
4095        Expression::qualified_column("_u", "pos"),
4096        Expression::qualified_column("_u_2", "pos_2"),
4097    )));
4098    let true_expr = Expression::qualified_column("_u_2", "entity");
4099    let iff_expr = Expression::Function(Box::new(Function::new(
4100        "IFF".to_string(),
4101        vec![cond, true_expr, Expression::Null(crate::expressions::Null)],
4102    )));
4103
4104    lat.this = Box::new(iff_expr);
4105    if lat.column_aliases.is_empty() {
4106        lat.column_aliases = vec![
4107            "SEQ".to_string(),
4108            "KEY".to_string(),
4109            "PATH".to_string(),
4110            "INDEX".to_string(),
4111            "VALUE".to_string(),
4112            "THIS".to_string(),
4113        ];
4114    }
4115    lat
4116}
4117
4118/// Info about an EXPLODE/POSEXPLODE found in a SELECT projection
4119struct ExplodeInfo {
4120    /// The argument to EXPLODE/POSEXPLODE (the array expression)
4121    arg_sql: String,
4122    /// The alias for the exploded column
4123    explode_alias: String,
4124    /// The alias for the position column
4125    pos_alias: String,
4126    /// Source alias for this unnest (e.g., _u_2)
4127    unnest_source_alias: String,
4128}
4129
4130fn explode_projection_to_unnest_impl(select: Select, target: DialectType) -> Result<Expression> {
4131    let is_presto = matches!(
4132        target,
4133        DialectType::Presto | DialectType::Trino | DialectType::Athena
4134    );
4135    let is_bigquery = matches!(target, DialectType::BigQuery);
4136
4137    if !is_presto && !is_bigquery {
4138        return Ok(Expression::Select(Box::new(select)));
4139    }
4140
4141    // Check if any SELECT projection contains EXPLODE or POSEXPLODE
4142    let has_explode = select.expressions.iter().any(|e| expr_contains_explode(e));
4143    if !has_explode {
4144        return Ok(Expression::Select(Box::new(select)));
4145    }
4146
4147    // Collect taken names from existing SELECT expressions and FROM sources
4148    let mut taken_select_names = std::collections::HashSet::new();
4149    let mut taken_source_names = std::collections::HashSet::new();
4150
4151    // Collect names from existing SELECT expressions (output names)
4152    for sel in &select.expressions {
4153        if let Some(name) = get_output_name(sel) {
4154            taken_select_names.insert(name);
4155        }
4156    }
4157
4158    // Also add the explode arg name if it's a column reference
4159    for sel in &select.expressions {
4160        let explode_expr = find_explode_in_expr(sel);
4161        if let Some(arg) = explode_expr {
4162            if let Some(name) = get_output_name(&arg) {
4163                taken_select_names.insert(name);
4164            }
4165        }
4166    }
4167
4168    // Collect source names from FROM clause
4169    if let Some(ref from) = select.from {
4170        for from_expr in &from.expressions {
4171            collect_source_names(from_expr, &mut taken_source_names);
4172        }
4173    }
4174    // Also collect from JOINs
4175    for join in &select.joins {
4176        collect_source_names(&join.this, &mut taken_source_names);
4177    }
4178
4179    // Generate series alias
4180    let series_alias = new_name(&mut taken_select_names, "pos");
4181
4182    // Generate series source alias
4183    let series_source_alias = new_name(&mut taken_source_names, "_u");
4184
4185    // Get the target dialect for generating expression SQL
4186    let target_dialect = Dialect::get(target);
4187
4188    // Process each SELECT expression, collecting explode info
4189    let mut explode_infos: Vec<ExplodeInfo> = Vec::new();
4190    let mut new_projections: Vec<String> = Vec::new();
4191
4192    for (_idx, sel_expr) in select.expressions.iter().enumerate() {
4193        let explode_data = extract_explode_data(sel_expr);
4194
4195        if let Some((is_posexplode, arg_expr, explicit_alias, explicit_pos_alias)) = explode_data {
4196            // Generate the argument SQL in target dialect
4197            let arg_sql = target_dialect
4198                .generate(&arg_expr)
4199                .unwrap_or_else(|_| "NULL".to_string());
4200
4201            let unnest_source_alias = new_name(&mut taken_source_names, "_u");
4202
4203            let explode_alias = if let Some(ref ea) = explicit_alias {
4204                // Use the explicit alias directly (it was explicitly specified by the user)
4205                // Remove from taken_select_names first to avoid false collision with itself
4206                taken_select_names.remove(ea.as_str());
4207                // Now check for collision with other names
4208                let name = new_name(&mut taken_select_names, ea);
4209                name
4210            } else {
4211                new_name(&mut taken_select_names, "col")
4212            };
4213
4214            let pos_alias = if let Some(ref pa) = explicit_pos_alias {
4215                // Use the explicit pos alias directly
4216                taken_select_names.remove(pa.as_str());
4217                let name = new_name(&mut taken_select_names, pa);
4218                name
4219            } else {
4220                new_name(&mut taken_select_names, "pos")
4221            };
4222
4223            // Build the IF projection
4224            if is_presto {
4225                // Presto: IF(_u.pos = _u_2.pos_2, _u_2.col) AS col
4226                let if_col = format!(
4227                    "IF({}.{} = {}.{}, {}.{}) AS {}",
4228                    series_source_alias,
4229                    series_alias,
4230                    unnest_source_alias,
4231                    pos_alias,
4232                    unnest_source_alias,
4233                    explode_alias,
4234                    explode_alias
4235                );
4236                new_projections.push(if_col);
4237
4238                // For POSEXPLODE, also add the position projection
4239                if is_posexplode {
4240                    let if_pos = format!(
4241                        "IF({}.{} = {}.{}, {}.{}) AS {}",
4242                        series_source_alias,
4243                        series_alias,
4244                        unnest_source_alias,
4245                        pos_alias,
4246                        unnest_source_alias,
4247                        pos_alias,
4248                        pos_alias
4249                    );
4250                    new_projections.push(if_pos);
4251                }
4252            } else {
4253                // BigQuery: IF(pos = pos_2, col, NULL) AS col
4254                let if_col = format!(
4255                    "IF({} = {}, {}, NULL) AS {}",
4256                    series_alias, pos_alias, explode_alias, explode_alias
4257                );
4258                new_projections.push(if_col);
4259
4260                // For POSEXPLODE, also add the position projection
4261                if is_posexplode {
4262                    let if_pos = format!(
4263                        "IF({} = {}, {}, NULL) AS {}",
4264                        series_alias, pos_alias, pos_alias, pos_alias
4265                    );
4266                    new_projections.push(if_pos);
4267                }
4268            }
4269
4270            explode_infos.push(ExplodeInfo {
4271                arg_sql,
4272                explode_alias,
4273                pos_alias,
4274                unnest_source_alias,
4275            });
4276        } else {
4277            // Not an EXPLODE expression, generate as-is
4278            let sel_sql = target_dialect
4279                .generate(sel_expr)
4280                .unwrap_or_else(|_| "*".to_string());
4281            new_projections.push(sel_sql);
4282        }
4283    }
4284
4285    if explode_infos.is_empty() {
4286        return Ok(Expression::Select(Box::new(select)));
4287    }
4288
4289    // Build the FROM clause
4290    let mut from_parts: Vec<String> = Vec::new();
4291
4292    // Existing FROM sources
4293    if let Some(ref from) = select.from {
4294        for from_expr in &from.expressions {
4295            let from_sql = target_dialect.generate(from_expr).unwrap_or_default();
4296            from_parts.push(from_sql);
4297        }
4298    }
4299
4300    // Build the size expressions for the series generator
4301    let size_exprs: Vec<String> = explode_infos
4302        .iter()
4303        .map(|info| {
4304            if is_presto {
4305                format!("CARDINALITY({})", info.arg_sql)
4306            } else {
4307                format!("ARRAY_LENGTH({})", info.arg_sql)
4308            }
4309        })
4310        .collect();
4311
4312    let greatest_arg = if size_exprs.len() == 1 {
4313        size_exprs[0].clone()
4314    } else {
4315        format!("GREATEST({})", size_exprs.join(", "))
4316    };
4317
4318    // Build the series source
4319    // greatest_arg is already "GREATEST(...)" when multiple, or "CARDINALITY(x)" / "ARRAY_LENGTH(x)" when single
4320    let series_sql = if is_presto {
4321        // SEQUENCE(1, GREATEST(CARDINALITY(x))) for single, SEQUENCE(1, GREATEST(C(a), C(b))) for multiple
4322        if size_exprs.len() == 1 {
4323            format!(
4324                "UNNEST(SEQUENCE(1, GREATEST({}))) AS {}({})",
4325                greatest_arg, series_source_alias, series_alias
4326            )
4327        } else {
4328            // greatest_arg already has GREATEST(...) wrapper
4329            format!(
4330                "UNNEST(SEQUENCE(1, {})) AS {}({})",
4331                greatest_arg, series_source_alias, series_alias
4332            )
4333        }
4334    } else {
4335        // GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1) for single
4336        if size_exprs.len() == 1 {
4337            format!(
4338                "UNNEST(GENERATE_ARRAY(0, GREATEST({}) - 1)) AS {}",
4339                greatest_arg, series_alias
4340            )
4341        } else {
4342            // greatest_arg already has GREATEST(...) wrapper
4343            format!(
4344                "UNNEST(GENERATE_ARRAY(0, {} - 1)) AS {}",
4345                greatest_arg, series_alias
4346            )
4347        }
4348    };
4349
4350    // Build CROSS JOIN UNNEST clauses
4351    // Always use Presto-style (WITH ORDINALITY) for the SQL string to parse,
4352    // then convert to BigQuery-style AST after parsing if needed
4353    let mut cross_joins: Vec<String> = Vec::new();
4354
4355    for info in &explode_infos {
4356        // Always use WITH ORDINALITY syntax (which our parser handles)
4357        cross_joins.push(format!(
4358            "CROSS JOIN UNNEST({}) WITH ORDINALITY AS {}({}, {})",
4359            info.arg_sql, info.unnest_source_alias, info.explode_alias, info.pos_alias
4360        ));
4361    }
4362
4363    // Build WHERE clause
4364    let mut where_conditions: Vec<String> = Vec::new();
4365
4366    for info in &explode_infos {
4367        let size_expr = if is_presto {
4368            format!("CARDINALITY({})", info.arg_sql)
4369        } else {
4370            format!("ARRAY_LENGTH({})", info.arg_sql)
4371        };
4372
4373        let cond = if is_presto {
4374            format!(
4375                "{series_src}.{series_al} = {unnest_src}.{pos_al} OR ({series_src}.{series_al} > {size} AND {unnest_src}.{pos_al} = {size})",
4376                series_src = series_source_alias,
4377                series_al = series_alias,
4378                unnest_src = info.unnest_source_alias,
4379                pos_al = info.pos_alias,
4380                size = size_expr
4381            )
4382        } else {
4383            format!(
4384                "{series_al} = {pos_al} OR ({series_al} > ({size} - 1) AND {pos_al} = ({size} - 1))",
4385                series_al = series_alias,
4386                pos_al = info.pos_alias,
4387                size = size_expr
4388            )
4389        };
4390
4391        where_conditions.push(cond);
4392    }
4393
4394    // Combine WHERE conditions with AND (wrapped in parens if multiple)
4395    let where_sql = if where_conditions.len() == 1 {
4396        where_conditions[0].clone()
4397    } else {
4398        where_conditions
4399            .iter()
4400            .map(|c| format!("({})", c))
4401            .collect::<Vec<_>>()
4402            .join(" AND ")
4403    };
4404
4405    // Build the complete SQL
4406    let select_part = new_projections.join(", ");
4407
4408    // FROM part: if there was no original FROM, the series becomes the FROM source
4409    let from_and_joins = if from_parts.is_empty() {
4410        // No original FROM: series is the FROM source, everything else is CROSS JOIN
4411        format!("FROM {} {}", series_sql, cross_joins.join(" "))
4412    } else {
4413        format!(
4414            "FROM {} {} {}",
4415            from_parts.join(", "),
4416            format!("CROSS JOIN {}", series_sql),
4417            cross_joins.join(" ")
4418        )
4419    };
4420
4421    let full_sql = format!(
4422        "SELECT {} {} WHERE {}",
4423        select_part, from_and_joins, where_sql
4424    );
4425
4426    // Parse the constructed SQL using the Generic dialect (which handles all SQL syntax)
4427    // We use Generic instead of the target dialect to avoid parser limitations
4428    let generic_dialect = Dialect::get(DialectType::Generic);
4429    let parsed = generic_dialect.parse(&full_sql);
4430    match parsed {
4431        Ok(mut stmts) if !stmts.is_empty() => {
4432            let mut result = stmts.remove(0);
4433
4434            // For BigQuery, convert Presto-style UNNEST AST to BigQuery-style
4435            // Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos])
4436            // BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos) [no outer Alias]
4437            if is_bigquery {
4438                convert_unnest_presto_to_bigquery(&mut result);
4439            }
4440
4441            Ok(result)
4442        }
4443        _ => {
4444            // If parsing fails, return the original expression unchanged
4445            Ok(Expression::Select(Box::new(select)))
4446        }
4447    }
4448}
4449
4450/// Convert Presto-style UNNEST WITH ORDINALITY to BigQuery-style UNNEST WITH OFFSET in the AST.
4451/// Presto: Alias(Unnest(with_ordinality=true), alias=_u_N, column_aliases=[col, pos_N])
4452/// BigQuery: Unnest(with_ordinality=true, alias=col, offset_alias=pos_N)
4453fn convert_unnest_presto_to_bigquery(expr: &mut Expression) {
4454    match expr {
4455        Expression::Select(ref mut select) => {
4456            // Convert in FROM clause
4457            if let Some(ref mut from) = select.from {
4458                for from_item in from.expressions.iter_mut() {
4459                    convert_unnest_presto_to_bigquery(from_item);
4460                }
4461            }
4462            // Convert in JOINs
4463            for join in select.joins.iter_mut() {
4464                convert_unnest_presto_to_bigquery(&mut join.this);
4465            }
4466        }
4467        Expression::Alias(ref alias) => {
4468            // Check if this is Alias(Unnest(with_ordinality=true), ..., column_aliases=[col, pos])
4469            if let Expression::Unnest(ref unnest) = alias.this {
4470                if unnest.with_ordinality && alias.column_aliases.len() >= 2 {
4471                    let col_alias = alias.column_aliases[0].clone();
4472                    let pos_alias = alias.column_aliases[1].clone();
4473                    let mut new_unnest = unnest.as_ref().clone();
4474                    new_unnest.alias = Some(col_alias);
4475                    new_unnest.offset_alias = Some(pos_alias);
4476                    // Replace the Alias(Unnest) with just Unnest
4477                    *expr = Expression::Unnest(Box::new(new_unnest));
4478                }
4479            }
4480        }
4481        _ => {}
4482    }
4483}
4484
4485/// Find a new name that doesn't conflict with existing names.
4486/// Tries `base`, then `base_2`, `base_3`, etc.
4487fn new_name(names: &mut std::collections::HashSet<String>, base: &str) -> String {
4488    if !names.contains(base) {
4489        names.insert(base.to_string());
4490        return base.to_string();
4491    }
4492    let mut i = 2;
4493    loop {
4494        let candidate = format!("{}_{}", base, i);
4495        if !names.contains(&candidate) {
4496            names.insert(candidate.clone());
4497            return candidate;
4498        }
4499        i += 1;
4500    }
4501}
4502
4503/// Check if an expression contains EXPLODE or POSEXPLODE
4504fn expr_contains_explode(expr: &Expression) -> bool {
4505    match expr {
4506        Expression::Explode(_) => true,
4507        Expression::ExplodeOuter(_) => true,
4508        Expression::Function(f) => {
4509            let name = f.name.to_uppercase();
4510            name == "POSEXPLODE" || name == "POSEXPLODE_OUTER"
4511        }
4512        Expression::Alias(a) => expr_contains_explode(&a.this),
4513        _ => false,
4514    }
4515}
4516
4517/// Find the EXPLODE/POSEXPLODE expression within a select item, return the arg
4518fn find_explode_in_expr(expr: &Expression) -> Option<Expression> {
4519    match expr {
4520        Expression::Explode(uf) => Some(uf.this.clone()),
4521        Expression::ExplodeOuter(uf) => Some(uf.this.clone()),
4522        Expression::Function(f) => {
4523            let name = f.name.to_uppercase();
4524            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4525                Some(f.args[0].clone())
4526            } else {
4527                None
4528            }
4529        }
4530        Expression::Alias(a) => find_explode_in_expr(&a.this),
4531        _ => None,
4532    }
4533}
4534
4535/// Extract explode data from a SELECT expression.
4536/// Returns (is_posexplode, arg_expression, explicit_col_alias, explicit_pos_alias)
4537fn extract_explode_data(
4538    expr: &Expression,
4539) -> Option<(bool, Expression, Option<String>, Option<String>)> {
4540    match expr {
4541        // Bare EXPLODE(x) without alias
4542        Expression::Explode(uf) => Some((false, uf.this.clone(), None, None)),
4543        Expression::ExplodeOuter(uf) => Some((false, uf.this.clone(), None, None)),
4544        // Bare POSEXPLODE(x) without alias
4545        Expression::Function(f) => {
4546            let name = f.name.to_uppercase();
4547            if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4548                Some((true, f.args[0].clone(), None, None))
4549            } else {
4550                None
4551            }
4552        }
4553        // Aliased: EXPLODE(x) AS col, or POSEXPLODE(x) AS (a, b)
4554        Expression::Alias(a) => {
4555            match &a.this {
4556                Expression::Explode(uf) => {
4557                    let alias = if !a.alias.is_empty() {
4558                        Some(a.alias.name.clone())
4559                    } else {
4560                        None
4561                    };
4562                    Some((false, uf.this.clone(), alias, None))
4563                }
4564                Expression::ExplodeOuter(uf) => {
4565                    let alias = if !a.alias.is_empty() {
4566                        Some(a.alias.name.clone())
4567                    } else {
4568                        None
4569                    };
4570                    Some((false, uf.this.clone(), alias, None))
4571                }
4572                Expression::Function(f) => {
4573                    let name = f.name.to_uppercase();
4574                    if (name == "POSEXPLODE" || name == "POSEXPLODE_OUTER") && !f.args.is_empty() {
4575                        // Check for column aliases: AS (a, b)
4576                        if a.column_aliases.len() == 2 {
4577                            let pos_alias = a.column_aliases[0].name.clone();
4578                            let col_alias = a.column_aliases[1].name.clone();
4579                            Some((true, f.args[0].clone(), Some(col_alias), Some(pos_alias)))
4580                        } else if !a.alias.is_empty() {
4581                            // Single alias: AS x
4582                            Some((true, f.args[0].clone(), Some(a.alias.name.clone()), None))
4583                        } else {
4584                            Some((true, f.args[0].clone(), None, None))
4585                        }
4586                    } else {
4587                        None
4588                    }
4589                }
4590                _ => None,
4591            }
4592        }
4593        _ => None,
4594    }
4595}
4596
4597/// Get the output name of a SELECT expression
4598fn get_output_name(expr: &Expression) -> Option<String> {
4599    match expr {
4600        Expression::Alias(a) => {
4601            if !a.alias.is_empty() {
4602                Some(a.alias.name.clone())
4603            } else {
4604                None
4605            }
4606        }
4607        Expression::Column(c) => Some(c.name.name.clone()),
4608        Expression::Identifier(id) => Some(id.name.clone()),
4609        _ => None,
4610    }
4611}
4612
4613/// Collect source names from a FROM/JOIN expression
4614fn collect_source_names(expr: &Expression, names: &mut std::collections::HashSet<String>) {
4615    match expr {
4616        Expression::Alias(a) => {
4617            if !a.alias.is_empty() {
4618                names.insert(a.alias.name.clone());
4619            }
4620        }
4621        Expression::Subquery(s) => {
4622            if let Some(ref alias) = s.alias {
4623                names.insert(alias.name.clone());
4624            }
4625        }
4626        Expression::Table(t) => {
4627            if let Some(ref alias) = t.alias {
4628                names.insert(alias.name.clone());
4629            } else {
4630                names.insert(t.name.name.clone());
4631            }
4632        }
4633        Expression::Column(c) => {
4634            names.insert(c.name.name.clone());
4635        }
4636        Expression::Identifier(id) => {
4637            names.insert(id.name.clone());
4638        }
4639        _ => {}
4640    }
4641}
4642
4643/// Strip UNNEST wrapping from column reference arguments for Redshift target.
4644/// BigQuery UNNEST(column_ref) -> Redshift: just column_ref
4645pub fn strip_unnest_column_refs(expr: Expression) -> Result<Expression> {
4646    use crate::dialects::transform_recursive;
4647    transform_recursive(expr, &strip_unnest_column_refs_single)
4648}
4649
4650fn strip_unnest_column_refs_single(expr: Expression) -> Result<Expression> {
4651    if let Expression::Select(mut select) = expr {
4652        // Process JOINs (UNNEST items have been moved to joins by unnest_from_to_cross_join)
4653        for join in select.joins.iter_mut() {
4654            strip_unnest_from_expr(&mut join.this);
4655        }
4656        // Process FROM items too
4657        if let Some(ref mut from) = select.from {
4658            for from_item in from.expressions.iter_mut() {
4659                strip_unnest_from_expr(from_item);
4660            }
4661        }
4662        Ok(Expression::Select(select))
4663    } else {
4664        Ok(expr)
4665    }
4666}
4667
4668/// If expr is Alias(UNNEST(column_ref), alias) where UNNEST arg is a column/dot path,
4669/// replace with Alias(column_ref, alias) to strip the UNNEST.
4670fn strip_unnest_from_expr(expr: &mut Expression) {
4671    if let Expression::Alias(ref mut alias) = expr {
4672        if let Expression::Unnest(ref unnest) = alias.this {
4673            let is_column_ref = matches!(&unnest.this, Expression::Column(_) | Expression::Dot(_));
4674            if is_column_ref {
4675                // Replace UNNEST(col_ref) with just col_ref
4676                let inner = unnest.this.clone();
4677                alias.this = inner;
4678            }
4679        }
4680    }
4681}
4682
4683/// Wrap DuckDB UNNEST of struct arrays in (SELECT UNNEST(..., max_depth => 2)) subquery.
4684/// BigQuery UNNEST of struct arrays needs this wrapping for DuckDB to properly expand struct fields.
4685pub fn wrap_duckdb_unnest_struct(expr: Expression) -> Result<Expression> {
4686    use crate::dialects::transform_recursive;
4687    transform_recursive(expr, &wrap_duckdb_unnest_struct_single)
4688}
4689
4690fn wrap_duckdb_unnest_struct_single(expr: Expression) -> Result<Expression> {
4691    if let Expression::Select(mut select) = expr {
4692        // Process FROM items
4693        if let Some(ref mut from) = select.from {
4694            for from_item in from.expressions.iter_mut() {
4695                try_wrap_unnest_in_subquery(from_item);
4696            }
4697        }
4698
4699        // Process JOINs
4700        for join in select.joins.iter_mut() {
4701            try_wrap_unnest_in_subquery(&mut join.this);
4702        }
4703
4704        Ok(Expression::Select(select))
4705    } else {
4706        Ok(expr)
4707    }
4708}
4709
4710/// Check if an expression contains struct array elements that need DuckDB UNNEST wrapping.
4711fn is_struct_array_unnest_arg(expr: &Expression) -> bool {
4712    match expr {
4713        // Array literal containing struct elements
4714        Expression::Array(arr) => arr
4715            .expressions
4716            .iter()
4717            .any(|e| matches!(e, Expression::Struct(_))),
4718        Expression::ArrayFunc(arr) => arr
4719            .expressions
4720            .iter()
4721            .any(|e| matches!(e, Expression::Struct(_))),
4722        // CAST to struct array type, e.g. CAST([] AS STRUCT(x BIGINT)[])
4723        Expression::Cast(c) => {
4724            matches!(&c.to, DataType::Array { element_type, .. } if matches!(**element_type, DataType::Struct { .. }))
4725        }
4726        _ => false,
4727    }
4728}
4729
4730/// Try to wrap an UNNEST expression in a (SELECT UNNEST(..., max_depth => 2)) subquery.
4731/// Handles both bare UNNEST and Alias(UNNEST).
4732fn try_wrap_unnest_in_subquery(expr: &mut Expression) {
4733    // Check for Alias wrapping UNNEST
4734    if let Expression::Alias(ref alias) = expr {
4735        if let Expression::Unnest(ref unnest) = alias.this {
4736            if is_struct_array_unnest_arg(&unnest.this) {
4737                let unnest_clone = (**unnest).clone();
4738                let alias_name = alias.alias.clone();
4739                let new_expr = make_unnest_subquery(unnest_clone, Some(alias_name));
4740                *expr = new_expr;
4741                return;
4742            }
4743        }
4744    }
4745
4746    // Check for bare UNNEST
4747    if let Expression::Unnest(ref unnest) = expr {
4748        if is_struct_array_unnest_arg(&unnest.this) {
4749            let unnest_clone = (**unnest).clone();
4750            let new_expr = make_unnest_subquery(unnest_clone, None);
4751            *expr = new_expr;
4752        }
4753    }
4754}
4755
4756/// Create (SELECT UNNEST(arg, max_depth => 2)) [AS alias] subquery.
4757fn make_unnest_subquery(unnest: UnnestFunc, alias: Option<Identifier>) -> Expression {
4758    // Build UNNEST function call with max_depth => 2 named argument
4759    let max_depth_arg = Expression::NamedArgument(Box::new(NamedArgument {
4760        name: Identifier::new("max_depth".to_string()),
4761        value: Expression::Literal(Literal::Number("2".to_string())),
4762        separator: NamedArgSeparator::DArrow,
4763    }));
4764
4765    let mut unnest_args = vec![unnest.this];
4766    unnest_args.extend(unnest.expressions);
4767    unnest_args.push(max_depth_arg);
4768
4769    let unnest_func =
4770        Expression::Function(Box::new(Function::new("UNNEST".to_string(), unnest_args)));
4771
4772    // Build SELECT UNNEST(...)
4773    let mut inner_select = Select::new();
4774    inner_select.expressions = vec![unnest_func];
4775    let inner_select = Expression::Select(Box::new(inner_select));
4776
4777    // Wrap in subquery
4778    let subquery = Subquery {
4779        this: inner_select,
4780        alias,
4781        column_aliases: Vec::new(),
4782        order_by: None,
4783        limit: None,
4784        offset: None,
4785        distribute_by: None,
4786        sort_by: None,
4787        cluster_by: None,
4788        lateral: false,
4789        modifiers_inside: false,
4790        trailing_comments: Vec::new(),
4791    };
4792
4793    Expression::Subquery(Box::new(subquery))
4794}
4795
4796/// Wrap UNION with ORDER BY/LIMIT in a subquery.
4797///
4798/// Some dialects (ClickHouse, TSQL) don't support ORDER BY/LIMIT directly on UNION.
4799/// This transform converts:
4800///   SELECT ... UNION SELECT ... ORDER BY x LIMIT n
4801/// to:
4802///   SELECT * FROM (SELECT ... UNION SELECT ...) AS _l_0 ORDER BY x LIMIT n
4803///
4804/// NOTE: Our parser may place ORDER BY/LIMIT on the right-hand SELECT rather than
4805/// the Union (unlike Python sqlglot). This function handles both cases by checking
4806/// the right-hand SELECT for trailing ORDER BY/LIMIT and moving them to the Union.
4807pub fn no_limit_order_by_union(expr: Expression) -> Result<Expression> {
4808    use crate::expressions::{Limit as LimitClause, Offset as OffsetClause, OrderBy, Star};
4809
4810    match expr {
4811        Expression::Union(mut u) => {
4812            // Check if ORDER BY/LIMIT are on the rightmost Select instead of the Union
4813            // (our parser may attach them to the right SELECT)
4814            if u.order_by.is_none() && u.limit.is_none() && u.offset.is_none() {
4815                // Find the rightmost Select and check for ORDER BY/LIMIT
4816                if let Expression::Select(ref mut right_select) = u.right {
4817                    if right_select.order_by.is_some()
4818                        || right_select.limit.is_some()
4819                        || right_select.offset.is_some()
4820                    {
4821                        // Move ORDER BY/LIMIT from right Select to Union
4822                        u.order_by = right_select.order_by.take();
4823                        u.limit = right_select.limit.take().map(|l| Box::new(l.this));
4824                        u.offset = right_select.offset.take().map(|o| Box::new(o.this));
4825                    }
4826                }
4827            }
4828
4829            let has_order_or_limit =
4830                u.order_by.is_some() || u.limit.is_some() || u.offset.is_some();
4831            if has_order_or_limit {
4832                // Extract ORDER BY, LIMIT, OFFSET from the Union
4833                let order_by: Option<OrderBy> = u.order_by.take();
4834                let union_limit: Option<Box<Expression>> = u.limit.take();
4835                let union_offset: Option<Box<Expression>> = u.offset.take();
4836
4837                // Convert Union's limit (Box<Expression>) to Select's limit (Limit struct)
4838                let select_limit: Option<LimitClause> = union_limit.map(|l| LimitClause {
4839                    this: *l,
4840                    percent: false,
4841                    comments: Vec::new(),
4842                });
4843
4844                // Convert Union's offset (Box<Expression>) to Select's offset (Offset struct)
4845                let select_offset: Option<OffsetClause> = union_offset.map(|o| OffsetClause {
4846                    this: *o,
4847                    rows: None,
4848                });
4849
4850                // Create a subquery from the Union
4851                let subquery = Subquery {
4852                    this: Expression::Union(u),
4853                    alias: Some(Identifier::new("_l_0")),
4854                    column_aliases: Vec::new(),
4855                    lateral: false,
4856                    modifiers_inside: false,
4857                    order_by: None,
4858                    limit: None,
4859                    offset: None,
4860                    distribute_by: None,
4861                    sort_by: None,
4862                    cluster_by: None,
4863                    trailing_comments: Vec::new(),
4864                };
4865
4866                // Build SELECT * FROM (UNION) AS _l_0 ORDER BY ... LIMIT ...
4867                let mut select = Select::default();
4868                select.expressions = vec![Expression::Star(Star {
4869                    table: None,
4870                    except: None,
4871                    replace: None,
4872                    rename: None,
4873                    trailing_comments: Vec::new(),
4874                    span: None,
4875                })];
4876                select.from = Some(From {
4877                    expressions: vec![Expression::Subquery(Box::new(subquery))],
4878                });
4879                select.order_by = order_by;
4880                select.limit = select_limit;
4881                select.offset = select_offset;
4882
4883                Ok(Expression::Select(Box::new(select)))
4884            } else {
4885                Ok(Expression::Union(u))
4886            }
4887        }
4888        _ => Ok(expr),
4889    }
4890}
4891
4892/// Expand LIKE ANY / ILIKE ANY to OR chains.
4893///
4894/// For dialects that don't support quantifiers on LIKE/ILIKE (e.g. DuckDB),
4895/// expand `x LIKE ANY (('a', 'b'))` to `x LIKE 'a' OR x LIKE 'b'`.
4896pub fn expand_like_any(expr: Expression) -> Result<Expression> {
4897    use crate::expressions::{BinaryOp, LikeOp};
4898
4899    fn unwrap_parens(e: &Expression) -> &Expression {
4900        match e {
4901            Expression::Paren(p) => unwrap_parens(&p.this),
4902            _ => e,
4903        }
4904    }
4905
4906    fn extract_tuple_values(e: &Expression) -> Option<Vec<Expression>> {
4907        let inner = unwrap_parens(e);
4908        match inner {
4909            Expression::Tuple(t) => Some(t.expressions.clone()),
4910            _ => None,
4911        }
4912    }
4913
4914    transform_recursive(expr, &|e| {
4915        match e {
4916            Expression::Like(ref op) if op.quantifier.as_deref() == Some("ANY") => {
4917                if let Some(values) = extract_tuple_values(&op.right) {
4918                    if values.is_empty() {
4919                        return Ok(e);
4920                    }
4921                    // Build: left LIKE val1 OR left LIKE val2 OR ...
4922                    let mut result: Option<Expression> = None;
4923                    for val in values {
4924                        let like = Expression::Like(Box::new(LikeOp {
4925                            left: op.left.clone(),
4926                            right: val,
4927                            escape: op.escape.clone(),
4928                            quantifier: None,
4929                        }));
4930                        result = Some(match result {
4931                            None => like,
4932                            Some(prev) => Expression::Or(Box::new(BinaryOp::new(prev, like))),
4933                        });
4934                    }
4935                    Ok(result.unwrap_or(e))
4936                } else {
4937                    Ok(e)
4938                }
4939            }
4940            Expression::ILike(ref op) if op.quantifier.as_deref() == Some("ANY") => {
4941                if let Some(values) = extract_tuple_values(&op.right) {
4942                    if values.is_empty() {
4943                        return Ok(e);
4944                    }
4945                    let mut result: Option<Expression> = None;
4946                    for val in values {
4947                        let ilike = Expression::ILike(Box::new(LikeOp {
4948                            left: op.left.clone(),
4949                            right: val,
4950                            escape: op.escape.clone(),
4951                            quantifier: None,
4952                        }));
4953                        result = Some(match result {
4954                            None => ilike,
4955                            Some(prev) => Expression::Or(Box::new(BinaryOp::new(prev, ilike))),
4956                        });
4957                    }
4958                    Ok(result.unwrap_or(e))
4959                } else {
4960                    Ok(e)
4961                }
4962            }
4963            _ => Ok(e),
4964        }
4965    })
4966}
4967
4968/// Ensures all unaliased column outputs in subqueries and CTEs get self-aliases.
4969///
4970/// This is needed for TSQL which requires derived table outputs to be aliased.
4971/// For example: `SELECT c FROM t` inside a subquery becomes `SELECT c AS c FROM t`.
4972///
4973/// Mirrors Python sqlglot's `qualify_derived_table_outputs` function which is applied
4974/// as a TRANSFORMS preprocessor for Subquery and CTE expressions in the TSQL dialect.
4975pub fn qualify_derived_table_outputs(expr: Expression) -> Result<Expression> {
4976    use crate::expressions::Alias;
4977
4978    fn add_self_aliases_to_select(select: &mut Select) {
4979        let new_expressions: Vec<Expression> = select
4980            .expressions
4981            .iter()
4982            .map(|e| {
4983                match e {
4984                    // Column reference without alias -> add self-alias
4985                    Expression::Column(col) => {
4986                        let alias_name = col.name.clone();
4987                        Expression::Alias(Box::new(Alias {
4988                            this: e.clone(),
4989                            alias: alias_name,
4990                            column_aliases: Vec::new(),
4991                            pre_alias_comments: Vec::new(),
4992                            trailing_comments: Vec::new(),
4993                        }))
4994                    }
4995                    // Already aliased or star or other -> keep as is
4996                    _ => e.clone(),
4997                }
4998            })
4999            .collect();
5000        select.expressions = new_expressions;
5001    }
5002
5003    fn walk_and_qualify(expr: &mut Expression) {
5004        match expr {
5005            Expression::Select(ref mut select) => {
5006                // Qualify subqueries in FROM
5007                if let Some(ref mut from) = select.from {
5008                    for e in from.expressions.iter_mut() {
5009                        qualify_subquery_expr(e);
5010                        walk_and_qualify(e);
5011                    }
5012                }
5013                // Qualify subqueries in JOINs
5014                for join in select.joins.iter_mut() {
5015                    qualify_subquery_expr(&mut join.this);
5016                    walk_and_qualify(&mut join.this);
5017                }
5018                // Recurse into expressions (for correlated subqueries etc.)
5019                for e in select.expressions.iter_mut() {
5020                    walk_and_qualify(e);
5021                }
5022                // Recurse into WHERE
5023                if let Some(ref mut w) = select.where_clause {
5024                    walk_and_qualify(&mut w.this);
5025                }
5026            }
5027            Expression::Subquery(ref mut subquery) => {
5028                walk_and_qualify(&mut subquery.this);
5029            }
5030            Expression::Union(ref mut u) => {
5031                walk_and_qualify(&mut u.left);
5032                walk_and_qualify(&mut u.right);
5033            }
5034            Expression::Intersect(ref mut i) => {
5035                walk_and_qualify(&mut i.left);
5036                walk_and_qualify(&mut i.right);
5037            }
5038            Expression::Except(ref mut e) => {
5039                walk_and_qualify(&mut e.left);
5040                walk_and_qualify(&mut e.right);
5041            }
5042            Expression::Cte(ref mut cte) => {
5043                walk_and_qualify(&mut cte.this);
5044            }
5045            _ => {}
5046        }
5047    }
5048
5049    fn qualify_subquery_expr(expr: &mut Expression) {
5050        match expr {
5051            Expression::Subquery(ref mut subquery) => {
5052                // Only qualify if the subquery has a table alias but no column aliases
5053                if subquery.alias.is_some() && subquery.column_aliases.is_empty() {
5054                    if let Expression::Select(ref mut inner_select) = subquery.this {
5055                        // Check the inner select doesn't use *
5056                        let has_star = inner_select
5057                            .expressions
5058                            .iter()
5059                            .any(|e| matches!(e, Expression::Star(_)));
5060                        if !has_star {
5061                            add_self_aliases_to_select(inner_select);
5062                        }
5063                    }
5064                }
5065                // Recurse into the subquery's inner query
5066                walk_and_qualify(&mut subquery.this);
5067            }
5068            Expression::Alias(ref mut alias) => {
5069                qualify_subquery_expr(&mut alias.this);
5070            }
5071            _ => {}
5072        }
5073    }
5074
5075    let mut result = expr;
5076    walk_and_qualify(&mut result);
5077
5078    // Also qualify CTE inner queries at the top level
5079    if let Expression::Select(ref mut select) = result {
5080        if let Some(ref mut with) = select.with {
5081            for cte in with.ctes.iter_mut() {
5082                // CTE with column names -> no need to qualify
5083                if cte.columns.is_empty() {
5084                    // Walk into the CTE's inner query for nested subqueries
5085                    walk_and_qualify(&mut cte.this);
5086                }
5087            }
5088        }
5089    }
5090
5091    Ok(result)
5092}
5093
5094#[cfg(test)]
5095mod tests {
5096    use super::*;
5097    use crate::dialects::{Dialect, DialectType};
5098    use crate::expressions::Column;
5099
5100    fn gen(expr: &Expression) -> String {
5101        let dialect = Dialect::get(DialectType::Generic);
5102        dialect.generate(expr).unwrap()
5103    }
5104
5105    #[test]
5106    fn test_preprocess() {
5107        let expr = Expression::Boolean(BooleanLiteral { value: true });
5108        let result = preprocess(expr, &[replace_bool_with_int]).unwrap();
5109        assert!(matches!(result, Expression::Literal(Literal::Number(_))));
5110    }
5111
5112    #[test]
5113    fn test_preprocess_chain() {
5114        // Test chaining multiple transforms using function pointers
5115        let expr = Expression::Boolean(BooleanLiteral { value: true });
5116        // Create array of function pointers (all same type)
5117        let transforms: Vec<fn(Expression) -> Result<Expression>> =
5118            vec![replace_bool_with_int, replace_int_with_bool];
5119        let result = preprocess(expr, &transforms).unwrap();
5120        // After replace_bool_with_int: 1
5121        // After replace_int_with_bool: true
5122        if let Expression::Boolean(b) = result {
5123            assert!(b.value);
5124        } else {
5125            panic!("Expected boolean literal");
5126        }
5127    }
5128
5129    #[test]
5130    fn test_unnest_to_explode() {
5131        let unnest = Expression::Unnest(Box::new(UnnestFunc {
5132            this: Expression::Column(Column {
5133                name: Identifier::new("arr".to_string()),
5134                table: None,
5135                join_mark: false,
5136                trailing_comments: vec![],
5137                span: None,
5138            }),
5139            expressions: Vec::new(),
5140            with_ordinality: false,
5141            alias: None,
5142            offset_alias: None,
5143        }));
5144
5145        let result = unnest_to_explode(unnest).unwrap();
5146        assert!(matches!(result, Expression::Explode(_)));
5147    }
5148
5149    #[test]
5150    fn test_explode_to_unnest() {
5151        let explode = Expression::Explode(Box::new(UnaryFunc {
5152            this: Expression::Column(Column {
5153                name: Identifier::new("arr".to_string()),
5154                table: None,
5155                join_mark: false,
5156                trailing_comments: vec![],
5157                span: None,
5158            }),
5159            original_name: None,
5160        }));
5161
5162        let result = explode_to_unnest(explode).unwrap();
5163        assert!(matches!(result, Expression::Unnest(_)));
5164    }
5165
5166    #[test]
5167    fn test_replace_bool_with_int() {
5168        let true_expr = Expression::Boolean(BooleanLiteral { value: true });
5169        let result = replace_bool_with_int(true_expr).unwrap();
5170        if let Expression::Literal(Literal::Number(n)) = result {
5171            assert_eq!(n, "1");
5172        } else {
5173            panic!("Expected number literal");
5174        }
5175
5176        let false_expr = Expression::Boolean(BooleanLiteral { value: false });
5177        let result = replace_bool_with_int(false_expr).unwrap();
5178        if let Expression::Literal(Literal::Number(n)) = result {
5179            assert_eq!(n, "0");
5180        } else {
5181            panic!("Expected number literal");
5182        }
5183    }
5184
5185    #[test]
5186    fn test_replace_int_with_bool() {
5187        let one_expr = Expression::Literal(Literal::Number("1".to_string()));
5188        let result = replace_int_with_bool(one_expr).unwrap();
5189        if let Expression::Boolean(b) = result {
5190            assert!(b.value);
5191        } else {
5192            panic!("Expected boolean true");
5193        }
5194
5195        let zero_expr = Expression::Literal(Literal::Number("0".to_string()));
5196        let result = replace_int_with_bool(zero_expr).unwrap();
5197        if let Expression::Boolean(b) = result {
5198            assert!(!b.value);
5199        } else {
5200            panic!("Expected boolean false");
5201        }
5202
5203        // Test that other numbers are not converted
5204        let two_expr = Expression::Literal(Literal::Number("2".to_string()));
5205        let result = replace_int_with_bool(two_expr).unwrap();
5206        assert!(matches!(result, Expression::Literal(Literal::Number(_))));
5207    }
5208
5209    #[test]
5210    fn test_strip_data_type_params() {
5211        // Test Decimal
5212        let decimal = DataType::Decimal {
5213            precision: Some(10),
5214            scale: Some(2),
5215        };
5216        let stripped = strip_data_type_params(decimal);
5217        assert_eq!(
5218            stripped,
5219            DataType::Decimal {
5220                precision: None,
5221                scale: None
5222            }
5223        );
5224
5225        // Test VarChar
5226        let varchar = DataType::VarChar {
5227            length: Some(255),
5228            parenthesized_length: false,
5229        };
5230        let stripped = strip_data_type_params(varchar);
5231        assert_eq!(
5232            stripped,
5233            DataType::VarChar {
5234                length: None,
5235                parenthesized_length: false
5236            }
5237        );
5238
5239        // Test Char
5240        let char_type = DataType::Char { length: Some(10) };
5241        let stripped = strip_data_type_params(char_type);
5242        assert_eq!(stripped, DataType::Char { length: None });
5243
5244        // Test Timestamp (preserve timezone)
5245        let timestamp = DataType::Timestamp {
5246            precision: Some(6),
5247            timezone: true,
5248        };
5249        let stripped = strip_data_type_params(timestamp);
5250        assert_eq!(
5251            stripped,
5252            DataType::Timestamp {
5253                precision: None,
5254                timezone: true
5255            }
5256        );
5257
5258        // Test Array (recursive)
5259        let array = DataType::Array {
5260            element_type: Box::new(DataType::VarChar {
5261                length: Some(100),
5262                parenthesized_length: false,
5263            }),
5264            dimension: None,
5265        };
5266        let stripped = strip_data_type_params(array);
5267        assert_eq!(
5268            stripped,
5269            DataType::Array {
5270                element_type: Box::new(DataType::VarChar {
5271                    length: None,
5272                    parenthesized_length: false
5273                }),
5274                dimension: None,
5275            }
5276        );
5277
5278        // Test types without params are unchanged
5279        let text = DataType::Text;
5280        let stripped = strip_data_type_params(text);
5281        assert_eq!(stripped, DataType::Text);
5282    }
5283
5284    #[test]
5285    fn test_remove_precision_parameterized_types_cast() {
5286        // Create a CAST(1 AS DECIMAL(10, 2)) expression
5287        let cast_expr = Expression::Cast(Box::new(Cast {
5288            this: Expression::Literal(Literal::Number("1".to_string())),
5289            to: DataType::Decimal {
5290                precision: Some(10),
5291                scale: Some(2),
5292            },
5293            trailing_comments: vec![],
5294            double_colon_syntax: false,
5295            format: None,
5296            default: None,
5297        }));
5298
5299        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5300        if let Expression::Cast(cast) = result {
5301            assert_eq!(
5302                cast.to,
5303                DataType::Decimal {
5304                    precision: None,
5305                    scale: None
5306                }
5307            );
5308        } else {
5309            panic!("Expected Cast expression");
5310        }
5311    }
5312
5313    #[test]
5314    fn test_remove_precision_parameterized_types_varchar() {
5315        // Create a CAST('hello' AS VARCHAR(10)) expression
5316        let cast_expr = Expression::Cast(Box::new(Cast {
5317            this: Expression::Literal(Literal::String("hello".to_string())),
5318            to: DataType::VarChar {
5319                length: Some(10),
5320                parenthesized_length: false,
5321            },
5322            trailing_comments: vec![],
5323            double_colon_syntax: false,
5324            format: None,
5325            default: None,
5326        }));
5327
5328        let result = remove_precision_parameterized_types(cast_expr).unwrap();
5329        if let Expression::Cast(cast) = result {
5330            assert_eq!(
5331                cast.to,
5332                DataType::VarChar {
5333                    length: None,
5334                    parenthesized_length: false
5335                }
5336            );
5337        } else {
5338            panic!("Expected Cast expression");
5339        }
5340    }
5341
5342    #[test]
5343    fn test_remove_precision_direct_cast() {
5344        // Test transform on a direct Cast expression (not nested in Select)
5345        // The current implementation handles top-level Cast expressions;
5346        // a full implementation would need recursive AST traversal
5347        let cast = Expression::Cast(Box::new(Cast {
5348            this: Expression::Literal(Literal::Number("1".to_string())),
5349            to: DataType::Decimal {
5350                precision: Some(10),
5351                scale: Some(2),
5352            },
5353            trailing_comments: vec![],
5354            double_colon_syntax: false,
5355            format: None,
5356            default: None,
5357        }));
5358
5359        let transformed = remove_precision_parameterized_types(cast).unwrap();
5360        let generated = gen(&transformed);
5361
5362        // Should now be DECIMAL without precision
5363        assert!(generated.contains("DECIMAL"));
5364        assert!(!generated.contains("(10"));
5365    }
5366
5367    #[test]
5368    fn test_epoch_cast_to_ts() {
5369        // Test CAST('epoch' AS TIMESTAMP) → CAST('1970-01-01 00:00:00' AS TIMESTAMP)
5370        let cast_expr = Expression::Cast(Box::new(Cast {
5371            this: Expression::Literal(Literal::String("epoch".to_string())),
5372            to: DataType::Timestamp {
5373                precision: None,
5374                timezone: false,
5375            },
5376            trailing_comments: vec![],
5377            double_colon_syntax: false,
5378            format: None,
5379            default: None,
5380        }));
5381
5382        let result = epoch_cast_to_ts(cast_expr).unwrap();
5383        if let Expression::Cast(cast) = result {
5384            if let Expression::Literal(Literal::String(s)) = cast.this {
5385                assert_eq!(s, "1970-01-01 00:00:00");
5386            } else {
5387                panic!("Expected string literal");
5388            }
5389        } else {
5390            panic!("Expected Cast expression");
5391        }
5392    }
5393
5394    #[test]
5395    fn test_epoch_cast_to_ts_preserves_non_epoch() {
5396        // Test that non-epoch strings are preserved
5397        let cast_expr = Expression::Cast(Box::new(Cast {
5398            this: Expression::Literal(Literal::String("2024-01-15".to_string())),
5399            to: DataType::Timestamp {
5400                precision: None,
5401                timezone: false,
5402            },
5403            trailing_comments: vec![],
5404            double_colon_syntax: false,
5405            format: None,
5406            default: None,
5407        }));
5408
5409        let result = epoch_cast_to_ts(cast_expr).unwrap();
5410        if let Expression::Cast(cast) = result {
5411            if let Expression::Literal(Literal::String(s)) = cast.this {
5412                assert_eq!(s, "2024-01-15");
5413            } else {
5414                panic!("Expected string literal");
5415            }
5416        } else {
5417            panic!("Expected Cast expression");
5418        }
5419    }
5420
5421    #[test]
5422    fn test_unqualify_columns() {
5423        // Test that table qualifiers are removed
5424        let col = Expression::Column(Column {
5425            name: Identifier::new("id".to_string()),
5426            table: Some(Identifier::new("users".to_string())),
5427            join_mark: false,
5428            trailing_comments: vec![],
5429            span: None,
5430        });
5431
5432        let result = unqualify_columns(col).unwrap();
5433        if let Expression::Column(c) = result {
5434            assert!(c.table.is_none());
5435            assert_eq!(c.name.name, "id");
5436        } else {
5437            panic!("Expected Column expression");
5438        }
5439    }
5440
5441    #[test]
5442    fn test_is_temporal_type() {
5443        assert!(is_temporal_type(&DataType::Date));
5444        assert!(is_temporal_type(&DataType::Timestamp {
5445            precision: None,
5446            timezone: false
5447        }));
5448        assert!(is_temporal_type(&DataType::Time {
5449            precision: None,
5450            timezone: false
5451        }));
5452        assert!(!is_temporal_type(&DataType::Int {
5453            length: None,
5454            integer_spelling: false
5455        }));
5456        assert!(!is_temporal_type(&DataType::VarChar {
5457            length: None,
5458            parenthesized_length: false
5459        }));
5460    }
5461
5462    #[test]
5463    fn test_eliminate_semi_join_basic() {
5464        use crate::expressions::{Join, TableRef};
5465
5466        // Test that semi joins are converted to EXISTS
5467        let select = Expression::Select(Box::new(Select {
5468            expressions: vec![Expression::Column(Column {
5469                name: Identifier::new("a".to_string()),
5470                table: None,
5471                join_mark: false,
5472                trailing_comments: vec![],
5473                span: None,
5474            })],
5475            from: Some(From {
5476                expressions: vec![Expression::Table(TableRef::new("t1"))],
5477            }),
5478            joins: vec![Join {
5479                this: Expression::Table(TableRef::new("t2")),
5480                kind: JoinKind::Semi,
5481                on: Some(Expression::Eq(Box::new(BinaryOp {
5482                    left: Expression::Column(Column {
5483                        name: Identifier::new("x".to_string()),
5484                        table: None,
5485                        join_mark: false,
5486                        trailing_comments: vec![],
5487                        span: None,
5488                    }),
5489                    right: Expression::Column(Column {
5490                        name: Identifier::new("y".to_string()),
5491                        table: None,
5492                        join_mark: false,
5493                        trailing_comments: vec![],
5494                        span: None,
5495                    }),
5496                    left_comments: vec![],
5497                    operator_comments: vec![],
5498                    trailing_comments: vec![],
5499                }))),
5500                using: vec![],
5501                use_inner_keyword: false,
5502                use_outer_keyword: false,
5503                deferred_condition: false,
5504                join_hint: None,
5505                match_condition: None,
5506                pivots: Vec::new(),
5507                comments: Vec::new(),
5508                nesting_group: 0,
5509                directed: false,
5510            }],
5511            ..Select::new()
5512        }));
5513
5514        let result = eliminate_semi_and_anti_joins(select).unwrap();
5515        if let Expression::Select(s) = result {
5516            // Semi join should be removed
5517            assert!(s.joins.is_empty());
5518            // WHERE clause should have EXISTS
5519            assert!(s.where_clause.is_some());
5520        } else {
5521            panic!("Expected Select expression");
5522        }
5523    }
5524
5525    #[test]
5526    fn test_no_ilike_sql() {
5527        use crate::expressions::LikeOp;
5528
5529        // Test ILIKE conversion to LOWER+LIKE
5530        let ilike_expr = Expression::ILike(Box::new(LikeOp {
5531            left: Expression::Column(Column {
5532                name: Identifier::new("name".to_string()),
5533                table: None,
5534                join_mark: false,
5535                trailing_comments: vec![],
5536                span: None,
5537            }),
5538            right: Expression::Literal(Literal::String("%test%".to_string())),
5539            escape: None,
5540            quantifier: None,
5541        }));
5542
5543        let result = no_ilike_sql(ilike_expr).unwrap();
5544        if let Expression::Like(like) = result {
5545            // Left should be LOWER(name)
5546            if let Expression::Function(f) = &like.left {
5547                assert_eq!(f.name, "LOWER");
5548            } else {
5549                panic!("Expected LOWER function on left");
5550            }
5551            // Right should be LOWER('%test%')
5552            if let Expression::Function(f) = &like.right {
5553                assert_eq!(f.name, "LOWER");
5554            } else {
5555                panic!("Expected LOWER function on right");
5556            }
5557        } else {
5558            panic!("Expected Like expression");
5559        }
5560    }
5561
5562    #[test]
5563    fn test_no_trycast_sql() {
5564        // Test TryCast conversion to Cast
5565        let trycast_expr = Expression::TryCast(Box::new(Cast {
5566            this: Expression::Literal(Literal::String("123".to_string())),
5567            to: DataType::Int {
5568                length: None,
5569                integer_spelling: false,
5570            },
5571            trailing_comments: vec![],
5572            double_colon_syntax: false,
5573            format: None,
5574            default: None,
5575        }));
5576
5577        let result = no_trycast_sql(trycast_expr).unwrap();
5578        assert!(matches!(result, Expression::Cast(_)));
5579    }
5580
5581    #[test]
5582    fn test_no_safe_cast_sql() {
5583        // Test SafeCast conversion to Cast
5584        let safe_cast_expr = Expression::SafeCast(Box::new(Cast {
5585            this: Expression::Literal(Literal::String("123".to_string())),
5586            to: DataType::Int {
5587                length: None,
5588                integer_spelling: false,
5589            },
5590            trailing_comments: vec![],
5591            double_colon_syntax: false,
5592            format: None,
5593            default: None,
5594        }));
5595
5596        let result = no_safe_cast_sql(safe_cast_expr).unwrap();
5597        assert!(matches!(result, Expression::Cast(_)));
5598    }
5599
5600    #[test]
5601    fn test_explode_to_unnest_presto() {
5602        let spark = Dialect::get(DialectType::Spark);
5603        let result = spark
5604            .transpile_to("SELECT EXPLODE(x) FROM tbl", DialectType::Presto)
5605            .unwrap();
5606        assert_eq!(
5607            result[0],
5608            "SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col FROM tbl CROSS JOIN UNNEST(SEQUENCE(1, GREATEST(CARDINALITY(x)))) AS _u(pos) CROSS JOIN UNNEST(x) WITH ORDINALITY AS _u_2(col, pos_2) WHERE _u.pos = _u_2.pos_2 OR (_u.pos > CARDINALITY(x) AND _u_2.pos_2 = CARDINALITY(x))"
5609        );
5610    }
5611
5612    #[test]
5613    fn test_explode_to_unnest_bigquery() {
5614        let spark = Dialect::get(DialectType::Spark);
5615        let result = spark
5616            .transpile_to("SELECT EXPLODE(x) FROM tbl", DialectType::BigQuery)
5617            .unwrap();
5618        assert_eq!(
5619            result[0],
5620            "SELECT IF(pos = pos_2, col, NULL) AS col FROM tbl CROSS JOIN UNNEST(GENERATE_ARRAY(0, GREATEST(ARRAY_LENGTH(x)) - 1)) AS pos CROSS JOIN UNNEST(x) AS col WITH OFFSET AS pos_2 WHERE pos = pos_2 OR (pos > (ARRAY_LENGTH(x) - 1) AND pos_2 = (ARRAY_LENGTH(x) - 1))"
5621        );
5622    }
5623}