Skip to main content

sqlglot_rust/optimizer/
unnest_subqueries.rs

1/// Unnest and decorrelate subqueries.
2///
3/// Transforms correlated subqueries in WHERE clauses into equivalent JOINs
4/// when safe to do so. This is a common optimization because most query engines
5/// execute joins more efficiently than correlated subqueries.
6///
7/// ## Supported rewrites
8///
9/// | Pattern                               | Rewrite                             |
10/// |---------------------------------------|-------------------------------------|
11/// | `WHERE EXISTS (SELECT … WHERE a = b)` | `INNER JOIN (SELECT …) ON a = b`   |
12/// | `WHERE NOT EXISTS (…)`                | `LEFT JOIN … WHERE _u.col IS NULL`  |
13/// | `WHERE x IN (SELECT col FROM …)`      | `INNER JOIN (SELECT DISTINCT …)`    |
14/// | `WHERE x NOT IN (SELECT col FROM …)`  | `LEFT JOIN … WHERE _u.col IS NULL`  |
15///
16/// ## Safety
17///
18/// The pass bails out (leaves the subquery unchanged) when:
19/// - The subquery has no correlation predicates (equality on outer columns).
20/// - The subquery is inside a function/expression in the SELECT list rather
21///   than in a WHERE predicate — matching the fix for Python sqlglot#7295.
22/// - The correlated predicate involves non-equality operators that would
23///   require LATERAL / APPLY for correctness.
24use crate::ast::*;
25
26/// Apply subquery unnesting to a statement.
27///
28/// Returns the statement unchanged if no subqueries can be safely unnested.
29pub fn unnest_subqueries(statement: Statement) -> Statement {
30    match statement {
31        Statement::Select(sel) => Statement::Select(unnest_select(sel)),
32        other => other,
33    }
34}
35
36/// Counter for generating unique aliases for unnested subquery tables.
37struct AliasGen {
38    counter: usize,
39}
40
41impl AliasGen {
42    fn new() -> Self {
43        Self { counter: 0 }
44    }
45
46    fn next(&mut self) -> String {
47        let alias = format!("_u{}", self.counter);
48        self.counter += 1;
49        alias
50    }
51}
52
53fn unnest_select(mut sel: SelectStatement) -> SelectStatement {
54    let mut alias_gen = AliasGen::new();
55
56    // Only unnest subqueries found in the WHERE clause (safe position).
57    // Subqueries in SELECT columns that are inside functions (COALESCE, etc.)
58    // are NOT unnested to avoid the Python sqlglot#7295 crash scenario.
59    if let Some(where_clause) = sel.where_clause.take() {
60        let (new_where, new_joins) = unnest_where(where_clause, &mut alias_gen);
61        sel.where_clause = new_where;
62        sel.joins.extend(new_joins);
63    }
64
65    sel
66}
67
68/// Process a WHERE clause expression, extracting subqueries that can be
69/// rewritten as joins. Returns the residual WHERE clause (if any) and
70/// a list of new JoinClauses.
71fn unnest_where(expr: Expr, alias_gen: &mut AliasGen) -> (Option<Expr>, Vec<JoinClause>) {
72    let mut joins = Vec::new();
73    let residual = unnest_expr(expr, &mut joins, alias_gen);
74    (residual, joins)
75}
76
77/// Recursively process an expression in the WHERE clause.
78/// Returns `None` if the expression was fully consumed (replaced by a join).
79fn unnest_expr(
80    expr: Expr,
81    joins: &mut Vec<JoinClause>,
82    alias_gen: &mut AliasGen,
83) -> Option<Expr> {
84    match expr {
85        // AND: try to unnest each side independently
86        Expr::BinaryOp {
87            left,
88            op: BinaryOperator::And,
89            right,
90        } => {
91            let left_result = unnest_expr(*left, joins, alias_gen);
92            let right_result = unnest_expr(*right, joins, alias_gen);
93            match (left_result, right_result) {
94                (Some(l), Some(r)) => Some(Expr::BinaryOp {
95                    left: Box::new(l),
96                    op: BinaryOperator::And,
97                    right: Box::new(r),
98                }),
99                (Some(l), None) => Some(l),
100                (None, Some(r)) => Some(r),
101                (None, None) => None,
102            }
103        }
104
105        // EXISTS (SELECT ... WHERE outer.col = inner.col)
106        Expr::Exists {
107            subquery,
108            negated,
109        } => {
110            let subquery_inner = *subquery;
111            if let Some((join, residual)) =
112                try_unnest_exists(subquery_inner.clone(), negated, alias_gen)
113            {
114                joins.push(join);
115                residual
116            } else {
117                Some(Expr::Exists {
118                    subquery: Box::new(subquery_inner),
119                    negated,
120                })
121            }
122        }
123
124        // NOT EXISTS parsed as UnaryOp(Not, Exists { negated: false })
125        Expr::UnaryOp {
126            op: UnaryOperator::Not,
127            expr,
128        } if matches!(expr.as_ref(), Expr::Exists { negated: false, .. }) => {
129            if let Expr::Exists { subquery, .. } = *expr {
130                let subquery_inner = *subquery;
131                if let Some((join, residual)) =
132                    try_unnest_exists(subquery_inner.clone(), true, alias_gen)
133                {
134                    joins.push(join);
135                    residual
136                } else {
137                    Some(Expr::UnaryOp {
138                        op: UnaryOperator::Not,
139                        expr: Box::new(Expr::Exists {
140                            subquery: Box::new(subquery_inner),
141                            negated: false,
142                        }),
143                    })
144                }
145            } else {
146                unreachable!()
147            }
148        }
149
150        // col IN (SELECT ...)
151        Expr::InSubquery {
152            expr: lhs,
153            subquery,
154            negated,
155        } => {
156            let lhs_inner = *lhs;
157            let subquery_inner = *subquery;
158            if let Some((join, residual)) =
159                try_unnest_in_subquery(lhs_inner.clone(), subquery_inner.clone(), negated, alias_gen)
160            {
161                joins.push(join);
162                residual
163            } else {
164                Some(Expr::InSubquery {
165                    expr: Box::new(lhs_inner),
166                    subquery: Box::new(subquery_inner),
167                    negated,
168                })
169            }
170        }
171
172        // Nested parenthesized expression — unwrap, try to unnest, re-wrap if needed
173        Expr::Nested(inner) => {
174            let result = unnest_expr(*inner, joins, alias_gen);
175            result.map(|e| {
176                if e.is_literal() || matches!(e, Expr::Column { .. }) {
177                    e
178                } else {
179                    Expr::Nested(Box::new(e))
180                }
181            })
182        }
183
184        // Everything else: leave as-is
185        other => Some(other),
186    }
187}
188
189// ─────────────────────────────────────────────────────────────────────
190// EXISTS / NOT EXISTS → JOIN
191// ─────────────────────────────────────────────────────────────────────
192
193/// Try to rewrite `[NOT] EXISTS (SELECT ... FROM t WHERE correlations)`
194/// into a join. Returns `None` if the subquery cannot be safely unnested.
195fn try_unnest_exists(
196    subquery: Statement,
197    negated: bool,
198    alias_gen: &mut AliasGen,
199) -> Option<(JoinClause, Option<Expr>)> {
200    let inner_select = match &subquery {
201        Statement::Select(sel) => sel,
202        _ => return None,
203    };
204
205    // Extract correlation predicates from the inner WHERE clause
206    let inner_where = inner_select.where_clause.as_ref()?;
207    let (eq_preds, non_eq_preds) = extract_correlation_predicates(inner_where);
208
209    // Must have at least one equality correlation
210    if eq_preds.is_empty() {
211        return None;
212    }
213
214    // Bail out if there are non-equality correlations (would need LATERAL)
215    if !non_eq_preds.is_empty() {
216        return None;
217    }
218
219    let alias = alias_gen.next();
220
221    // Build the ON condition from equality predicates
222    let on_condition = build_join_on(&eq_preds, &alias);
223
224    // Build the derived table from the inner SELECT, stripping correlation predicates
225    let derived = build_derived_table_from_exists(subquery, &eq_preds, &alias);
226
227    if negated {
228        // NOT EXISTS → LEFT JOIN ... WHERE _u.col IS NULL
229        // Pick any column from the inner select as the NULL-check sentinel
230        let null_check_col = sentinel_column(&alias);
231
232        let join = JoinClause {
233            join_type: JoinType::Left,
234            table: derived,
235            on: Some(on_condition),
236            using: vec![],
237        };
238        let residual = Some(Expr::IsNull {
239            expr: Box::new(null_check_col),
240            negated: false,
241        });
242        Some((join, residual))
243    } else {
244        // EXISTS → INNER JOIN (deduplicated via GROUP BY or DISTINCT in the derived table)
245        let join = JoinClause {
246            join_type: JoinType::Inner,
247            table: derived,
248            on: Some(on_condition),
249            using: vec![],
250        };
251        Some((join, None))
252    }
253}
254
255// ─────────────────────────────────────────────────────────────────────
256// IN / NOT IN subquery → JOIN
257// ─────────────────────────────────────────────────────────────────────
258
259/// Try to rewrite `col [NOT] IN (SELECT x FROM t WHERE ...)` into a join.
260fn try_unnest_in_subquery(
261    lhs: Expr,
262    subquery: Statement,
263    negated: bool,
264    alias_gen: &mut AliasGen,
265) -> Option<(JoinClause, Option<Expr>)> {
266    let inner_select = match &subquery {
267        Statement::Select(sel) => sel,
268        _ => return None,
269    };
270
271    // The inner SELECT must project exactly one column
272    if inner_select.columns.len() != 1 {
273        return None;
274    }
275
276    let alias = alias_gen.next();
277    let inner_col_alias = "_col0".to_string();
278
279    // Build ON condition: outer_col = _u._col0
280    let on_condition = Expr::BinaryOp {
281        left: Box::new(lhs),
282        op: BinaryOperator::Eq,
283        right: Box::new(Expr::Column {
284            table: Some(alias.clone()),
285            name: inner_col_alias.clone(),
286            quote_style: QuoteStyle::None,
287            table_quote_style: QuoteStyle::None,
288        }),
289    };
290
291    // Build a derived table: (SELECT DISTINCT <col> AS _col0 FROM ... WHERE ...) AS _uN
292    let derived = build_derived_table_from_in(subquery, &inner_col_alias, &alias);
293
294    if negated {
295        // NOT IN → LEFT JOIN ... WHERE _uN._col0 IS NULL
296        let null_check = Expr::IsNull {
297            expr: Box::new(Expr::Column {
298                table: Some(alias.clone()),
299                name: inner_col_alias,
300                quote_style: QuoteStyle::None,
301                table_quote_style: QuoteStyle::None,
302            }),
303            negated: false,
304        };
305
306        let join = JoinClause {
307            join_type: JoinType::Left,
308            table: derived,
309            on: Some(on_condition),
310            using: vec![],
311        };
312        Some((join, Some(null_check)))
313    } else {
314        // IN → INNER JOIN
315        let join = JoinClause {
316            join_type: JoinType::Inner,
317            table: derived,
318            on: Some(on_condition),
319            using: vec![],
320        };
321        Some((join, None))
322    }
323}
324
325// ─────────────────────────────────────────────────────────────────────
326// Helpers: correlation detection
327// ─────────────────────────────────────────────────────────────────────
328
329/// A correlation predicate extracted from a subquery's WHERE clause.
330/// Represents `outer_col = inner_col`.
331#[derive(Debug, Clone)]
332struct CorrelationPredicate {
333    /// The column reference from the outer query
334    outer_col: Expr,
335    /// The column reference from the inner query
336    inner_col: Expr,
337}
338
339/// Examine a WHERE expression and extract equality correlation predicates
340/// (where one side references an outer table and the other an inner table).
341///
342/// Returns (equality_correlations, non_equality_predicates_referencing_outer_tables).
343///
344/// A predicate is classified as "correlated" if it contains column references
345/// with table qualifiers that likely come from the outer scope. Since we don't
346/// have full scope analysis, we use a heuristic: columns with a table qualifier
347/// that appears as a correlation candidate.
348fn extract_correlation_predicates(expr: &Expr) -> (Vec<CorrelationPredicate>, Vec<Expr>) {
349    let mut eq_preds = Vec::new();
350    let mut non_eq_preds = Vec::new();
351
352    collect_correlation_predicates(expr, &mut eq_preds, &mut non_eq_preds);
353
354    (eq_preds, non_eq_preds)
355}
356
357fn collect_correlation_predicates(
358    expr: &Expr,
359    eq_preds: &mut Vec<CorrelationPredicate>,
360    non_eq_preds: &mut Vec<Expr>,
361) {
362    match expr {
363        Expr::BinaryOp {
364            left,
365            op: BinaryOperator::And,
366            right,
367        } => {
368            collect_correlation_predicates(left, eq_preds, non_eq_preds);
369            collect_correlation_predicates(right, eq_preds, non_eq_preds);
370        }
371
372        Expr::BinaryOp {
373            left,
374            op: BinaryOperator::Eq,
375            right,
376        } => {
377            // Check if this is a correlation: one side has a table qualifier,
378            // and they reference different tables
379            if let (Some((l_table, _l_name)), Some((r_table, _r_name))) =
380                (extract_column_ref(left), extract_column_ref(right))
381            {
382                if l_table == r_table {
383                    // Same table — not a correlation
384                } else {
385                    eq_preds.push(CorrelationPredicate {
386                        outer_col: *left.clone(),
387                        inner_col: *right.clone(),
388                    });
389                    return;
390                }
391            }
392            // Not a correlation — check if it references outer tables
393            if has_potential_outer_reference(expr) {
394                non_eq_preds.push(expr.clone());
395            }
396        }
397
398        // Non-equality comparisons that reference outer columns
399        Expr::BinaryOp {
400            op: BinaryOperator::Lt
401                | BinaryOperator::Gt
402                | BinaryOperator::LtEq
403                | BinaryOperator::GtEq
404                | BinaryOperator::Neq,
405            ..
406        } => {
407            if is_cross_table_predicate(expr) {
408                non_eq_preds.push(expr.clone());
409            }
410        }
411
412        _ => {}
413    }
414}
415
416/// Extract (table, column_name) from a column reference if it has a table qualifier.
417fn extract_column_ref(expr: &Expr) -> Option<(String, String)> {
418    match expr {
419        Expr::Column {
420            table: Some(t),
421            name,
422            ..
423        } => Some((t.clone(), name.clone())),
424        _ => None,
425    }
426}
427
428/// Check whether an expression contains column references from different tables
429/// (heuristic for cross-scope references without full scope analysis).
430fn is_cross_table_predicate(expr: &Expr) -> bool {
431    let mut tables = Vec::new();
432    expr.walk(&mut |e| {
433        if let Expr::Column { table: Some(t), .. } = e
434            && !tables.iter().any(|existing: &String| existing == t)
435        {
436            tables.push(t.clone());
437        }
438        true
439    });
440    tables.len() > 1
441}
442
443/// Heuristic: does this expression reference columns from more than one table?
444fn has_potential_outer_reference(expr: &Expr) -> bool {
445    is_cross_table_predicate(expr)
446}
447
448// ─────────────────────────────────────────────────────────────────────
449// Helpers: building derived tables and join conditions
450// ─────────────────────────────────────────────────────────────────────
451
452/// Build a JOIN ON clause from equality correlation predicates,
453/// rewriting inner column references to use the new alias.
454fn build_join_on(preds: &[CorrelationPredicate], alias: &str) -> Expr {
455    let conditions: Vec<Expr> = preds
456        .iter()
457        .map(|p| {
458            let rewritten_inner = rewrite_column_table(&p.inner_col, alias);
459            Expr::BinaryOp {
460                left: Box::new(p.outer_col.clone()),
461                op: BinaryOperator::Eq,
462                right: Box::new(rewritten_inner),
463            }
464        })
465        .collect();
466
467    and_all(conditions)
468}
469
470/// Combine multiple expressions with AND.
471fn and_all(mut exprs: Vec<Expr>) -> Expr {
472    assert!(!exprs.is_empty(), "and_all requires at least one expression");
473    if exprs.len() == 1 {
474        return exprs.remove(0);
475    }
476    let first = exprs.remove(0);
477    exprs.into_iter().fold(first, |acc, e| Expr::BinaryOp {
478        left: Box::new(acc),
479        op: BinaryOperator::And,
480        right: Box::new(e),
481    })
482}
483
484/// Rewrite a column's table qualifier to the given alias.
485fn rewrite_column_table(expr: &Expr, new_table: &str) -> Expr {
486    match expr {
487        Expr::Column { name, quote_style, .. } => Expr::Column {
488            table: Some(new_table.to_string()),
489            name: name.clone(),
490            quote_style: *quote_style,
491            table_quote_style: QuoteStyle::None,
492        },
493        other => other.clone(),
494    }
495}
496
497/// Build a derived table source from an EXISTS subquery.
498///
499/// Strips the correlation predicates from the inner WHERE clause and wraps
500/// the result as `(SELECT DISTINCT 1 AS _sentinel, <join_keys> FROM ... WHERE <residual>) AS _uN`.
501fn build_derived_table_from_exists(
502    subquery: Statement,
503    eq_preds: &[CorrelationPredicate],
504    alias: &str,
505) -> TableSource {
506    let mut inner_select = match subquery {
507        Statement::Select(sel) => sel,
508        _ => unreachable!("Caller ensures this is a SELECT"),
509    };
510
511    // Remove correlation predicates from the inner WHERE
512    if let Some(where_clause) = inner_select.where_clause.take() {
513        inner_select.where_clause = strip_correlation_predicates(where_clause, eq_preds);
514    }
515
516    // Replace the SELECT list with DISTINCT 1 AS _sentinel
517    // to deduplicate rows for EXISTS semantics
518    inner_select.distinct = true;
519    inner_select.columns = vec![SelectItem::Expr {
520        expr: Expr::Number("1".to_string()),
521        alias: Some("_sentinel".to_string()),
522    }];
523
524    TableSource::Subquery {
525        query: Box::new(Statement::Select(inner_select)),
526        alias: Some(alias.to_string()),
527    }
528}
529
530/// Build a derived table from an IN-subquery.
531///
532/// Wraps the inner query as `(SELECT DISTINCT <col> AS _col0 FROM ...) AS _uN`.
533fn build_derived_table_from_in(
534    subquery: Statement,
535    col_alias: &str,
536    table_alias: &str,
537) -> TableSource {
538    let mut inner_select = match subquery {
539        Statement::Select(sel) => sel,
540        _ => unreachable!("Caller ensures this is a SELECT"),
541    };
542
543    // Ensure DISTINCT to avoid row multiplication
544    inner_select.distinct = true;
545
546    // Alias the single projected column
547    if let Some(SelectItem::Expr { alias, .. }) = inner_select.columns.first_mut() {
548        *alias = Some(col_alias.to_string());
549    }
550
551    TableSource::Subquery {
552        query: Box::new(Statement::Select(inner_select)),
553        alias: Some(table_alias.to_string()),
554    }
555}
556
557/// Create a sentinel column reference for NULL-checks in anti-joins.
558fn sentinel_column(alias: &str) -> Expr {
559    Expr::Column {
560        table: Some(alias.to_string()),
561        name: "_sentinel".to_string(),
562        quote_style: QuoteStyle::None,
563        table_quote_style: QuoteStyle::None,
564    }
565}
566
567/// Strip the given correlation predicates from a WHERE expression,
568/// returning the residual expression (or None if the entire WHERE was correlations).
569fn strip_correlation_predicates(
570    expr: Expr,
571    eq_preds: &[CorrelationPredicate],
572) -> Option<Expr> {
573    match expr {
574        Expr::BinaryOp {
575            left,
576            op: BinaryOperator::And,
577            right,
578        } => {
579            let left_result = strip_correlation_predicates(*left, eq_preds);
580            let right_result = strip_correlation_predicates(*right, eq_preds);
581            match (left_result, right_result) {
582                (Some(l), Some(r)) => Some(Expr::BinaryOp {
583                    left: Box::new(l),
584                    op: BinaryOperator::And,
585                    right: Box::new(r),
586                }),
587                (Some(l), None) => Some(l),
588                (None, Some(r)) => Some(r),
589                (None, None) => None,
590            }
591        }
592
593        Expr::BinaryOp {
594            ref left,
595            op: BinaryOperator::Eq,
596            ref right,
597        } => {
598            // Check if this matches any of the correlation predicates
599            for pred in eq_preds {
600                if (*left.as_ref() == pred.outer_col && *right.as_ref() == pred.inner_col)
601                    || (*left.as_ref() == pred.inner_col && *right.as_ref() == pred.outer_col)
602                {
603                    return None; // Strip this predicate
604                }
605            }
606            Some(expr)
607        }
608
609        other => Some(other),
610    }
611}
612
613#[cfg(test)]
614mod tests {
615    use super::*;
616    use crate::generator::generate;
617    use crate::parser::Parser;
618    use crate::dialects::Dialect;
619
620    fn parse_and_unnest(sql: &str) -> String {
621        let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
622        let unnested = unnest_subqueries(stmt);
623        generate(&unnested, Dialect::Ansi)
624    }
625
626    #[test]
627    fn test_exists_to_inner_join() {
628        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
629        let result = parse_and_unnest(sql);
630        // Should be rewritten to an INNER JOIN
631        assert!(result.contains("INNER JOIN"), "Expected INNER JOIN in: {result}");
632        assert!(result.contains("_u0"), "Expected derived alias _u0 in: {result}");
633        assert!(!result.contains("EXISTS"), "Should not contain EXISTS: {result}");
634    }
635
636    #[test]
637    fn test_not_exists_to_left_join() {
638        let sql = "SELECT a.id FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
639        let result = parse_and_unnest(sql);
640        assert!(result.contains("LEFT JOIN"), "Expected LEFT JOIN in: {result}");
641        assert!(result.contains("IS NULL"), "Expected IS NULL check in: {result}");
642        assert!(!result.contains("NOT EXISTS"), "Should not contain NOT EXISTS: {result}");
643    }
644
645    #[test]
646    fn test_in_subquery_to_inner_join() {
647        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id FROM b)";
648        let result = parse_and_unnest(sql);
649        assert!(result.contains("INNER JOIN"), "Expected INNER JOIN in: {result}");
650        assert!(!result.contains(" IN "), "Should not contain IN: {result}");
651    }
652
653    #[test]
654    fn test_not_in_subquery_to_left_join() {
655        let sql = "SELECT a.id FROM a WHERE a.id NOT IN (SELECT b.id FROM b)";
656        let result = parse_and_unnest(sql);
657        assert!(result.contains("LEFT JOIN"), "Expected LEFT JOIN in: {result}");
658        assert!(result.contains("IS NULL"), "Expected IS NULL check in: {result}");
659    }
660
661    #[test]
662    fn test_no_correlation_not_unnested() {
663        // Subquery without correlation should not be unnested
664        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.x > 10)";
665        let result = parse_and_unnest(sql);
666        assert!(result.contains("EXISTS"), "Uncorrelated EXISTS should remain: {result}");
667    }
668
669    #[test]
670    fn test_non_equality_correlation_not_unnested() {
671        // Subquery with non-equality correlation (< instead of =) should NOT be unnested
672        // This is the scenario described in Python sqlglot#7295
673        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.val < a.val AND b.id = a.id)";
674        let result = parse_and_unnest(sql);
675        assert!(
676            result.contains("EXISTS"),
677            "Subquery with non-eq correlation should not be unnested: {result}"
678        );
679    }
680
681    #[test]
682    fn test_subquery_in_select_not_unnested() {
683        // Correlated scalar subquery inside COALESCE in SELECT list
684        // This is the exact scenario from Python sqlglot#7295 — must NOT be touched
685        let sql = "SELECT COALESCE((SELECT MAX(b.val) FROM b WHERE b.id = a.id), a.val) AS result FROM a";
686        let result = parse_and_unnest(sql);
687        // The subquery should remain in the SELECT, NOT be moved to a JOIN
688        assert!(!result.contains("JOIN"), "Subquery in SELECT should not become a JOIN: {result}");
689    }
690
691    #[test]
692    fn test_exists_with_additional_where() {
693        // EXISTS with both correlation and a local predicate
694        let sql = "SELECT a.id FROM a WHERE a.x > 5 AND EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
695        let result = parse_and_unnest(sql);
696        assert!(result.contains("INNER JOIN"), "Expected INNER JOIN in: {result}");
697        assert!(result.contains("a.x > 5") || result.contains("a.x >"), "Should keep non-subquery predicate: {result}");
698    }
699
700    #[test]
701    fn test_non_select_statement_unchanged() {
702        let sql = "INSERT INTO t (a) VALUES (1)";
703        let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
704        let result = unnest_subqueries(stmt.clone());
705        assert_eq!(
706            format!("{result:?}"),
707            format!("{stmt:?}"),
708            "Non-SELECT statements should pass through unchanged"
709        );
710    }
711
712    // ─────────────────────────────────────────────────────────────────
713    // Multiple correlation predicates
714    // ─────────────────────────────────────────────────────────────────
715
716    #[test]
717    fn test_exists_multiple_correlations() {
718        // Two equality correlations → JOIN ON with AND
719        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id AND b.org = a.org)";
720        let result = parse_and_unnest(sql);
721        assert!(result.contains("INNER JOIN"), "Expected INNER JOIN in: {result}");
722        assert!(!result.contains("EXISTS"), "Should not contain EXISTS: {result}");
723        // The ON clause must have both correlation columns joined with AND
724        assert!(result.contains(" AND "), "ON clause should have AND for multiple correlations: {result}");
725        assert!(result.contains(".id"), "ON should reference id: {result}");
726        assert!(result.contains(".org"), "ON should reference org: {result}");
727    }
728
729    // ─────────────────────────────────────────────────────────────────
730    // Multiple subqueries in WHERE
731    // ─────────────────────────────────────────────────────────────────
732
733    #[test]
734    fn test_multiple_subqueries_in_where() {
735        // Two different subqueries in the same WHERE, both should be unnested
736        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id) AND a.id IN (SELECT c.id FROM c)";
737        let result = parse_and_unnest(sql);
738        // Both should become joins
739        assert!(!result.contains("EXISTS"), "EXISTS should be unnested: {result}");
740        assert!(!result.contains(" IN "), "IN should be unnested: {result}");
741        // Should have two joins with different aliases
742        assert!(result.contains("_u0"), "Expected first alias _u0: {result}");
743        assert!(result.contains("_u1"), "Expected second alias _u1: {result}");
744    }
745
746    // ─────────────────────────────────────────────────────────────────
747    // Residual inner WHERE after stripping correlations
748    // ─────────────────────────────────────────────────────────────────
749
750    #[test]
751    fn test_exists_with_inner_residual_where() {
752        // Inner subquery has a correlation AND a local (non-correlated) predicate
753        // The local predicate should remain in the derived table's WHERE
754        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id AND b.active = 1)";
755        let result = parse_and_unnest(sql);
756        assert!(result.contains("INNER JOIN"), "Expected INNER JOIN in: {result}");
757        assert!(!result.contains("EXISTS"), "Should not contain EXISTS: {result}");
758        // The non-correlated predicate b.active = 1 should be preserved in the subquery
759        assert!(
760            result.contains("active") && result.contains("1"),
761            "Inner residual WHERE should be preserved: {result}"
762        );
763    }
764
765    // ─────────────────────────────────────────────────────────────────
766    // Nested parenthesized subqueries
767    // ─────────────────────────────────────────────────────────────────
768
769    #[test]
770    fn test_parenthesized_exists() {
771        // EXISTS wrapped in extra parentheses
772        let sql = "SELECT a.id FROM a WHERE (EXISTS (SELECT 1 FROM b WHERE b.id = a.id))";
773        let result = parse_and_unnest(sql);
774        assert!(result.contains("INNER JOIN"), "Expected INNER JOIN in: {result}");
775        assert!(!result.contains("EXISTS"), "Should not contain EXISTS: {result}");
776    }
777
778    // ─────────────────────────────────────────────────────────────────
779    // IN subquery with multiple projected columns (bail out)
780    // ─────────────────────────────────────────────────────────────────
781
782    #[test]
783    fn test_in_subquery_multi_column_not_unnested() {
784        // IN (SELECT col1, col2 ...) — multi-column, should NOT be unnested
785        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id, b.name FROM b)";
786        let result = parse_and_unnest(sql);
787        assert!(result.contains(" IN "), "Multi-column IN should remain: {result}");
788    }
789
790    // ─────────────────────────────────────────────────────────────────
791    // OR with subqueries (should NOT unnest — not safe)
792    // ─────────────────────────────────────────────────────────────────
793
794    #[test]
795    fn test_or_with_exists_not_unnested() {
796        // EXISTS in an OR branch — cannot safely unnest
797        let sql = "SELECT a.id FROM a WHERE a.x > 1 OR EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
798        let result = parse_and_unnest(sql);
799        assert!(result.contains("EXISTS"), "EXISTS in OR should remain: {result}");
800    }
801
802    // ─────────────────────────────────────────────────────────────────
803    // Scalar subquery in WHERE (not EXISTS / IN — should stay)
804    // ─────────────────────────────────────────────────────────────────
805
806    #[test]
807    fn test_scalar_subquery_in_where_not_unnested() {
808        // WHERE col = (SELECT ...) — scalar subquery comparison, not handled
809        let sql = "SELECT a.id FROM a WHERE a.val = (SELECT MAX(b.val) FROM b WHERE b.id = a.id)";
810        let result = parse_and_unnest(sql);
811        assert!(!result.contains("JOIN"), "Scalar subquery in WHERE should not become JOIN: {result}");
812    }
813
814    // ─────────────────────────────────────────────────────────────────
815    // Exact reproducer from Python sqlglot#7295
816    // ─────────────────────────────────────────────────────────────────
817
818    #[test]
819    fn test_sqlglot_issue_7295_exact_reproducer() {
820        // This is the exact SQL from Python sqlglot issue #7295.
821        // The correlated subquery is inside COALESCE in the SELECT list
822        // with a non-equality correlation (b.val < a.val).
823        // Must NOT crash and must NOT modify the query.
824        let sql = "SELECT COALESCE((SELECT MAX(b.val) FROM t AS b WHERE b.val < a.val AND b.id = a.id), a.val) AS result FROM t AS a";
825        let result = parse_and_unnest(sql);
826        assert!(!result.contains("JOIN"), "Issue #7295 query must NOT be rewritten to JOIN: {result}");
827        assert!(result.contains("COALESCE"), "COALESCE should remain: {result}");
828    }
829
830    // ─────────────────────────────────────────────────────────────────
831    // No WHERE clause at all
832    // ─────────────────────────────────────────────────────────────────
833
834    #[test]
835    fn test_no_where_clause_unchanged() {
836        let sql = "SELECT a.id FROM a";
837        let result = parse_and_unnest(sql);
838        assert_eq!(result, "SELECT a.id FROM a", "No WHERE should be unchanged");
839    }
840
841    // ─────────────────────────────────────────────────────────────────
842    // WHERE with no subqueries
843    // ─────────────────────────────────────────────────────────────────
844
845    #[test]
846    fn test_where_without_subqueries_unchanged() {
847        let sql = "SELECT a.id FROM a WHERE a.x > 1 AND a.y = 2";
848        let result = parse_and_unnest(sql);
849        assert!(!result.contains("JOIN"), "No subqueries, no joins should be added: {result}");
850        assert!(result.contains("a.x > 1"), "Original predicates should remain: {result}");
851    }
852
853    // ─────────────────────────────────────────────────────────────────
854    // EXISTS with no inner WHERE (uncorrelated — bail out)
855    // ─────────────────────────────────────────────────────────────────
856
857    #[test]
858    fn test_exists_no_where_not_unnested() {
859        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b)";
860        let result = parse_and_unnest(sql);
861        assert!(result.contains("EXISTS"), "EXISTS without inner WHERE should remain: {result}");
862    }
863
864    // ─────────────────────────────────────────────────────────────────
865    // Inner subquery has only same-table predicates (no cross-table correlation)
866    // ─────────────────────────────────────────────────────────────────
867
868    #[test]
869    fn test_exists_same_table_predicate_not_unnested() {
870        // Inner WHERE only references columns from b — no correlation to outer a
871        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.x = b.y)";
872        let result = parse_and_unnest(sql);
873        assert!(result.contains("EXISTS"), "Same-table predicate is not correlation: {result}");
874    }
875
876    // ─────────────────────────────────────────────────────────────────
877    // DISTINCT behaviour in derived tables
878    // ─────────────────────────────────────────────────────────────────
879
880    #[test]
881    fn test_exists_produces_distinct_derived_table() {
882        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
883        let result = parse_and_unnest(sql);
884        assert!(result.contains("DISTINCT"), "Derived table should use DISTINCT: {result}");
885    }
886
887    #[test]
888    fn test_in_produces_distinct_derived_table() {
889        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id FROM b)";
890        let result = parse_and_unnest(sql);
891        assert!(result.contains("DISTINCT"), "IN-derived table should use DISTINCT: {result}");
892    }
893
894    // ─────────────────────────────────────────────────────────────────
895    // NOT IN with inner WHERE (should propagate inner WHERE into derived table)
896    // ─────────────────────────────────────────────────────────────────
897
898    #[test]
899    fn test_not_in_preserves_inner_where() {
900        let sql = "SELECT a.id FROM a WHERE a.id NOT IN (SELECT b.id FROM b WHERE b.active = 1)";
901        let result = parse_and_unnest(sql);
902        assert!(result.contains("LEFT JOIN"), "Expected LEFT JOIN: {result}");
903        assert!(result.contains("IS NULL"), "Expected IS NULL: {result}");
904        assert!(result.contains("active"), "Inner WHERE should be preserved: {result}");
905    }
906
907    // ─────────────────────────────────────────────────────────────────
908    // AliasGen produces sequential aliases
909    // ─────────────────────────────────────────────────────────────────
910
911    #[test]
912    fn test_alias_gen_sequential() {
913        let mut alias_gen = AliasGen::new();
914        assert_eq!(alias_gen.next(), "_u0");
915        assert_eq!(alias_gen.next(), "_u1");
916        assert_eq!(alias_gen.next(), "_u2");
917    }
918}