Skip to main content

sqlglot_rust/optimizer/
unnest_subqueries.rs

1/// Unnest and decorrelate subqueries.
2///
3/// Transforms correlated subqueries in WHERE clauses into equivalent JOINs
4/// when safe to do so. This is a common optimization because most query engines
5/// execute joins more efficiently than correlated subqueries.
6///
7/// ## Supported rewrites
8///
9/// | Pattern                               | Rewrite                             |
10/// |---------------------------------------|-------------------------------------|
11/// | `WHERE EXISTS (SELECT … WHERE a = b)` | `INNER JOIN (SELECT …) ON a = b`   |
12/// | `WHERE NOT EXISTS (…)`                | `LEFT JOIN … WHERE _u.col IS NULL`  |
13/// | `WHERE x IN (SELECT col FROM …)`      | `INNER JOIN (SELECT DISTINCT …)`    |
14/// | `WHERE x NOT IN (SELECT col FROM …)`  | `LEFT JOIN … WHERE _u.col IS NULL`  |
15///
16/// ## Safety
17///
18/// The pass bails out (leaves the subquery unchanged) when:
19/// - The subquery has no correlation predicates (equality on outer columns).
20/// - The subquery is inside a function/expression in the SELECT list rather
21///   than in a WHERE predicate — matching the fix for Python sqlglot#7295.
22/// - The correlated predicate involves non-equality operators that would
23///   require LATERAL / APPLY for correctness.
24use crate::ast::*;
25
26/// Apply subquery unnesting to a statement.
27///
28/// Returns the statement unchanged if no subqueries can be safely unnested.
29pub fn unnest_subqueries(statement: Statement) -> Statement {
30    match statement {
31        Statement::Select(sel) => Statement::Select(unnest_select(sel)),
32        other => other,
33    }
34}
35
36/// Counter for generating unique aliases for unnested subquery tables.
37struct AliasGen {
38    counter: usize,
39}
40
41impl AliasGen {
42    fn new() -> Self {
43        Self { counter: 0 }
44    }
45
46    fn next(&mut self) -> String {
47        let alias = format!("_u{}", self.counter);
48        self.counter += 1;
49        alias
50    }
51}
52
53fn unnest_select(mut sel: SelectStatement) -> SelectStatement {
54    let mut alias_gen = AliasGen::new();
55
56    // Only unnest subqueries found in the WHERE clause (safe position).
57    // Subqueries in SELECT columns that are inside functions (COALESCE, etc.)
58    // are NOT unnested to avoid the Python sqlglot#7295 crash scenario.
59    if let Some(where_clause) = sel.where_clause.take() {
60        let (new_where, new_joins) = unnest_where(where_clause, &mut alias_gen);
61        sel.where_clause = new_where;
62        sel.joins.extend(new_joins);
63    }
64
65    sel
66}
67
68/// Process a WHERE clause expression, extracting subqueries that can be
69/// rewritten as joins. Returns the residual WHERE clause (if any) and
70/// a list of new JoinClauses.
71fn unnest_where(expr: Expr, alias_gen: &mut AliasGen) -> (Option<Expr>, Vec<JoinClause>) {
72    let mut joins = Vec::new();
73    let residual = unnest_expr(expr, &mut joins, alias_gen);
74    (residual, joins)
75}
76
77/// Recursively process an expression in the WHERE clause.
78/// Returns `None` if the expression was fully consumed (replaced by a join).
79fn unnest_expr(expr: Expr, joins: &mut Vec<JoinClause>, alias_gen: &mut AliasGen) -> Option<Expr> {
80    match expr {
81        // AND: try to unnest each side independently
82        Expr::BinaryOp {
83            left,
84            op: BinaryOperator::And,
85            right,
86        } => {
87            let left_result = unnest_expr(*left, joins, alias_gen);
88            let right_result = unnest_expr(*right, joins, alias_gen);
89            match (left_result, right_result) {
90                (Some(l), Some(r)) => Some(Expr::BinaryOp {
91                    left: Box::new(l),
92                    op: BinaryOperator::And,
93                    right: Box::new(r),
94                }),
95                (Some(l), None) => Some(l),
96                (None, Some(r)) => Some(r),
97                (None, None) => None,
98            }
99        }
100
101        // EXISTS (SELECT ... WHERE outer.col = inner.col)
102        Expr::Exists { subquery, negated } => {
103            let subquery_inner = *subquery;
104            if let Some((join, residual)) =
105                try_unnest_exists(subquery_inner.clone(), negated, alias_gen)
106            {
107                joins.push(join);
108                residual
109            } else {
110                Some(Expr::Exists {
111                    subquery: Box::new(subquery_inner),
112                    negated,
113                })
114            }
115        }
116
117        // NOT EXISTS parsed as UnaryOp(Not, Exists { negated: false })
118        Expr::UnaryOp {
119            op: UnaryOperator::Not,
120            expr,
121        } if matches!(expr.as_ref(), Expr::Exists { negated: false, .. }) => {
122            if let Expr::Exists { subquery, .. } = *expr {
123                let subquery_inner = *subquery;
124                if let Some((join, residual)) =
125                    try_unnest_exists(subquery_inner.clone(), true, alias_gen)
126                {
127                    joins.push(join);
128                    residual
129                } else {
130                    Some(Expr::UnaryOp {
131                        op: UnaryOperator::Not,
132                        expr: Box::new(Expr::Exists {
133                            subquery: Box::new(subquery_inner),
134                            negated: false,
135                        }),
136                    })
137                }
138            } else {
139                unreachable!()
140            }
141        }
142
143        // col IN (SELECT ...)
144        Expr::InSubquery {
145            expr: lhs,
146            subquery,
147            negated,
148        } => {
149            let lhs_inner = *lhs;
150            let subquery_inner = *subquery;
151            if let Some((join, residual)) = try_unnest_in_subquery(
152                lhs_inner.clone(),
153                subquery_inner.clone(),
154                negated,
155                alias_gen,
156            ) {
157                joins.push(join);
158                residual
159            } else {
160                Some(Expr::InSubquery {
161                    expr: Box::new(lhs_inner),
162                    subquery: Box::new(subquery_inner),
163                    negated,
164                })
165            }
166        }
167
168        // Nested parenthesized expression — unwrap, try to unnest, re-wrap if needed
169        Expr::Nested(inner) => {
170            let result = unnest_expr(*inner, joins, alias_gen);
171            result.map(|e| {
172                if e.is_literal() || matches!(e, Expr::Column { .. }) {
173                    e
174                } else {
175                    Expr::Nested(Box::new(e))
176                }
177            })
178        }
179
180        // Everything else: leave as-is
181        other => Some(other),
182    }
183}
184
185// ─────────────────────────────────────────────────────────────────────
186// EXISTS / NOT EXISTS → JOIN
187// ─────────────────────────────────────────────────────────────────────
188
189/// Try to rewrite `[NOT] EXISTS (SELECT ... FROM t WHERE correlations)`
190/// into a join. Returns `None` if the subquery cannot be safely unnested.
191fn try_unnest_exists(
192    subquery: Statement,
193    negated: bool,
194    alias_gen: &mut AliasGen,
195) -> Option<(JoinClause, Option<Expr>)> {
196    let inner_select = match &subquery {
197        Statement::Select(sel) => sel,
198        _ => return None,
199    };
200
201    // Extract correlation predicates from the inner WHERE clause
202    let inner_where = inner_select.where_clause.as_ref()?;
203    let (eq_preds, non_eq_preds) = extract_correlation_predicates(inner_where);
204
205    // Must have at least one equality correlation
206    if eq_preds.is_empty() {
207        return None;
208    }
209
210    // Bail out if there are non-equality correlations (would need LATERAL)
211    if !non_eq_preds.is_empty() {
212        return None;
213    }
214
215    let alias = alias_gen.next();
216
217    // Build the ON condition from equality predicates
218    let on_condition = build_join_on(&eq_preds, &alias);
219
220    // Build the derived table from the inner SELECT, stripping correlation predicates
221    let derived = build_derived_table_from_exists(subquery, &eq_preds, &alias);
222
223    if negated {
224        // NOT EXISTS → LEFT JOIN ... WHERE _u.col IS NULL
225        // Pick any column from the inner select as the NULL-check sentinel
226        let null_check_col = sentinel_column(&alias);
227
228        let join = JoinClause {
229            join_type: JoinType::Left,
230            table: derived,
231            on: Some(on_condition),
232            using: vec![],
233        };
234        let residual = Some(Expr::IsNull {
235            expr: Box::new(null_check_col),
236            negated: false,
237        });
238        Some((join, residual))
239    } else {
240        // EXISTS → INNER JOIN (deduplicated via GROUP BY or DISTINCT in the derived table)
241        let join = JoinClause {
242            join_type: JoinType::Inner,
243            table: derived,
244            on: Some(on_condition),
245            using: vec![],
246        };
247        Some((join, None))
248    }
249}
250
251// ─────────────────────────────────────────────────────────────────────
252// IN / NOT IN subquery → JOIN
253// ─────────────────────────────────────────────────────────────────────
254
255/// Try to rewrite `col [NOT] IN (SELECT x FROM t WHERE ...)` into a join.
256fn try_unnest_in_subquery(
257    lhs: Expr,
258    subquery: Statement,
259    negated: bool,
260    alias_gen: &mut AliasGen,
261) -> Option<(JoinClause, Option<Expr>)> {
262    let inner_select = match &subquery {
263        Statement::Select(sel) => sel,
264        _ => return None,
265    };
266
267    // The inner SELECT must project exactly one column
268    if inner_select.columns.len() != 1 {
269        return None;
270    }
271
272    let alias = alias_gen.next();
273    let inner_col_alias = "_col0".to_string();
274
275    // Build ON condition: outer_col = _u._col0
276    let on_condition = Expr::BinaryOp {
277        left: Box::new(lhs),
278        op: BinaryOperator::Eq,
279        right: Box::new(Expr::Column {
280            table: Some(alias.clone()),
281            name: inner_col_alias.clone(),
282            quote_style: QuoteStyle::None,
283            table_quote_style: QuoteStyle::None,
284        }),
285    };
286
287    // Build a derived table: (SELECT DISTINCT <col> AS _col0 FROM ... WHERE ...) AS _uN
288    let derived = build_derived_table_from_in(subquery, &inner_col_alias, &alias);
289
290    if negated {
291        // NOT IN → LEFT JOIN ... WHERE _uN._col0 IS NULL
292        let null_check = Expr::IsNull {
293            expr: Box::new(Expr::Column {
294                table: Some(alias.clone()),
295                name: inner_col_alias,
296                quote_style: QuoteStyle::None,
297                table_quote_style: QuoteStyle::None,
298            }),
299            negated: false,
300        };
301
302        let join = JoinClause {
303            join_type: JoinType::Left,
304            table: derived,
305            on: Some(on_condition),
306            using: vec![],
307        };
308        Some((join, Some(null_check)))
309    } else {
310        // IN → INNER JOIN
311        let join = JoinClause {
312            join_type: JoinType::Inner,
313            table: derived,
314            on: Some(on_condition),
315            using: vec![],
316        };
317        Some((join, None))
318    }
319}
320
321// ─────────────────────────────────────────────────────────────────────
322// Helpers: correlation detection
323// ─────────────────────────────────────────────────────────────────────
324
325/// A correlation predicate extracted from a subquery's WHERE clause.
326/// Represents `outer_col = inner_col`.
327#[derive(Debug, Clone)]
328struct CorrelationPredicate {
329    /// The column reference from the outer query
330    outer_col: Expr,
331    /// The column reference from the inner query
332    inner_col: Expr,
333}
334
335/// Examine a WHERE expression and extract equality correlation predicates
336/// (where one side references an outer table and the other an inner table).
337///
338/// Returns (equality_correlations, non_equality_predicates_referencing_outer_tables).
339///
340/// A predicate is classified as "correlated" if it contains column references
341/// with table qualifiers that likely come from the outer scope. Since we don't
342/// have full scope analysis, we use a heuristic: columns with a table qualifier
343/// that appears as a correlation candidate.
344fn extract_correlation_predicates(expr: &Expr) -> (Vec<CorrelationPredicate>, Vec<Expr>) {
345    let mut eq_preds = Vec::new();
346    let mut non_eq_preds = Vec::new();
347
348    collect_correlation_predicates(expr, &mut eq_preds, &mut non_eq_preds);
349
350    (eq_preds, non_eq_preds)
351}
352
353fn collect_correlation_predicates(
354    expr: &Expr,
355    eq_preds: &mut Vec<CorrelationPredicate>,
356    non_eq_preds: &mut Vec<Expr>,
357) {
358    match expr {
359        Expr::BinaryOp {
360            left,
361            op: BinaryOperator::And,
362            right,
363        } => {
364            collect_correlation_predicates(left, eq_preds, non_eq_preds);
365            collect_correlation_predicates(right, eq_preds, non_eq_preds);
366        }
367
368        Expr::BinaryOp {
369            left,
370            op: BinaryOperator::Eq,
371            right,
372        } => {
373            // Check if this is a correlation: one side has a table qualifier,
374            // and they reference different tables
375            if let (Some((l_table, _l_name)), Some((r_table, _r_name))) =
376                (extract_column_ref(left), extract_column_ref(right))
377            {
378                if l_table == r_table {
379                    // Same table — not a correlation
380                } else {
381                    eq_preds.push(CorrelationPredicate {
382                        outer_col: *left.clone(),
383                        inner_col: *right.clone(),
384                    });
385                    return;
386                }
387            }
388            // Not a correlation — check if it references outer tables
389            if has_potential_outer_reference(expr) {
390                non_eq_preds.push(expr.clone());
391            }
392        }
393
394        // Non-equality comparisons that reference outer columns
395        Expr::BinaryOp {
396            op:
397                BinaryOperator::Lt
398                | BinaryOperator::Gt
399                | BinaryOperator::LtEq
400                | BinaryOperator::GtEq
401                | BinaryOperator::Neq,
402            ..
403        } => {
404            if is_cross_table_predicate(expr) {
405                non_eq_preds.push(expr.clone());
406            }
407        }
408
409        _ => {}
410    }
411}
412
413/// Extract (table, column_name) from a column reference if it has a table qualifier.
414fn extract_column_ref(expr: &Expr) -> Option<(String, String)> {
415    match expr {
416        Expr::Column {
417            table: Some(t),
418            name,
419            ..
420        } => Some((t.clone(), name.clone())),
421        _ => None,
422    }
423}
424
425/// Check whether an expression contains column references from different tables
426/// (heuristic for cross-scope references without full scope analysis).
427fn is_cross_table_predicate(expr: &Expr) -> bool {
428    let mut tables = Vec::new();
429    expr.walk(&mut |e| {
430        if let Expr::Column { table: Some(t), .. } = e
431            && !tables.iter().any(|existing: &String| existing == t)
432        {
433            tables.push(t.clone());
434        }
435        true
436    });
437    tables.len() > 1
438}
439
440/// Heuristic: does this expression reference columns from more than one table?
441fn has_potential_outer_reference(expr: &Expr) -> bool {
442    is_cross_table_predicate(expr)
443}
444
445// ─────────────────────────────────────────────────────────────────────
446// Helpers: building derived tables and join conditions
447// ─────────────────────────────────────────────────────────────────────
448
449/// Build a JOIN ON clause from equality correlation predicates,
450/// rewriting inner column references to use the new alias.
451fn build_join_on(preds: &[CorrelationPredicate], alias: &str) -> Expr {
452    let conditions: Vec<Expr> = preds
453        .iter()
454        .map(|p| {
455            let rewritten_inner = rewrite_column_table(&p.inner_col, alias);
456            Expr::BinaryOp {
457                left: Box::new(p.outer_col.clone()),
458                op: BinaryOperator::Eq,
459                right: Box::new(rewritten_inner),
460            }
461        })
462        .collect();
463
464    and_all(conditions)
465}
466
467/// Combine multiple expressions with AND.
468fn and_all(mut exprs: Vec<Expr>) -> Expr {
469    assert!(
470        !exprs.is_empty(),
471        "and_all requires at least one expression"
472    );
473    if exprs.len() == 1 {
474        return exprs.remove(0);
475    }
476    let first = exprs.remove(0);
477    exprs.into_iter().fold(first, |acc, e| Expr::BinaryOp {
478        left: Box::new(acc),
479        op: BinaryOperator::And,
480        right: Box::new(e),
481    })
482}
483
484/// Rewrite a column's table qualifier to the given alias.
485fn rewrite_column_table(expr: &Expr, new_table: &str) -> Expr {
486    match expr {
487        Expr::Column {
488            name, quote_style, ..
489        } => Expr::Column {
490            table: Some(new_table.to_string()),
491            name: name.clone(),
492            quote_style: *quote_style,
493            table_quote_style: QuoteStyle::None,
494        },
495        other => other.clone(),
496    }
497}
498
499/// Build a derived table source from an EXISTS subquery.
500///
501/// Strips the correlation predicates from the inner WHERE clause and wraps
502/// the result as `(SELECT DISTINCT 1 AS _sentinel, <join_keys> FROM ... WHERE <residual>) AS _uN`.
503fn build_derived_table_from_exists(
504    subquery: Statement,
505    eq_preds: &[CorrelationPredicate],
506    alias: &str,
507) -> TableSource {
508    let mut inner_select = match subquery {
509        Statement::Select(sel) => sel,
510        _ => unreachable!("Caller ensures this is a SELECT"),
511    };
512
513    // Remove correlation predicates from the inner WHERE
514    if let Some(where_clause) = inner_select.where_clause.take() {
515        inner_select.where_clause = strip_correlation_predicates(where_clause, eq_preds);
516    }
517
518    // Replace the SELECT list with DISTINCT 1 AS _sentinel
519    // to deduplicate rows for EXISTS semantics
520    inner_select.distinct = true;
521    inner_select.columns = vec![SelectItem::Expr {
522        expr: Expr::Number("1".to_string()),
523        alias: Some("_sentinel".to_string()),
524    }];
525
526    TableSource::Subquery {
527        query: Box::new(Statement::Select(inner_select)),
528        alias: Some(alias.to_string()),
529    }
530}
531
532/// Build a derived table from an IN-subquery.
533///
534/// Wraps the inner query as `(SELECT DISTINCT <col> AS _col0 FROM ...) AS _uN`.
535fn build_derived_table_from_in(
536    subquery: Statement,
537    col_alias: &str,
538    table_alias: &str,
539) -> TableSource {
540    let mut inner_select = match subquery {
541        Statement::Select(sel) => sel,
542        _ => unreachable!("Caller ensures this is a SELECT"),
543    };
544
545    // Ensure DISTINCT to avoid row multiplication
546    inner_select.distinct = true;
547
548    // Alias the single projected column
549    if let Some(SelectItem::Expr { alias, .. }) = inner_select.columns.first_mut() {
550        *alias = Some(col_alias.to_string());
551    }
552
553    TableSource::Subquery {
554        query: Box::new(Statement::Select(inner_select)),
555        alias: Some(table_alias.to_string()),
556    }
557}
558
559/// Create a sentinel column reference for NULL-checks in anti-joins.
560fn sentinel_column(alias: &str) -> Expr {
561    Expr::Column {
562        table: Some(alias.to_string()),
563        name: "_sentinel".to_string(),
564        quote_style: QuoteStyle::None,
565        table_quote_style: QuoteStyle::None,
566    }
567}
568
569/// Strip the given correlation predicates from a WHERE expression,
570/// returning the residual expression (or None if the entire WHERE was correlations).
571fn strip_correlation_predicates(expr: Expr, eq_preds: &[CorrelationPredicate]) -> Option<Expr> {
572    match expr {
573        Expr::BinaryOp {
574            left,
575            op: BinaryOperator::And,
576            right,
577        } => {
578            let left_result = strip_correlation_predicates(*left, eq_preds);
579            let right_result = strip_correlation_predicates(*right, eq_preds);
580            match (left_result, right_result) {
581                (Some(l), Some(r)) => Some(Expr::BinaryOp {
582                    left: Box::new(l),
583                    op: BinaryOperator::And,
584                    right: Box::new(r),
585                }),
586                (Some(l), None) => Some(l),
587                (None, Some(r)) => Some(r),
588                (None, None) => None,
589            }
590        }
591
592        Expr::BinaryOp {
593            ref left,
594            op: BinaryOperator::Eq,
595            ref right,
596        } => {
597            // Check if this matches any of the correlation predicates
598            for pred in eq_preds {
599                if (*left.as_ref() == pred.outer_col && *right.as_ref() == pred.inner_col)
600                    || (*left.as_ref() == pred.inner_col && *right.as_ref() == pred.outer_col)
601                {
602                    return None; // Strip this predicate
603                }
604            }
605            Some(expr)
606        }
607
608        other => Some(other),
609    }
610}
611
612#[cfg(test)]
613mod tests {
614    use super::*;
615    use crate::dialects::Dialect;
616    use crate::generator::generate;
617    use crate::parser::Parser;
618
619    fn parse_and_unnest(sql: &str) -> String {
620        let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
621        let unnested = unnest_subqueries(stmt);
622        generate(&unnested, Dialect::Ansi)
623    }
624
625    #[test]
626    fn test_exists_to_inner_join() {
627        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
628        let result = parse_and_unnest(sql);
629        // Should be rewritten to an INNER JOIN
630        assert!(
631            result.contains("INNER JOIN"),
632            "Expected INNER JOIN in: {result}"
633        );
634        assert!(
635            result.contains("_u0"),
636            "Expected derived alias _u0 in: {result}"
637        );
638        assert!(
639            !result.contains("EXISTS"),
640            "Should not contain EXISTS: {result}"
641        );
642    }
643
644    #[test]
645    fn test_not_exists_to_left_join() {
646        let sql = "SELECT a.id FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
647        let result = parse_and_unnest(sql);
648        assert!(
649            result.contains("LEFT JOIN"),
650            "Expected LEFT JOIN in: {result}"
651        );
652        assert!(
653            result.contains("IS NULL"),
654            "Expected IS NULL check in: {result}"
655        );
656        assert!(
657            !result.contains("NOT EXISTS"),
658            "Should not contain NOT EXISTS: {result}"
659        );
660    }
661
662    #[test]
663    fn test_in_subquery_to_inner_join() {
664        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id FROM b)";
665        let result = parse_and_unnest(sql);
666        assert!(
667            result.contains("INNER JOIN"),
668            "Expected INNER JOIN in: {result}"
669        );
670        assert!(!result.contains(" IN "), "Should not contain IN: {result}");
671    }
672
673    #[test]
674    fn test_not_in_subquery_to_left_join() {
675        let sql = "SELECT a.id FROM a WHERE a.id NOT IN (SELECT b.id FROM b)";
676        let result = parse_and_unnest(sql);
677        assert!(
678            result.contains("LEFT JOIN"),
679            "Expected LEFT JOIN in: {result}"
680        );
681        assert!(
682            result.contains("IS NULL"),
683            "Expected IS NULL check in: {result}"
684        );
685    }
686
687    #[test]
688    fn test_no_correlation_not_unnested() {
689        // Subquery without correlation should not be unnested
690        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.x > 10)";
691        let result = parse_and_unnest(sql);
692        assert!(
693            result.contains("EXISTS"),
694            "Uncorrelated EXISTS should remain: {result}"
695        );
696    }
697
698    #[test]
699    fn test_non_equality_correlation_not_unnested() {
700        // Subquery with non-equality correlation (< instead of =) should NOT be unnested
701        // This is the scenario described in Python sqlglot#7295
702        let sql =
703            "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.val < a.val AND b.id = a.id)";
704        let result = parse_and_unnest(sql);
705        assert!(
706            result.contains("EXISTS"),
707            "Subquery with non-eq correlation should not be unnested: {result}"
708        );
709    }
710
711    #[test]
712    fn test_subquery_in_select_not_unnested() {
713        // Correlated scalar subquery inside COALESCE in SELECT list
714        // This is the exact scenario from Python sqlglot#7295 — must NOT be touched
715        let sql =
716            "SELECT COALESCE((SELECT MAX(b.val) FROM b WHERE b.id = a.id), a.val) AS result FROM a";
717        let result = parse_and_unnest(sql);
718        // The subquery should remain in the SELECT, NOT be moved to a JOIN
719        assert!(
720            !result.contains("JOIN"),
721            "Subquery in SELECT should not become a JOIN: {result}"
722        );
723    }
724
725    #[test]
726    fn test_exists_with_additional_where() {
727        // EXISTS with both correlation and a local predicate
728        let sql = "SELECT a.id FROM a WHERE a.x > 5 AND EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
729        let result = parse_and_unnest(sql);
730        assert!(
731            result.contains("INNER JOIN"),
732            "Expected INNER JOIN in: {result}"
733        );
734        assert!(
735            result.contains("a.x > 5") || result.contains("a.x >"),
736            "Should keep non-subquery predicate: {result}"
737        );
738    }
739
740    #[test]
741    fn test_non_select_statement_unchanged() {
742        let sql = "INSERT INTO t (a) VALUES (1)";
743        let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
744        let result = unnest_subqueries(stmt.clone());
745        assert_eq!(
746            format!("{result:?}"),
747            format!("{stmt:?}"),
748            "Non-SELECT statements should pass through unchanged"
749        );
750    }
751
752    // ─────────────────────────────────────────────────────────────────
753    // Multiple correlation predicates
754    // ─────────────────────────────────────────────────────────────────
755
756    #[test]
757    fn test_exists_multiple_correlations() {
758        // Two equality correlations → JOIN ON with AND
759        let sql =
760            "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id AND b.org = a.org)";
761        let result = parse_and_unnest(sql);
762        assert!(
763            result.contains("INNER JOIN"),
764            "Expected INNER JOIN in: {result}"
765        );
766        assert!(
767            !result.contains("EXISTS"),
768            "Should not contain EXISTS: {result}"
769        );
770        // The ON clause must have both correlation columns joined with AND
771        assert!(
772            result.contains(" AND "),
773            "ON clause should have AND for multiple correlations: {result}"
774        );
775        assert!(result.contains(".id"), "ON should reference id: {result}");
776        assert!(result.contains(".org"), "ON should reference org: {result}");
777    }
778
779    // ─────────────────────────────────────────────────────────────────
780    // Multiple subqueries in WHERE
781    // ─────────────────────────────────────────────────────────────────
782
783    #[test]
784    fn test_multiple_subqueries_in_where() {
785        // Two different subqueries in the same WHERE, both should be unnested
786        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id) AND a.id IN (SELECT c.id FROM c)";
787        let result = parse_and_unnest(sql);
788        // Both should become joins
789        assert!(
790            !result.contains("EXISTS"),
791            "EXISTS should be unnested: {result}"
792        );
793        assert!(!result.contains(" IN "), "IN should be unnested: {result}");
794        // Should have two joins with different aliases
795        assert!(result.contains("_u0"), "Expected first alias _u0: {result}");
796        assert!(
797            result.contains("_u1"),
798            "Expected second alias _u1: {result}"
799        );
800    }
801
802    // ─────────────────────────────────────────────────────────────────
803    // Residual inner WHERE after stripping correlations
804    // ─────────────────────────────────────────────────────────────────
805
806    #[test]
807    fn test_exists_with_inner_residual_where() {
808        // Inner subquery has a correlation AND a local (non-correlated) predicate
809        // The local predicate should remain in the derived table's WHERE
810        let sql =
811            "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id AND b.active = 1)";
812        let result = parse_and_unnest(sql);
813        assert!(
814            result.contains("INNER JOIN"),
815            "Expected INNER JOIN in: {result}"
816        );
817        assert!(
818            !result.contains("EXISTS"),
819            "Should not contain EXISTS: {result}"
820        );
821        // The non-correlated predicate b.active = 1 should be preserved in the subquery
822        assert!(
823            result.contains("active") && result.contains("1"),
824            "Inner residual WHERE should be preserved: {result}"
825        );
826    }
827
828    // ─────────────────────────────────────────────────────────────────
829    // Nested parenthesized subqueries
830    // ─────────────────────────────────────────────────────────────────
831
832    #[test]
833    fn test_parenthesized_exists() {
834        // EXISTS wrapped in extra parentheses
835        let sql = "SELECT a.id FROM a WHERE (EXISTS (SELECT 1 FROM b WHERE b.id = a.id))";
836        let result = parse_and_unnest(sql);
837        assert!(
838            result.contains("INNER JOIN"),
839            "Expected INNER JOIN in: {result}"
840        );
841        assert!(
842            !result.contains("EXISTS"),
843            "Should not contain EXISTS: {result}"
844        );
845    }
846
847    // ─────────────────────────────────────────────────────────────────
848    // IN subquery with multiple projected columns (bail out)
849    // ─────────────────────────────────────────────────────────────────
850
851    #[test]
852    fn test_in_subquery_multi_column_not_unnested() {
853        // IN (SELECT col1, col2 ...) — multi-column, should NOT be unnested
854        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id, b.name FROM b)";
855        let result = parse_and_unnest(sql);
856        assert!(
857            result.contains(" IN "),
858            "Multi-column IN should remain: {result}"
859        );
860    }
861
862    // ─────────────────────────────────────────────────────────────────
863    // OR with subqueries (should NOT unnest — not safe)
864    // ─────────────────────────────────────────────────────────────────
865
866    #[test]
867    fn test_or_with_exists_not_unnested() {
868        // EXISTS in an OR branch — cannot safely unnest
869        let sql = "SELECT a.id FROM a WHERE a.x > 1 OR EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
870        let result = parse_and_unnest(sql);
871        assert!(
872            result.contains("EXISTS"),
873            "EXISTS in OR should remain: {result}"
874        );
875    }
876
877    // ─────────────────────────────────────────────────────────────────
878    // Scalar subquery in WHERE (not EXISTS / IN — should stay)
879    // ─────────────────────────────────────────────────────────────────
880
881    #[test]
882    fn test_scalar_subquery_in_where_not_unnested() {
883        // WHERE col = (SELECT ...) — scalar subquery comparison, not handled
884        let sql = "SELECT a.id FROM a WHERE a.val = (SELECT MAX(b.val) FROM b WHERE b.id = a.id)";
885        let result = parse_and_unnest(sql);
886        assert!(
887            !result.contains("JOIN"),
888            "Scalar subquery in WHERE should not become JOIN: {result}"
889        );
890    }
891
892    // ─────────────────────────────────────────────────────────────────
893    // Exact reproducer from Python sqlglot#7295
894    // ─────────────────────────────────────────────────────────────────
895
896    #[test]
897    fn test_sqlglot_issue_7295_exact_reproducer() {
898        // This is the exact SQL from Python sqlglot issue #7295.
899        // The correlated subquery is inside COALESCE in the SELECT list
900        // with a non-equality correlation (b.val < a.val).
901        // Must NOT crash and must NOT modify the query.
902        let sql = "SELECT COALESCE((SELECT MAX(b.val) FROM t AS b WHERE b.val < a.val AND b.id = a.id), a.val) AS result FROM t AS a";
903        let result = parse_and_unnest(sql);
904        assert!(
905            !result.contains("JOIN"),
906            "Issue #7295 query must NOT be rewritten to JOIN: {result}"
907        );
908        assert!(
909            result.contains("COALESCE"),
910            "COALESCE should remain: {result}"
911        );
912    }
913
914    // ─────────────────────────────────────────────────────────────────
915    // No WHERE clause at all
916    // ─────────────────────────────────────────────────────────────────
917
918    #[test]
919    fn test_no_where_clause_unchanged() {
920        let sql = "SELECT a.id FROM a";
921        let result = parse_and_unnest(sql);
922        assert_eq!(result, "SELECT a.id FROM a", "No WHERE should be unchanged");
923    }
924
925    // ─────────────────────────────────────────────────────────────────
926    // WHERE with no subqueries
927    // ─────────────────────────────────────────────────────────────────
928
929    #[test]
930    fn test_where_without_subqueries_unchanged() {
931        let sql = "SELECT a.id FROM a WHERE a.x > 1 AND a.y = 2";
932        let result = parse_and_unnest(sql);
933        assert!(
934            !result.contains("JOIN"),
935            "No subqueries, no joins should be added: {result}"
936        );
937        assert!(
938            result.contains("a.x > 1"),
939            "Original predicates should remain: {result}"
940        );
941    }
942
943    // ─────────────────────────────────────────────────────────────────
944    // EXISTS with no inner WHERE (uncorrelated — bail out)
945    // ─────────────────────────────────────────────────────────────────
946
947    #[test]
948    fn test_exists_no_where_not_unnested() {
949        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b)";
950        let result = parse_and_unnest(sql);
951        assert!(
952            result.contains("EXISTS"),
953            "EXISTS without inner WHERE should remain: {result}"
954        );
955    }
956
957    // ─────────────────────────────────────────────────────────────────
958    // Inner subquery has only same-table predicates (no cross-table correlation)
959    // ─────────────────────────────────────────────────────────────────
960
961    #[test]
962    fn test_exists_same_table_predicate_not_unnested() {
963        // Inner WHERE only references columns from b — no correlation to outer a
964        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.x = b.y)";
965        let result = parse_and_unnest(sql);
966        assert!(
967            result.contains("EXISTS"),
968            "Same-table predicate is not correlation: {result}"
969        );
970    }
971
972    // ─────────────────────────────────────────────────────────────────
973    // DISTINCT behaviour in derived tables
974    // ─────────────────────────────────────────────────────────────────
975
976    #[test]
977    fn test_exists_produces_distinct_derived_table() {
978        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
979        let result = parse_and_unnest(sql);
980        assert!(
981            result.contains("DISTINCT"),
982            "Derived table should use DISTINCT: {result}"
983        );
984    }
985
986    #[test]
987    fn test_in_produces_distinct_derived_table() {
988        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id FROM b)";
989        let result = parse_and_unnest(sql);
990        assert!(
991            result.contains("DISTINCT"),
992            "IN-derived table should use DISTINCT: {result}"
993        );
994    }
995
996    // ─────────────────────────────────────────────────────────────────
997    // NOT IN with inner WHERE (should propagate inner WHERE into derived table)
998    // ─────────────────────────────────────────────────────────────────
999
1000    #[test]
1001    fn test_not_in_preserves_inner_where() {
1002        let sql = "SELECT a.id FROM a WHERE a.id NOT IN (SELECT b.id FROM b WHERE b.active = 1)";
1003        let result = parse_and_unnest(sql);
1004        assert!(result.contains("LEFT JOIN"), "Expected LEFT JOIN: {result}");
1005        assert!(result.contains("IS NULL"), "Expected IS NULL: {result}");
1006        assert!(
1007            result.contains("active"),
1008            "Inner WHERE should be preserved: {result}"
1009        );
1010    }
1011
1012    // ─────────────────────────────────────────────────────────────────
1013    // AliasGen produces sequential aliases
1014    // ─────────────────────────────────────────────────────────────────
1015
1016    #[test]
1017    fn test_alias_gen_sequential() {
1018        let mut alias_gen = AliasGen::new();
1019        assert_eq!(alias_gen.next(), "_u0");
1020        assert_eq!(alias_gen.next(), "_u1");
1021        assert_eq!(alias_gen.next(), "_u2");
1022    }
1023}