Skip to main content

sqlglot_rust/optimizer/
unnest_subqueries.rs

1/// Unnest and decorrelate subqueries.
2///
3/// Transforms correlated subqueries in WHERE clauses into equivalent JOINs
4/// when safe to do so. This is a common optimization because most query engines
5/// execute joins more efficiently than correlated subqueries.
6///
7/// ## Supported rewrites
8///
9/// | Pattern                               | Rewrite                             |
10/// |---------------------------------------|-------------------------------------|
11/// | `WHERE EXISTS (SELECT … WHERE a = b)` | `INNER JOIN (SELECT …) ON a = b`   |
12/// | `WHERE NOT EXISTS (…)`                | `LEFT JOIN … WHERE _u.col IS NULL`  |
13/// | `WHERE x IN (SELECT col FROM …)`      | `INNER JOIN (SELECT DISTINCT …)`    |
14/// | `WHERE x NOT IN (SELECT col FROM …)`  | `LEFT JOIN … WHERE _u.col IS NULL`  |
15///
16/// ## Safety
17///
18/// The pass bails out (leaves the subquery unchanged) when:
19/// - The subquery has no correlation predicates (equality on outer columns).
20/// - The subquery is inside a function/expression in the SELECT list rather
21///   than in a WHERE predicate — matching the fix for Python sqlglot#7295.
22/// - The correlated predicate involves non-equality operators that would
23///   require LATERAL / APPLY for correctness.
24use crate::ast::*;
25
26/// Apply subquery unnesting to a statement.
27///
28/// Returns the statement unchanged if no subqueries can be safely unnested.
29pub fn unnest_subqueries(statement: Statement) -> Statement {
30    match statement {
31        Statement::Select(sel) => Statement::Select(unnest_select(sel)),
32        other => other,
33    }
34}
35
36/// Counter for generating unique aliases for unnested subquery tables.
37struct AliasGen {
38    counter: usize,
39}
40
41impl AliasGen {
42    fn new() -> Self {
43        Self { counter: 0 }
44    }
45
46    fn next(&mut self) -> String {
47        let alias = format!("_u{}", self.counter);
48        self.counter += 1;
49        alias
50    }
51}
52
53fn unnest_select(mut sel: SelectStatement) -> SelectStatement {
54    let mut alias_gen = AliasGen::new();
55
56    // Only unnest subqueries found in the WHERE clause (safe position).
57    // Subqueries in SELECT columns that are inside functions (COALESCE, etc.)
58    // are NOT unnested to avoid the Python sqlglot#7295 crash scenario.
59    if let Some(where_clause) = sel.where_clause.take() {
60        let (new_where, new_joins) = unnest_where(where_clause, &mut alias_gen);
61        sel.where_clause = new_where;
62        sel.joins.extend(new_joins);
63    }
64
65    sel
66}
67
68/// Process a WHERE clause expression, extracting subqueries that can be
69/// rewritten as joins. Returns the residual WHERE clause (if any) and
70/// a list of new JoinClauses.
71fn unnest_where(expr: Expr, alias_gen: &mut AliasGen) -> (Option<Expr>, Vec<JoinClause>) {
72    let mut joins = Vec::new();
73    let residual = unnest_expr(expr, &mut joins, alias_gen);
74    (residual, joins)
75}
76
77/// Recursively process an expression in the WHERE clause.
78/// Returns `None` if the expression was fully consumed (replaced by a join).
79fn unnest_expr(expr: Expr, joins: &mut Vec<JoinClause>, alias_gen: &mut AliasGen) -> Option<Expr> {
80    match expr {
81        // AND: try to unnest each side independently
82        Expr::BinaryOp {
83            left,
84            op: BinaryOperator::And,
85            right,
86        } => {
87            let left_result = unnest_expr(*left, joins, alias_gen);
88            let right_result = unnest_expr(*right, joins, alias_gen);
89            match (left_result, right_result) {
90                (Some(l), Some(r)) => Some(Expr::BinaryOp {
91                    left: Box::new(l),
92                    op: BinaryOperator::And,
93                    right: Box::new(r),
94                }),
95                (Some(l), None) => Some(l),
96                (None, Some(r)) => Some(r),
97                (None, None) => None,
98            }
99        }
100
101        // EXISTS (SELECT ... WHERE outer.col = inner.col)
102        Expr::Exists { subquery, negated } => {
103            let subquery_inner = *subquery;
104            if let Some((join, residual)) =
105                try_unnest_exists(subquery_inner.clone(), negated, alias_gen)
106            {
107                joins.push(join);
108                residual
109            } else {
110                Some(Expr::Exists {
111                    subquery: Box::new(subquery_inner),
112                    negated,
113                })
114            }
115        }
116
117        // NOT EXISTS parsed as UnaryOp(Not, Exists { negated: false })
118        Expr::UnaryOp {
119            op: UnaryOperator::Not,
120            expr,
121        } if matches!(expr.as_ref(), Expr::Exists { negated: false, .. }) => {
122            if let Expr::Exists { subquery, .. } = *expr {
123                let subquery_inner = *subquery;
124                if let Some((join, residual)) =
125                    try_unnest_exists(subquery_inner.clone(), true, alias_gen)
126                {
127                    joins.push(join);
128                    residual
129                } else {
130                    Some(Expr::UnaryOp {
131                        op: UnaryOperator::Not,
132                        expr: Box::new(Expr::Exists {
133                            subquery: Box::new(subquery_inner),
134                            negated: false,
135                        }),
136                    })
137                }
138            } else {
139                unreachable!()
140            }
141        }
142
143        // col IN (SELECT ...)
144        Expr::InSubquery {
145            expr: lhs,
146            subquery,
147            negated,
148        } => {
149            let lhs_inner = *lhs;
150            let subquery_inner = *subquery;
151            if let Some((join, residual)) = try_unnest_in_subquery(
152                lhs_inner.clone(),
153                subquery_inner.clone(),
154                negated,
155                alias_gen,
156            ) {
157                joins.push(join);
158                residual
159            } else {
160                Some(Expr::InSubquery {
161                    expr: Box::new(lhs_inner),
162                    subquery: Box::new(subquery_inner),
163                    negated,
164                })
165            }
166        }
167
168        // Nested parenthesized expression — unwrap, try to unnest, re-wrap if needed
169        Expr::Nested(inner) => {
170            let result = unnest_expr(*inner, joins, alias_gen);
171            result.map(|e| {
172                if e.is_literal() || matches!(e, Expr::Column { .. }) {
173                    e
174                } else {
175                    Expr::Nested(Box::new(e))
176                }
177            })
178        }
179
180        // Everything else: leave as-is
181        other => Some(other),
182    }
183}
184
185// ─────────────────────────────────────────────────────────────────────
186// EXISTS / NOT EXISTS → JOIN
187// ─────────────────────────────────────────────────────────────────────
188
189/// Try to rewrite `[NOT] EXISTS (SELECT ... FROM t WHERE correlations)`
190/// into a join. Returns `None` if the subquery cannot be safely unnested.
191fn try_unnest_exists(
192    subquery: Statement,
193    negated: bool,
194    alias_gen: &mut AliasGen,
195) -> Option<(JoinClause, Option<Expr>)> {
196    let inner_select = match &subquery {
197        Statement::Select(sel) => sel,
198        _ => return None,
199    };
200
201    // Extract correlation predicates from the inner WHERE clause
202    let inner_where = inner_select.where_clause.as_ref()?;
203    let (eq_preds, non_eq_preds) = extract_correlation_predicates(inner_where);
204
205    // Must have at least one equality correlation
206    if eq_preds.is_empty() {
207        return None;
208    }
209
210    // Bail out if there are non-equality correlations (would need LATERAL)
211    if !non_eq_preds.is_empty() {
212        return None;
213    }
214
215    let alias = alias_gen.next();
216
217    // Build the ON condition from equality predicates
218    let on_condition = build_join_on(&eq_preds, &alias);
219
220    // Build the derived table from the inner SELECT, stripping correlation predicates
221    let derived = build_derived_table_from_exists(subquery, &eq_preds, &alias);
222
223    if negated {
224        // NOT EXISTS → LEFT JOIN ... WHERE _u.col IS NULL
225        // Pick any column from the inner select as the NULL-check sentinel
226        let null_check_col = sentinel_column(&alias);
227
228        let join = JoinClause {
229            join_type: JoinType::Left,
230            table: derived,
231            on: Some(on_condition),
232            using: vec![],
233        };
234        let residual = Some(Expr::IsNull {
235            expr: Box::new(null_check_col),
236            negated: false,
237        });
238        Some((join, residual))
239    } else {
240        // EXISTS → INNER JOIN (deduplicated via GROUP BY or DISTINCT in the derived table)
241        let join = JoinClause {
242            join_type: JoinType::Inner,
243            table: derived,
244            on: Some(on_condition),
245            using: vec![],
246        };
247        Some((join, None))
248    }
249}
250
251// ─────────────────────────────────────────────────────────────────────
252// IN / NOT IN subquery → JOIN
253// ─────────────────────────────────────────────────────────────────────
254
255/// Try to rewrite `col [NOT] IN (SELECT x FROM t WHERE ...)` into a join.
256fn try_unnest_in_subquery(
257    lhs: Expr,
258    subquery: Statement,
259    negated: bool,
260    alias_gen: &mut AliasGen,
261) -> Option<(JoinClause, Option<Expr>)> {
262    let inner_select = match &subquery {
263        Statement::Select(sel) => sel,
264        _ => return None,
265    };
266
267    // The inner SELECT must project exactly one column
268    if inner_select.columns.len() != 1 {
269        return None;
270    }
271
272    let alias = alias_gen.next();
273    let inner_col_alias = "_col0".to_string();
274
275    // Build ON condition: outer_col = _u._col0
276    let on_condition = Expr::BinaryOp {
277        left: Box::new(lhs),
278        op: BinaryOperator::Eq,
279        right: Box::new(Expr::Column {
280            table: Some(alias.clone()),
281            name: inner_col_alias.clone(),
282            quote_style: QuoteStyle::None,
283            table_quote_style: QuoteStyle::None,
284        }),
285    };
286
287    // Build a derived table: (SELECT DISTINCT <col> AS _col0 FROM ... WHERE ...) AS _uN
288    let derived = build_derived_table_from_in(subquery, &inner_col_alias, &alias);
289
290    if negated {
291        // NOT IN → LEFT JOIN ... WHERE _uN._col0 IS NULL
292        let null_check = Expr::IsNull {
293            expr: Box::new(Expr::Column {
294                table: Some(alias.clone()),
295                name: inner_col_alias,
296                quote_style: QuoteStyle::None,
297                table_quote_style: QuoteStyle::None,
298            }),
299            negated: false,
300        };
301
302        let join = JoinClause {
303            join_type: JoinType::Left,
304            table: derived,
305            on: Some(on_condition),
306            using: vec![],
307        };
308        Some((join, Some(null_check)))
309    } else {
310        // IN → INNER JOIN
311        let join = JoinClause {
312            join_type: JoinType::Inner,
313            table: derived,
314            on: Some(on_condition),
315            using: vec![],
316        };
317        Some((join, None))
318    }
319}
320
321// ─────────────────────────────────────────────────────────────────────
322// Helpers: correlation detection
323// ─────────────────────────────────────────────────────────────────────
324
325/// A correlation predicate extracted from a subquery's WHERE clause.
326/// Represents `outer_col = inner_col`.
327#[derive(Debug, Clone)]
328struct CorrelationPredicate {
329    /// The column reference from the outer query
330    outer_col: Expr,
331    /// The column reference from the inner query
332    inner_col: Expr,
333}
334
335/// Examine a WHERE expression and extract equality correlation predicates
336/// (where one side references an outer table and the other an inner table).
337///
338/// Returns (equality_correlations, non_equality_predicates_referencing_outer_tables).
339///
340/// A predicate is classified as "correlated" if it contains column references
341/// with table qualifiers that likely come from the outer scope. Since we don't
342/// have full scope analysis, we use a heuristic: columns with a table qualifier
343/// that appears as a correlation candidate.
344fn extract_correlation_predicates(expr: &Expr) -> (Vec<CorrelationPredicate>, Vec<Expr>) {
345    let mut eq_preds = Vec::new();
346    let mut non_eq_preds = Vec::new();
347
348    collect_correlation_predicates(expr, &mut eq_preds, &mut non_eq_preds);
349
350    (eq_preds, non_eq_preds)
351}
352
353fn collect_correlation_predicates(
354    expr: &Expr,
355    eq_preds: &mut Vec<CorrelationPredicate>,
356    non_eq_preds: &mut Vec<Expr>,
357) {
358    match expr {
359        Expr::BinaryOp {
360            left,
361            op: BinaryOperator::And,
362            right,
363        } => {
364            collect_correlation_predicates(left, eq_preds, non_eq_preds);
365            collect_correlation_predicates(right, eq_preds, non_eq_preds);
366        }
367
368        Expr::BinaryOp {
369            left,
370            op: BinaryOperator::Eq,
371            right,
372        } => {
373            // Check if this is a correlation: one side has a table qualifier,
374            // and they reference different tables
375            if let (Some((l_table, _l_name)), Some((r_table, _r_name))) =
376                (extract_column_ref(left), extract_column_ref(right))
377            {
378                if l_table == r_table {
379                    // Same table — not a correlation
380                } else {
381                    eq_preds.push(CorrelationPredicate {
382                        outer_col: *left.clone(),
383                        inner_col: *right.clone(),
384                    });
385                    return;
386                }
387            }
388            // Not a correlation — check if it references outer tables
389            if has_potential_outer_reference(expr) {
390                non_eq_preds.push(expr.clone());
391            }
392        }
393
394        // Non-equality comparisons that reference outer columns
395        Expr::BinaryOp {
396            op:
397                BinaryOperator::Lt
398                | BinaryOperator::Gt
399                | BinaryOperator::LtEq
400                | BinaryOperator::GtEq
401                | BinaryOperator::Neq,
402            ..
403        } => {
404            if is_cross_table_predicate(expr) {
405                non_eq_preds.push(expr.clone());
406            }
407        }
408
409        _ => {}
410    }
411}
412
413/// Extract (table, column_name) from a column reference if it has a table qualifier.
414fn extract_column_ref(expr: &Expr) -> Option<(String, String)> {
415    match expr {
416        Expr::Column {
417            table: Some(t),
418            name,
419            ..
420        } => Some((t.clone(), name.clone())),
421        _ => None,
422    }
423}
424
425/// Check whether an expression contains column references from different tables
426/// (heuristic for cross-scope references without full scope analysis).
427fn is_cross_table_predicate(expr: &Expr) -> bool {
428    let mut tables = Vec::new();
429    expr.walk(&mut |e| {
430        if let Expr::Column { table: Some(t), .. } = e
431            && !tables.iter().any(|existing: &String| existing == t)
432        {
433            tables.push(t.clone());
434        }
435        true
436    });
437    tables.len() > 1
438}
439
440/// Heuristic: does this expression reference columns from more than one table?
441fn has_potential_outer_reference(expr: &Expr) -> bool {
442    is_cross_table_predicate(expr)
443}
444
445// ─────────────────────────────────────────────────────────────────────
446// Helpers: building derived tables and join conditions
447// ─────────────────────────────────────────────────────────────────────
448
449/// Build a JOIN ON clause from equality correlation predicates,
450/// rewriting inner column references to use the new alias.
451fn build_join_on(preds: &[CorrelationPredicate], alias: &str) -> Expr {
452    let conditions: Vec<Expr> = preds
453        .iter()
454        .map(|p| {
455            let rewritten_inner = rewrite_column_table(&p.inner_col, alias);
456            Expr::BinaryOp {
457                left: Box::new(p.outer_col.clone()),
458                op: BinaryOperator::Eq,
459                right: Box::new(rewritten_inner),
460            }
461        })
462        .collect();
463
464    and_all(conditions)
465}
466
467/// Combine multiple expressions with AND.
468fn and_all(mut exprs: Vec<Expr>) -> Expr {
469    assert!(
470        !exprs.is_empty(),
471        "and_all requires at least one expression"
472    );
473    if exprs.len() == 1 {
474        return exprs.remove(0);
475    }
476    let first = exprs.remove(0);
477    exprs.into_iter().fold(first, |acc, e| Expr::BinaryOp {
478        left: Box::new(acc),
479        op: BinaryOperator::And,
480        right: Box::new(e),
481    })
482}
483
484/// Rewrite a column's table qualifier to the given alias.
485fn rewrite_column_table(expr: &Expr, new_table: &str) -> Expr {
486    match expr {
487        Expr::Column {
488            name, quote_style, ..
489        } => Expr::Column {
490            table: Some(new_table.to_string()),
491            name: name.clone(),
492            quote_style: *quote_style,
493            table_quote_style: QuoteStyle::None,
494        },
495        other => other.clone(),
496    }
497}
498
499/// Build a derived table source from an EXISTS subquery.
500///
501/// Strips the correlation predicates from the inner WHERE clause and wraps
502/// the result as `(SELECT DISTINCT 1 AS _sentinel, <join_keys> FROM ... WHERE <residual>) AS _uN`.
503fn build_derived_table_from_exists(
504    subquery: Statement,
505    eq_preds: &[CorrelationPredicate],
506    alias: &str,
507) -> TableSource {
508    let mut inner_select = match subquery {
509        Statement::Select(sel) => sel,
510        _ => unreachable!("Caller ensures this is a SELECT"),
511    };
512
513    // Remove correlation predicates from the inner WHERE
514    if let Some(where_clause) = inner_select.where_clause.take() {
515        inner_select.where_clause = strip_correlation_predicates(where_clause, eq_preds);
516    }
517
518    // Replace the SELECT list with DISTINCT 1 AS _sentinel
519    // to deduplicate rows for EXISTS semantics
520    inner_select.distinct = true;
521    inner_select.columns = vec![SelectItem::Expr {
522        expr: Expr::Number("1".to_string()),
523        alias: Some("_sentinel".to_string()),
524        alias_quote_style: QuoteStyle::None,
525    }];
526
527    TableSource::Subquery {
528        query: Box::new(Statement::Select(inner_select)),
529        alias: Some(alias.to_string()),
530        alias_quote_style: QuoteStyle::None,
531    }
532}
533
534/// Build a derived table from an IN-subquery.
535///
536/// Wraps the inner query as `(SELECT DISTINCT <col> AS _col0 FROM ...) AS _uN`.
537fn build_derived_table_from_in(
538    subquery: Statement,
539    col_alias: &str,
540    table_alias: &str,
541) -> TableSource {
542    let mut inner_select = match subquery {
543        Statement::Select(sel) => sel,
544        _ => unreachable!("Caller ensures this is a SELECT"),
545    };
546
547    // Ensure DISTINCT to avoid row multiplication
548    inner_select.distinct = true;
549
550    // Alias the single projected column
551    if let Some(SelectItem::Expr { alias, .. }) = inner_select.columns.first_mut() {
552        *alias = Some(col_alias.to_string());
553    }
554
555    TableSource::Subquery {
556        query: Box::new(Statement::Select(inner_select)),
557        alias: Some(table_alias.to_string()),
558        alias_quote_style: QuoteStyle::None,
559    }
560}
561
562/// Create a sentinel column reference for NULL-checks in anti-joins.
563fn sentinel_column(alias: &str) -> Expr {
564    Expr::Column {
565        table: Some(alias.to_string()),
566        name: "_sentinel".to_string(),
567        quote_style: QuoteStyle::None,
568        table_quote_style: QuoteStyle::None,
569    }
570}
571
572/// Strip the given correlation predicates from a WHERE expression,
573/// returning the residual expression (or None if the entire WHERE was correlations).
574fn strip_correlation_predicates(expr: Expr, eq_preds: &[CorrelationPredicate]) -> Option<Expr> {
575    match expr {
576        Expr::BinaryOp {
577            left,
578            op: BinaryOperator::And,
579            right,
580        } => {
581            let left_result = strip_correlation_predicates(*left, eq_preds);
582            let right_result = strip_correlation_predicates(*right, eq_preds);
583            match (left_result, right_result) {
584                (Some(l), Some(r)) => Some(Expr::BinaryOp {
585                    left: Box::new(l),
586                    op: BinaryOperator::And,
587                    right: Box::new(r),
588                }),
589                (Some(l), None) => Some(l),
590                (None, Some(r)) => Some(r),
591                (None, None) => None,
592            }
593        }
594
595        Expr::BinaryOp {
596            ref left,
597            op: BinaryOperator::Eq,
598            ref right,
599        } => {
600            // Check if this matches any of the correlation predicates
601            for pred in eq_preds {
602                if (*left.as_ref() == pred.outer_col && *right.as_ref() == pred.inner_col)
603                    || (*left.as_ref() == pred.inner_col && *right.as_ref() == pred.outer_col)
604                {
605                    return None; // Strip this predicate
606                }
607            }
608            Some(expr)
609        }
610
611        other => Some(other),
612    }
613}
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618    use crate::dialects::Dialect;
619    use crate::generator::generate;
620    use crate::parser::Parser;
621
622    fn parse_and_unnest(sql: &str) -> String {
623        let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
624        let unnested = unnest_subqueries(stmt);
625        generate(&unnested, Dialect::Ansi)
626    }
627
628    #[test]
629    fn test_exists_to_inner_join() {
630        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
631        let result = parse_and_unnest(sql);
632        // Should be rewritten to an INNER JOIN
633        assert!(
634            result.contains("INNER JOIN"),
635            "Expected INNER JOIN in: {result}"
636        );
637        assert!(
638            result.contains("_u0"),
639            "Expected derived alias _u0 in: {result}"
640        );
641        assert!(
642            !result.contains("EXISTS"),
643            "Should not contain EXISTS: {result}"
644        );
645    }
646
647    #[test]
648    fn test_not_exists_to_left_join() {
649        let sql = "SELECT a.id FROM a WHERE NOT EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
650        let result = parse_and_unnest(sql);
651        assert!(
652            result.contains("LEFT JOIN"),
653            "Expected LEFT JOIN in: {result}"
654        );
655        assert!(
656            result.contains("IS NULL"),
657            "Expected IS NULL check in: {result}"
658        );
659        assert!(
660            !result.contains("NOT EXISTS"),
661            "Should not contain NOT EXISTS: {result}"
662        );
663    }
664
665    #[test]
666    fn test_in_subquery_to_inner_join() {
667        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id FROM b)";
668        let result = parse_and_unnest(sql);
669        assert!(
670            result.contains("INNER JOIN"),
671            "Expected INNER JOIN in: {result}"
672        );
673        assert!(!result.contains(" IN "), "Should not contain IN: {result}");
674    }
675
676    #[test]
677    fn test_not_in_subquery_to_left_join() {
678        let sql = "SELECT a.id FROM a WHERE a.id NOT IN (SELECT b.id FROM b)";
679        let result = parse_and_unnest(sql);
680        assert!(
681            result.contains("LEFT JOIN"),
682            "Expected LEFT JOIN in: {result}"
683        );
684        assert!(
685            result.contains("IS NULL"),
686            "Expected IS NULL check in: {result}"
687        );
688    }
689
690    #[test]
691    fn test_no_correlation_not_unnested() {
692        // Subquery without correlation should not be unnested
693        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.x > 10)";
694        let result = parse_and_unnest(sql);
695        assert!(
696            result.contains("EXISTS"),
697            "Uncorrelated EXISTS should remain: {result}"
698        );
699    }
700
701    #[test]
702    fn test_non_equality_correlation_not_unnested() {
703        // Subquery with non-equality correlation (< instead of =) should NOT be unnested
704        // This is the scenario described in Python sqlglot#7295
705        let sql =
706            "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.val < a.val AND b.id = a.id)";
707        let result = parse_and_unnest(sql);
708        assert!(
709            result.contains("EXISTS"),
710            "Subquery with non-eq correlation should not be unnested: {result}"
711        );
712    }
713
714    #[test]
715    fn test_subquery_in_select_not_unnested() {
716        // Correlated scalar subquery inside COALESCE in SELECT list
717        // This is the exact scenario from Python sqlglot#7295 — must NOT be touched
718        let sql =
719            "SELECT COALESCE((SELECT MAX(b.val) FROM b WHERE b.id = a.id), a.val) AS result FROM a";
720        let result = parse_and_unnest(sql);
721        // The subquery should remain in the SELECT, NOT be moved to a JOIN
722        assert!(
723            !result.contains("JOIN"),
724            "Subquery in SELECT should not become a JOIN: {result}"
725        );
726    }
727
728    #[test]
729    fn test_exists_with_additional_where() {
730        // EXISTS with both correlation and a local predicate
731        let sql = "SELECT a.id FROM a WHERE a.x > 5 AND EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
732        let result = parse_and_unnest(sql);
733        assert!(
734            result.contains("INNER JOIN"),
735            "Expected INNER JOIN in: {result}"
736        );
737        assert!(
738            result.contains("a.x > 5") || result.contains("a.x >"),
739            "Should keep non-subquery predicate: {result}"
740        );
741    }
742
743    #[test]
744    fn test_non_select_statement_unchanged() {
745        let sql = "INSERT INTO t (a) VALUES (1)";
746        let stmt = Parser::new(sql).unwrap().parse_statement().unwrap();
747        let result = unnest_subqueries(stmt.clone());
748        assert_eq!(
749            format!("{result:?}"),
750            format!("{stmt:?}"),
751            "Non-SELECT statements should pass through unchanged"
752        );
753    }
754
755    // ─────────────────────────────────────────────────────────────────
756    // Multiple correlation predicates
757    // ─────────────────────────────────────────────────────────────────
758
759    #[test]
760    fn test_exists_multiple_correlations() {
761        // Two equality correlations → JOIN ON with AND
762        let sql =
763            "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id AND b.org = a.org)";
764        let result = parse_and_unnest(sql);
765        assert!(
766            result.contains("INNER JOIN"),
767            "Expected INNER JOIN in: {result}"
768        );
769        assert!(
770            !result.contains("EXISTS"),
771            "Should not contain EXISTS: {result}"
772        );
773        // The ON clause must have both correlation columns joined with AND
774        assert!(
775            result.contains(" AND "),
776            "ON clause should have AND for multiple correlations: {result}"
777        );
778        assert!(result.contains(".id"), "ON should reference id: {result}");
779        assert!(result.contains(".org"), "ON should reference org: {result}");
780    }
781
782    // ─────────────────────────────────────────────────────────────────
783    // Multiple subqueries in WHERE
784    // ─────────────────────────────────────────────────────────────────
785
786    #[test]
787    fn test_multiple_subqueries_in_where() {
788        // Two different subqueries in the same WHERE, both should be unnested
789        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id) AND a.id IN (SELECT c.id FROM c)";
790        let result = parse_and_unnest(sql);
791        // Both should become joins
792        assert!(
793            !result.contains("EXISTS"),
794            "EXISTS should be unnested: {result}"
795        );
796        assert!(!result.contains(" IN "), "IN should be unnested: {result}");
797        // Should have two joins with different aliases
798        assert!(result.contains("_u0"), "Expected first alias _u0: {result}");
799        assert!(
800            result.contains("_u1"),
801            "Expected second alias _u1: {result}"
802        );
803    }
804
805    // ─────────────────────────────────────────────────────────────────
806    // Residual inner WHERE after stripping correlations
807    // ─────────────────────────────────────────────────────────────────
808
809    #[test]
810    fn test_exists_with_inner_residual_where() {
811        // Inner subquery has a correlation AND a local (non-correlated) predicate
812        // The local predicate should remain in the derived table's WHERE
813        let sql =
814            "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id AND b.active = 1)";
815        let result = parse_and_unnest(sql);
816        assert!(
817            result.contains("INNER JOIN"),
818            "Expected INNER JOIN in: {result}"
819        );
820        assert!(
821            !result.contains("EXISTS"),
822            "Should not contain EXISTS: {result}"
823        );
824        // The non-correlated predicate b.active = 1 should be preserved in the subquery
825        assert!(
826            result.contains("active") && result.contains("1"),
827            "Inner residual WHERE should be preserved: {result}"
828        );
829    }
830
831    // ─────────────────────────────────────────────────────────────────
832    // Nested parenthesized subqueries
833    // ─────────────────────────────────────────────────────────────────
834
835    #[test]
836    fn test_parenthesized_exists() {
837        // EXISTS wrapped in extra parentheses
838        let sql = "SELECT a.id FROM a WHERE (EXISTS (SELECT 1 FROM b WHERE b.id = a.id))";
839        let result = parse_and_unnest(sql);
840        assert!(
841            result.contains("INNER JOIN"),
842            "Expected INNER JOIN in: {result}"
843        );
844        assert!(
845            !result.contains("EXISTS"),
846            "Should not contain EXISTS: {result}"
847        );
848    }
849
850    // ─────────────────────────────────────────────────────────────────
851    // IN subquery with multiple projected columns (bail out)
852    // ─────────────────────────────────────────────────────────────────
853
854    #[test]
855    fn test_in_subquery_multi_column_not_unnested() {
856        // IN (SELECT col1, col2 ...) — multi-column, should NOT be unnested
857        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id, b.name FROM b)";
858        let result = parse_and_unnest(sql);
859        assert!(
860            result.contains(" IN "),
861            "Multi-column IN should remain: {result}"
862        );
863    }
864
865    // ─────────────────────────────────────────────────────────────────
866    // OR with subqueries (should NOT unnest — not safe)
867    // ─────────────────────────────────────────────────────────────────
868
869    #[test]
870    fn test_or_with_exists_not_unnested() {
871        // EXISTS in an OR branch — cannot safely unnest
872        let sql = "SELECT a.id FROM a WHERE a.x > 1 OR EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
873        let result = parse_and_unnest(sql);
874        assert!(
875            result.contains("EXISTS"),
876            "EXISTS in OR should remain: {result}"
877        );
878    }
879
880    // ─────────────────────────────────────────────────────────────────
881    // Scalar subquery in WHERE (not EXISTS / IN — should stay)
882    // ─────────────────────────────────────────────────────────────────
883
884    #[test]
885    fn test_scalar_subquery_in_where_not_unnested() {
886        // WHERE col = (SELECT ...) — scalar subquery comparison, not handled
887        let sql = "SELECT a.id FROM a WHERE a.val = (SELECT MAX(b.val) FROM b WHERE b.id = a.id)";
888        let result = parse_and_unnest(sql);
889        assert!(
890            !result.contains("JOIN"),
891            "Scalar subquery in WHERE should not become JOIN: {result}"
892        );
893    }
894
895    // ─────────────────────────────────────────────────────────────────
896    // Exact reproducer from Python sqlglot#7295
897    // ─────────────────────────────────────────────────────────────────
898
899    #[test]
900    fn test_sqlglot_issue_7295_exact_reproducer() {
901        // This is the exact SQL from Python sqlglot issue #7295.
902        // The correlated subquery is inside COALESCE in the SELECT list
903        // with a non-equality correlation (b.val < a.val).
904        // Must NOT crash and must NOT modify the query.
905        let sql = "SELECT COALESCE((SELECT MAX(b.val) FROM t AS b WHERE b.val < a.val AND b.id = a.id), a.val) AS result FROM t AS a";
906        let result = parse_and_unnest(sql);
907        assert!(
908            !result.contains("JOIN"),
909            "Issue #7295 query must NOT be rewritten to JOIN: {result}"
910        );
911        assert!(
912            result.contains("COALESCE"),
913            "COALESCE should remain: {result}"
914        );
915    }
916
917    // ─────────────────────────────────────────────────────────────────
918    // No WHERE clause at all
919    // ─────────────────────────────────────────────────────────────────
920
921    #[test]
922    fn test_no_where_clause_unchanged() {
923        let sql = "SELECT a.id FROM a";
924        let result = parse_and_unnest(sql);
925        assert_eq!(result, "SELECT a.id FROM a", "No WHERE should be unchanged");
926    }
927
928    // ─────────────────────────────────────────────────────────────────
929    // WHERE with no subqueries
930    // ─────────────────────────────────────────────────────────────────
931
932    #[test]
933    fn test_where_without_subqueries_unchanged() {
934        let sql = "SELECT a.id FROM a WHERE a.x > 1 AND a.y = 2";
935        let result = parse_and_unnest(sql);
936        assert!(
937            !result.contains("JOIN"),
938            "No subqueries, no joins should be added: {result}"
939        );
940        assert!(
941            result.contains("a.x > 1"),
942            "Original predicates should remain: {result}"
943        );
944    }
945
946    // ─────────────────────────────────────────────────────────────────
947    // EXISTS with no inner WHERE (uncorrelated — bail out)
948    // ─────────────────────────────────────────────────────────────────
949
950    #[test]
951    fn test_exists_no_where_not_unnested() {
952        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b)";
953        let result = parse_and_unnest(sql);
954        assert!(
955            result.contains("EXISTS"),
956            "EXISTS without inner WHERE should remain: {result}"
957        );
958    }
959
960    // ─────────────────────────────────────────────────────────────────
961    // Inner subquery has only same-table predicates (no cross-table correlation)
962    // ─────────────────────────────────────────────────────────────────
963
964    #[test]
965    fn test_exists_same_table_predicate_not_unnested() {
966        // Inner WHERE only references columns from b — no correlation to outer a
967        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.x = b.y)";
968        let result = parse_and_unnest(sql);
969        assert!(
970            result.contains("EXISTS"),
971            "Same-table predicate is not correlation: {result}"
972        );
973    }
974
975    // ─────────────────────────────────────────────────────────────────
976    // DISTINCT behaviour in derived tables
977    // ─────────────────────────────────────────────────────────────────
978
979    #[test]
980    fn test_exists_produces_distinct_derived_table() {
981        let sql = "SELECT a.id FROM a WHERE EXISTS (SELECT 1 FROM b WHERE b.id = a.id)";
982        let result = parse_and_unnest(sql);
983        assert!(
984            result.contains("DISTINCT"),
985            "Derived table should use DISTINCT: {result}"
986        );
987    }
988
989    #[test]
990    fn test_in_produces_distinct_derived_table() {
991        let sql = "SELECT a.id FROM a WHERE a.id IN (SELECT b.id FROM b)";
992        let result = parse_and_unnest(sql);
993        assert!(
994            result.contains("DISTINCT"),
995            "IN-derived table should use DISTINCT: {result}"
996        );
997    }
998
999    // ─────────────────────────────────────────────────────────────────
1000    // NOT IN with inner WHERE (should propagate inner WHERE into derived table)
1001    // ─────────────────────────────────────────────────────────────────
1002
1003    #[test]
1004    fn test_not_in_preserves_inner_where() {
1005        let sql = "SELECT a.id FROM a WHERE a.id NOT IN (SELECT b.id FROM b WHERE b.active = 1)";
1006        let result = parse_and_unnest(sql);
1007        assert!(result.contains("LEFT JOIN"), "Expected LEFT JOIN: {result}");
1008        assert!(result.contains("IS NULL"), "Expected IS NULL: {result}");
1009        assert!(
1010            result.contains("active"),
1011            "Inner WHERE should be preserved: {result}"
1012        );
1013    }
1014
1015    // ─────────────────────────────────────────────────────────────────
1016    // AliasGen produces sequential aliases
1017    // ─────────────────────────────────────────────────────────────────
1018
1019    #[test]
1020    fn test_alias_gen_sequential() {
1021        let mut alias_gen = AliasGen::new();
1022        assert_eq!(alias_gen.next(), "_u0");
1023        assert_eq!(alias_gen.next(), "_u1");
1024        assert_eq!(alias_gen.next(), "_u2");
1025    }
1026}