Skip to main content

sqlglot_rust/optimizer/
scope_analysis.rs

1//! Scope analysis for SQL queries.
2//!
3//! Provides a [`Scope`] struct that tracks the sources, columns, and
4//! relationships within a SQL query. This is the foundation for
5//! optimizer passes like qualify_columns, pushdown_predicates,
6//! annotate_types, and column lineage analysis.
7//!
8//! Inspired by Python sqlglot's `optimizer/scope.py`.
9//!
10//! # Example
11//!
12//! ```rust
13//! use sqlglot_rust::parser::parse;
14//! use sqlglot_rust::dialects::Dialect;
15//! use sqlglot_rust::optimizer::scope_analysis::{build_scope, ScopeType};
16//!
17//! let ast = parse("SELECT a, b FROM t WHERE a > 1", Dialect::Ansi).unwrap();
18//! let scope = build_scope(&ast);
19//! assert_eq!(scope.scope_type, ScopeType::Root);
20//! assert!(!scope.columns.is_empty());
21//! ```
22
23use std::collections::HashMap;
24
25use crate::ast::*;
26
27// ═══════════════════════════════════════════════════════════════════════
28// Scope types
29// ═══════════════════════════════════════════════════════════════════════
30
31/// The kind of scope a query fragment lives in.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
33pub enum ScopeType {
34    /// The outermost query.
35    Root,
36    /// A scalar or lateral subquery in a WHERE / SELECT / HAVING expression.
37    Subquery,
38    /// A derived table (subquery in FROM).
39    DerivedTable,
40    /// A CTE definition (`WITH name AS (...)`).
41    Cte,
42    /// One branch of a UNION / INTERSECT / EXCEPT.
43    Union,
44    /// A user-defined table function (UDTF) / LATERAL.
45    Udtf,
46}
47
48// ═══════════════════════════════════════════════════════════════════════
49// Source — either a table reference or a child scope
50// ═══════════════════════════════════════════════════════════════════════
51
52/// A source within a scope. Can be a concrete table or a reference to a
53/// child scope (derived table, CTE, etc.).
54#[derive(Debug, Clone)]
55pub enum Source {
56    /// A direct table reference.
57    Table(TableRef),
58    /// A child scope (derived table, CTE, UDTF).
59    Scope(Box<Scope>),
60}
61
62// ═══════════════════════════════════════════════════════════════════════
63// Column reference — a resolved column encountered in the scope
64// ═══════════════════════════════════════════════════════════════════════
65
66/// A column reference encountered during scope analysis.
67#[derive(Debug, Clone, PartialEq)]
68pub struct ColumnRef {
69    /// Optional table qualifier.
70    pub table: Option<String>,
71    /// The column name.
72    pub name: String,
73}
74
75// ═══════════════════════════════════════════════════════════════════════
76// Scope
77// ═══════════════════════════════════════════════════════════════════════
78
79/// Represents a single query scope and its relationships.
80///
81/// A scope is the context created by a query level (a SELECT, each branch
82/// of a UNION, each CTE definition, etc.). It tracks what tables are
83/// visible, what columns are referenced, and how nested scopes relate.
84#[derive(Debug, Clone)]
85pub struct Scope {
86    /// What kind of scope this is.
87    pub scope_type: ScopeType,
88
89    /// Mapping of source name/alias → [`Source`].
90    /// For tables this is `alias.unwrap_or(name) → Source::Table(...)`.
91    /// For derived tables / CTEs it is `alias → Source::Scope(...)`.
92    pub sources: HashMap<String, Source>,
93
94    /// All column references found *directly* in this scope (not in
95    /// child subquery scopes).
96    pub columns: Vec<ColumnRef>,
97
98    /// Columns that reference an *outer* scope (correlation).
99    pub external_columns: Vec<ColumnRef>,
100
101    /// Child scopes created by derived tables (subqueries in FROM).
102    pub derived_table_scopes: Vec<Scope>,
103
104    /// Child scopes created by scalar subqueries (in SELECT / WHERE / HAVING).
105    pub subquery_scopes: Vec<Scope>,
106
107    /// Child scopes for each branch of a UNION / INTERSECT / EXCEPT.
108    pub union_scopes: Vec<Scope>,
109
110    /// Child scopes for CTE definitions.
111    pub cte_scopes: Vec<Scope>,
112
113    /// Sources that are actually referenced by columns in SELECT (subset
114    /// of `sources`). Keyed the same way as `sources`.
115    pub selected_sources: HashMap<String, Source>,
116
117    /// Whether this scope contains correlated references to an outer scope.
118    pub is_correlated: bool,
119
120    /// The SQL expression/statement this scope was built from (optional,
121    /// kept for diagnostics).
122    expression: Option<ScopeExpression>,
123}
124
125/// Thin wrapper so we can attach the originating AST node for debugging.
126#[derive(Debug, Clone)]
127#[allow(dead_code)]
128enum ScopeExpression {
129    Statement(Statement),
130}
131
132impl Scope {
133    fn new(scope_type: ScopeType) -> Self {
134        Self {
135            scope_type,
136            sources: HashMap::new(),
137            columns: Vec::new(),
138            external_columns: Vec::new(),
139            derived_table_scopes: Vec::new(),
140            subquery_scopes: Vec::new(),
141            union_scopes: Vec::new(),
142            cte_scopes: Vec::new(),
143            selected_sources: HashMap::new(),
144            is_correlated: false,
145            expression: None,
146        }
147    }
148
149    /// Names (aliases / table names) of all sources visible in this scope.
150    #[must_use]
151    pub fn source_names(&self) -> Vec<&str> {
152        self.sources.keys().map(String::as_str).collect()
153    }
154
155    /// Iterate over all child scopes (derived tables, subqueries, unions,
156    /// CTEs) in a flat list.
157    #[must_use]
158    pub fn child_scopes(&self) -> Vec<&Scope> {
159        let mut children: Vec<&Scope> = Vec::new();
160        children.extend(self.derived_table_scopes.iter());
161        children.extend(self.subquery_scopes.iter());
162        children.extend(self.union_scopes.iter());
163        children.extend(self.cte_scopes.iter());
164        children
165    }
166
167    /// Walk through all scopes in the tree (pre-order).
168    pub fn walk<F>(&self, visitor: &mut F)
169    where
170        F: FnMut(&Scope),
171    {
172        visitor(self);
173        for child in self.child_scopes() {
174            child.walk(visitor);
175        }
176    }
177}
178
179// ═══════════════════════════════════════════════════════════════════════
180// Public API
181// ═══════════════════════════════════════════════════════════════════════
182
183/// Build the scope tree from a parsed SQL statement.
184///
185/// The returned [`Scope`] is the root scope; child scopes (CTEs, derived
186/// tables, subqueries, unions) are reachable via the corresponding fields.
187#[must_use]
188pub fn build_scope(statement: &Statement) -> Scope {
189    let mut scope = Scope::new(ScopeType::Root);
190    scope.expression = Some(ScopeExpression::Statement(statement.clone()));
191    build_scope_inner(statement, &mut scope, ScopeType::Root);
192    resolve_selected_sources(&mut scope);
193    detect_correlation(&mut scope, &[]);
194    scope
195}
196
197/// Find all expressions in a scope that match a predicate, respecting
198/// scope boundaries (does not descend into child scopes).
199///
200/// This is the equivalent of Python sqlglot's `find_all_in_scope`.
201#[must_use]
202pub fn find_all_in_scope<'a, F>(scope: &'a Scope, predicate: &F) -> Vec<&'a ColumnRef>
203where
204    F: Fn(&ColumnRef) -> bool,
205{
206    scope.columns.iter().filter(|c| predicate(c)).collect()
207}
208
209// ═══════════════════════════════════════════════════════════════════════
210// Internal scope-building logic
211// ═══════════════════════════════════════════════════════════════════════
212
213fn build_scope_inner(statement: &Statement, scope: &mut Scope, _scope_type: ScopeType) {
214    match statement {
215        Statement::Select(sel) => build_select_scope(sel, scope),
216        Statement::SetOperation(set_op) => build_set_operation_scope(set_op, scope),
217        Statement::CreateView(cv) => {
218            // Treat the view body as a root-like scope
219            build_scope_inner(&cv.query, scope, ScopeType::Root);
220        }
221        Statement::Insert(ins) => {
222            if let InsertSource::Query(q) = &ins.source {
223                let mut sub = Scope::new(ScopeType::Subquery);
224                build_scope_inner(q, &mut sub, ScopeType::Subquery);
225                resolve_selected_sources(&mut sub);
226                scope.subquery_scopes.push(sub);
227            }
228        }
229        Statement::Delete(del) => {
230            // Collect columns from WHERE
231            if let Some(wh) = &del.where_clause {
232                collect_columns_from_expr(wh, scope);
233            }
234        }
235        Statement::Update(upd) => {
236            // Collect columns from assignments and WHERE
237            for (_col, expr) in &upd.assignments {
238                collect_columns_from_expr(expr, scope);
239            }
240            if let Some(wh) = &upd.where_clause {
241                collect_columns_from_expr(wh, scope);
242            }
243        }
244        Statement::Explain(expl) => {
245            build_scope_inner(&expl.statement, scope, _scope_type);
246        }
247        // Other statement types don't introduce meaningful scopes
248        _ => {}
249    }
250}
251
252/// Build scope information for a SELECT statement.
253fn build_select_scope(sel: &SelectStatement, scope: &mut Scope) {
254    // ── 1. Process CTEs ──────────────────────────────────────────────
255    for cte in &sel.ctes {
256        let mut cte_scope = Scope::new(ScopeType::Cte);
257        cte_scope.expression = Some(ScopeExpression::Statement(*cte.query.clone()));
258        build_scope_inner(&cte.query, &mut cte_scope, ScopeType::Cte);
259        resolve_selected_sources(&mut cte_scope);
260
261        // The CTE name is visible as a source in the outer scope
262        scope
263            .sources
264            .insert(cte.name.clone(), Source::Scope(Box::new(cte_scope.clone())));
265        scope.cte_scopes.push(cte_scope);
266    }
267
268    // ── 2. Process FROM source ───────────────────────────────────────
269    if let Some(from) = &sel.from {
270        process_table_source(&from.source, scope);
271    }
272
273    // ── 3. Process JOINs ─────────────────────────────────────────────
274    for join in &sel.joins {
275        process_table_source(&join.table, scope);
276        if let Some(on) = &join.on {
277            collect_columns_from_expr(on, scope);
278        }
279    }
280
281    // ── 4. Process SELECT columns ────────────────────────────────────
282    for item in &sel.columns {
283        match item {
284            SelectItem::Expr { expr, .. } => {
285                collect_columns_from_expr(expr, scope);
286                collect_subqueries_from_expr(expr, scope);
287            }
288            SelectItem::QualifiedWildcard { table } => {
289                // table.* — record as a column reference so selected_sources
290                // picks it up
291                scope.columns.push(ColumnRef {
292                    table: Some(table.clone()),
293                    name: "*".to_string(),
294                });
295            }
296            SelectItem::Wildcard => {}
297        }
298    }
299
300    // ── 5. Process WHERE ─────────────────────────────────────────────
301    if let Some(wh) = &sel.where_clause {
302        collect_columns_from_expr(wh, scope);
303        collect_subqueries_from_expr(wh, scope);
304    }
305
306    // ── 6. Process GROUP BY ──────────────────────────────────────────
307    for expr in &sel.group_by {
308        collect_columns_from_expr(expr, scope);
309    }
310
311    // ── 7. Process HAVING ────────────────────────────────────────────
312    if let Some(having) = &sel.having {
313        collect_columns_from_expr(having, scope);
314        collect_subqueries_from_expr(having, scope);
315    }
316
317    // ── 8. Process ORDER BY ──────────────────────────────────────────
318    for item in &sel.order_by {
319        collect_columns_from_expr(&item.expr, scope);
320    }
321
322    // ── 9. Process QUALIFY ───────────────────────────────────────────
323    if let Some(qualify) = &sel.qualify {
324        collect_columns_from_expr(qualify, scope);
325        collect_subqueries_from_expr(qualify, scope);
326    }
327}
328
329/// Build scope for UNION / INTERSECT / EXCEPT.
330fn build_set_operation_scope(set_op: &SetOperationStatement, scope: &mut Scope) {
331    // Each branch gets its own Union scope
332    let mut left_scope = Scope::new(ScopeType::Union);
333    build_scope_inner(&set_op.left, &mut left_scope, ScopeType::Union);
334    resolve_selected_sources(&mut left_scope);
335    scope.union_scopes.push(left_scope);
336
337    let mut right_scope = Scope::new(ScopeType::Union);
338    build_scope_inner(&set_op.right, &mut right_scope, ScopeType::Union);
339    resolve_selected_sources(&mut right_scope);
340    scope.union_scopes.push(right_scope);
341
342    // ORDER BY and LIMIT on the set operation itself
343    for item in &set_op.order_by {
344        collect_columns_from_expr(&item.expr, scope);
345    }
346}
347
348/// Register a table source in the scope and recurse into derived tables.
349fn process_table_source(source: &TableSource, scope: &mut Scope) {
350    match source {
351        TableSource::Table(table_ref) => {
352            let key = table_ref
353                .alias
354                .as_deref()
355                .unwrap_or(&table_ref.name)
356                .to_string();
357            scope.sources.insert(key, Source::Table(table_ref.clone()));
358        }
359        TableSource::Subquery { query, alias } => {
360            let mut dt_scope = Scope::new(ScopeType::DerivedTable);
361            dt_scope.expression = Some(ScopeExpression::Statement(*query.clone()));
362            build_scope_inner(query, &mut dt_scope, ScopeType::DerivedTable);
363            resolve_selected_sources(&mut dt_scope);
364
365            if let Some(alias) = alias {
366                scope
367                    .sources
368                    .insert(alias.clone(), Source::Scope(Box::new(dt_scope.clone())));
369            }
370            scope.derived_table_scopes.push(dt_scope);
371        }
372        TableSource::TableFunction { alias, .. } => {
373            if let Some(alias) = alias {
374                // UDTF — register the alias but we can't descend further
375                scope.sources.insert(
376                    alias.clone(),
377                    Source::Table(TableRef {
378                        catalog: None,
379                        schema: None,
380                        name: alias.clone(),
381                        alias: None,
382                        name_quote_style: QuoteStyle::None,
383                    }),
384                );
385            }
386        }
387        TableSource::Lateral { source } => {
388            process_table_source(source, scope);
389        }
390        TableSource::Unnest { alias, .. } => {
391            if let Some(alias) = alias {
392                scope.sources.insert(
393                    alias.clone(),
394                    Source::Table(TableRef {
395                        catalog: None,
396                        schema: None,
397                        name: alias.clone(),
398                        alias: None,
399                        name_quote_style: QuoteStyle::None,
400                    }),
401                );
402            }
403        }
404    }
405}
406
407// ═══════════════════════════════════════════════════════════════════════
408// Column collection — stays within scope boundaries
409// ═══════════════════════════════════════════════════════════════════════
410
411/// Collect column references from an expression, stopping at subquery
412/// boundaries (those create their own scopes).
413fn collect_columns_from_expr(expr: &Expr, scope: &mut Scope) {
414    expr.walk(&mut |e| {
415        match e {
416            Expr::Column { table, name, .. } => {
417                scope.columns.push(ColumnRef {
418                    table: table.clone(),
419                    name: name.clone(),
420                });
421                true
422            }
423            // Don't descend into subqueries — they create their own scope
424            Expr::Subquery(_) | Expr::Exists { .. } | Expr::InSubquery { .. } => false,
425            _ => true,
426        }
427    });
428}
429
430/// Collect scalar and EXISTS subqueries from an expression as child scopes.
431fn collect_subqueries_from_expr(expr: &Expr, scope: &mut Scope) {
432    match expr {
433        Expr::Subquery(stmt) => {
434            let mut sub = Scope::new(ScopeType::Subquery);
435            sub.expression = Some(ScopeExpression::Statement(*stmt.clone()));
436            build_scope_inner(stmt, &mut sub, ScopeType::Subquery);
437            resolve_selected_sources(&mut sub);
438            scope.subquery_scopes.push(sub);
439        }
440        Expr::Exists { subquery, .. } => {
441            let mut sub = Scope::new(ScopeType::Subquery);
442            sub.expression = Some(ScopeExpression::Statement(*subquery.clone()));
443            build_scope_inner(subquery, &mut sub, ScopeType::Subquery);
444            resolve_selected_sources(&mut sub);
445            scope.subquery_scopes.push(sub);
446        }
447        Expr::InSubquery {
448            expr: left,
449            subquery,
450            ..
451        } => {
452            // The left-hand expression belongs to the *current* scope
453            collect_columns_from_expr(left, scope);
454
455            let mut sub = Scope::new(ScopeType::Subquery);
456            sub.expression = Some(ScopeExpression::Statement(*subquery.clone()));
457            build_scope_inner(subquery, &mut sub, ScopeType::Subquery);
458            resolve_selected_sources(&mut sub);
459            scope.subquery_scopes.push(sub);
460        }
461        _ => {
462            // Walk children to find nested subqueries
463            walk_expr_for_subqueries(expr, scope);
464        }
465    }
466}
467
468/// Walk child expressions looking for subqueries, without double-processing
469/// the top-level node.
470fn walk_expr_for_subqueries(expr: &Expr, scope: &mut Scope) {
471    match expr {
472        Expr::BinaryOp { left, right, .. } => {
473            collect_subqueries_from_expr(left, scope);
474            collect_subqueries_from_expr(right, scope);
475        }
476        Expr::UnaryOp { expr: inner, .. } => {
477            collect_subqueries_from_expr(inner, scope);
478        }
479        Expr::Function { args, filter, .. } => {
480            for arg in args {
481                collect_subqueries_from_expr(arg, scope);
482            }
483            if let Some(f) = filter {
484                collect_subqueries_from_expr(f, scope);
485            }
486        }
487        Expr::Nested(inner) => {
488            collect_subqueries_from_expr(inner, scope);
489        }
490        Expr::Case {
491            operand,
492            when_clauses,
493            else_clause,
494        } => {
495            if let Some(op) = operand {
496                collect_subqueries_from_expr(op, scope);
497            }
498            for (cond, result) in when_clauses {
499                collect_subqueries_from_expr(cond, scope);
500                collect_subqueries_from_expr(result, scope);
501            }
502            if let Some(el) = else_clause {
503                collect_subqueries_from_expr(el, scope);
504            }
505        }
506        Expr::Between {
507            expr: inner,
508            low,
509            high,
510            ..
511        } => {
512            collect_subqueries_from_expr(inner, scope);
513            collect_subqueries_from_expr(low, scope);
514            collect_subqueries_from_expr(high, scope);
515        }
516        Expr::InList {
517            expr: inner, list, ..
518        } => {
519            collect_subqueries_from_expr(inner, scope);
520            for item in list {
521                collect_subqueries_from_expr(item, scope);
522            }
523        }
524        Expr::Cast { expr: inner, .. } | Expr::TryCast { expr: inner, .. } => {
525            collect_subqueries_from_expr(inner, scope);
526        }
527        Expr::Coalesce(items) | Expr::ArrayLiteral(items) | Expr::Tuple(items) => {
528            for item in items {
529                collect_subqueries_from_expr(item, scope);
530            }
531        }
532        Expr::If {
533            condition,
534            true_val,
535            false_val,
536        } => {
537            collect_subqueries_from_expr(condition, scope);
538            collect_subqueries_from_expr(true_val, scope);
539            if let Some(fv) = false_val {
540                collect_subqueries_from_expr(fv, scope);
541            }
542        }
543        Expr::IsNull { expr: inner, .. } | Expr::IsBool { expr: inner, .. } => {
544            collect_subqueries_from_expr(inner, scope);
545        }
546        Expr::Like {
547            expr: inner,
548            pattern,
549            ..
550        }
551        | Expr::ILike {
552            expr: inner,
553            pattern,
554            ..
555        } => {
556            collect_subqueries_from_expr(inner, scope);
557            collect_subqueries_from_expr(pattern, scope);
558        }
559        Expr::Alias { expr: inner, .. } | Expr::Collate { expr: inner, .. } => {
560            collect_subqueries_from_expr(inner, scope);
561        }
562        Expr::NullIf {
563            expr: inner,
564            r#else,
565        } => {
566            collect_subqueries_from_expr(inner, scope);
567            collect_subqueries_from_expr(r#else, scope);
568        }
569        Expr::AnyOp {
570            expr: inner, right, ..
571        }
572        | Expr::AllOp {
573            expr: inner, right, ..
574        } => {
575            collect_subqueries_from_expr(inner, scope);
576            collect_subqueries_from_expr(right, scope);
577        }
578        Expr::ArrayIndex { expr: inner, index } => {
579            collect_subqueries_from_expr(inner, scope);
580            collect_subqueries_from_expr(index, scope);
581        }
582        Expr::JsonAccess {
583            expr: inner, path, ..
584        } => {
585            collect_subqueries_from_expr(inner, scope);
586            collect_subqueries_from_expr(path, scope);
587        }
588        Expr::Lambda { body, .. } => {
589            collect_subqueries_from_expr(body, scope);
590        }
591        Expr::Extract { expr: inner, .. } | Expr::Interval { value: inner, .. } => {
592            collect_subqueries_from_expr(inner, scope);
593        }
594        // Leaf nodes — nothing to do
595        _ => {}
596    }
597}
598
599// ═══════════════════════════════════════════════════════════════════════
600// Selected-source resolution
601// ═══════════════════════════════════════════════════════════════════════
602
603/// Populate `selected_sources` by checking which sources are actually
604/// referenced by the scope's column list.
605#[allow(clippy::collapsible_if)]
606fn resolve_selected_sources(scope: &mut Scope) {
607    for col in &scope.columns {
608        if let Some(table) = &col.table {
609            if let Some(source) = scope.sources.get(table) {
610                scope
611                    .selected_sources
612                    .entry(table.clone())
613                    .or_insert_with(|| source.clone());
614            }
615        }
616    }
617}
618
619// ═══════════════════════════════════════════════════════════════════════
620// Correlation detection
621// ═══════════════════════════════════════════════════════════════════════
622
623/// Detect correlated references: columns in child scopes that reference
624/// tables from an outer scope. Populates `external_columns` and
625/// `is_correlated` on each child scope.
626fn detect_correlation(scope: &mut Scope, outer_source_names: &[String]) {
627    // Source names visible in this scope
628    let mut visible: Vec<String> = outer_source_names.to_vec();
629    visible.extend(scope.sources.keys().cloned());
630
631    // Process each category of child scopes
632    detect_correlation_in_children(&mut scope.subquery_scopes, &visible);
633    detect_correlation_in_children(&mut scope.derived_table_scopes, &visible);
634    detect_correlation_in_children(&mut scope.union_scopes, &visible);
635    detect_correlation_in_children(&mut scope.cte_scopes, &visible);
636}
637
638#[allow(clippy::collapsible_if)]
639fn detect_correlation_in_children(children: &mut [Scope], outer_names: &[String]) {
640    for child in children.iter_mut() {
641        // A column in the child is external (correlated) if its table
642        // qualifier matches an outer source but NOT a source in the child.
643        for col in &child.columns {
644            if let Some(table) = &col.table {
645                if outer_names.contains(table) && !child.sources.contains_key(table) {
646                    child.external_columns.push(col.clone());
647                    child.is_correlated = true;
648                }
649            }
650        }
651
652        // Recurse into the child's own children
653        detect_correlation(child, outer_names);
654    }
655}
656
657// ═══════════════════════════════════════════════════════════════════════
658// Tests
659// ═══════════════════════════════════════════════════════════════════════
660
661#[cfg(test)]
662mod tests {
663    use super::*;
664    use crate::dialects::Dialect;
665    use crate::parser::parse;
666
667    /// Helper: parse SQL and build scope in one step.
668    fn scope_for(sql: &str) -> Scope {
669        let ast = parse(sql, Dialect::Ansi).unwrap();
670        build_scope(&ast)
671    }
672
673    // ── Basic SELECT ─────────────────────────────────────────────────
674
675    #[test]
676    fn test_simple_select() {
677        let scope = scope_for("SELECT a, b FROM t WHERE a > 1");
678        assert_eq!(scope.scope_type, ScopeType::Root);
679        assert!(scope.sources.contains_key("t"));
680        // Should have columns a, b, a (from WHERE)
681        assert!(scope.columns.len() >= 2);
682        assert!(scope.external_columns.is_empty());
683        assert!(!scope.is_correlated);
684    }
685
686    #[test]
687    fn test_aliased_table() {
688        let scope = scope_for("SELECT t1.x FROM my_table t1");
689        assert!(scope.sources.contains_key("t1"));
690        assert!(!scope.sources.contains_key("my_table"));
691    }
692
693    // ── JOINs ────────────────────────────────────────────────────────
694
695    #[test]
696    fn test_join_sources() {
697        let scope = scope_for("SELECT a.id, b.val FROM alpha a JOIN beta b ON a.id = b.id");
698        assert!(scope.sources.contains_key("a"));
699        assert!(scope.sources.contains_key("b"));
700        // ON condition columns
701        let on_cols: Vec<_> = scope.columns.iter().filter(|c| c.name == "id").collect();
702        assert!(on_cols.len() >= 2); // a.id, b.id from ON + maybe SELECT
703    }
704
705    // ── Derived tables ───────────────────────────────────────────────
706
707    #[test]
708    fn test_derived_table() {
709        let scope = scope_for("SELECT sub.x FROM (SELECT a AS x FROM t) sub");
710        assert!(scope.sources.contains_key("sub"));
711        assert_eq!(scope.derived_table_scopes.len(), 1);
712
713        let dt = &scope.derived_table_scopes[0];
714        assert_eq!(dt.scope_type, ScopeType::DerivedTable);
715        assert!(dt.sources.contains_key("t"));
716    }
717
718    // ── CTEs ─────────────────────────────────────────────────────────
719
720    #[test]
721    fn test_cte_scope() {
722        let scope = scope_for("WITH cte AS (SELECT id FROM t) SELECT cte.id FROM cte");
723        assert!(scope.sources.contains_key("cte"));
724        assert_eq!(scope.cte_scopes.len(), 1);
725
726        let cte = &scope.cte_scopes[0];
727        assert_eq!(cte.scope_type, ScopeType::Cte);
728        assert!(cte.sources.contains_key("t"));
729    }
730
731    #[test]
732    fn test_multiple_ctes() {
733        let scope = scope_for(
734            "WITH a AS (SELECT 1 AS x), b AS (SELECT 2 AS y) \
735             SELECT a.x, b.y FROM a, b",
736        );
737        assert_eq!(scope.cte_scopes.len(), 2);
738        assert!(scope.sources.contains_key("a"));
739        assert!(scope.sources.contains_key("b"));
740    }
741
742    // ── UNION / INTERSECT / EXCEPT ───────────────────────────────────
743
744    #[test]
745    fn test_union_scopes() {
746        let scope = scope_for("SELECT a FROM t1 UNION ALL SELECT b FROM t2");
747        assert_eq!(scope.union_scopes.len(), 2);
748
749        let left = &scope.union_scopes[0];
750        assert_eq!(left.scope_type, ScopeType::Union);
751        assert!(left.sources.contains_key("t1"));
752
753        let right = &scope.union_scopes[1];
754        assert!(right.sources.contains_key("t2"));
755    }
756
757    // ── Scalar subqueries ────────────────────────────────────────────
758
759    #[test]
760    fn test_scalar_subquery() {
761        let scope = scope_for("SELECT (SELECT MAX(x) FROM t2) AS mx FROM t1");
762        assert_eq!(scope.subquery_scopes.len(), 1);
763        let sub = &scope.subquery_scopes[0];
764        assert_eq!(sub.scope_type, ScopeType::Subquery);
765        assert!(sub.sources.contains_key("t2"));
766    }
767
768    // ── EXISTS ───────────────────────────────────────────────────────
769
770    #[test]
771    fn test_exists_subquery() {
772        let scope =
773            scope_for("SELECT a FROM t1 WHERE EXISTS (SELECT 1 FROM t2 WHERE t2.id = t1.id)");
774        assert_eq!(scope.subquery_scopes.len(), 1);
775        let sub = &scope.subquery_scopes[0];
776        assert!(sub.sources.contains_key("t2"));
777        // t1.id inside the subquery references outer scope ⇒ correlated
778        assert!(sub.is_correlated);
779        assert!(!sub.external_columns.is_empty());
780        let ext = &sub.external_columns[0];
781        assert_eq!(ext.table.as_deref(), Some("t1"));
782        assert_eq!(ext.name, "id");
783    }
784
785    // ── IN subquery ──────────────────────────────────────────────────
786
787    #[test]
788    fn test_in_subquery() {
789        let scope = scope_for("SELECT a FROM t1 WHERE a IN (SELECT b FROM t2)");
790        assert_eq!(scope.subquery_scopes.len(), 1);
791        let sub = &scope.subquery_scopes[0];
792        assert!(sub.sources.contains_key("t2"));
793        // Not correlated — no outer reference
794        assert!(!sub.is_correlated);
795    }
796
797    // ── Correlated subquery ──────────────────────────────────────────
798
799    #[test]
800    fn test_correlated_subquery() {
801        let scope =
802            scope_for("SELECT a FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE t2.fk = t1.id)");
803        assert_eq!(scope.subquery_scopes.len(), 1);
804        let sub = &scope.subquery_scopes[0];
805        assert!(sub.is_correlated);
806        assert!(
807            sub.external_columns
808                .iter()
809                .any(|c| c.table.as_deref() == Some("t1"))
810        );
811    }
812
813    // ── Nested subqueries ────────────────────────────────────────────
814
815    #[test]
816    fn test_nested_subqueries() {
817        let scope = scope_for(
818            "SELECT a FROM t1 WHERE a IN (SELECT b FROM t2 WHERE b > (SELECT MIN(c) FROM t3))",
819        );
820        // t1's scope should have 1 subquery (the IN subquery)
821        assert_eq!(scope.subquery_scopes.len(), 1);
822
823        let in_sub = &scope.subquery_scopes[0];
824        assert!(in_sub.sources.contains_key("t2"));
825        // The nested scalar subquery inside the IN subquery
826        assert_eq!(in_sub.subquery_scopes.len(), 1);
827        let inner = &in_sub.subquery_scopes[0];
828        assert!(inner.sources.contains_key("t3"));
829    }
830
831    // ── Selected sources ─────────────────────────────────────────────
832
833    #[test]
834    fn test_selected_sources() {
835        let scope = scope_for("SELECT a.x FROM alpha a JOIN beta b ON a.id = b.id");
836        // Column a.x references source "a", so it should be in selected_sources
837        assert!(scope.selected_sources.contains_key("a"));
838    }
839
840    // ── find_all_in_scope ────────────────────────────────────────────
841
842    #[test]
843    fn test_find_all_in_scope() {
844        let scope = scope_for("SELECT t.a, t.b, s.c FROM t JOIN s ON t.id = s.id");
845        let t_cols = find_all_in_scope(&scope, &|c| c.table.as_deref() == Some("t"));
846        // t.a, t.b, t.id
847        assert!(t_cols.len() >= 3);
848    }
849
850    // ── Scope walk ───────────────────────────────────────────────────
851
852    #[test]
853    fn test_scope_walk() {
854        let scope = scope_for(
855            "WITH cte AS (SELECT 1 AS a) \
856             SELECT * FROM cte WHERE EXISTS (SELECT 1 FROM t)",
857        );
858        let mut count = 0;
859        scope.walk(&mut |_| count += 1);
860        // root + cte + exists subquery = 3
861        assert!(count >= 3);
862    }
863
864    // ── Qualified wildcard ───────────────────────────────────────────
865
866    #[test]
867    fn test_qualified_wildcard() {
868        let scope = scope_for("SELECT t.* FROM t");
869        assert!(
870            scope
871                .columns
872                .iter()
873                .any(|c| c.table.as_deref() == Some("t") && c.name == "*")
874        );
875        assert!(scope.selected_sources.contains_key("t"));
876    }
877}