Skip to main content

flowscope_core/linter/rules/
st_003.rs

1//! LINT_ST_003: Unused CTE.
2//!
3//! A CTE (WITH clause) is defined but never referenced in the query body
4//! or subsequent CTEs. This is likely dead code.
5
6use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Issue};
8use sqlparser::ast::*;
9use std::collections::HashSet;
10
11pub struct UnusedCte;
12
13impl LintRule for UnusedCte {
14    fn code(&self) -> &'static str {
15        issue_codes::LINT_ST_003
16    }
17
18    fn name(&self) -> &'static str {
19        "Unused CTE"
20    }
21
22    fn description(&self) -> &'static str {
23        "Query defines a CTE (common-table expression) but does not use it."
24    }
25
26    fn check(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
27        let query = match stmt {
28            Statement::Query(q) => q,
29            Statement::Insert(ins) => {
30                if let Some(ref source) = ins.source {
31                    source
32                } else {
33                    return Vec::new();
34                }
35            }
36            Statement::CreateView { query, .. } => query,
37            Statement::CreateTable(create) => {
38                if let Some(ref q) = create.query {
39                    q
40                } else {
41                    return Vec::new();
42                }
43            }
44            Statement::Delete(delete) => {
45                let mut issues = Vec::new();
46                check_delete_for_nested_ctes(delete, ctx, &mut issues);
47                return issues;
48            }
49            _ => return Vec::new(),
50        };
51
52        let mut issues = Vec::new();
53        check_query_unused_ctes(query, ctx, &mut issues);
54        issues
55    }
56}
57
58/// Checks a query for unused CTEs, including nested WITH clauses inside CTE
59/// bodies.
60fn check_query_unused_ctes(query: &Query, ctx: &LintContext, issues: &mut Vec<Issue>) {
61    let with = match &query.with {
62        Some(w) => w,
63        None => {
64            // Even without a top-level WITH, the body may contain nested CTEs.
65            check_set_expr_for_nested_ctes(&query.body, ctx, issues);
66            return;
67        }
68    };
69
70    // Collect table references from the query body only (the body's own
71    // references, not inner CTE definitions which are a separate scope).
72    let mut referenced = HashSet::new();
73    collect_table_refs(&query.body, &mut referenced);
74    if let Some(order_by) = &query.order_by {
75        collect_order_by_refs(order_by, &mut referenced);
76    }
77
78    // Each CTE can reference earlier CTEs; collect those refs too.
79    for (i, cte) in with.cte_tables.iter().enumerate() {
80        let mut cte_refs = HashSet::new();
81        collect_query_refs(&cte.query, &mut cte_refs);
82        for later_cte in &with.cte_tables[i + 1..] {
83            collect_query_refs(&later_cte.query, &mut cte_refs);
84        }
85        referenced.extend(cte_refs);
86    }
87
88    for (i, cte) in with.cte_tables.iter().enumerate() {
89        let name_upper = cte.alias.name.value.to_uppercase();
90        if !referenced.contains(&name_upper) {
91            let referenced_by_later = with.cte_tables[i + 1..].iter().any(|later| {
92                let mut refs = HashSet::new();
93                collect_query_refs(&later.query, &mut refs);
94                refs.contains(&name_upper)
95            });
96            if referenced_by_later {
97                continue;
98            }
99
100            let span = find_cte_name_span(&cte.alias.name, ctx);
101            let mut issue = Issue::warning(
102                issue_codes::LINT_ST_003,
103                format!(
104                    "CTE '{}' is defined but never referenced.",
105                    cte.alias.name.value
106                ),
107            )
108            .with_statement(ctx.statement_index);
109            if let Some(s) = span {
110                issue = issue.with_span(s);
111            }
112            issues.push(issue);
113        }
114
115        // Recursively check nested CTEs inside this CTE's body.
116        check_query_unused_ctes(&cte.query, ctx, issues);
117    }
118
119    // Also check nested CTEs in the main body (e.g. subqueries with WITH).
120    check_set_expr_for_nested_ctes(&query.body, ctx, issues);
121}
122
123/// Walks a set expression looking for nested queries that might contain WITH
124/// clauses to check.
125fn check_set_expr_for_nested_ctes(expr: &SetExpr, ctx: &LintContext, issues: &mut Vec<Issue>) {
126    match expr {
127        SetExpr::Select(select) => {
128            for item in &select.from {
129                check_relation_for_nested_ctes(&item.relation, ctx, issues);
130                for join in &item.joins {
131                    check_relation_for_nested_ctes(&join.relation, ctx, issues);
132                }
133            }
134            // Check subqueries in projections and predicates.
135            for item in &select.projection {
136                if let SelectItem::UnnamedExpr(e) | SelectItem::ExprWithAlias { expr: e, .. } = item
137                {
138                    check_expr_for_nested_ctes(e, ctx, issues);
139                }
140            }
141            if let Some(sel) = &select.selection {
142                check_expr_for_nested_ctes(sel, ctx, issues);
143            }
144        }
145        SetExpr::Query(q) => check_query_unused_ctes(q, ctx, issues),
146        SetExpr::SetOperation { left, right, .. } => {
147            check_set_expr_for_nested_ctes(left, ctx, issues);
148            check_set_expr_for_nested_ctes(right, ctx, issues);
149        }
150        _ => {}
151    }
152}
153
154/// Checks a DELETE statement for CTEs inside USING and FROM subqueries.
155fn check_delete_for_nested_ctes(delete: &Delete, ctx: &LintContext, issues: &mut Vec<Issue>) {
156    if let Some(using) = &delete.using {
157        for twj in using {
158            check_relation_for_nested_ctes(&twj.relation, ctx, issues);
159            for join in &twj.joins {
160                check_relation_for_nested_ctes(&join.relation, ctx, issues);
161            }
162        }
163    }
164    let from_tables = match &delete.from {
165        FromTable::WithFromKeyword(tables) | FromTable::WithoutKeyword(tables) => tables,
166    };
167    for twj in from_tables {
168        check_relation_for_nested_ctes(&twj.relation, ctx, issues);
169        for join in &twj.joins {
170            check_relation_for_nested_ctes(&join.relation, ctx, issues);
171        }
172    }
173}
174
175fn check_relation_for_nested_ctes(
176    relation: &TableFactor,
177    ctx: &LintContext,
178    issues: &mut Vec<Issue>,
179) {
180    if let TableFactor::Derived { subquery, .. } = relation {
181        check_query_unused_ctes(subquery, ctx, issues);
182    }
183}
184
185fn check_expr_for_nested_ctes(expr: &Expr, ctx: &LintContext, issues: &mut Vec<Issue>) {
186    match expr {
187        Expr::Subquery(q) | Expr::Exists { subquery: q, .. } => {
188            check_query_unused_ctes(q, ctx, issues);
189        }
190        Expr::InSubquery { subquery, expr, .. } => {
191            check_query_unused_ctes(subquery, ctx, issues);
192            check_expr_for_nested_ctes(expr, ctx, issues);
193        }
194        Expr::BinaryOp { left, right, .. } => {
195            check_expr_for_nested_ctes(left, ctx, issues);
196            check_expr_for_nested_ctes(right, ctx, issues);
197        }
198        Expr::Nested(inner) => check_expr_for_nested_ctes(inner, ctx, issues),
199        _ => {}
200    }
201}
202
203/// Collects all table references from a query, including nested CTE bodies.
204fn collect_query_refs(query: &Query, refs: &mut HashSet<String>) {
205    if let Some(w) = &query.with {
206        for cte in &w.cte_tables {
207            collect_query_refs(&cte.query, refs);
208        }
209    }
210    collect_table_refs(&query.body, refs);
211    if let Some(order_by) = &query.order_by {
212        collect_order_by_refs(order_by, refs);
213    }
214}
215
216fn collect_statement_refs(stmt: &Statement, refs: &mut HashSet<String>) {
217    match stmt {
218        Statement::Query(query) => collect_query_refs(query, refs),
219        Statement::Insert(insert) => {
220            if let Some(source) = &insert.source {
221                collect_query_refs(source, refs);
222            }
223        }
224        Statement::CreateView { query, .. } => collect_query_refs(query, refs),
225        Statement::CreateTable(create) => {
226            if let Some(query) = &create.query {
227                collect_query_refs(query, refs);
228            }
229        }
230        Statement::Update {
231            table,
232            from,
233            selection,
234            ..
235        } => {
236            collect_relation_refs(&table.relation, refs);
237            for join in &table.joins {
238                collect_relation_refs(&join.relation, refs);
239                collect_join_constraint_refs(&join.join_operator, refs);
240            }
241            if let Some(from_kind) = from {
242                let tables = match from_kind {
243                    UpdateTableFromKind::BeforeSet(t) | UpdateTableFromKind::AfterSet(t) => t,
244                };
245                for twj in tables {
246                    collect_relation_refs(&twj.relation, refs);
247                    for join in &twj.joins {
248                        collect_relation_refs(&join.relation, refs);
249                        collect_join_constraint_refs(&join.join_operator, refs);
250                    }
251                }
252            }
253            if let Some(sel) = selection {
254                collect_expr_table_refs(sel, refs);
255            }
256        }
257        Statement::Delete(delete) => {
258            if let Some(using) = &delete.using {
259                for twj in using {
260                    collect_relation_refs(&twj.relation, refs);
261                    for join in &twj.joins {
262                        collect_relation_refs(&join.relation, refs);
263                        collect_join_constraint_refs(&join.join_operator, refs);
264                    }
265                }
266            }
267            if let Some(sel) = &delete.selection {
268                collect_expr_table_refs(sel, refs);
269            }
270        }
271        _ => {}
272    }
273}
274
275/// Recursively collects uppercase table/CTE names referenced in a set expression.
276fn collect_table_refs(expr: &SetExpr, refs: &mut HashSet<String>) {
277    match expr {
278        SetExpr::Select(select) => {
279            for item in &select.from {
280                collect_relation_refs(&item.relation, refs);
281                for join in &item.joins {
282                    collect_relation_refs(&join.relation, refs);
283                    collect_join_constraint_refs(&join.join_operator, refs);
284                }
285            }
286            // Check subqueries in SELECT and predicate expressions.
287            for item in &select.projection {
288                if let SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } = item
289                {
290                    collect_expr_table_refs(expr, refs);
291                }
292            }
293            if let Some(prewhere) = &select.prewhere {
294                collect_expr_table_refs(prewhere, refs);
295            }
296            if let Some(ref selection) = select.selection {
297                collect_expr_table_refs(selection, refs);
298            }
299            if let Some(ref having) = select.having {
300                collect_expr_table_refs(having, refs);
301            }
302            if let Some(ref qualify) = select.qualify {
303                collect_expr_table_refs(qualify, refs);
304            }
305            if let GroupByExpr::Expressions(exprs, _) = &select.group_by {
306                for expr in exprs {
307                    collect_expr_table_refs(expr, refs);
308                }
309            }
310            for sort_expr in &select.sort_by {
311                collect_expr_table_refs(&sort_expr.expr, refs);
312            }
313        }
314        SetExpr::Query(q) => {
315            collect_query_refs(q, refs);
316            // Also check subquery CTEs
317            if let Some(w) = &q.with {
318                for cte in &w.cte_tables {
319                    collect_query_refs(&cte.query, refs);
320                }
321            }
322        }
323        SetExpr::SetOperation { left, right, .. } => {
324            collect_table_refs(left, refs);
325            collect_table_refs(right, refs);
326        }
327        SetExpr::Insert(stmt)
328        | SetExpr::Update(stmt)
329        | SetExpr::Delete(stmt)
330        | SetExpr::Merge(stmt) => {
331            collect_statement_refs(stmt, refs);
332        }
333        _ => {}
334    }
335}
336
337/// Collects table/CTE references from subqueries inside expressions.
338fn collect_expr_table_refs(expr: &Expr, refs: &mut HashSet<String>) {
339    match expr {
340        Expr::InSubquery { subquery, expr, .. } => {
341            collect_query_refs(subquery, refs);
342            if let Some(w) = &subquery.with {
343                for cte in &w.cte_tables {
344                    collect_query_refs(&cte.query, refs);
345                }
346            }
347            collect_expr_table_refs(expr, refs);
348        }
349        Expr::Subquery(subquery) | Expr::Exists { subquery, .. } => {
350            collect_query_refs(subquery, refs);
351            if let Some(w) = &subquery.with {
352                for cte in &w.cte_tables {
353                    collect_query_refs(&cte.query, refs);
354                }
355            }
356        }
357        Expr::BinaryOp { left, right, .. } => {
358            collect_expr_table_refs(left, refs);
359            collect_expr_table_refs(right, refs);
360        }
361        Expr::UnaryOp { expr: inner, .. }
362        | Expr::Nested(inner)
363        | Expr::IsNull(inner)
364        | Expr::IsNotNull(inner) => {
365            collect_expr_table_refs(inner, refs);
366        }
367        Expr::InList { expr, list, .. } => {
368            collect_expr_table_refs(expr, refs);
369            for item in list {
370                collect_expr_table_refs(item, refs);
371            }
372        }
373        Expr::Between {
374            expr, low, high, ..
375        } => {
376            collect_expr_table_refs(expr, refs);
377            collect_expr_table_refs(low, refs);
378            collect_expr_table_refs(high, refs);
379        }
380        Expr::Case {
381            operand,
382            conditions,
383            else_result,
384            ..
385        } => {
386            if let Some(op) = operand {
387                collect_expr_table_refs(op, refs);
388            }
389            for case_when in conditions {
390                collect_expr_table_refs(&case_when.condition, refs);
391                collect_expr_table_refs(&case_when.result, refs);
392            }
393            if let Some(el) = else_result {
394                collect_expr_table_refs(el, refs);
395            }
396        }
397        Expr::Cast { expr: inner, .. } => {
398            collect_expr_table_refs(inner, refs);
399        }
400        Expr::Function(func) => {
401            if let FunctionArguments::List(arg_list) = &func.args {
402                for arg in &arg_list.args {
403                    match arg {
404                        FunctionArg::Unnamed(FunctionArgExpr::Expr(e))
405                        | FunctionArg::Named {
406                            arg: FunctionArgExpr::Expr(e),
407                            ..
408                        } => collect_expr_table_refs(e, refs),
409                        _ => {}
410                    }
411                }
412            }
413        }
414        _ => {}
415    }
416}
417
418fn collect_relation_refs(relation: &TableFactor, refs: &mut HashSet<String>) {
419    match relation {
420        TableFactor::Table { name, .. } => {
421            // Use the last part of the name (table name) for CTE matching
422            if let Some(part) = name.0.last() {
423                let value = part
424                    .as_ident()
425                    .map(|ident| ident.value.clone())
426                    .unwrap_or_else(|| part.to_string());
427                refs.insert(value.to_uppercase());
428            }
429        }
430        TableFactor::Derived { subquery, .. } => {
431            collect_query_refs(subquery, refs);
432            if let Some(w) = &subquery.with {
433                for cte in &w.cte_tables {
434                    collect_query_refs(&cte.query, refs);
435                }
436            }
437        }
438        TableFactor::NestedJoin {
439            table_with_joins, ..
440        } => {
441            collect_relation_refs(&table_with_joins.relation, refs);
442            for join in &table_with_joins.joins {
443                collect_relation_refs(&join.relation, refs);
444                collect_join_constraint_refs(&join.join_operator, refs);
445            }
446        }
447        _ => {}
448    }
449}
450
451fn collect_order_by_refs(order_by: &OrderBy, refs: &mut HashSet<String>) {
452    if let OrderByKind::Expressions(order_exprs) = &order_by.kind {
453        for order_expr in order_exprs {
454            collect_expr_table_refs(&order_expr.expr, refs);
455        }
456    }
457}
458
459fn collect_join_constraint_refs(join_operator: &JoinOperator, refs: &mut HashSet<String>) {
460    let constraint = match join_operator {
461        JoinOperator::Join(c)
462        | JoinOperator::Inner(c)
463        | JoinOperator::LeftOuter(c)
464        | JoinOperator::RightOuter(c)
465        | JoinOperator::FullOuter(c)
466        | JoinOperator::LeftSemi(c)
467        | JoinOperator::RightSemi(c)
468        | JoinOperator::LeftAnti(c)
469        | JoinOperator::RightAnti(c) => c,
470        _ => return,
471    };
472    if let JoinConstraint::On(expr) = constraint {
473        collect_expr_table_refs(expr, refs);
474    }
475}
476
477fn find_cte_name_span(name: &Ident, ctx: &LintContext) -> Option<crate::types::Span> {
478    ident_span_in_statement(name, ctx)
479}
480
481fn ident_span_in_statement(name: &Ident, ctx: &LintContext) -> Option<crate::types::Span> {
482    use crate::analyzer::helpers::line_col_to_offset;
483
484    let start = line_col_to_offset(
485        ctx.sql,
486        name.span.start.line as usize,
487        name.span.start.column as usize,
488    )?;
489    let end = line_col_to_offset(
490        ctx.sql,
491        name.span.end.line as usize,
492        name.span.end.column as usize,
493    )?;
494
495    if start >= end {
496        return None;
497    }
498
499    if start < ctx.statement_range.start || end > ctx.statement_range.end {
500        return None;
501    }
502
503    Some(crate::types::Span::new(start, end))
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509    use crate::parser::parse_sql;
510
511    fn check_sql(sql: &str) -> Vec<Issue> {
512        let stmts = parse_sql(sql).unwrap();
513        let rule = UnusedCte;
514        let ctx = LintContext {
515            sql,
516            statement_range: 0..sql.len(),
517            statement_index: 0,
518        };
519        let mut issues = Vec::new();
520        for stmt in &stmts {
521            issues.extend(rule.check(stmt, &ctx));
522        }
523        issues
524    }
525
526    #[test]
527    fn test_unused_cte_detected() {
528        let issues = check_sql("WITH unused AS (SELECT 1) SELECT 2");
529        assert_eq!(issues.len(), 1);
530        assert_eq!(issues[0].code, "LINT_ST_003");
531        assert!(issues[0].message.contains("unused"));
532    }
533
534    #[test]
535    fn test_unused_cte_span_matches_cte_name() {
536        let sql = "WITH unused AS (SELECT 1) SELECT 2";
537        let issues = check_sql(sql);
538        let span = issues[0].span.expect("span");
539        assert_eq!(&sql[span.start..span.end], "unused");
540    }
541
542    #[test]
543    fn test_used_cte_ok() {
544        let issues = check_sql("WITH my_cte AS (SELECT 1) SELECT * FROM my_cte");
545        assert!(issues.is_empty());
546    }
547
548    #[test]
549    fn test_cte_referenced_by_later_cte() {
550        let issues = check_sql("WITH a AS (SELECT 1), b AS (SELECT * FROM a) SELECT * FROM b");
551        assert!(issues.is_empty());
552    }
553
554    // --- Edge cases adopted from sqlfluff ST03 (structure.unused_cte) ---
555
556    #[test]
557    fn test_no_cte_ok() {
558        let issues = check_sql("SELECT * FROM t");
559        assert!(issues.is_empty());
560    }
561
562    #[test]
563    fn test_multiple_ctes_all_used() {
564        let issues = check_sql(
565            "WITH cte1 AS (SELECT a FROM t), cte2 AS (SELECT b FROM t) \
566             SELECT cte1.a, cte2.b FROM cte1 JOIN cte2 ON cte1.a = cte2.b",
567        );
568        assert!(issues.is_empty());
569    }
570
571    #[test]
572    fn test_multiple_ctes_one_unused() {
573        let issues = check_sql(
574            "WITH cte1 AS (SELECT a FROM t), cte2 AS (SELECT b FROM t), cte3 AS (SELECT c FROM t) \
575             SELECT * FROM cte1 JOIN cte3 ON cte1.a = cte3.c",
576        );
577        assert_eq!(issues.len(), 1);
578        assert!(issues[0].message.contains("cte2"));
579    }
580
581    #[test]
582    fn test_cte_used_in_subquery() {
583        let issues = check_sql(
584            "WITH cte AS (SELECT id FROM t) \
585             SELECT * FROM t2 WHERE id IN (SELECT id FROM cte)",
586        );
587        assert!(issues.is_empty());
588    }
589
590    #[test]
591    fn test_cte_used_in_exists_subquery() {
592        let issues = check_sql(
593            "WITH cte AS (SELECT id FROM t) \
594             SELECT 1 WHERE EXISTS (SELECT 1 FROM cte)",
595        );
596        assert!(issues.is_empty());
597    }
598
599    #[test]
600    fn test_cte_in_insert() {
601        let issues = check_sql("INSERT INTO target WITH unused AS (SELECT 1) SELECT 2");
602        assert_eq!(issues.len(), 1);
603    }
604    #[test]
605    fn test_with_insert_ctes_used_ok() {
606        let issues = check_sql(
607            "WITH a AS (SELECT 1), b AS (SELECT * FROM a) \
608             INSERT INTO target SELECT * FROM b",
609        );
610        assert!(
611            issues.is_empty(),
612            "expected no unused CTEs, got: {issues:#?}"
613        );
614    }
615
616    #[test]
617    fn test_cte_in_create_view() {
618        let issues = check_sql("CREATE VIEW v AS WITH unused AS (SELECT 1) SELECT 2");
619        assert_eq!(issues.len(), 1);
620    }
621
622    #[test]
623    fn test_chained_ctes_three_levels() {
624        let issues = check_sql(
625            "WITH a AS (SELECT 1), b AS (SELECT * FROM a), c AS (SELECT * FROM b) \
626             SELECT * FROM c",
627        );
628        assert!(issues.is_empty());
629    }
630
631    #[test]
632    fn test_cte_case_insensitive() {
633        let issues = check_sql("WITH My_Cte AS (SELECT 1) SELECT * FROM my_cte");
634        assert!(issues.is_empty());
635    }
636
637    #[test]
638    fn test_cte_used_in_join() {
639        let issues = check_sql(
640            "WITH cte AS (SELECT id FROM t) \
641             SELECT * FROM t2 JOIN cte ON t2.id = cte.id",
642        );
643        assert!(issues.is_empty());
644    }
645
646    #[test]
647    fn test_all_ctes_unused() {
648        let issues = check_sql("WITH a AS (SELECT 1), b AS (SELECT 2) SELECT 3");
649        assert_eq!(issues.len(), 2);
650    }
651
652    #[test]
653    fn test_update_cte_used_in_from() {
654        // SQLFluff: test_pass_update_cte
655        let sql = "\
656            WITH cte AS (SELECT id, name, description FROM table1) \
657            UPDATE table2 SET name = cte.name, description = cte.description \
658            FROM cte WHERE table2.id = cte.id";
659        assert!(check_sql(sql).is_empty());
660    }
661
662    #[test]
663    fn test_nested_cte_unused() {
664        // SQLFluff: test_fail_nested_cte
665        let sql = "WITH a AS (WITH b AS (SELECT 1 FROM foo) SELECT 1) SELECT * FROM a";
666        let issues = check_sql(sql);
667        assert_eq!(issues.len(), 1);
668        assert!(issues[0].message.contains("b"));
669    }
670
671    #[test]
672    fn test_nested_with_cte_used() {
673        // SQLFluff: test_pass_nested_with_cte
674        let sql = "\
675            WITH example_cte AS (SELECT 1), \
676            container_cte AS (\
677                WITH nested_cte AS (SELECT * FROM example_cte) \
678                SELECT * FROM nested_cte\
679            ) SELECT * FROM container_cte";
680        assert!(check_sql(sql).is_empty());
681    }
682
683    #[test]
684    fn test_snowflake_delete_cte() {
685        // SQLFluff: test_snowflake_delete_cte
686        // CTE inside a derived table (USING subquery) is unused.
687        let sql = "\
688            DELETE FROM MYTABLE1 \
689            USING (\
690                WITH MYCTE AS (SELECT COLUMN2 FROM MYTABLE3) \
691                SELECT COLUMN3 FROM MYTABLE3\
692            ) X \
693            WHERE COLUMN1 = X.COLUMN3";
694        let issues = check_sql(sql);
695        assert_eq!(issues.len(), 1);
696        assert!(issues[0].message.to_uppercase().contains("MYCTE"));
697    }
698}