Skip to main content

sqruff_lib/rules/structure/
st11.rs

1use hashbrown::HashMap;
2use smol_str::StrExt;
3use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
4use sqruff_lib_core::parser::segments::ErasedSegment;
5use sqruff_lib_core::parser::segments::from::FromExpressionElementSegment;
6use sqruff_lib_core::parser::segments::object_reference::{
7    ObjectReferenceKind, ObjectReferenceSegment,
8};
9use sqruff_lib_core::utils::analysis::query::Query;
10
11use crate::core::config::Value;
12use crate::core::rules::context::RuleContext;
13use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
14use crate::core::rules::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
15
16#[derive(Debug, Default, Clone)]
17pub struct RuleST11;
18
19impl Rule for RuleST11 {
20    fn load_from_config(&self, _config: &HashMap<String, Value>) -> Result<ErasedRule, String> {
21        Ok(RuleST11.erased())
22    }
23
24    fn name(&self) -> &'static str {
25        "structure.unused_join"
26    }
27
28    fn description(&self) -> &'static str {
29        "Joined table not referenced in query."
30    }
31
32    fn long_description(&self) -> &'static str {
33        r#"
34**Anti-pattern**
35
36In this example, the table ``bar`` is included in the ``JOIN`` clause
37but no columns from it are referenced elsewhere in the query.
38
39```sql
40SELECT
41    foo.a,
42    foo.b
43FROM foo
44LEFT JOIN bar ON foo.a = bar.a
45```
46
47**Best practice**
48
49Remove the join, or use the table.
50
51```sql
52SELECT
53    foo.a,
54    foo.b,
55    bar.c
56FROM foo
57LEFT JOIN bar ON foo.a = bar.a
58```
59"#
60    }
61
62    fn groups(&self) -> &'static [RuleGroups] {
63        &[RuleGroups::All, RuleGroups::Structure]
64    }
65
66    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
67        let segment = &context.segment;
68
69        // Extract joined tables and which are outer joins.
70        let joined_tables = match self.extract_references_from_select(segment) {
71            Some(tables) => tables,
72            None => return Vec::new(),
73        };
74
75        if joined_tables.is_empty() {
76            return Vec::new();
77        }
78
79        // Now scan all the other clauses for table references.
80        let mut table_references = hashbrown::HashSet::new();
81        let reference_clause_types: &[SyntaxKind] = &[
82            SyntaxKind::SelectClause,
83            SyntaxKind::WhereClause,
84            SyntaxKind::GroupbyClause,
85            SyntaxKind::OrderbyClause,
86            SyntaxKind::HavingClause,
87            SyntaxKind::QualifyClause,
88        ];
89
90        for clause_type in reference_clause_types {
91            let clause = segment.child(&SyntaxSet::new(&[*clause_type]));
92            if let Some(clause) = clause {
93                // Extract all column references from this clause.
94                for col_ref in clause.recursive_crawl(
95                    const { &SyntaxSet::new(&[SyntaxKind::ColumnReference]) },
96                    true,
97                    &SyntaxSet::EMPTY,
98                    true,
99                ) {
100                    let obj_ref = ObjectReferenceSegment(col_ref, ObjectReferenceKind::Object);
101                    let parts = obj_ref.iter_raw_references();
102                    if parts.len() < 2 {
103                        // Unqualified reference found - abort for this SELECT
104                        // because we can't resolve which table it belongs to.
105                        return Vec::new();
106                    }
107                    // The table qualifier is the second-to-last part.
108                    let table_part = &parts[parts.len() - 2].part;
109                    table_references.insert(
110                        table_part
111                            .to_uppercase()
112                            .trim_matches(|c| {
113                                c == '"' || c == '\'' || c == '`' || c == '[' || c == ']'
114                            })
115                            .to_string(),
116                    );
117                }
118            }
119        }
120
121        // Also check for wildcards (e.g., table.* or *)
122        let query = Query::from_segment(segment, context.dialect, None);
123        let inner = query.inner.borrow();
124        for selectable in &inner.selectables {
125            for wcinfo in selectable.wildcard_info() {
126                for table in &wcinfo.tables {
127                    table_references.insert(
128                        table
129                            .to_uppercase()
130                            .trim_matches(|c: char| {
131                                c == '"' || c == '\'' || c == '`' || c == '[' || c == ']'
132                            })
133                            .to_string(),
134                    );
135                }
136            }
137        }
138
139        // Now check which joined tables are not referenced.
140        let mut results = Vec::new();
141        for (tbl_ref, segment) in &joined_tables {
142            if !table_references.contains(tbl_ref.as_str()) {
143                results.push(LintResult::new(
144                    Some(segment.clone()),
145                    Vec::new(),
146                    Some(format!(
147                        "Joined table '{}' not referenced elsewhere in query.",
148                        segment.raw()
149                    )),
150                    None,
151                ));
152            }
153        }
154
155        results
156    }
157
158    fn crawl_behaviour(&self) -> Crawler {
159        SegmentSeekerCrawler::new(const { SyntaxSet::new(&[SyntaxKind::SelectStatement]) }).into()
160    }
161}
162
163impl RuleST11 {
164    /// Extract the alias/name from a from_expression_element.
165    fn extract_reference_from_expression(&self, segment: &ErasedSegment) -> String {
166        let alias = FromExpressionElementSegment(segment.clone()).eventual_alias();
167        let ref_str = alias.ref_str.as_str();
168        if ref_str.is_empty() {
169            return String::new();
170        }
171        ref_str
172            .to_uppercase()
173            .trim_matches(|c| c == '"' || c == '\'' || c == '`' || c == '[' || c == ']')
174            .to_string()
175    }
176
177    /// Extract tables from column references within a segment, yielding
178    /// the uppercase table qualifier. If allow_unqualified is false and an
179    /// unqualified reference is found, returns None.
180    fn extract_referenced_tables(
181        &self,
182        segment: &ErasedSegment,
183        allow_unqualified: bool,
184    ) -> Option<Vec<String>> {
185        let mut tables = Vec::new();
186        for col_ref in segment.recursive_crawl(
187            const { &SyntaxSet::new(&[SyntaxKind::ColumnReference]) },
188            true,
189            &SyntaxSet::EMPTY,
190            true,
191        ) {
192            let obj_ref = ObjectReferenceSegment(col_ref, ObjectReferenceKind::Object);
193            let parts = obj_ref.iter_raw_references();
194            if parts.len() < 2 {
195                if allow_unqualified {
196                    continue;
197                } else {
198                    return None;
199                }
200            }
201            let table_part = &parts[parts.len() - 2].part;
202            tables.push(
203                table_part
204                    .to_uppercase()
205                    .trim_matches(|c| c == '"' || c == '\'' || c == '`' || c == '[' || c == ']')
206                    .to_string(),
207            );
208        }
209        Some(tables)
210    }
211
212    /// Extract the list of (uppercase_table_ref, segment) for tables brought in
213    /// via FROM/JOIN that are candidates for being unused. Only explicit OUTER
214    /// joins (LEFT, RIGHT, FULL) are flagged. Returns None if there are fewer
215    /// than 2 tables overall (single table queries are not checked).
216    fn extract_references_from_select(
217        &self,
218        segment: &ErasedSegment,
219    ) -> Option<Vec<(String, ErasedSegment)>> {
220        let from_clause = segment.child(const { &SyntaxSet::new(&[SyntaxKind::FromClause]) })?;
221
222        let mut joined_tables: Vec<(String, ErasedSegment)> = Vec::new();
223        let mut referenced_tables: Vec<String> = Vec::new();
224        let mut total_table_count = 0;
225
226        for from_expression in
227            from_clause.children(const { &SyntaxSet::new(&[SyntaxKind::FromExpression]) })
228        {
229            // Handle the main FROM expression elements (implicit cross joins).
230            let from_elements: Vec<_> = from_expression
231                .children(const { &SyntaxSet::new(&[SyntaxKind::FromExpressionElement]) })
232                .cloned()
233                .collect();
234
235            if from_elements.len() > 1 {
236                // Implicit cross join - don't add FROM tables as candidates.
237                total_table_count += from_elements.len();
238            } else {
239                for elem in &from_elements {
240                    let ref_str = self.extract_reference_from_expression(elem);
241                    if !ref_str.is_empty() {
242                        // FROM tables are not candidates for being unused (they
243                        // aren't joins), but we still track them for the count.
244                        total_table_count += 1;
245                    }
246                }
247            }
248
249            // Handle JOIN clauses.
250            for join_clause in
251                from_expression.children(const { &SyntaxSet::new(&[SyntaxKind::JoinClause]) })
252            {
253                // Check if this is an outer join (LEFT, RIGHT, FULL).
254                let join_keywords: hashbrown::HashSet<String> = join_clause
255                    .children(const { &SyntaxSet::new(&[SyntaxKind::Keyword]) })
256                    .map(|kw| kw.raw().to_uppercase_smolstr().to_string())
257                    .collect();
258
259                let is_outer = join_keywords.contains("LEFT")
260                    || join_keywords.contains("RIGHT")
261                    || join_keywords.contains("FULL");
262
263                let mut this_clause_refs = Vec::new();
264
265                for from_elem in join_clause
266                    .children(const { &SyntaxSet::new(&[SyntaxKind::FromExpressionElement]) })
267                {
268                    let ref_str = self.extract_reference_from_expression(from_elem);
269                    total_table_count += 1;
270
271                    if !ref_str.is_empty() && is_outer {
272                        joined_tables.push((ref_str.clone(), from_elem.clone()));
273                        this_clause_refs.push(ref_str);
274                    }
275
276                    // Check for table references within the from_expression_element
277                    // (e.g., UNNEST(ft.generic_array) references ft).
278                    if let Some(refs) = self.extract_referenced_tables(from_elem, true) {
279                        for tbl_ref in refs {
280                            if !this_clause_refs.contains(&tbl_ref) {
281                                referenced_tables.push(tbl_ref);
282                            }
283                        }
284                    }
285                }
286
287                // Check ON condition for references to other tables.
288                for join_on_condition in
289                    join_clause.children(const { &SyntaxSet::new(&[SyntaxKind::JoinOnCondition]) })
290                {
291                    if let Some(refs) = self.extract_referenced_tables(join_on_condition, true) {
292                        for tbl_ref in refs {
293                            if !this_clause_refs.contains(&tbl_ref) {
294                                referenced_tables.push(tbl_ref);
295                            }
296                        }
297                    }
298                }
299            }
300        }
301
302        // If there's only one table total, don't flag anything.
303        if total_table_count <= 1 {
304            return None;
305        }
306
307        // Remove tables that are referenced in other join clauses.
308        let result: Vec<_> = joined_tables
309            .into_iter()
310            .filter(|(ref_str, _)| !referenced_tables.contains(ref_str))
311            .collect();
312
313        Some(result)
314    }
315}