sqruff_lib/rules/structure/
st07.rs1use ahash::AHashMap;
2use itertools::Itertools;
3use smol_str::{SmolStr, ToSmolStr};
4use sqruff_lib_core::dialects::init::DialectKind;
5use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
6use sqruff_lib_core::lint_fix::LintFix;
7use sqruff_lib_core::parser::segments::{ErasedSegment, SegmentBuilder, Tables};
8use sqruff_lib_core::utils::analysis::select::get_select_statement_info;
9use sqruff_lib_core::utils::functional::segments::Segments;
10
11use crate::core::config::Value;
12use crate::core::rules::context::RuleContext;
13use crate::core::rules::crawlers::{Crawler, SegmentSeekerCrawler};
14use crate::core::rules::{Erased as _, ErasedRule, LintResult, Rule, RuleGroups};
15use crate::utils::functional::context::FunctionalContext;
16
17#[derive(Clone, Debug, Default)]
18pub struct RuleST07;
19
20impl Rule for RuleST07 {
21 fn load_from_config(&self, _config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
22 Ok(RuleST07.erased())
23 }
24
25 fn name(&self) -> &'static str {
26 "structure.using"
27 }
28
29 fn description(&self) -> &'static str {
30 "Prefer specifying join keys instead of using ``USING``."
31 }
32
33 fn long_description(&self) -> &'static str {
34 r"
35**Anti-pattern**
36
37```sql
38SELECT
39 table_a.field_1,
40 table_b.field_2
41FROM
42 table_a
43INNER JOIN table_b USING (id)
44```
45
46**Best practice**
47
48Specify the keys directly
49
50```sql
51SELECT
52 table_a.field_1,
53 table_b.field_2
54FROM
55 table_a
56INNER JOIN table_b
57 ON table_a.id = table_b.id
58```"
59 }
60
61 fn groups(&self) -> &'static [RuleGroups] {
62 &[RuleGroups::All, RuleGroups::Structure]
63 }
64
65 fn dialect_skip(&self) -> &'static [DialectKind] {
66 &[DialectKind::Clickhouse]
67 }
68
69 fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
70 let functional_context = FunctionalContext::new(context);
71 let segment = functional_context.segment();
72 let parent_stack = functional_context.parent_stack();
73
74 let usings = segment.children_where(|it: &ErasedSegment| it.is_keyword("using"));
75 let using_anchor = usings.first();
76
77 let Some(using_anchor) = using_anchor else {
78 return Vec::new();
79 };
80
81 let unfixable_result = LintResult::new(
82 using_anchor.clone().into(),
83 Vec::new(),
84 Some("Found USING statement. Expected only ON statements.".into()),
85 None,
86 );
87
88 let tables_in_join = parent_stack
89 .last()
90 .unwrap()
91 .segments()
92 .iter()
93 .filter(|it| {
94 matches!(
95 it.get_type(),
96 SyntaxKind::JoinClause | SyntaxKind::FromExpressionElement
97 )
98 })
99 .cloned()
100 .collect_vec();
101
102 if segment.get(0, None) != tables_in_join.get(1).cloned() {
103 return vec![unfixable_result];
104 }
105
106 let stmts = parent_stack
107 .find_last_where(|it: &ErasedSegment| it.is_type(SyntaxKind::SelectStatement));
108 let parent_select = stmts.first();
109
110 let Some(parent_select) = parent_select else {
111 return vec![unfixable_result];
112 };
113
114 let select_info = get_select_statement_info(parent_select, context.dialect.into(), true);
115 let mut table_aliases =
116 select_info.map_or(Vec::new(), |select_info| select_info.table_aliases);
117 table_aliases.retain(|it| !it.ref_str.is_empty());
118
119 if table_aliases.len() < 2 {
120 return vec![unfixable_result];
121 }
122
123 let (to_delete, insert_after_anchor) = extract_deletion_sequence_and_anchor(&segment);
124
125 let [table_a, table_b, ..] = &table_aliases[..] else {
126 unreachable!()
127 };
128
129 let mut edit_segments = vec![
130 SegmentBuilder::keyword(context.tables.next_id(), "ON"),
131 SegmentBuilder::whitespace(context.tables.next_id(), " "),
132 ];
133
134 edit_segments.append(&mut generate_join_conditions(
135 context.tables,
136 context.dialect.name,
137 &table_a.ref_str,
138 &table_b.ref_str,
139 extract_cols_from_using(segment, using_anchor),
140 ));
141
142 let mut fixes = Vec::with_capacity(1 + to_delete.len());
143
144 fixes.push(LintFix::create_before(insert_after_anchor, edit_segments));
145 fixes.extend(to_delete.into_iter().map(LintFix::delete));
146
147 vec![LintResult::new(
148 using_anchor.clone().into(),
149 fixes,
150 None,
151 None,
152 )]
153 }
154
155 fn is_fix_compatible(&self) -> bool {
156 true
157 }
158
159 fn crawl_behaviour(&self) -> Crawler {
160 SegmentSeekerCrawler::new(const { SyntaxSet::new(&[SyntaxKind::JoinClause]) }).into()
161 }
162}
163
164fn extract_cols_from_using(join_clause: Segments, using_segs: &ErasedSegment) -> Vec<SmolStr> {
165 join_clause
166 .children_all()
167 .after(using_segs)
168 .filter(|it: &ErasedSegment| it.is_type(SyntaxKind::Bracketed))
169 .head()
170 .children_where(|it: &ErasedSegment| {
171 it.is_type(SyntaxKind::Identifier) || it.is_type(SyntaxKind::NakedIdentifier)
172 })
173 .into_iter()
174 .map(|it| it.raw().to_smolstr())
175 .collect()
176}
177
178fn generate_join_conditions(
179 tables: &Tables,
180 dialect: DialectKind,
181 table_a_ref: &str,
182 table_b_ref: &str,
183 columns: Vec<SmolStr>,
184) -> Vec<ErasedSegment> {
185 let mut edit_segments = Vec::new();
186
187 for col in columns {
188 edit_segments.extend_from_slice(&[
189 create_col_reference(tables, dialect, table_a_ref, &col),
190 SegmentBuilder::whitespace(tables.next_id(), " "),
191 SegmentBuilder::token(tables.next_id(), "=", SyntaxKind::Symbol).finish(),
192 SegmentBuilder::whitespace(tables.next_id(), " "),
193 create_col_reference(tables, dialect, table_b_ref, &col),
194 SegmentBuilder::whitespace(tables.next_id(), " "),
195 SegmentBuilder::keyword(tables.next_id(), "AND"),
196 SegmentBuilder::whitespace(tables.next_id(), " "),
197 ]);
198 }
199
200 edit_segments
201 .get(..edit_segments.len().saturating_sub(3))
202 .map_or(Vec::new(), ToOwned::to_owned)
203 .clone()
204}
205
206fn extract_deletion_sequence_and_anchor(
207 join_clause: &Segments,
208) -> (Vec<ErasedSegment>, ErasedSegment) {
209 let mut insert_anchor = None;
210 let mut to_delete = Vec::new();
211
212 for seg in join_clause.children_all() {
213 if seg.raw().eq_ignore_ascii_case("USING") {
214 to_delete.push(seg.clone());
215 continue;
216 }
217
218 if to_delete.is_empty() {
219 continue;
220 }
221
222 if to_delete.last().unwrap().is_type(SyntaxKind::Bracketed) {
223 insert_anchor = Some(seg);
224 break;
225 }
226
227 to_delete.push(seg);
228 }
229
230 (to_delete, insert_anchor.unwrap())
231}
232
233fn create_col_reference(
234 tables: &Tables,
235 dialect: DialectKind,
236 table_ref: &str,
237 column_name: &str,
238) -> ErasedSegment {
239 SegmentBuilder::node(
240 tables.next_id(),
241 SyntaxKind::ColumnReference,
242 dialect,
243 vec![
244 SegmentBuilder::token(tables.next_id(), table_ref, SyntaxKind::NakedIdentifier)
245 .finish(),
246 SegmentBuilder::symbol(tables.next_id(), "."),
247 SegmentBuilder::token(tables.next_id(), column_name, SyntaxKind::NakedIdentifier)
248 .finish(),
249 ],
250 )
251 .finish()
252}