1use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
8use sqlparser::ast::{Spanned, *};
9use sqlparser::tokenizer::{Span as SqlParserSpan, Token, TokenWithSpan, Tokenizer, Whitespace};
10
11pub struct AvoidUsingJoin;
12
13impl LintRule for AvoidUsingJoin {
14 fn code(&self) -> &'static str {
15 issue_codes::LINT_ST_007
16 }
17
18 fn name(&self) -> &'static str {
19 "Avoid USING in JOIN"
20 }
21
22 fn description(&self) -> &'static str {
23 "Prefer specifying join keys instead of using 'USING'."
24 }
25
26 fn check(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
27 let mut issues = Vec::new();
28 check_statement(stmt, ctx, &mut issues);
29 issues
30 }
31}
32
33fn check_statement(stmt: &Statement, ctx: &LintContext, issues: &mut Vec<Issue>) {
34 match stmt {
35 Statement::Query(q) => check_query(q, ctx, issues),
36 Statement::Insert(ins) => {
37 if let Some(ref source) = ins.source {
38 check_query(source, ctx, issues);
39 }
40 }
41 Statement::CreateView { query, .. } => check_query(query, ctx, issues),
42 Statement::CreateTable(create) => {
43 if let Some(ref q) = create.query {
44 check_query(q, ctx, issues);
45 }
46 }
47 _ => {}
48 }
49}
50
51fn check_query(query: &Query, ctx: &LintContext, issues: &mut Vec<Issue>) {
52 if let Some(ref with) = query.with {
53 for cte in &with.cte_tables {
54 check_query(&cte.query, ctx, issues);
55 }
56 }
57 check_set_expr(&query.body, ctx, issues);
58}
59
60fn check_set_expr(body: &SetExpr, ctx: &LintContext, issues: &mut Vec<Issue>) {
61 match body {
62 SetExpr::Select(select) => {
63 for from_item in &select.from {
64 let mut left_ref = table_factor_reference_name(&from_item.relation);
65 check_table_factor(&from_item.relation, ctx, issues);
66 for join in &from_item.joins {
67 let right_ref = table_factor_reference_name(&join.relation);
68 if let Some(issue) = using_join_issue(
69 ctx,
70 &join.join_operator,
71 left_ref.as_deref(),
72 right_ref.as_deref(),
73 ) {
74 issues.push(issue);
75 }
76 check_table_factor(&join.relation, ctx, issues);
77
78 if join_uses_using(&join.join_operator) {
79 left_ref = None;
83 } else if right_ref.is_some() {
84 left_ref = right_ref;
85 }
86 }
87 }
88 }
89 SetExpr::Query(q) => check_query(q, ctx, issues),
90 SetExpr::SetOperation { left, right, .. } => {
91 check_set_expr(left, ctx, issues);
92 check_set_expr(right, ctx, issues);
93 }
94 _ => {}
95 }
96}
97
98fn check_table_factor(relation: &TableFactor, ctx: &LintContext, issues: &mut Vec<Issue>) {
99 match relation {
100 TableFactor::Derived { subquery, .. } => check_query(subquery, ctx, issues),
101 TableFactor::NestedJoin {
102 table_with_joins, ..
103 } => {
104 let mut left_ref = table_factor_reference_name(&table_with_joins.relation);
105 check_table_factor(&table_with_joins.relation, ctx, issues);
106 for join in &table_with_joins.joins {
107 let right_ref = table_factor_reference_name(&join.relation);
108 if let Some(issue) = using_join_issue(
109 ctx,
110 &join.join_operator,
111 left_ref.as_deref(),
112 right_ref.as_deref(),
113 ) {
114 issues.push(issue);
115 }
116 check_table_factor(&join.relation, ctx, issues);
117
118 if join_uses_using(&join.join_operator) {
119 left_ref = None;
120 } else if right_ref.is_some() {
121 left_ref = right_ref;
122 }
123 }
124 }
125 _ => {}
126 }
127}
128
129fn using_join_issue(
130 ctx: &LintContext,
131 join_operator: &JoinOperator,
132 left_ref: Option<&str>,
133 right_ref: Option<&str>,
134) -> Option<Issue> {
135 let constraint = join_constraint(join_operator)?;
136 let JoinConstraint::Using(columns) = constraint else {
137 return None;
138 };
139
140 let mut issue = Issue::warning(
141 issue_codes::LINT_ST_007,
142 "Avoid JOIN ... USING (...); prefer explicit ON conditions.",
143 )
144 .with_statement(ctx.statement_index);
145
146 if let Some((span, replacement)) =
147 using_join_autofix(ctx, constraint, columns, left_ref, right_ref)
148 {
149 issue = issue.with_span(span).with_autofix_edits(
150 IssueAutofixApplicability::Safe,
151 vec![IssuePatchEdit::new(span, replacement)],
152 );
153 }
154
155 Some(issue)
156}
157
158fn join_constraint(op: &JoinOperator) -> Option<&JoinConstraint> {
159 match op {
160 JoinOperator::Join(constraint)
161 | JoinOperator::Inner(constraint)
162 | JoinOperator::Left(constraint)
163 | JoinOperator::LeftOuter(constraint)
164 | JoinOperator::Right(constraint)
165 | JoinOperator::RightOuter(constraint)
166 | JoinOperator::FullOuter(constraint)
167 | JoinOperator::CrossJoin(constraint)
168 | JoinOperator::Semi(constraint)
169 | JoinOperator::LeftSemi(constraint)
170 | JoinOperator::RightSemi(constraint)
171 | JoinOperator::Anti(constraint)
172 | JoinOperator::LeftAnti(constraint)
173 | JoinOperator::RightAnti(constraint)
174 | JoinOperator::StraightJoin(constraint) => Some(constraint),
175 JoinOperator::AsOf { constraint, .. } => Some(constraint),
176 JoinOperator::CrossApply | JoinOperator::OuterApply => None,
177 }
178}
179
180fn join_uses_using(op: &JoinOperator) -> bool {
181 matches!(join_constraint(op), Some(JoinConstraint::Using(_)))
182}
183
184#[derive(Clone)]
185struct PositionedToken {
186 token: Token,
187 start: usize,
188 end: usize,
189}
190
191fn using_join_autofix(
192 ctx: &LintContext,
193 constraint: &JoinConstraint,
194 columns: &[ObjectName],
195 left_ref: Option<&str>,
196 right_ref: Option<&str>,
197) -> Option<(Span, String)> {
198 let left_ref = left_ref?;
199 let right_ref = right_ref?;
200 let replacement = format!(
201 "ON {}",
202 using_columns_to_on_expr(columns, left_ref, right_ref)?
203 );
204
205 let (constraint_start, constraint_end) = constraint_statement_offsets(ctx, constraint)?;
206 let span = locate_using_clause_span(ctx, constraint_start, constraint_end)?;
207 if span_contains_comment(ctx, span) {
208 return None;
209 }
210
211 Some((span, replacement))
212}
213
214fn using_columns_to_on_expr(
215 columns: &[ObjectName],
216 left_ref: &str,
217 right_ref: &str,
218) -> Option<String> {
219 let mut combined: Option<Expr> = None;
220
221 for object_name in columns {
222 let column_ident = object_name
223 .0
224 .last()
225 .and_then(|part| part.as_ident())
226 .cloned()?;
227
228 let equality = Expr::BinaryOp {
229 left: Box::new(Expr::CompoundIdentifier(vec![
230 Ident::new(left_ref),
231 column_ident.clone(),
232 ])),
233 op: BinaryOperator::Eq,
234 right: Box::new(Expr::CompoundIdentifier(vec![
235 Ident::new(right_ref),
236 column_ident,
237 ])),
238 };
239
240 combined = Some(match combined {
241 Some(prev) => Expr::BinaryOp {
242 left: Box::new(prev),
243 op: BinaryOperator::And,
244 right: Box::new(equality),
245 },
246 None => equality,
247 });
248 }
249
250 Some(combined?.to_string())
251}
252
253fn constraint_statement_offsets(
254 ctx: &LintContext,
255 constraint: &JoinConstraint,
256) -> Option<(usize, usize)> {
257 if let Some((start, end)) = sqlparser_span_offsets(ctx.statement_sql(), constraint.span()) {
258 return Some((start, end));
259 }
260
261 let (start, end) = sqlparser_span_offsets(ctx.sql, constraint.span())?;
262 if start < ctx.statement_range.start || end > ctx.statement_range.end {
263 return None;
264 }
265 Some((
266 start - ctx.statement_range.start,
267 end - ctx.statement_range.start,
268 ))
269}
270
271fn locate_using_clause_span(
272 ctx: &LintContext,
273 constraint_start: usize,
274 constraint_end: usize,
275) -> Option<Span> {
276 let tokens = positioned_statement_tokens(ctx)?;
277 if tokens.is_empty() {
278 return None;
279 }
280
281 let abs_constraint_start = ctx.statement_range.start + constraint_start;
282 let abs_constraint_end = ctx.statement_range.start + constraint_end;
283
284 let using_indexes = tokens
285 .iter()
286 .enumerate()
287 .filter_map(|(idx, token)| {
288 (token.start <= abs_constraint_start && token_word_equals(&token.token, "USING"))
289 .then_some(idx)
290 })
291 .collect::<Vec<_>>();
292
293 for using_idx in using_indexes.into_iter().rev() {
294 let Some(lparen_idx) = next_non_trivia_token_index(&tokens, using_idx + 1) else {
295 continue;
296 };
297 if !matches!(tokens[lparen_idx].token, Token::LParen) {
298 continue;
299 }
300
301 let mut depth = 0usize;
302 let mut close_idx = None;
303 for (idx, token) in tokens.iter().enumerate().skip(lparen_idx) {
304 match token.token {
305 Token::LParen => depth += 1,
306 Token::RParen => {
307 if depth == 0 {
308 break;
309 }
310 depth -= 1;
311 if depth == 0 {
312 close_idx = Some(idx);
313 break;
314 }
315 }
316 _ => {}
317 }
318 }
319
320 let Some(close_idx) = close_idx else {
321 continue;
322 };
323
324 let span = Span::new(tokens[using_idx].start, tokens[close_idx].end);
325 if span.start <= abs_constraint_start && span.end >= abs_constraint_end {
326 return Some(span);
327 }
328 }
329
330 None
331}
332
333fn next_non_trivia_token_index(tokens: &[PositionedToken], start: usize) -> Option<usize> {
334 tokens
335 .iter()
336 .enumerate()
337 .skip(start)
338 .find_map(|(idx, token)| (!is_trivia(&token.token)).then_some(idx))
339}
340
341fn table_factor_reference_name(relation: &TableFactor) -> Option<String> {
342 match relation {
343 TableFactor::Table { name, alias, .. } => {
344 if let Some(alias) = alias {
345 Some(alias.name.value.clone())
346 } else {
347 name.0
348 .last()
349 .and_then(|part| part.as_ident())
350 .map(|ident| ident.value.clone())
351 }
352 }
353 TableFactor::Derived { alias, .. }
354 | TableFactor::TableFunction { alias, .. }
355 | TableFactor::Function { alias, .. }
356 | TableFactor::UNNEST { alias, .. }
357 | TableFactor::JsonTable { alias, .. }
358 | TableFactor::OpenJsonTable { alias, .. }
359 | TableFactor::NestedJoin { alias, .. }
360 | TableFactor::Pivot { alias, .. }
361 | TableFactor::Unpivot { alias, .. } => alias.as_ref().map(|a| a.name.value.clone()),
362 _ => None,
363 }
364}
365
366fn positioned_statement_tokens(ctx: &LintContext) -> Option<Vec<PositionedToken>> {
367 let from_document_tokens = ctx.with_document_tokens(|tokens| {
368 if tokens.is_empty() {
369 return None;
370 }
371
372 let mut positioned = Vec::new();
373 for token in tokens {
374 let (start, end) = token_with_span_offsets(ctx.sql, token)?;
375 if start < ctx.statement_range.start || end > ctx.statement_range.end {
376 continue;
377 }
378 positioned.push(PositionedToken {
379 token: token.token.clone(),
380 start,
381 end,
382 });
383 }
384 Some(positioned)
385 });
386
387 if let Some(tokens) = from_document_tokens {
388 return Some(tokens);
389 }
390
391 let tokens = tokenize_with_spans(ctx.statement_sql(), ctx.dialect())?;
392 let mut positioned = Vec::new();
393 for token in tokens {
394 let (start, end) = token_with_span_offsets(ctx.statement_sql(), &token)?;
395 positioned.push(PositionedToken {
396 token: token.token,
397 start: ctx.statement_range.start + start,
398 end: ctx.statement_range.start + end,
399 });
400 }
401 Some(positioned)
402}
403
404fn span_contains_comment(ctx: &LintContext, span: Span) -> bool {
405 positioned_statement_tokens(ctx).is_some_and(|tokens| {
406 tokens.iter().any(|token| {
407 token.start >= span.start && token.end <= span.end && is_comment_token(&token.token)
408 })
409 })
410}
411
412fn tokenize_with_spans(sql: &str, dialect: crate::types::Dialect) -> Option<Vec<TokenWithSpan>> {
413 let dialect = dialect.to_sqlparser_dialect();
414 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
415 tokenizer.tokenize_with_location().ok()
416}
417
418fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
419 let start = line_col_to_offset(
420 sql,
421 token.span.start.line as usize,
422 token.span.start.column as usize,
423 )?;
424 let end = line_col_to_offset(
425 sql,
426 token.span.end.line as usize,
427 token.span.end.column as usize,
428 )?;
429 Some((start, end))
430}
431
432fn sqlparser_span_offsets(sql: &str, span: SqlParserSpan) -> Option<(usize, usize)> {
433 if span.start.line == 0 || span.start.column == 0 || span.end.line == 0 || span.end.column == 0
434 {
435 return None;
436 }
437
438 let start = line_col_to_offset(sql, span.start.line as usize, span.start.column as usize)?;
439 let end = line_col_to_offset(sql, span.end.line as usize, span.end.column as usize)?;
440 (end >= start).then_some((start, end))
441}
442
443fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
444 if line == 0 || column == 0 {
445 return None;
446 }
447
448 let mut current_line = 1usize;
449 let mut line_start = 0usize;
450 for (idx, ch) in sql.char_indices() {
451 if current_line == line {
452 break;
453 }
454 if ch == '\n' {
455 current_line += 1;
456 line_start = idx + ch.len_utf8();
457 }
458 }
459 if current_line != line {
460 return None;
461 }
462
463 let mut current_column = 1usize;
464 for (rel_idx, ch) in sql[line_start..].char_indices() {
465 if current_column == column {
466 return Some(line_start + rel_idx);
467 }
468 if ch == '\n' {
469 return None;
470 }
471 current_column += 1;
472 }
473 if current_column == column {
474 return Some(sql.len());
475 }
476 None
477}
478
479fn token_word_equals(token: &Token, word: &str) -> bool {
480 match token {
481 Token::Word(w) => w.value.eq_ignore_ascii_case(word),
482 _ => false,
483 }
484}
485
486fn is_trivia(token: &Token) -> bool {
487 matches!(token, Token::Whitespace(_))
488}
489
490fn is_comment_token(token: &Token) -> bool {
491 matches!(
492 token,
493 Token::Whitespace(Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_))
494 )
495}
496
497#[cfg(test)]
498mod tests {
499 use super::*;
500 use crate::parser::parse_sql;
501 use crate::types::{IssueAutofixApplicability, IssuePatchEdit};
502
503 fn check_sql(sql: &str) -> Vec<Issue> {
504 let stmts = parse_sql(sql).unwrap();
505 let rule = AvoidUsingJoin;
506 let ctx = LintContext {
507 sql,
508 statement_range: 0..sql.len(),
509 statement_index: 0,
510 };
511 let mut issues = Vec::new();
512 for stmt in &stmts {
513 issues.extend(rule.check(stmt, &ctx));
514 }
515 issues
516 }
517
518 fn apply_edits(sql: &str, edits: &[IssuePatchEdit]) -> String {
519 let mut output = sql.to_string();
520 let mut ordered = edits.iter().collect::<Vec<_>>();
521 ordered.sort_by_key(|edit| edit.span.start);
522 for edit in ordered.into_iter().rev() {
523 output.replace_range(edit.span.start..edit.span.end, &edit.replacement);
524 }
525 output
526 }
527
528 #[test]
529 fn test_using_join_detected() {
530 let sql = "SELECT * FROM a JOIN b USING (id)";
531 let issues = check_sql(sql);
532 assert_eq!(issues.len(), 1);
533 assert_eq!(issues[0].code, "LINT_ST_007");
534
535 let autofix = issues[0]
536 .autofix
537 .as_ref()
538 .expect("expected ST007 core autofix metadata");
539 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
540 assert_eq!(autofix.edits.len(), 1);
541 let fixed = apply_edits(sql, &autofix.edits);
542 assert_eq!(fixed, "SELECT * FROM a JOIN b ON a.id = b.id");
543 }
544
545 #[test]
546 fn test_on_join_ok() {
547 let issues = check_sql("SELECT * FROM a JOIN b ON a.id = b.id");
548 assert!(issues.is_empty());
549 }
550
551 #[test]
552 fn test_using_join_comment_blocks_safe_autofix() {
553 let issues = check_sql("SELECT * FROM a JOIN b USING (id /*keep*/)");
554 assert_eq!(issues.len(), 1);
555 assert!(
556 issues[0].autofix.is_none(),
557 "comment-bearing USING join should not emit safe autofix metadata"
558 );
559 }
560
561 #[test]
562 fn test_using_join_subquery_alias_autofix() {
563 let sql = "select x.a from (SELECT 1 AS a) AS x inner join y using (id)";
564 let issues = check_sql(sql);
565 assert_eq!(issues.len(), 1);
566 let autofix = issues[0]
567 .autofix
568 .as_ref()
569 .expect("expected ST007 core autofix metadata");
570 let fixed = apply_edits(sql, &autofix.edits);
571 assert_eq!(
572 fixed,
573 "select x.a from (SELECT 1 AS a) AS x inner join y ON x.id = y.id"
574 );
575 }
576
577 #[test]
578 fn test_using_chain_stops_after_first_using_fix() {
579 let sql = "select x.a\nfrom x\ninner join y using(id, foo)\ninner join z using(id)";
580 let issues = check_sql(sql);
581 assert_eq!(issues.len(), 2);
582
583 let all_edits: Vec<IssuePatchEdit> = issues
584 .iter()
585 .filter_map(|issue| issue.autofix.as_ref())
586 .flat_map(|autofix| autofix.edits.iter().cloned())
587 .collect();
588 let fixed = apply_edits(sql, &all_edits);
589 assert_eq!(
590 fixed,
591 "select x.a\nfrom x\ninner join y ON x.id = y.id AND x.foo = y.foo\ninner join z using(id)"
592 );
593 assert!(
594 issues[1].autofix.is_none(),
595 "second USING join should remain unfixed for safety"
596 );
597 }
598}