Skip to main content

flowscope_core/linter/rules/
st_007.rs

1//! LINT_ST_007: Avoid JOIN ... USING (...).
2//!
3//! USING can hide which side a column originates from and may create ambiguity
4//! in complex joins. Prefer explicit ON conditions.
5
6use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
8use sqlparser::ast::{Spanned, *};
9use sqlparser::tokenizer::{Span as SqlParserSpan, Token, TokenWithSpan, Tokenizer, Whitespace};
10
11pub struct AvoidUsingJoin;
12
13impl LintRule for AvoidUsingJoin {
14    fn code(&self) -> &'static str {
15        issue_codes::LINT_ST_007
16    }
17
18    fn name(&self) -> &'static str {
19        "Avoid USING in JOIN"
20    }
21
22    fn description(&self) -> &'static str {
23        "Prefer specifying join keys instead of using 'USING'."
24    }
25
26    fn check(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
27        let mut issues = Vec::new();
28        check_statement(stmt, ctx, &mut issues);
29        issues
30    }
31}
32
33fn check_statement(stmt: &Statement, ctx: &LintContext, issues: &mut Vec<Issue>) {
34    match stmt {
35        Statement::Query(q) => check_query(q, ctx, issues),
36        Statement::Insert(ins) => {
37            if let Some(ref source) = ins.source {
38                check_query(source, ctx, issues);
39            }
40        }
41        Statement::CreateView { query, .. } => check_query(query, ctx, issues),
42        Statement::CreateTable(create) => {
43            if let Some(ref q) = create.query {
44                check_query(q, ctx, issues);
45            }
46        }
47        _ => {}
48    }
49}
50
51fn check_query(query: &Query, ctx: &LintContext, issues: &mut Vec<Issue>) {
52    if let Some(ref with) = query.with {
53        for cte in &with.cte_tables {
54            check_query(&cte.query, ctx, issues);
55        }
56    }
57    check_set_expr(&query.body, ctx, issues);
58}
59
60fn check_set_expr(body: &SetExpr, ctx: &LintContext, issues: &mut Vec<Issue>) {
61    match body {
62        SetExpr::Select(select) => {
63            for from_item in &select.from {
64                let mut left_ref = table_factor_reference_name(&from_item.relation);
65                check_table_factor(&from_item.relation, ctx, issues);
66                for join in &from_item.joins {
67                    let right_ref = table_factor_reference_name(&join.relation);
68                    if let Some(issue) = using_join_issue(
69                        ctx,
70                        &join.join_operator,
71                        left_ref.as_deref(),
72                        right_ref.as_deref(),
73                    ) {
74                        issues.push(issue);
75                    }
76                    check_table_factor(&join.relation, ctx, issues);
77
78                    if join_uses_using(&join.join_operator) {
79                        // SQLFluff parity: once a USING join is encountered, the
80                        // left side becomes a merged relation and follow-up USING
81                        // joins are not safely autofixable.
82                        left_ref = None;
83                    } else if right_ref.is_some() {
84                        left_ref = right_ref;
85                    }
86                }
87            }
88        }
89        SetExpr::Query(q) => check_query(q, ctx, issues),
90        SetExpr::SetOperation { left, right, .. } => {
91            check_set_expr(left, ctx, issues);
92            check_set_expr(right, ctx, issues);
93        }
94        _ => {}
95    }
96}
97
98fn check_table_factor(relation: &TableFactor, ctx: &LintContext, issues: &mut Vec<Issue>) {
99    match relation {
100        TableFactor::Derived { subquery, .. } => check_query(subquery, ctx, issues),
101        TableFactor::NestedJoin {
102            table_with_joins, ..
103        } => {
104            let mut left_ref = table_factor_reference_name(&table_with_joins.relation);
105            check_table_factor(&table_with_joins.relation, ctx, issues);
106            for join in &table_with_joins.joins {
107                let right_ref = table_factor_reference_name(&join.relation);
108                if let Some(issue) = using_join_issue(
109                    ctx,
110                    &join.join_operator,
111                    left_ref.as_deref(),
112                    right_ref.as_deref(),
113                ) {
114                    issues.push(issue);
115                }
116                check_table_factor(&join.relation, ctx, issues);
117
118                if join_uses_using(&join.join_operator) {
119                    left_ref = None;
120                } else if right_ref.is_some() {
121                    left_ref = right_ref;
122                }
123            }
124        }
125        _ => {}
126    }
127}
128
129fn using_join_issue(
130    ctx: &LintContext,
131    join_operator: &JoinOperator,
132    left_ref: Option<&str>,
133    right_ref: Option<&str>,
134) -> Option<Issue> {
135    let constraint = join_constraint(join_operator)?;
136    let JoinConstraint::Using(columns) = constraint else {
137        return None;
138    };
139
140    let mut issue = Issue::warning(
141        issue_codes::LINT_ST_007,
142        "Avoid JOIN ... USING (...); prefer explicit ON conditions.",
143    )
144    .with_statement(ctx.statement_index);
145
146    if let Some((span, replacement)) =
147        using_join_autofix(ctx, constraint, columns, left_ref, right_ref)
148    {
149        issue = issue.with_span(span).with_autofix_edits(
150            IssueAutofixApplicability::Safe,
151            vec![IssuePatchEdit::new(span, replacement)],
152        );
153    }
154
155    Some(issue)
156}
157
158fn join_constraint(op: &JoinOperator) -> Option<&JoinConstraint> {
159    match op {
160        JoinOperator::Join(constraint)
161        | JoinOperator::Inner(constraint)
162        | JoinOperator::Left(constraint)
163        | JoinOperator::LeftOuter(constraint)
164        | JoinOperator::Right(constraint)
165        | JoinOperator::RightOuter(constraint)
166        | JoinOperator::FullOuter(constraint)
167        | JoinOperator::CrossJoin(constraint)
168        | JoinOperator::Semi(constraint)
169        | JoinOperator::LeftSemi(constraint)
170        | JoinOperator::RightSemi(constraint)
171        | JoinOperator::Anti(constraint)
172        | JoinOperator::LeftAnti(constraint)
173        | JoinOperator::RightAnti(constraint)
174        | JoinOperator::StraightJoin(constraint) => Some(constraint),
175        JoinOperator::AsOf { constraint, .. } => Some(constraint),
176        JoinOperator::CrossApply | JoinOperator::OuterApply => None,
177    }
178}
179
180fn join_uses_using(op: &JoinOperator) -> bool {
181    matches!(join_constraint(op), Some(JoinConstraint::Using(_)))
182}
183
184#[derive(Clone)]
185struct PositionedToken {
186    token: Token,
187    start: usize,
188    end: usize,
189}
190
191fn using_join_autofix(
192    ctx: &LintContext,
193    constraint: &JoinConstraint,
194    columns: &[ObjectName],
195    left_ref: Option<&str>,
196    right_ref: Option<&str>,
197) -> Option<(Span, String)> {
198    let left_ref = left_ref?;
199    let right_ref = right_ref?;
200    let replacement = format!(
201        "ON {}",
202        using_columns_to_on_expr(columns, left_ref, right_ref)?
203    );
204
205    let (constraint_start, constraint_end) = constraint_statement_offsets(ctx, constraint)?;
206    let span = locate_using_clause_span(ctx, constraint_start, constraint_end)?;
207    if span_contains_comment(ctx, span) {
208        return None;
209    }
210
211    Some((span, replacement))
212}
213
214fn using_columns_to_on_expr(
215    columns: &[ObjectName],
216    left_ref: &str,
217    right_ref: &str,
218) -> Option<String> {
219    let mut combined: Option<Expr> = None;
220
221    for object_name in columns {
222        let column_ident = object_name
223            .0
224            .last()
225            .and_then(|part| part.as_ident())
226            .cloned()?;
227
228        let equality = Expr::BinaryOp {
229            left: Box::new(Expr::CompoundIdentifier(vec![
230                Ident::new(left_ref),
231                column_ident.clone(),
232            ])),
233            op: BinaryOperator::Eq,
234            right: Box::new(Expr::CompoundIdentifier(vec![
235                Ident::new(right_ref),
236                column_ident,
237            ])),
238        };
239
240        combined = Some(match combined {
241            Some(prev) => Expr::BinaryOp {
242                left: Box::new(prev),
243                op: BinaryOperator::And,
244                right: Box::new(equality),
245            },
246            None => equality,
247        });
248    }
249
250    Some(combined?.to_string())
251}
252
253fn constraint_statement_offsets(
254    ctx: &LintContext,
255    constraint: &JoinConstraint,
256) -> Option<(usize, usize)> {
257    if let Some((start, end)) = sqlparser_span_offsets(ctx.statement_sql(), constraint.span()) {
258        return Some((start, end));
259    }
260
261    let (start, end) = sqlparser_span_offsets(ctx.sql, constraint.span())?;
262    if start < ctx.statement_range.start || end > ctx.statement_range.end {
263        return None;
264    }
265    Some((
266        start - ctx.statement_range.start,
267        end - ctx.statement_range.start,
268    ))
269}
270
271fn locate_using_clause_span(
272    ctx: &LintContext,
273    constraint_start: usize,
274    constraint_end: usize,
275) -> Option<Span> {
276    let tokens = positioned_statement_tokens(ctx)?;
277    if tokens.is_empty() {
278        return None;
279    }
280
281    let abs_constraint_start = ctx.statement_range.start + constraint_start;
282    let abs_constraint_end = ctx.statement_range.start + constraint_end;
283
284    let using_indexes = tokens
285        .iter()
286        .enumerate()
287        .filter_map(|(idx, token)| {
288            (token.start <= abs_constraint_start && token_word_equals(&token.token, "USING"))
289                .then_some(idx)
290        })
291        .collect::<Vec<_>>();
292
293    for using_idx in using_indexes.into_iter().rev() {
294        let Some(lparen_idx) = next_non_trivia_token_index(&tokens, using_idx + 1) else {
295            continue;
296        };
297        if !matches!(tokens[lparen_idx].token, Token::LParen) {
298            continue;
299        }
300
301        let mut depth = 0usize;
302        let mut close_idx = None;
303        for (idx, token) in tokens.iter().enumerate().skip(lparen_idx) {
304            match token.token {
305                Token::LParen => depth += 1,
306                Token::RParen => {
307                    if depth == 0 {
308                        break;
309                    }
310                    depth -= 1;
311                    if depth == 0 {
312                        close_idx = Some(idx);
313                        break;
314                    }
315                }
316                _ => {}
317            }
318        }
319
320        let Some(close_idx) = close_idx else {
321            continue;
322        };
323
324        let span = Span::new(tokens[using_idx].start, tokens[close_idx].end);
325        if span.start <= abs_constraint_start && span.end >= abs_constraint_end {
326            return Some(span);
327        }
328    }
329
330    None
331}
332
333fn next_non_trivia_token_index(tokens: &[PositionedToken], start: usize) -> Option<usize> {
334    tokens
335        .iter()
336        .enumerate()
337        .skip(start)
338        .find_map(|(idx, token)| (!is_trivia(&token.token)).then_some(idx))
339}
340
341fn table_factor_reference_name(relation: &TableFactor) -> Option<String> {
342    match relation {
343        TableFactor::Table { name, alias, .. } => {
344            if let Some(alias) = alias {
345                Some(alias.name.value.clone())
346            } else {
347                name.0
348                    .last()
349                    .and_then(|part| part.as_ident())
350                    .map(|ident| ident.value.clone())
351            }
352        }
353        TableFactor::Derived { alias, .. }
354        | TableFactor::TableFunction { alias, .. }
355        | TableFactor::Function { alias, .. }
356        | TableFactor::UNNEST { alias, .. }
357        | TableFactor::JsonTable { alias, .. }
358        | TableFactor::OpenJsonTable { alias, .. }
359        | TableFactor::NestedJoin { alias, .. }
360        | TableFactor::Pivot { alias, .. }
361        | TableFactor::Unpivot { alias, .. } => alias.as_ref().map(|a| a.name.value.clone()),
362        _ => None,
363    }
364}
365
366fn positioned_statement_tokens(ctx: &LintContext) -> Option<Vec<PositionedToken>> {
367    let from_document_tokens = ctx.with_document_tokens(|tokens| {
368        if tokens.is_empty() {
369            return None;
370        }
371
372        let mut positioned = Vec::new();
373        for token in tokens {
374            let (start, end) = token_with_span_offsets(ctx.sql, token)?;
375            if start < ctx.statement_range.start || end > ctx.statement_range.end {
376                continue;
377            }
378            positioned.push(PositionedToken {
379                token: token.token.clone(),
380                start,
381                end,
382            });
383        }
384        Some(positioned)
385    });
386
387    if let Some(tokens) = from_document_tokens {
388        return Some(tokens);
389    }
390
391    let tokens = tokenize_with_spans(ctx.statement_sql(), ctx.dialect())?;
392    let mut positioned = Vec::new();
393    for token in tokens {
394        let (start, end) = token_with_span_offsets(ctx.statement_sql(), &token)?;
395        positioned.push(PositionedToken {
396            token: token.token,
397            start: ctx.statement_range.start + start,
398            end: ctx.statement_range.start + end,
399        });
400    }
401    Some(positioned)
402}
403
404fn span_contains_comment(ctx: &LintContext, span: Span) -> bool {
405    positioned_statement_tokens(ctx).is_some_and(|tokens| {
406        tokens.iter().any(|token| {
407            token.start >= span.start && token.end <= span.end && is_comment_token(&token.token)
408        })
409    })
410}
411
412fn tokenize_with_spans(sql: &str, dialect: crate::types::Dialect) -> Option<Vec<TokenWithSpan>> {
413    let dialect = dialect.to_sqlparser_dialect();
414    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
415    tokenizer.tokenize_with_location().ok()
416}
417
418fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
419    let start = line_col_to_offset(
420        sql,
421        token.span.start.line as usize,
422        token.span.start.column as usize,
423    )?;
424    let end = line_col_to_offset(
425        sql,
426        token.span.end.line as usize,
427        token.span.end.column as usize,
428    )?;
429    Some((start, end))
430}
431
432fn sqlparser_span_offsets(sql: &str, span: SqlParserSpan) -> Option<(usize, usize)> {
433    if span.start.line == 0 || span.start.column == 0 || span.end.line == 0 || span.end.column == 0
434    {
435        return None;
436    }
437
438    let start = line_col_to_offset(sql, span.start.line as usize, span.start.column as usize)?;
439    let end = line_col_to_offset(sql, span.end.line as usize, span.end.column as usize)?;
440    (end >= start).then_some((start, end))
441}
442
443fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
444    if line == 0 || column == 0 {
445        return None;
446    }
447
448    let mut current_line = 1usize;
449    let mut line_start = 0usize;
450    for (idx, ch) in sql.char_indices() {
451        if current_line == line {
452            break;
453        }
454        if ch == '\n' {
455            current_line += 1;
456            line_start = idx + ch.len_utf8();
457        }
458    }
459    if current_line != line {
460        return None;
461    }
462
463    let mut current_column = 1usize;
464    for (rel_idx, ch) in sql[line_start..].char_indices() {
465        if current_column == column {
466            return Some(line_start + rel_idx);
467        }
468        if ch == '\n' {
469            return None;
470        }
471        current_column += 1;
472    }
473    if current_column == column {
474        return Some(sql.len());
475    }
476    None
477}
478
479fn token_word_equals(token: &Token, word: &str) -> bool {
480    match token {
481        Token::Word(w) => w.value.eq_ignore_ascii_case(word),
482        _ => false,
483    }
484}
485
486fn is_trivia(token: &Token) -> bool {
487    matches!(token, Token::Whitespace(_))
488}
489
490fn is_comment_token(token: &Token) -> bool {
491    matches!(
492        token,
493        Token::Whitespace(Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_))
494    )
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use crate::parser::parse_sql;
501    use crate::types::{IssueAutofixApplicability, IssuePatchEdit};
502
503    fn check_sql(sql: &str) -> Vec<Issue> {
504        let stmts = parse_sql(sql).unwrap();
505        let rule = AvoidUsingJoin;
506        let ctx = LintContext {
507            sql,
508            statement_range: 0..sql.len(),
509            statement_index: 0,
510        };
511        let mut issues = Vec::new();
512        for stmt in &stmts {
513            issues.extend(rule.check(stmt, &ctx));
514        }
515        issues
516    }
517
518    fn apply_edits(sql: &str, edits: &[IssuePatchEdit]) -> String {
519        let mut output = sql.to_string();
520        let mut ordered = edits.iter().collect::<Vec<_>>();
521        ordered.sort_by_key(|edit| edit.span.start);
522        for edit in ordered.into_iter().rev() {
523            output.replace_range(edit.span.start..edit.span.end, &edit.replacement);
524        }
525        output
526    }
527
528    #[test]
529    fn test_using_join_detected() {
530        let sql = "SELECT * FROM a JOIN b USING (id)";
531        let issues = check_sql(sql);
532        assert_eq!(issues.len(), 1);
533        assert_eq!(issues[0].code, "LINT_ST_007");
534
535        let autofix = issues[0]
536            .autofix
537            .as_ref()
538            .expect("expected ST007 core autofix metadata");
539        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
540        assert_eq!(autofix.edits.len(), 1);
541        let fixed = apply_edits(sql, &autofix.edits);
542        assert_eq!(fixed, "SELECT * FROM a JOIN b ON a.id = b.id");
543    }
544
545    #[test]
546    fn test_on_join_ok() {
547        let issues = check_sql("SELECT * FROM a JOIN b ON a.id = b.id");
548        assert!(issues.is_empty());
549    }
550
551    #[test]
552    fn test_using_join_comment_blocks_safe_autofix() {
553        let issues = check_sql("SELECT * FROM a JOIN b USING (id /*keep*/)");
554        assert_eq!(issues.len(), 1);
555        assert!(
556            issues[0].autofix.is_none(),
557            "comment-bearing USING join should not emit safe autofix metadata"
558        );
559    }
560
561    #[test]
562    fn test_using_join_subquery_alias_autofix() {
563        let sql = "select x.a from (SELECT 1 AS a) AS x inner join y using (id)";
564        let issues = check_sql(sql);
565        assert_eq!(issues.len(), 1);
566        let autofix = issues[0]
567            .autofix
568            .as_ref()
569            .expect("expected ST007 core autofix metadata");
570        let fixed = apply_edits(sql, &autofix.edits);
571        assert_eq!(
572            fixed,
573            "select x.a from (SELECT 1 AS a) AS x inner join y ON x.id = y.id"
574        );
575    }
576
577    #[test]
578    fn test_using_chain_stops_after_first_using_fix() {
579        let sql = "select x.a\nfrom x\ninner join y using(id, foo)\ninner join z using(id)";
580        let issues = check_sql(sql);
581        assert_eq!(issues.len(), 2);
582
583        let all_edits: Vec<IssuePatchEdit> = issues
584            .iter()
585            .filter_map(|issue| issue.autofix.as_ref())
586            .flat_map(|autofix| autofix.edits.iter().cloned())
587            .collect();
588        let fixed = apply_edits(sql, &all_edits);
589        assert_eq!(
590            fixed,
591            "select x.a\nfrom x\ninner join y ON x.id = y.id AND x.foo = y.foo\ninner join z using(id)"
592        );
593        assert!(
594            issues[1].autofix.is_none(),
595            "second USING join should remain unfixed for safety"
596        );
597    }
598}