Skip to main content

flowscope_core/linter/rules/
am_008.rs

1//! LINT_AM_008: Ambiguous JOIN condition.
2//!
3//! SQLFluff AM08 parity: detect implicit cross joins where JOIN-like operators
4//! omit ON/USING/NATURAL conditions.
5
6use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
8use sqlparser::ast::{JoinConstraint, JoinOperator, Select, Statement, TableFactor};
9use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
10
11use super::semantic_helpers::visit_selects_in_statement;
12
13pub struct AmbiguousJoinCondition;
14
15impl LintRule for AmbiguousJoinCondition {
16    fn code(&self) -> &'static str {
17        issue_codes::LINT_AM_008
18    }
19
20    fn name(&self) -> &'static str {
21        "Ambiguous join condition"
22    }
23
24    fn description(&self) -> &'static str {
25        "Implicit cross join detected."
26    }
27
28    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
29        let mut violations = 0usize;
30
31        visit_selects_in_statement(statement, &mut |select| {
32            violations += count_implicit_cross_join_violations(select);
33        });
34
35        // Subtract join types the AST doesn't distinguish but the token
36        // scanner can identify (e.g., DuckDB POSITIONAL JOIN).
37        let positional_joins = count_positional_joins_in_context(ctx);
38        violations = violations.saturating_sub(positional_joins);
39
40        let mut autofix_candidates = am008_autofix_candidates_for_context(ctx);
41        autofix_candidates.sort_by_key(|candidate| candidate.span.start);
42        let candidates_align = autofix_candidates.len() == violations;
43
44        (0..violations)
45            .map(|index| {
46                let mut issue =
47                    Issue::warning(issue_codes::LINT_AM_008, "Implicit cross join detected.")
48                        .with_statement(ctx.statement_index);
49                if candidates_align {
50                    let candidate = &autofix_candidates[index];
51                    issue = issue.with_span(candidate.span).with_autofix_edits(
52                        IssueAutofixApplicability::Safe,
53                        candidate.edits.clone(),
54                    );
55                }
56                issue
57            })
58            .collect()
59    }
60}
61
62fn count_implicit_cross_join_violations(select: &Select) -> usize {
63    // SQLFluff AM08 defers JOIN+WHERE patterns to CV12.
64    if select.selection.is_some() {
65        return 0;
66    }
67
68    let mut violations = 0usize;
69
70    for table in &select.from {
71        for join in &table.joins {
72            if !operator_requires_join_condition(&join.join_operator) {
73                continue;
74            }
75
76            if join_constraint_is_explicit(&join.join_operator) {
77                continue;
78            }
79
80            if is_unnest_join_target(&join.relation) {
81                continue;
82            }
83
84            violations += 1;
85        }
86    }
87
88    violations
89}
90
91fn operator_requires_join_condition(join_operator: &JoinOperator) -> bool {
92    matches!(
93        join_operator,
94        JoinOperator::Join(_)
95            | JoinOperator::Inner(_)
96            | JoinOperator::Left(_)
97            | JoinOperator::LeftOuter(_)
98            | JoinOperator::Right(_)
99            | JoinOperator::RightOuter(_)
100            | JoinOperator::FullOuter(_)
101            | JoinOperator::StraightJoin(_)
102    )
103}
104
105fn join_constraint_is_explicit(join_operator: &JoinOperator) -> bool {
106    let Some(constraint) = join_constraint(join_operator) else {
107        return false;
108    };
109
110    matches!(
111        constraint,
112        JoinConstraint::On(_) | JoinConstraint::Using(_) | JoinConstraint::Natural
113    )
114}
115
116fn join_constraint(join_operator: &JoinOperator) -> Option<&JoinConstraint> {
117    match join_operator {
118        JoinOperator::Join(constraint)
119        | JoinOperator::Inner(constraint)
120        | JoinOperator::Left(constraint)
121        | JoinOperator::LeftOuter(constraint)
122        | JoinOperator::Right(constraint)
123        | JoinOperator::RightOuter(constraint)
124        | JoinOperator::FullOuter(constraint)
125        | JoinOperator::CrossJoin(constraint)
126        | JoinOperator::Semi(constraint)
127        | JoinOperator::LeftSemi(constraint)
128        | JoinOperator::RightSemi(constraint)
129        | JoinOperator::Anti(constraint)
130        | JoinOperator::LeftAnti(constraint)
131        | JoinOperator::RightAnti(constraint)
132        | JoinOperator::StraightJoin(constraint) => Some(constraint),
133        JoinOperator::AsOf { constraint, .. } => Some(constraint),
134        JoinOperator::CrossApply | JoinOperator::OuterApply => None,
135    }
136}
137
138fn is_unnest_join_target(table_factor: &TableFactor) -> bool {
139    matches!(table_factor, TableFactor::UNNEST { .. })
140}
141
142#[derive(Clone, Debug)]
143struct PositionedToken {
144    token: Token,
145    start: usize,
146    end: usize,
147}
148
149#[derive(Clone, Debug)]
150struct Am008AutofixCandidate {
151    span: Span,
152    edits: Vec<IssuePatchEdit>,
153}
154
155#[derive(Clone, Copy, Debug)]
156struct JoinOperatorTokenSpan {
157    start_position: usize,
158    end_position: usize,
159}
160
161/// Count `POSITIONAL JOIN` occurrences in the statement's token stream.
162///
163/// sqlparser doesn't model POSITIONAL JOIN as a distinct operator — it parses
164/// `POSITIONAL` as a table alias and `JOIN` as a bare join. This function
165/// detects the pattern at the token level so the AST violation count can be
166/// corrected.
167fn count_positional_joins_in_context(ctx: &LintContext) -> usize {
168    let sql = ctx.statement_sql();
169    // Quick textual check to avoid tokenization when not needed.
170    if !sql.to_ascii_uppercase().contains("POSITIONAL") {
171        return 0;
172    }
173
174    let Some(tokens) = tokenize_with_spans(sql, ctx.dialect()) else {
175        return 0;
176    };
177    let mut count = 0;
178    let mut prev_word: Option<String> = None;
179    for token in &tokens {
180        match &token.token {
181            Token::Word(w) => {
182                let upper = w.value.to_ascii_uppercase();
183                if upper == "JOIN" && prev_word.as_deref() == Some("POSITIONAL") {
184                    count += 1;
185                }
186                prev_word = Some(upper);
187            }
188            t if is_trivia(t) => {}
189            _ => {
190                prev_word = None;
191            }
192        }
193    }
194    count
195}
196
197fn am008_autofix_candidates_for_context(ctx: &LintContext) -> Vec<Am008AutofixCandidate> {
198    let from_document_tokens = ctx.with_document_tokens(|tokens| {
199        if tokens.is_empty() {
200            return None;
201        }
202
203        let mut positioned = Vec::new();
204        for token in tokens {
205            let (start, end) = token_with_span_offsets(ctx.sql, token)?;
206            if start < ctx.statement_range.start || end > ctx.statement_range.end {
207                continue;
208            }
209            positioned.push(PositionedToken {
210                token: token.token.clone(),
211                start,
212                end,
213            });
214        }
215
216        Some(positioned)
217    });
218
219    if let Some(tokens) = from_document_tokens {
220        return am008_autofix_candidates_from_positioned_tokens(&tokens);
221    }
222
223    let Some(tokens) = tokenize_with_spans(ctx.statement_sql(), ctx.dialect()) else {
224        return Vec::new();
225    };
226
227    let mut positioned = Vec::new();
228    for token in &tokens {
229        let Some((start, end)) = token_with_span_offsets(ctx.statement_sql(), token) else {
230            continue;
231        };
232        positioned.push(PositionedToken {
233            token: token.token.clone(),
234            start: ctx.statement_range.start + start,
235            end: ctx.statement_range.start + end,
236        });
237    }
238
239    am008_autofix_candidates_from_positioned_tokens(&positioned)
240}
241
242fn am008_autofix_candidates_from_positioned_tokens(
243    tokens: &[PositionedToken],
244) -> Vec<Am008AutofixCandidate> {
245    if tokens.is_empty() {
246        return Vec::new();
247    }
248
249    let significant_indexes: Vec<usize> = tokens
250        .iter()
251        .enumerate()
252        .filter_map(|(index, token)| (!is_trivia(&token.token)).then_some(index))
253        .collect();
254    if significant_indexes.is_empty() {
255        return Vec::new();
256    }
257
258    let max_end = tokens.last().map_or(0, |token| token.end);
259    let mut candidates = Vec::new();
260    for position in 0..significant_indexes.len() {
261        let Some(operator_span) =
262            join_operator_token_span_at(tokens, &significant_indexes, position)
263        else {
264            continue;
265        };
266
267        if has_explicit_join_constraint(tokens, &significant_indexes, operator_span) {
268            continue;
269        }
270        if relation_starts_with_unnest(tokens, &significant_indexes, operator_span.end_position) {
271            continue;
272        }
273
274        let start_index = significant_indexes[operator_span.start_position];
275        let end_index = significant_indexes[operator_span.end_position];
276        let span = Span::new(tokens[start_index].start, tokens[end_index].end);
277        if span.start >= span.end || span.end > max_end {
278            continue;
279        }
280        if operator_span_contains_comment(tokens, span) {
281            continue;
282        }
283
284        candidates.push(Am008AutofixCandidate {
285            span,
286            edits: vec![IssuePatchEdit::new(span, "CROSS JOIN")],
287        });
288    }
289
290    candidates
291}
292
293fn join_operator_token_span_at(
294    tokens: &[PositionedToken],
295    significant_indexes: &[usize],
296    position: usize,
297) -> Option<JoinOperatorTokenSpan> {
298    let token = &tokens[*significant_indexes.get(position)?].token;
299
300    if token_word_equals(token, "STRAIGHT_JOIN") {
301        return Some(JoinOperatorTokenSpan {
302            start_position: position,
303            end_position: position,
304        });
305    }
306
307    if token_word_equals(token, "STRAIGHT")
308        && token_is_word_at(tokens, significant_indexes, position + 1, "JOIN")
309    {
310        return Some(JoinOperatorTokenSpan {
311            start_position: position,
312            end_position: position + 1,
313        });
314    }
315
316    if token_word_equals(token, "INNER")
317        && token_is_word_at(tokens, significant_indexes, position + 1, "JOIN")
318    {
319        return Some(JoinOperatorTokenSpan {
320            start_position: position,
321            end_position: position + 1,
322        });
323    }
324
325    if is_outer_join_side_keyword(token) {
326        if token_is_word_at(tokens, significant_indexes, position + 1, "OUTER")
327            && token_is_word_at(tokens, significant_indexes, position + 2, "JOIN")
328        {
329            return Some(JoinOperatorTokenSpan {
330                start_position: position,
331                end_position: position + 2,
332            });
333        }
334
335        if token_is_word_at(tokens, significant_indexes, position + 1, "JOIN") {
336            return Some(JoinOperatorTokenSpan {
337                start_position: position,
338                end_position: position + 1,
339            });
340        }
341    }
342
343    if token_word_equals(token, "JOIN")
344        && !previous_token_is_join_modifier(tokens, significant_indexes, position)
345    {
346        return Some(JoinOperatorTokenSpan {
347            start_position: position,
348            end_position: position,
349        });
350    }
351
352    None
353}
354
355fn token_is_word_at(
356    tokens: &[PositionedToken],
357    significant_indexes: &[usize],
358    position: usize,
359    expected_upper: &str,
360) -> bool {
361    significant_indexes
362        .get(position)
363        .and_then(|index| tokens.get(*index))
364        .is_some_and(|token| token_word_equals(&token.token, expected_upper))
365}
366
367fn previous_token_is_join_modifier(
368    tokens: &[PositionedToken],
369    significant_indexes: &[usize],
370    position: usize,
371) -> bool {
372    if position == 0 {
373        return false;
374    }
375
376    let previous = &tokens[significant_indexes[position - 1]].token;
377    token_word_equals(previous, "INNER")
378        || token_word_equals(previous, "LEFT")
379        || token_word_equals(previous, "RIGHT")
380        || token_word_equals(previous, "FULL")
381        || token_word_equals(previous, "CROSS")
382        || token_word_equals(previous, "NATURAL")
383        || token_word_equals(previous, "OUTER")
384        || token_word_equals(previous, "SEMI")
385        || token_word_equals(previous, "ANTI")
386        || token_word_equals(previous, "ASOF")
387        || token_word_equals(previous, "STRAIGHT")
388        || token_word_equals(previous, "STRAIGHT_JOIN")
389        || token_word_equals(previous, "POSITIONAL")
390}
391
392fn has_explicit_join_constraint(
393    tokens: &[PositionedToken],
394    significant_indexes: &[usize],
395    operator_span: JoinOperatorTokenSpan,
396) -> bool {
397    if operator_span.start_position > 0
398        && token_is_word_at(
399            tokens,
400            significant_indexes,
401            operator_span.start_position - 1,
402            "NATURAL",
403        )
404    {
405        return true;
406    }
407
408    // DuckDB POSITIONAL JOIN matches rows by position; no ON/USING needed.
409    if operator_span.start_position > 0
410        && token_is_word_at(
411            tokens,
412            significant_indexes,
413            operator_span.start_position - 1,
414            "POSITIONAL",
415        )
416    {
417        return true;
418    }
419
420    let mut depth = 0usize;
421    for position in (operator_span.end_position + 1)..significant_indexes.len() {
422        let token = &tokens[significant_indexes[position]].token;
423
424        match token {
425            Token::LParen => {
426                depth += 1;
427                continue;
428            }
429            Token::RParen => {
430                if depth == 0 {
431                    break;
432                }
433                depth -= 1;
434                continue;
435            }
436            _ => {}
437        }
438
439        if depth > 0 {
440            continue;
441        }
442
443        if token_word_equals(token, "ON") || token_word_equals(token, "USING") {
444            return true;
445        }
446
447        if token_word_equals(token, "NATURAL")
448            || is_clause_boundary_token(token)
449            || matches!(token, Token::Comma | Token::SemiColon)
450            || join_operator_token_span_at(tokens, significant_indexes, position).is_some()
451        {
452            break;
453        }
454    }
455
456    false
457}
458
459fn relation_starts_with_unnest(
460    tokens: &[PositionedToken],
461    significant_indexes: &[usize],
462    operator_end_position: usize,
463) -> bool {
464    for position in (operator_end_position + 1)..significant_indexes.len() {
465        let token = &tokens[significant_indexes[position]].token;
466        if token_word_equals(token, "LATERAL") {
467            continue;
468        }
469        return token_word_equals(token, "UNNEST");
470    }
471
472    false
473}
474
475fn is_clause_boundary_token(token: &Token) -> bool {
476    token_word_equals(token, "WHERE")
477        || token_word_equals(token, "GROUP")
478        || token_word_equals(token, "HAVING")
479        || token_word_equals(token, "QUALIFY")
480        || token_word_equals(token, "ORDER")
481        || token_word_equals(token, "LIMIT")
482        || token_word_equals(token, "FETCH")
483        || token_word_equals(token, "OFFSET")
484        || token_word_equals(token, "UNION")
485        || token_word_equals(token, "EXCEPT")
486        || token_word_equals(token, "INTERSECT")
487        || token_word_equals(token, "WINDOW")
488        || token_word_equals(token, "RETURNING")
489        || token_word_equals(token, "CONNECT")
490        || token_word_equals(token, "START")
491        || token_word_equals(token, "MODEL")
492}
493
494fn operator_span_contains_comment(tokens: &[PositionedToken], span: Span) -> bool {
495    tokens
496        .iter()
497        .any(|token| token.start >= span.start && token.end <= span.end && is_comment(&token.token))
498}
499
500fn tokenize_with_spans(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
501    let dialect = dialect.to_sqlparser_dialect();
502    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
503    tokenizer.tokenize_with_location().ok()
504}
505
506fn token_word_equals(token: &Token, expected_upper: &str) -> bool {
507    matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(expected_upper))
508}
509
510fn is_outer_join_side_keyword(token: &Token) -> bool {
511    token_word_equals(token, "LEFT")
512        || token_word_equals(token, "RIGHT")
513        || token_word_equals(token, "FULL")
514}
515
516fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
517    let start = line_col_to_offset(
518        sql,
519        token.span.start.line as usize,
520        token.span.start.column as usize,
521    )?;
522    let end = line_col_to_offset(
523        sql,
524        token.span.end.line as usize,
525        token.span.end.column as usize,
526    )?;
527    Some((start, end))
528}
529
530fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
531    if line == 0 || column == 0 {
532        return None;
533    }
534
535    let mut current_line = 1usize;
536    let mut current_col = 1usize;
537
538    for (offset, ch) in sql.char_indices() {
539        if current_line == line && current_col == column {
540            return Some(offset);
541        }
542
543        if ch == '\n' {
544            current_line += 1;
545            current_col = 1;
546        } else {
547            current_col += 1;
548        }
549    }
550
551    if current_line == line && current_col == column {
552        return Some(sql.len());
553    }
554
555    None
556}
557
558fn is_trivia(token: &Token) -> bool {
559    matches!(
560        token,
561        Token::Whitespace(
562            Whitespace::Space
563                | Whitespace::Newline
564                | Whitespace::Tab
565                | Whitespace::SingleLineComment { .. }
566                | Whitespace::MultiLineComment(_)
567        )
568    )
569}
570
571fn is_comment(token: &Token) -> bool {
572    matches!(
573        token,
574        Token::Whitespace(Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_))
575    )
576}
577
578#[cfg(test)]
579mod tests {
580    use super::*;
581    use crate::parser::parse_sql;
582    use crate::types::IssueAutofixApplicability;
583
584    fn run(sql: &str) -> Vec<Issue> {
585        let statements = parse_sql(sql).expect("parse");
586        let rule = AmbiguousJoinCondition;
587        statements
588            .iter()
589            .enumerate()
590            .flat_map(|(index, statement)| {
591                rule.check(
592                    statement,
593                    &LintContext {
594                        sql,
595                        statement_range: 0..sql.len(),
596                        statement_index: index,
597                    },
598                )
599            })
600            .collect()
601    }
602
603    // --- Edge cases adopted from sqlfluff AM08 ---
604
605    #[test]
606    fn flags_missing_on_clause_for_inner_join() {
607        let issues = run("SELECT foo.a, bar.b FROM foo INNER JOIN bar");
608        assert_eq!(issues.len(), 1);
609        assert_eq!(issues[0].code, issue_codes::LINT_AM_008);
610    }
611
612    #[test]
613    fn flags_missing_on_clause_for_left_join() {
614        let issues = run("SELECT foo.a, bar.b FROM foo left join bar");
615        assert_eq!(issues.len(), 1);
616    }
617
618    #[test]
619    fn flags_each_missing_join_condition_in_join_chain() {
620        let issues =
621            run("SELECT foo.a, bar.b FROM foo left join bar left join baz on foo.x = bar.y");
622        assert_eq!(issues.len(), 1);
623
624        let issues =
625            run("SELECT foo.a, bar.b FROM foo left join bar on foo.x = bar.y left join baz");
626        assert_eq!(issues.len(), 1);
627    }
628
629    #[test]
630    fn does_not_flag_join_without_on_when_where_clause_exists() {
631        let issues = run("SELECT foo.a, bar.b FROM foo left join bar where foo.x = bar.y");
632        assert!(issues.is_empty());
633
634        let issues = run("SELECT foo.a, bar.b FROM foo JOIN bar WHERE foo.a = bar.a OR foo.x = 3");
635        assert!(issues.is_empty());
636    }
637
638    #[test]
639    fn allows_explicit_join_conditions() {
640        let issues = run("SELECT foo.a, bar.b FROM foo INNER JOIN bar ON 1=1");
641        assert!(issues.is_empty());
642
643        let issues = run("SELECT foo.id, bar.id FROM foo LEFT JOIN bar USING (id)");
644        assert!(issues.is_empty());
645
646        let issues = run("SELECT foo.x FROM foo NATURAL JOIN bar");
647        assert!(issues.is_empty());
648    }
649
650    #[test]
651    fn allows_explicit_cross_join() {
652        let issues = run("SELECT foo.a, bar.b FROM foo CROSS JOIN bar");
653        assert!(issues.is_empty());
654    }
655
656    #[test]
657    fn ignores_unnest_joins() {
658        let issues = run("SELECT t.id FROM t INNER JOIN UNNEST(t.items) AS item");
659        assert!(issues.is_empty());
660    }
661
662    #[test]
663    fn inner_join_without_condition_emits_safe_autofix_patch() {
664        let sql = "SELECT foo.a, bar.b FROM foo INNER JOIN bar";
665        let issues = run(sql);
666        assert_eq!(issues.len(), 1);
667
668        let autofix = issues[0]
669            .autofix
670            .as_ref()
671            .expect("expected AM008 core autofix metadata");
672        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
673        assert_eq!(autofix.edits.len(), 1);
674        assert_eq!(autofix.edits[0].replacement, "CROSS JOIN");
675        assert_eq!(
676            &sql[autofix.edits[0].span.start..autofix.edits[0].span.end],
677            "INNER JOIN"
678        );
679    }
680
681    #[test]
682    fn bare_join_without_condition_emits_safe_autofix_patch() {
683        let sql = "SELECT foo.a, bar.b FROM foo JOIN bar";
684        let issues = run(sql);
685        assert_eq!(issues.len(), 1);
686
687        let autofix = issues[0]
688            .autofix
689            .as_ref()
690            .expect("expected AM008 core autofix metadata for bare JOIN");
691        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
692        assert_eq!(autofix.edits.len(), 1);
693        assert_eq!(autofix.edits[0].replacement, "CROSS JOIN");
694        assert_eq!(
695            &sql[autofix.edits[0].span.start..autofix.edits[0].span.end],
696            "JOIN"
697        );
698    }
699
700    #[test]
701    fn join_operator_comment_blocks_safe_autofix_metadata() {
702        let sql = "SELECT foo.a, bar.b FROM foo LEFT /*keep*/ JOIN bar";
703        let issues = run(sql);
704        assert_eq!(issues.len(), 1);
705        assert!(
706            issues[0].autofix.is_none(),
707            "comment-bearing join operator span should remain unpatched"
708        );
709    }
710
711    #[test]
712    fn allows_positional_join_duckdb() {
713        let issues = run("SELECT foo.a, bar.b FROM foo POSITIONAL JOIN bar");
714        assert!(issues.is_empty());
715    }
716}