Skip to main content

sqrust_rules/structure/
subquery_in_join_condition.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3use crate::capitalisation::{is_word_char, SkipMap};
4
5pub struct SubqueryInJoinCondition;
6
7impl Rule for SubqueryInJoinCondition {
8    fn name(&self) -> &'static str {
9        "Structure/SubqueryInJoinCondition"
10    }
11
12    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
13        if !ctx.parse_errors.is_empty() {
14            return Vec::new();
15        }
16
17        find_subqueries_in_join_on(&ctx.source, self.name())
18    }
19}
20
21// ── source-level scan ─────────────────────────────────────────────────────────
22
23/// Scans source for occurrences of `ON` keyword followed (after optional
24/// whitespace/newlines) by `(SELECT`. Emits one diagnostic per occurrence.
25///
26/// The scan is a state machine:
27/// 1. Find a JOIN keyword (marks that we're inside a JOIN chain)
28/// 2. Look for the next ON keyword after JOIN
29/// 3. After ON, skip whitespace; if `(SELECT` follows, emit a diagnostic
30///
31/// We re-scan the whole source to find all JOIN...ON...subquery patterns.
32fn find_subqueries_in_join_on(source: &str, rule: &'static str) -> Vec<Diagnostic> {
33    let bytes = source.as_bytes();
34    let len = bytes.len();
35    let skip_map = SkipMap::build(source);
36
37    let mut diags = Vec::new();
38
39    // Find all ON keyword positions where the ON is preceded by a JOIN clause.
40    // Strategy: walk through source, find each ON keyword that is a whole word
41    // and is code (not inside a string/comment). For each such ON, scan forward
42    // skipping whitespace. If we find `(SELECT` (case-insensitive), emit a
43    // diagnostic at the position of the `ON` keyword.
44    //
45    // To avoid false positives (ON that is not a join ON), we check that there
46    // was a JOIN keyword somewhere before this ON in the same statement context.
47    let join_keywords: &[&[u8]] = &[
48        b"JOIN",
49        b"INNER JOIN",
50        b"LEFT JOIN",
51        b"RIGHT JOIN",
52        b"FULL JOIN",
53        b"CROSS JOIN",
54        b"LEFT OUTER JOIN",
55        b"RIGHT OUTER JOIN",
56        b"FULL OUTER JOIN",
57    ];
58
59    // Collect byte offsets of all JOIN keyword positions.
60    let join_offsets = collect_keyword_offsets(bytes, len, &skip_map, b"JOIN");
61
62    // Collect byte offsets of all ON keyword positions.
63    let on_offsets = collect_keyword_offsets(bytes, len, &skip_map, b"ON");
64
65    // For each ON, check if there is a JOIN somewhere before it (within the
66    // statement). Then check if (SELECT follows the ON.
67    for &on_pos in &on_offsets {
68        // There must be at least one JOIN before this ON.
69        let has_prior_join = join_offsets.iter().any(|&j| j < on_pos);
70        if !has_prior_join {
71            continue;
72        }
73
74        // Skip whitespace and newlines after the ON keyword.
75        let after_on = on_pos + 2; // len("ON") == 2
76        let mut scan = after_on;
77        while scan < len && (bytes[scan] == b' ' || bytes[scan] == b'\t' || bytes[scan] == b'\n' || bytes[scan] == b'\r') {
78            scan += 1;
79        }
80
81        // Check for `(SELECT` (case-insensitive).
82        if scan + 7 <= len
83            && bytes[scan] == b'('
84            && bytes[scan + 1..scan + 7].eq_ignore_ascii_case(b"SELECT")
85        {
86            // Ensure the S-E-L-E-C-T is followed by a non-word char (i.e. it's
87            // really the SELECT keyword, not something like `(SELECTX`).
88            let after_select = scan + 7;
89            let select_ends = after_select >= len || !is_word_char(bytes[after_select]);
90            if select_ends {
91                let (line, col) = offset_to_line_col(source, on_pos);
92                diags.push(Diagnostic {
93                    rule,
94                    message: "Subquery in JOIN ON condition may prevent index use; consider pre-computing as a CTE or derived table".to_string(),
95                    line,
96                    col,
97                });
98            }
99        }
100    }
101
102    // Suppress unused variable warning — join_keywords is used for documentation intent
103    let _ = join_keywords;
104
105    diags
106}
107
108/// Collect byte offsets of all whole-word occurrences of `keyword` in `bytes`
109/// that are code (not inside strings/comments) according to `skip_map`.
110fn collect_keyword_offsets(
111    bytes: &[u8],
112    len: usize,
113    skip_map: &SkipMap,
114    keyword: &[u8],
115) -> Vec<usize> {
116    let kw_len = keyword.len();
117    let mut offsets = Vec::new();
118    let mut i = 0;
119
120    while i + kw_len <= len {
121        if !skip_map.is_code(i) {
122            i += 1;
123            continue;
124        }
125
126        // Word boundary before.
127        let before_ok = i == 0 || !is_word_char(bytes[i - 1]);
128        if !before_ok {
129            i += 1;
130            continue;
131        }
132
133        // Case-insensitive match.
134        let matches = bytes[i..i + kw_len]
135            .iter()
136            .zip(keyword.iter())
137            .all(|(a, b)| a.eq_ignore_ascii_case(b));
138
139        if matches {
140            let after = i + kw_len;
141            let after_ok = after >= len || !is_word_char(bytes[after]);
142            let all_code = (i..i + kw_len).all(|k| skip_map.is_code(k));
143
144            if after_ok && all_code {
145                offsets.push(i);
146                i += kw_len;
147                continue;
148            }
149        }
150
151        i += 1;
152    }
153
154    offsets
155}
156
157// ── helpers ───────────────────────────────────────────────────────────────────
158
159/// Converts a byte offset to a 1-indexed (line, col) pair.
160fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
161    let before = &source[..offset];
162    let line = before.chars().filter(|&c| c == '\n').count() + 1;
163    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
164    (line, col)
165}