Skip to main content

sqrust_rules/lint/
duplicate_condition.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3pub struct DuplicateCondition;
4
5impl Rule for DuplicateCondition {
6    fn name(&self) -> &'static str {
7        "Lint/DuplicateCondition"
8    }
9
10    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
11        let source = &ctx.source;
12        let bytes = source.as_bytes();
13        let skip = build_skip(bytes);
14
15        let mut diags = Vec::new();
16
17        // Find all WHERE and HAVING clauses and check each for duplicate conditions.
18        check_clauses(source, &skip, &mut diags);
19
20        diags
21    }
22}
23
24/// Scans the source for WHERE and HAVING clauses and emits a diagnostic for
25/// each duplicate condition found within a clause.
26fn check_clauses(source: &str, skip: &[bool], diags: &mut Vec<Diagnostic>) {
27    let bytes = source.as_bytes();
28    let len = bytes.len();
29    // Uppercase copy for case-insensitive keyword matching.
30    let upper: Vec<u8> = bytes.iter().map(|b| b.to_ascii_uppercase()).collect();
31
32    let mut i = 0usize;
33    while i < len {
34        if skip[i] {
35            i += 1;
36            continue;
37        }
38
39        // Try to match WHERE or HAVING at position i.
40        let clause_start_opt = if let Some(after) = match_keyword_at(&upper, skip, i, len, b"WHERE") {
41            Some((after, i))
42        } else if let Some(after) = match_keyword_at(&upper, skip, i, len, b"HAVING") {
43            Some((after, i))
44        } else {
45            None
46        };
47
48        if let Some((after_kw, kw_start)) = clause_start_opt {
49            // Extract clause text: from after_kw until the next clause-terminating
50            // keyword (outside strings/comments) or end of source.
51            let clause_end = find_clause_end(&upper, skip, after_kw, len);
52            let clause_source = &source[after_kw..clause_end];
53            let clause_skip = &skip[after_kw..clause_end];
54
55            // Check for duplicates within this clause.
56            check_clause_for_duplicates(
57                source,
58                clause_source,
59                clause_skip,
60                after_kw,
61                kw_start,
62                diags,
63            );
64
65            // Advance past the keyword to avoid re-matching it.
66            i = after_kw;
67            continue;
68        }
69
70        i += 1;
71    }
72}
73
74/// Returns the end offset (exclusive) of a WHERE/HAVING clause body, i.e.
75/// the offset at which the next statement-terminating keyword begins, or the
76/// end of source.
77///
78/// Terminating keywords: GROUP, ORDER, HAVING, LIMIT, UNION, EXCEPT,
79/// INTERSECT, `;`.
80fn find_clause_end(upper: &[u8], skip: &[bool], start: usize, len: usize) -> usize {
81    let terminators: &[&[u8]] = &[
82        b"GROUP", b"ORDER", b"HAVING", b"LIMIT", b"UNION", b"EXCEPT", b"INTERSECT",
83    ];
84    let mut i = start;
85    while i < len {
86        if skip[i] {
87            i += 1;
88            continue;
89        }
90        // Semicolon terminates the clause.
91        if upper[i] == b';' {
92            return i;
93        }
94        // Check each terminating keyword.
95        for kw in terminators {
96            if match_keyword_at(upper, skip, i, len, kw).is_some() {
97                return i;
98            }
99        }
100        i += 1;
101    }
102    len
103}
104
105/// Splits a clause body on ` AND ` and ` OR ` (case-insensitive, spaces
106/// required), normalises each piece, then reports any duplicate that appears
107/// for the second or subsequent time.
108fn check_clause_for_duplicates(
109    full_source: &str,
110    clause_text: &str,
111    clause_skip: &[bool],
112    clause_offset: usize,  // byte offset of clause_text within full_source
113    _kw_start: usize,      // position of the WHERE/HAVING keyword (unused for now)
114    diags: &mut Vec<Diagnostic>,
115) {
116    // Split the clause on AND / OR connectives.
117    // We use a simple approach: split the lowercased clause on " and " and " or "
118    // (with spaces), then map the pieces back to their original offsets.
119    let conditions = split_clause(clause_text, clause_skip);
120
121    // Normalize each condition and track where we've seen it.
122    // seen: map from normalized form to the first raw occurrence.
123    let mut seen: Vec<(String, usize)> = Vec::new(); // (normalized, source_offset)
124
125    for (raw, local_offset) in conditions {
126        let normalized = normalize_condition(&raw);
127        if normalized.is_empty() {
128            continue;
129        }
130        let source_offset = clause_offset + local_offset;
131
132        let already_seen = seen.iter().any(|(norm, _)| norm == &normalized);
133        if already_seen {
134            // This is a duplicate — report the position of this occurrence.
135            let (line, col) = offset_to_line_col(full_source, source_offset);
136            diags.push(Diagnostic {
137                rule: "Lint/DuplicateCondition",
138                message: "Duplicate condition in WHERE/HAVING clause".to_string(),
139                line,
140                col,
141            });
142        } else {
143            seen.push((normalized, source_offset));
144        }
145    }
146}
147
148/// Splits `text` into individual conditions by scanning for word-boundary
149/// `AND` and `OR` connectives that are not inside skip regions.
150/// Returns a list of `(raw_condition_text, byte_offset_within_text)` pairs.
151fn split_clause<'a>(text: &'a str, skip: &[bool]) -> Vec<(String, usize)> {
152    let bytes = text.as_bytes();
153    let len = bytes.len();
154    // Uppercase for keyword matching.
155    let upper: Vec<u8> = bytes.iter().map(|b| b.to_ascii_uppercase()).collect();
156
157    let mut conditions: Vec<(String, usize)> = Vec::new();
158    let mut segment_start = 0usize;
159    let mut i = 0usize;
160
161    while i < len {
162        let skip_here = skip.get(i).copied().unwrap_or(false);
163        if skip_here {
164            i += 1;
165            continue;
166        }
167
168        // Try to match AND or OR at position i.
169        let split_end = if let Some(after) = match_keyword_at(&upper, skip, i, len, b"AND") {
170            Some(after)
171        } else if let Some(after) = match_keyword_at(&upper, skip, i, len, b"OR") {
172            Some(after)
173        } else {
174            None
175        };
176
177        if let Some(after_kw) = split_end {
178            // Push the segment from segment_start to i (before the connective).
179            let segment = text[segment_start..i].to_string();
180            conditions.push((segment, segment_start));
181            segment_start = after_kw;
182            i = after_kw;
183            continue;
184        }
185
186        i += 1;
187    }
188
189    // Push the trailing segment.
190    if segment_start < len {
191        let segment = text[segment_start..len].to_string();
192        conditions.push((segment, segment_start));
193    }
194
195    conditions
196}
197
198/// Normalises a condition for duplicate detection:
199/// - lowercase
200/// - collapse runs of whitespace to a single space
201/// - trim leading/trailing whitespace
202fn normalize_condition(raw: &str) -> String {
203    let lower = raw.to_lowercase();
204    // Collapse whitespace runs.
205    let mut result = String::with_capacity(lower.len());
206    let mut prev_space = true; // start as true to trim leading whitespace
207    for ch in lower.chars() {
208        if ch.is_whitespace() {
209            if !prev_space {
210                result.push(' ');
211                prev_space = true;
212            }
213        } else {
214            result.push(ch);
215            prev_space = false;
216        }
217    }
218    // Trim trailing space.
219    if result.ends_with(' ') {
220        result.pop();
221    }
222    result
223}
224
225/// Returns `Some(pos_after_keyword)` if `kw` matches at `pos` in `upper`
226/// with word boundaries on both sides and not inside a skip region.
227fn match_keyword_at(
228    upper: &[u8],
229    skip: &[bool],
230    pos: usize,
231    len: usize,
232    kw: &[u8],
233) -> Option<usize> {
234    let kw_len = kw.len();
235    if pos + kw_len > len {
236        return None;
237    }
238    if skip.get(pos).copied().unwrap_or(false) {
239        return None;
240    }
241    if &upper[pos..pos + kw_len] != kw {
242        return None;
243    }
244    // Word boundary before.
245    let before_ok = pos == 0 || {
246        let b = upper[pos - 1];
247        !b.is_ascii_alphanumeric() && b != b'_'
248    };
249    // Word boundary after.
250    let after_pos = pos + kw_len;
251    let after_ok = after_pos >= len || {
252        let b = upper[after_pos];
253        !b.is_ascii_alphanumeric() && b != b'_'
254    };
255    if before_ok && after_ok {
256        Some(after_pos)
257    } else {
258        None
259    }
260}
261
262/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
263fn offset_to_line_col(source: &str, offset: usize) -> (usize, usize) {
264    let before = &source[..offset.min(source.len())];
265    let line = before.chars().filter(|&c| c == '\n').count() + 1;
266    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
267    (line, col)
268}
269
270/// Builds a skip table: `true` for every byte offset that is inside a
271/// string literal, line comment, block comment, or quoted identifier.
272fn build_skip(bytes: &[u8]) -> Vec<bool> {
273    let len = bytes.len();
274    let mut skip = vec![false; len];
275    let mut i = 0usize;
276
277    while i < len {
278        // Line comment: -- ... end-of-line
279        if i + 1 < len && bytes[i] == b'-' && bytes[i + 1] == b'-' {
280            let start = i;
281            while i < len && bytes[i] != b'\n' {
282                i += 1;
283            }
284            for s in &mut skip[start..i] {
285                *s = true;
286            }
287            continue;
288        }
289
290        // Block comment: /* ... */
291        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
292            let start = i;
293            i += 2;
294            while i + 1 < len && !(bytes[i] == b'*' && bytes[i + 1] == b'/') {
295                i += 1;
296            }
297            let end = if i + 1 < len { i + 2 } else { i + 1 };
298            for s in &mut skip[start..end.min(len)] {
299                *s = true;
300            }
301            i = end;
302            continue;
303        }
304
305        // Single-quoted string: '...' with '' as escaped quote
306        if bytes[i] == b'\'' {
307            let start = i;
308            i += 1;
309            while i < len {
310                if bytes[i] == b'\'' {
311                    if i + 1 < len && bytes[i + 1] == b'\'' {
312                        i += 2; // escaped quote
313                    } else {
314                        i += 1; // closing quote
315                        break;
316                    }
317                } else {
318                    i += 1;
319                }
320            }
321            for s in &mut skip[start..i.min(len)] {
322                *s = true;
323            }
324            continue;
325        }
326
327        // Double-quoted identifier: "..."
328        if bytes[i] == b'"' {
329            let start = i;
330            i += 1;
331            while i < len && bytes[i] != b'"' {
332                i += 1;
333            }
334            let end = if i < len { i + 1 } else { i };
335            for s in &mut skip[start..end.min(len)] {
336                *s = true;
337            }
338            i = end;
339            continue;
340        }
341
342        // Backtick identifier: `...`
343        if bytes[i] == b'`' {
344            let start = i;
345            i += 1;
346            while i < len && bytes[i] != b'`' {
347                i += 1;
348            }
349            let end = if i < len { i + 1 } else { i };
350            for s in &mut skip[start..end.min(len)] {
351                *s = true;
352            }
353            i = end;
354            continue;
355        }
356
357        i += 1;
358    }
359
360    skip
361}