Skip to main content

sqrust_rules/convention/
distinct_parenthesis.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2
3pub struct DistinctParenthesis;
4
5/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
6fn line_col(source: &str, offset: usize) -> (usize, usize) {
7    let before = &source[..offset];
8    let line = before.chars().filter(|&c| c == '\n').count() + 1;
9    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
10    (line, col)
11}
12
13/// Builds a skip table: each entry is `true` if the byte at that offset is
14/// inside a string literal, line comment, block comment, or quoted identifier.
15fn build_skip(source: &[u8]) -> Vec<bool> {
16    let len = source.len();
17    let mut skip = vec![false; len];
18    let mut i = 0;
19
20    while i < len {
21        // Line comment: -- ... end-of-line
22        if i + 1 < len && source[i] == b'-' && source[i + 1] == b'-' {
23            skip[i] = true;
24            skip[i + 1] = true;
25            i += 2;
26            while i < len && source[i] != b'\n' {
27                skip[i] = true;
28                i += 1;
29            }
30            continue;
31        }
32
33        // Block comment: /* ... */
34        if i + 1 < len && source[i] == b'/' && source[i + 1] == b'*' {
35            skip[i] = true;
36            skip[i + 1] = true;
37            i += 2;
38            while i < len {
39                if i + 1 < len && source[i] == b'*' && source[i + 1] == b'/' {
40                    skip[i] = true;
41                    skip[i + 1] = true;
42                    i += 2;
43                    break;
44                }
45                skip[i] = true;
46                i += 1;
47            }
48            continue;
49        }
50
51        // Single-quoted string: '...' with '' as escaped quote
52        if source[i] == b'\'' {
53            skip[i] = true;
54            i += 1;
55            while i < len {
56                if source[i] == b'\'' {
57                    skip[i] = true;
58                    i += 1;
59                    if i < len && source[i] == b'\'' {
60                        skip[i] = true;
61                        i += 1;
62                        continue;
63                    }
64                    break;
65                }
66                skip[i] = true;
67                i += 1;
68            }
69            continue;
70        }
71
72        // Double-quoted identifier: "..."
73        if source[i] == b'"' {
74            skip[i] = true;
75            i += 1;
76            while i < len && source[i] != b'"' {
77                skip[i] = true;
78                i += 1;
79            }
80            if i < len {
81                skip[i] = true;
82                i += 1;
83            }
84            continue;
85        }
86
87        // Backtick identifier: `...`
88        if source[i] == b'`' {
89            skip[i] = true;
90            i += 1;
91            while i < len && source[i] != b'`' {
92                skip[i] = true;
93                i += 1;
94            }
95            if i < len {
96                skip[i] = true;
97                i += 1;
98            }
99            continue;
100        }
101
102        i += 1;
103    }
104
105    skip
106}
107
108/// Returns `true` if `ch` is a word character (`[a-zA-Z0-9_]`).
109#[inline]
110fn is_word_char(ch: u8) -> bool {
111    ch.is_ascii_alphanumeric() || ch == b'_'
112}
113
114impl Rule for DistinctParenthesis {
115    fn name(&self) -> &'static str {
116        "Convention/DistinctParenthesis"
117    }
118
119    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
120        let source = &ctx.source;
121        let bytes = source.as_bytes();
122        let len = bytes.len();
123        let skip = build_skip(bytes);
124
125        const DISTINCT: &[u8] = b"DISTINCT";
126        const DISTINCT_LEN: usize = 8;
127
128        let mut diags = Vec::new();
129        let mut i = 0;
130
131        while i < len {
132            // Skip positions that are inside strings/comments
133            if skip[i] {
134                i += 1;
135                continue;
136            }
137
138            // Look for the start of a word token matching DISTINCT (case-insensitive)
139            if is_word_char(bytes[i]) {
140                // Word boundary: not preceded by a word character
141                let preceded_by_word = i > 0 && is_word_char(bytes[i - 1]);
142                if preceded_by_word {
143                    i += 1;
144                    continue;
145                }
146
147                // Find the end of this word
148                let word_start = i;
149                let mut word_end = i;
150                while word_end < len && is_word_char(bytes[word_end]) {
151                    word_end += 1;
152                }
153
154                // Must be exactly 8 chars long and all code (not inside skip)
155                if word_end - word_start == DISTINCT_LEN
156                    && (word_start..word_end).all(|k| !skip[k])
157                {
158                    // Case-insensitive match against "DISTINCT"
159                    let matches_distinct = bytes[word_start..word_end]
160                        .iter()
161                        .zip(DISTINCT.iter())
162                        .all(|(&a, &b)| a.eq_ignore_ascii_case(&b));
163
164                    if matches_distinct {
165                        // Check what precedes DISTINCT (skip backwards over whitespace)
166                        // If preceded by '(' it's inside a function like COUNT(DISTINCT ...)
167                        // — that's not a violation.
168                        let mut back = word_start;
169                        while back > 0 && bytes[back - 1].is_ascii_whitespace() {
170                            back -= 1;
171                        }
172                        let preceded_by_open_paren = back > 0 && bytes[back - 1] == b'(';
173
174                        if !preceded_by_open_paren {
175                            // Now look past DISTINCT for '('
176                            let mut j = word_end;
177                            // Skip whitespace after DISTINCT
178                            while j < len && bytes[j].is_ascii_whitespace() && !skip[j] {
179                                j += 1;
180                            }
181                            // If the next code character is '(', it's a violation
182                            if j < len && !skip[j] && bytes[j] == b'(' {
183                                let (line, col) = line_col(source, j);
184                                diags.push(Diagnostic {
185                                    rule: self.name(),
186                                    message: "DISTINCT is not a function; write DISTINCT col instead of DISTINCT(col)".to_string(),
187                                    line,
188                                    col,
189                                });
190                            }
191                        }
192
193                        i = word_end;
194                        continue;
195                    }
196                }
197
198                i = word_end;
199                continue;
200            }
201
202            i += 1;
203        }
204
205        diags
206    }
207
208    fn fix(&self, _ctx: &FileContext) -> Option<String> {
209        // Fix is complex due to matching the closing parenthesis correctly.
210        // Return None — flag only.
211        None
212    }
213}