flowscope_core/linter/rules/
st_008.rs1use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
8use sqlparser::ast::Statement;
9use sqlparser::keywords::Keyword;
10use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
11
12pub struct StructureDistinct;
13
14impl LintRule for StructureDistinct {
15 fn code(&self) -> &'static str {
16 issue_codes::LINT_ST_008
17 }
18
19 fn name(&self) -> &'static str {
20 "Structure distinct"
21 }
22
23 fn description(&self) -> &'static str {
24 "'DISTINCT' used with parentheses."
25 }
26
27 fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
28 let candidates = st008_autofix_candidates(ctx.statement_sql(), ctx.dialect());
29
30 candidates
31 .into_iter()
32 .map(|candidate| {
33 Issue::info(issue_codes::LINT_ST_008, "DISTINCT used with parentheses.")
34 .with_statement(ctx.statement_index)
35 .with_span(candidate.span)
36 .with_autofix_edits(IssueAutofixApplicability::Safe, candidate.edits)
37 })
38 .collect()
39 }
40}
41
42#[derive(Clone, Debug)]
43struct St008AutofixCandidate {
44 span: Span,
45 edits: Vec<IssuePatchEdit>,
46}
47
48fn st008_autofix_candidates(sql: &str, dialect: Dialect) -> Vec<St008AutofixCandidate> {
49 let Some(tokens) = tokenized(sql, dialect) else {
50 return Vec::new();
51 };
52
53 let mut candidates = Vec::new();
54 for distinct_index in 0..tokens.len() {
55 if !is_distinct_keyword(&tokens[distinct_index].token) {
56 continue;
57 }
58
59 let Some(next_index) = next_non_trivia_index(&tokens, distinct_index + 1) else {
60 continue;
61 };
62
63 if matches!(&tokens[next_index].token, Token::Word(word) if word.keyword == Keyword::ON) {
65 continue;
66 }
67
68 let left_paren_index = next_index;
69 if !matches!(tokens[left_paren_index].token, Token::LParen) {
70 continue;
71 }
72
73 let has_space_before_paren =
75 has_whitespace_between(&tokens, distinct_index, left_paren_index);
76
77 let Some((right_paren_index, has_projection_comma, has_subquery)) =
78 find_matching_distinct_rparen(&tokens, left_paren_index)
79 else {
80 continue;
81 };
82 if has_projection_comma || has_subquery {
83 continue;
84 }
85
86 let paren_removable = next_token_allows_paren_removal(&tokens, right_paren_index + 1);
88
89 if !paren_removable && has_space_before_paren {
91 continue;
92 }
93
94 let Some((distinct_start, distinct_end)) =
95 token_with_span_offsets(sql, &tokens[distinct_index])
96 else {
97 continue;
98 };
99 let Some((_, left_paren_end)) = token_with_span_offsets(sql, &tokens[left_paren_index])
100 else {
101 continue;
102 };
103 let Some((right_paren_start, right_paren_end)) =
104 token_with_span_offsets(sql, &tokens[right_paren_index])
105 else {
106 continue;
107 };
108 if left_paren_end < distinct_end || right_paren_end <= right_paren_start {
109 continue;
110 }
111
112 let edits = if paren_removable {
113 vec![
115 IssuePatchEdit::new(Span::new(distinct_end, left_paren_end), " "),
116 IssuePatchEdit::new(Span::new(right_paren_start, right_paren_end), ""),
117 ]
118 } else {
119 vec![IssuePatchEdit::new(
121 Span::new(distinct_end, distinct_end),
122 " ",
123 )]
124 };
125
126 candidates.push(St008AutofixCandidate {
127 span: Span::new(distinct_start, distinct_end),
128 edits,
129 });
130 }
131
132 candidates
133}
134
135fn is_distinct_keyword(token: &Token) -> bool {
136 matches!(token, Token::Word(word) if word.keyword == Keyword::DISTINCT)
137}
138
139fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
140 let dialect = dialect.to_sqlparser_dialect();
141 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
142 tokenizer.tokenize_with_location().ok()
143}
144
145fn find_matching_distinct_rparen(
148 tokens: &[TokenWithSpan],
149 left_paren_index: usize,
150) -> Option<(usize, bool, bool)> {
151 let mut depth = 0usize;
152 let mut has_projection_comma = false;
153 let mut has_subquery = false;
154
155 for (index, token) in tokens.iter().enumerate().skip(left_paren_index) {
156 if is_trivia_token(&token.token) {
157 continue;
158 }
159
160 match &token.token {
161 Token::LParen => {
162 depth += 1;
163 }
164 Token::RParen => {
165 if depth == 0 {
166 return None;
167 }
168 depth -= 1;
169 if depth == 0 {
170 return Some((index, has_projection_comma, has_subquery));
171 }
172 }
173 Token::Comma if depth == 1 => {
174 has_projection_comma = true;
175 }
176 Token::Word(word) if depth == 1 && word.keyword == Keyword::SELECT => {
177 has_subquery = true;
178 }
179 _ => {}
180 }
181 }
182
183 None
184}
185
186fn next_token_allows_paren_removal(tokens: &[TokenWithSpan], start: usize) -> bool {
190 let Some(index) = next_non_trivia_index(tokens, start) else {
191 return true;
192 };
193
194 match &tokens[index].token {
195 Token::Comma | Token::SemiColon | Token::RParen => true,
198 Token::Word(word) => {
199 matches!(
200 word.keyword,
201 Keyword::FROM
202 | Keyword::WHERE
203 | Keyword::GROUP
204 | Keyword::HAVING
205 | Keyword::QUALIFY
206 | Keyword::ORDER
207 | Keyword::LIMIT
208 | Keyword::FETCH
209 | Keyword::OFFSET
210 | Keyword::UNION
211 | Keyword::EXCEPT
212 | Keyword::INTERSECT
213 | Keyword::WINDOW
214 | Keyword::INTO
215 )
216 }
217 _ => false,
218 }
219}
220
221fn has_whitespace_between(tokens: &[TokenWithSpan], start: usize, end: usize) -> bool {
222 (start + 1..end).any(|i| is_trivia_token(&tokens[i].token))
223}
224
225fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
226 while index < tokens.len() {
227 if !is_trivia_token(&tokens[index].token) {
228 return Some(index);
229 }
230 index += 1;
231 }
232 None
233}
234
235fn is_trivia_token(token: &Token) -> bool {
236 matches!(
237 token,
238 Token::Whitespace(
239 Whitespace::Space
240 | Whitespace::Newline
241 | Whitespace::Tab
242 | Whitespace::SingleLineComment { .. }
243 | Whitespace::MultiLineComment(_)
244 )
245 )
246}
247
248fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
249 let start = line_col_to_offset(
250 sql,
251 token.span.start.line as usize,
252 token.span.start.column as usize,
253 )?;
254 let end = line_col_to_offset(
255 sql,
256 token.span.end.line as usize,
257 token.span.end.column as usize,
258 )?;
259 Some((start, end))
260}
261
262fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
263 if line == 0 || column == 0 {
264 return None;
265 }
266
267 let mut current_line = 1usize;
268 let mut current_col = 1usize;
269
270 for (offset, ch) in sql.char_indices() {
271 if current_line == line && current_col == column {
272 return Some(offset);
273 }
274
275 if ch == '\n' {
276 current_line += 1;
277 current_col = 1;
278 } else {
279 current_col += 1;
280 }
281 }
282
283 if current_line == line && current_col == column {
284 return Some(sql.len());
285 }
286
287 None
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293 use crate::parser::parse_sql;
294 use crate::types::IssueAutofixApplicability;
295
296 fn run(sql: &str) -> Vec<Issue> {
297 let statements = parse_sql(sql).expect("parse");
298 let rule = StructureDistinct;
299 statements
300 .iter()
301 .enumerate()
302 .flat_map(|(index, statement)| {
303 rule.check(
304 statement,
305 &LintContext {
306 sql,
307 statement_range: 0..sql.len(),
308 statement_index: index,
309 },
310 )
311 })
312 .collect()
313 }
314
315 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
316 let autofix = issue.autofix.as_ref()?;
317 let mut edits = autofix.edits.clone();
318 edits.sort_by(|left, right| right.span.start.cmp(&left.span.start));
319
320 let mut out = sql.to_string();
321 for edit in edits {
322 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
323 }
324 Some(out)
325 }
326
327 #[test]
328 fn flags_distinct_parenthesized_projection() {
329 let issues = run("SELECT DISTINCT(a) FROM t");
330 assert_eq!(issues.len(), 1);
331 assert_eq!(issues[0].code, issue_codes::LINT_ST_008);
332 }
333
334 #[test]
335 fn does_not_flag_normal_distinct_projection() {
336 let issues = run("SELECT DISTINCT a FROM t");
337 assert!(issues.is_empty());
338 }
339
340 #[test]
341 fn flags_multiple_projections_removes_parens() {
342 let sql = "SELECT DISTINCT(a), b\n";
344 let issues = run(sql);
345 assert_eq!(issues.len(), 1);
346 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
347 assert_eq!(fixed, "SELECT DISTINCT a, b\n");
348 }
349
350 #[test]
351 fn flags_in_nested_select_scope() {
352 let issues = run("SELECT * FROM (SELECT DISTINCT(a) FROM t) AS sub");
353 assert_eq!(issues.len(), 1);
354 }
355
356 #[test]
357 fn emits_safe_autofix_for_distinct_parenthesized_projection() {
358 let sql = "SELECT DISTINCT(a) FROM t";
359 let issues = run(sql);
360 assert_eq!(issues.len(), 1);
361
362 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
363 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
364 assert_eq!(autofix.edits.len(), 2);
365
366 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
367 assert_eq!(fixed, "SELECT DISTINCT a FROM t");
368 }
369
370 #[test]
371 fn adds_space_when_parens_needed_for_grouping() {
372 let sql = "SELECT DISTINCT(a + b) * c";
374 let issues = run(sql);
375 assert_eq!(issues.len(), 1);
376 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
377 assert_eq!(fixed, "SELECT DISTINCT (a + b) * c");
378 }
379
380 #[test]
381 fn does_not_flag_distinct_with_space_and_needed_parens() {
382 assert!(run("SELECT DISTINCT (a + b) * c").is_empty());
384 }
385
386 #[test]
387 fn flags_distinct_space_paren_single_column() {
388 let sql = "SELECT DISTINCT (a)";
390 let issues = run(sql);
391 assert_eq!(issues.len(), 1);
392 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
393 assert_eq!(fixed, "SELECT DISTINCT a");
394 }
395
396 #[test]
397 fn flags_distinct_inside_count() {
398 let sql = "SELECT COUNT(DISTINCT(unique_key))\n";
400 let issues = run(sql);
401 assert_eq!(issues.len(), 1);
402 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
403 assert_eq!(fixed, "SELECT COUNT(DISTINCT unique_key)\n");
404 }
405
406 #[test]
407 fn flags_distinct_concat_inside_count() {
408 let sql = "SELECT COUNT(DISTINCT(CONCAT(col1, '-', col2, '-', col3)))\n";
410 let issues = run(sql);
411 assert_eq!(issues.len(), 1);
412 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
413 assert_eq!(
414 fixed,
415 "SELECT COUNT(DISTINCT CONCAT(col1, '-', col2, '-', col3))\n"
416 );
417 }
418
419 #[test]
420 fn does_not_flag_distinct_on_postgres() {
421 assert!(run("SELECT DISTINCT ON(bcolor) bcolor, fcolor FROM t").is_empty());
424 }
425
426 #[test]
427 fn does_not_flag_distinct_subquery_inside_count() {
428 let sql = "SELECT COUNT(DISTINCT(SELECT ANY_VALUE(id) FROM UNNEST(tag) t))";
430 assert!(run(sql).is_empty());
431 }
432}