flowscope_core/linter/rules/
am_001.rs1use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit};
8use sqlparser::ast::*;
9use sqlparser::keywords::Keyword;
10use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
11
12pub struct DistinctWithGroupBy;
13
14impl LintRule for DistinctWithGroupBy {
15 fn code(&self) -> &'static str {
16 issue_codes::LINT_AM_001
17 }
18
19 fn name(&self) -> &'static str {
20 "DISTINCT with GROUP BY"
21 }
22
23 fn description(&self) -> &'static str {
24 "Ambiguous use of 'DISTINCT' in a 'SELECT' statement with 'GROUP BY'."
25 }
26
27 fn check(&self, stmt: &Statement, ctx: &LintContext) -> Vec<Issue> {
28 let mut issues = Vec::new();
29 check_statement(stmt, ctx, &mut issues);
30
31 let mut distinct_ranges = distinct_removal_ranges(ctx.statement_sql(), ctx.dialect());
32 if distinct_ranges.ranges.len() == issues.len() {
33 for issue in &mut issues {
34 if let Some((start, end)) = distinct_ranges.next() {
35 let span = ctx.span_from_statement_offset(start, end);
36 *issue = issue.clone().with_span(span).with_autofix_edits(
37 IssueAutofixApplicability::Safe,
38 vec![IssuePatchEdit::new(span, "")],
39 );
40 }
41 }
42 }
43
44 issues
45 }
46}
47
48fn check_statement(stmt: &Statement, ctx: &LintContext, issues: &mut Vec<Issue>) {
49 match stmt {
50 Statement::Query(q) => check_query(q, ctx, issues),
51 Statement::Insert(ins) => {
52 if let Some(ref source) = ins.source {
53 check_query(source, ctx, issues);
54 }
55 }
56 Statement::CreateView { query, .. } => check_query(query, ctx, issues),
57 Statement::CreateTable(create) => {
58 if let Some(ref q) = create.query {
59 check_query(q, ctx, issues);
60 }
61 }
62 _ => {}
63 }
64}
65
66fn check_query(query: &Query, ctx: &LintContext, issues: &mut Vec<Issue>) {
67 if let Some(ref with) = query.with {
68 for cte in &with.cte_tables {
69 check_query(&cte.query, ctx, issues);
70 }
71 }
72 check_set_expr(&query.body, ctx, issues);
73}
74
75fn check_set_expr(body: &SetExpr, ctx: &LintContext, issues: &mut Vec<Issue>) {
76 match body {
77 SetExpr::Select(select) => {
78 let has_distinct = matches!(
79 select.distinct,
80 Some(Distinct::Distinct) | Some(Distinct::On(_))
81 );
82 let has_group_by = match &select.group_by {
83 GroupByExpr::All(_) => true,
84 GroupByExpr::Expressions(exprs, _) => !exprs.is_empty(),
85 };
86
87 if has_distinct && has_group_by {
88 issues.push(
89 Issue::warning(
90 issue_codes::LINT_AM_001,
91 "DISTINCT is redundant when GROUP BY is present.",
92 )
93 .with_statement(ctx.statement_index),
94 );
95 }
96
97 for from_item in &select.from {
99 check_table_factor(&from_item.relation, ctx, issues);
100 for join in &from_item.joins {
101 check_table_factor(&join.relation, ctx, issues);
102 }
103 }
104 }
105 SetExpr::Query(q) => check_query(q, ctx, issues),
106 SetExpr::SetOperation { left, right, .. } => {
107 check_set_expr(left, ctx, issues);
108 check_set_expr(right, ctx, issues);
109 }
110 _ => {}
111 }
112}
113
114fn check_table_factor(relation: &TableFactor, ctx: &LintContext, issues: &mut Vec<Issue>) {
115 match relation {
116 TableFactor::Derived { subquery, .. } => check_query(subquery, ctx, issues),
117 TableFactor::NestedJoin {
118 table_with_joins, ..
119 } => {
120 check_table_factor(&table_with_joins.relation, ctx, issues);
121 for join in &table_with_joins.joins {
122 check_table_factor(&join.relation, ctx, issues);
123 }
124 }
125 _ => {}
126 }
127}
128
129struct DistinctRemovalRanges {
130 ranges: Vec<(usize, usize)>,
131 index: usize,
132}
133
134impl DistinctRemovalRanges {
135 fn next(&mut self) -> Option<(usize, usize)> {
136 let range = self.ranges.get(self.index).copied();
137 if range.is_some() {
138 self.index += 1;
139 }
140 range
141 }
142}
143
144fn distinct_removal_ranges(sql: &str, dialect: Dialect) -> DistinctRemovalRanges {
145 let Some(tokens) = tokenized(sql, dialect) else {
146 return DistinctRemovalRanges {
147 ranges: Vec::new(),
148 index: 0,
149 };
150 };
151
152 let mut ranges = Vec::new();
153 for distinct_index in 0..tokens.len() {
154 if !is_distinct_keyword(&tokens[distinct_index].token) {
155 continue;
156 }
157
158 let phrase_end_index = if let Some(on_index) =
159 next_non_trivia_index(&tokens, distinct_index + 1)
160 {
161 if is_on_keyword(&tokens[on_index].token) {
162 let Some(left_paren_index) = next_non_trivia_index(&tokens, on_index + 1) else {
163 continue;
164 };
165 if !matches!(tokens[left_paren_index].token, Token::LParen) {
166 continue;
167 }
168 let Some(right_paren_index) = find_matching_rparen(&tokens, left_paren_index)
169 else {
170 continue;
171 };
172 right_paren_index
173 } else {
174 distinct_index
175 }
176 } else {
177 distinct_index
178 };
179
180 let Some((start, _)) = token_with_span_offsets(sql, &tokens[distinct_index]) else {
181 continue;
182 };
183 let end = match next_non_trivia_index(&tokens, phrase_end_index + 1) {
184 Some(next_index) => match token_with_span_offsets(sql, &tokens[next_index]) {
185 Some((next_start, _)) => next_start,
186 None => continue,
187 },
188 None => match token_with_span_offsets(sql, &tokens[phrase_end_index]) {
189 Some((_, phrase_end)) => phrase_end,
190 None => continue,
191 },
192 };
193
194 if start < end {
195 ranges.push((start, end));
196 }
197 }
198
199 DistinctRemovalRanges { ranges, index: 0 }
200}
201
202fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
203 let dialect = dialect.to_sqlparser_dialect();
204 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
205 tokenizer.tokenize_with_location().ok()
206}
207
208fn is_distinct_keyword(token: &Token) -> bool {
209 matches!(token, Token::Word(word) if word.keyword == Keyword::DISTINCT)
210}
211
212fn is_on_keyword(token: &Token) -> bool {
213 matches!(token, Token::Word(word) if word.keyword == Keyword::ON)
214}
215
216fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
217 while index < tokens.len() {
218 if !is_trivia_token(&tokens[index].token) {
219 return Some(index);
220 }
221 index += 1;
222 }
223 None
224}
225
226fn find_matching_rparen(tokens: &[TokenWithSpan], left_paren_index: usize) -> Option<usize> {
227 let mut depth = 0usize;
228
229 for (index, token) in tokens.iter().enumerate().skip(left_paren_index) {
230 if is_trivia_token(&token.token) {
231 continue;
232 }
233
234 match token.token {
235 Token::LParen => {
236 depth += 1;
237 }
238 Token::RParen => {
239 if depth == 0 {
240 return None;
241 }
242 depth -= 1;
243 if depth == 0 {
244 return Some(index);
245 }
246 }
247 _ => {}
248 }
249 }
250
251 None
252}
253
254fn is_trivia_token(token: &Token) -> bool {
255 matches!(
256 token,
257 Token::Whitespace(
258 Whitespace::Space
259 | Whitespace::Newline
260 | Whitespace::Tab
261 | Whitespace::SingleLineComment { .. }
262 | Whitespace::MultiLineComment(_)
263 )
264 )
265}
266
267fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
268 let start = line_col_to_offset(
269 sql,
270 token.span.start.line as usize,
271 token.span.start.column as usize,
272 )?;
273 let end = line_col_to_offset(
274 sql,
275 token.span.end.line as usize,
276 token.span.end.column as usize,
277 )?;
278 Some((start, end))
279}
280
281fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
282 if line == 0 || column == 0 {
283 return None;
284 }
285
286 let mut current_line = 1usize;
287 let mut current_col = 1usize;
288
289 for (offset, ch) in sql.char_indices() {
290 if current_line == line && current_col == column {
291 return Some(offset);
292 }
293
294 if ch == '\n' {
295 current_line += 1;
296 current_col = 1;
297 } else {
298 current_col += 1;
299 }
300 }
301
302 if current_line == line && current_col == column {
303 return Some(sql.len());
304 }
305
306 None
307}
308
309#[cfg(test)]
310mod tests {
311 use super::*;
312 use crate::parser::parse_sql;
313 use crate::types::IssueAutofixApplicability;
314
315 fn check_sql(sql: &str) -> Vec<Issue> {
316 let stmts = parse_sql(sql).unwrap();
317 let rule = DistinctWithGroupBy;
318 let ctx = LintContext {
319 sql,
320 statement_range: 0..sql.len(),
321 statement_index: 0,
322 };
323 let mut issues = Vec::new();
324 for stmt in &stmts {
325 issues.extend(rule.check(stmt, &ctx));
326 }
327 issues
328 }
329
330 fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
331 let autofix = issue.autofix.as_ref()?;
332 let mut edits = autofix.edits.clone();
333 edits.sort_by(|left, right| right.span.start.cmp(&left.span.start));
334
335 let mut out = sql.to_string();
336 for edit in edits {
337 out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
338 }
339 Some(out)
340 }
341
342 #[test]
343 fn test_distinct_with_group_by_detected() {
344 let issues = check_sql("SELECT DISTINCT col FROM t GROUP BY col");
345 assert_eq!(issues.len(), 1);
346 assert_eq!(issues[0].code, "LINT_AM_001");
347 }
348
349 #[test]
350 fn test_distinct_without_group_by_ok() {
351 let issues = check_sql("SELECT DISTINCT col FROM t");
352 assert!(issues.is_empty());
353 }
354
355 #[test]
356 fn test_group_by_without_distinct_ok() {
357 let issues = check_sql("SELECT col FROM t GROUP BY col");
358 assert!(issues.is_empty());
359 }
360
361 #[test]
364 fn test_distinct_group_by_in_subquery() {
365 let issues = check_sql("SELECT * FROM (SELECT DISTINCT a FROM t GROUP BY a) AS sub");
366 assert_eq!(issues.len(), 1);
367 }
368
369 #[test]
370 fn test_distinct_group_by_in_cte() {
371 let issues =
372 check_sql("WITH cte AS (SELECT DISTINCT a FROM t GROUP BY a) SELECT * FROM cte");
373 assert_eq!(issues.len(), 1);
374 }
375
376 #[test]
377 fn test_distinct_group_by_in_create_view() {
378 let issues = check_sql("CREATE VIEW v AS SELECT DISTINCT a FROM t GROUP BY a");
379 assert_eq!(issues.len(), 1);
380 }
381
382 #[test]
383 fn test_distinct_group_by_in_insert() {
384 let issues = check_sql("INSERT INTO target SELECT DISTINCT a FROM t GROUP BY a");
385 assert_eq!(issues.len(), 1);
386 }
387
388 #[test]
389 fn test_no_distinct_no_group_by_ok() {
390 let issues = check_sql("SELECT a, b FROM t");
391 assert!(issues.is_empty());
392 }
393
394 #[test]
395 fn test_distinct_group_by_in_union_branch() {
396 let issues = check_sql("SELECT a FROM t UNION ALL SELECT DISTINCT b FROM t2 GROUP BY b");
397 assert_eq!(issues.len(), 1);
398 }
399
400 #[test]
401 fn test_distinct_group_by_emits_safe_autofix_patch() {
402 let sql = "SELECT DISTINCT a FROM t GROUP BY a";
403 let issues = check_sql(sql);
404 assert_eq!(issues.len(), 1);
405
406 let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
407 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
408 assert_eq!(autofix.edits.len(), 1);
409
410 let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
411 assert_eq!(fixed, "SELECT a FROM t GROUP BY a");
412 }
413}