Skip to main content

flowscope_core/linter/rules/
st_008.rs

1//! LINT_ST_008: Structure distinct.
2//!
3//! SQLFluff ST08 parity: `SELECT DISTINCT(<expr>)` should be rewritten to
4//! `SELECT DISTINCT <expr>`.
5
6use crate::linter::rule::{LintContext, LintRule};
7use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
8use sqlparser::ast::Statement;
9use sqlparser::keywords::Keyword;
10use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
11
12pub struct StructureDistinct;
13
14impl LintRule for StructureDistinct {
15    fn code(&self) -> &'static str {
16        issue_codes::LINT_ST_008
17    }
18
19    fn name(&self) -> &'static str {
20        "Structure distinct"
21    }
22
23    fn description(&self) -> &'static str {
24        "'DISTINCT' used with parentheses."
25    }
26
27    fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
28        let candidates = st008_autofix_candidates(ctx.statement_sql(), ctx.dialect());
29
30        candidates
31            .into_iter()
32            .map(|candidate| {
33                Issue::info(issue_codes::LINT_ST_008, "DISTINCT used with parentheses.")
34                    .with_statement(ctx.statement_index)
35                    .with_span(candidate.span)
36                    .with_autofix_edits(IssueAutofixApplicability::Safe, candidate.edits)
37            })
38            .collect()
39    }
40}
41
42#[derive(Clone, Debug)]
43struct St008AutofixCandidate {
44    span: Span,
45    edits: Vec<IssuePatchEdit>,
46}
47
48fn st008_autofix_candidates(sql: &str, dialect: Dialect) -> Vec<St008AutofixCandidate> {
49    let Some(tokens) = tokenized(sql, dialect) else {
50        return Vec::new();
51    };
52
53    let mut candidates = Vec::new();
54    for distinct_index in 0..tokens.len() {
55        if !is_distinct_keyword(&tokens[distinct_index].token) {
56            continue;
57        }
58
59        let Some(next_index) = next_non_trivia_index(&tokens, distinct_index + 1) else {
60            continue;
61        };
62
63        // Skip `DISTINCT ON(...)` — valid Postgres syntax.
64        if matches!(&tokens[next_index].token, Token::Word(word) if word.keyword == Keyword::ON) {
65            continue;
66        }
67
68        let left_paren_index = next_index;
69        if !matches!(tokens[left_paren_index].token, Token::LParen) {
70            continue;
71        }
72
73        // Check if there's already a space between DISTINCT and `(`.
74        let has_space_before_paren =
75            has_whitespace_between(&tokens, distinct_index, left_paren_index);
76
77        let Some((right_paren_index, has_projection_comma, has_subquery)) =
78            find_matching_distinct_rparen(&tokens, left_paren_index)
79        else {
80            continue;
81        };
82        if has_projection_comma || has_subquery {
83            continue;
84        }
85
86        // Determine whether parens can be removed or only a space is needed.
87        let paren_removable = next_token_allows_paren_removal(&tokens, right_paren_index + 1);
88
89        // If parens are needed and there's already a space, no violation.
90        if !paren_removable && has_space_before_paren {
91            continue;
92        }
93
94        let Some((distinct_start, distinct_end)) =
95            token_with_span_offsets(sql, &tokens[distinct_index])
96        else {
97            continue;
98        };
99        let Some((_, left_paren_end)) = token_with_span_offsets(sql, &tokens[left_paren_index])
100        else {
101            continue;
102        };
103        let Some((right_paren_start, right_paren_end)) =
104            token_with_span_offsets(sql, &tokens[right_paren_index])
105        else {
106            continue;
107        };
108        if left_paren_end < distinct_end || right_paren_end <= right_paren_start {
109            continue;
110        }
111
112        let edits = if paren_removable {
113            // Remove parentheses: `DISTINCT(x)` → `DISTINCT x`
114            vec![
115                IssuePatchEdit::new(Span::new(distinct_end, left_paren_end), " "),
116                IssuePatchEdit::new(Span::new(right_paren_start, right_paren_end), ""),
117            ]
118        } else {
119            // Keep parentheses but add space: `DISTINCT(x) * y` → `DISTINCT (x) * y`
120            vec![IssuePatchEdit::new(
121                Span::new(distinct_end, distinct_end),
122                " ",
123            )]
124        };
125
126        candidates.push(St008AutofixCandidate {
127            span: Span::new(distinct_start, distinct_end),
128            edits,
129        });
130    }
131
132    candidates
133}
134
135fn is_distinct_keyword(token: &Token) -> bool {
136    matches!(token, Token::Word(word) if word.keyword == Keyword::DISTINCT)
137}
138
139fn tokenized(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
140    let dialect = dialect.to_sqlparser_dialect();
141    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
142    tokenizer.tokenize_with_location().ok()
143}
144
145/// Finds the matching `)` for the opening `(` at `left_paren_index`.
146/// Returns `(index, has_comma, has_subquery)`.
147fn find_matching_distinct_rparen(
148    tokens: &[TokenWithSpan],
149    left_paren_index: usize,
150) -> Option<(usize, bool, bool)> {
151    let mut depth = 0usize;
152    let mut has_projection_comma = false;
153    let mut has_subquery = false;
154
155    for (index, token) in tokens.iter().enumerate().skip(left_paren_index) {
156        if is_trivia_token(&token.token) {
157            continue;
158        }
159
160        match &token.token {
161            Token::LParen => {
162                depth += 1;
163            }
164            Token::RParen => {
165                if depth == 0 {
166                    return None;
167                }
168                depth -= 1;
169                if depth == 0 {
170                    return Some((index, has_projection_comma, has_subquery));
171                }
172            }
173            Token::Comma if depth == 1 => {
174                has_projection_comma = true;
175            }
176            Token::Word(word) if depth == 1 && word.keyword == Keyword::SELECT => {
177                has_subquery = true;
178            }
179            _ => {}
180        }
181    }
182
183    None
184}
185
186/// Returns true when the parentheses around the DISTINCT expression can be
187/// safely removed.  When the next meaningful token after the closing paren is
188/// an operator, the parens serve as grouping and should be kept.
189fn next_token_allows_paren_removal(tokens: &[TokenWithSpan], start: usize) -> bool {
190    let Some(index) = next_non_trivia_index(tokens, start) else {
191        return true;
192    };
193
194    match &tokens[index].token {
195        // A comma means there are more projections — parens can still be removed
196        // since they only wrap the first projection item.
197        Token::Comma | Token::SemiColon | Token::RParen => true,
198        Token::Word(word) => {
199            matches!(
200                word.keyword,
201                Keyword::FROM
202                    | Keyword::WHERE
203                    | Keyword::GROUP
204                    | Keyword::HAVING
205                    | Keyword::QUALIFY
206                    | Keyword::ORDER
207                    | Keyword::LIMIT
208                    | Keyword::FETCH
209                    | Keyword::OFFSET
210                    | Keyword::UNION
211                    | Keyword::EXCEPT
212                    | Keyword::INTERSECT
213                    | Keyword::WINDOW
214                    | Keyword::INTO
215            )
216        }
217        _ => false,
218    }
219}
220
221fn has_whitespace_between(tokens: &[TokenWithSpan], start: usize, end: usize) -> bool {
222    (start + 1..end).any(|i| is_trivia_token(&tokens[i].token))
223}
224
225fn next_non_trivia_index(tokens: &[TokenWithSpan], mut index: usize) -> Option<usize> {
226    while index < tokens.len() {
227        if !is_trivia_token(&tokens[index].token) {
228            return Some(index);
229        }
230        index += 1;
231    }
232    None
233}
234
235fn is_trivia_token(token: &Token) -> bool {
236    matches!(
237        token,
238        Token::Whitespace(
239            Whitespace::Space
240                | Whitespace::Newline
241                | Whitespace::Tab
242                | Whitespace::SingleLineComment { .. }
243                | Whitespace::MultiLineComment(_)
244        )
245    )
246}
247
248fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
249    let start = line_col_to_offset(
250        sql,
251        token.span.start.line as usize,
252        token.span.start.column as usize,
253    )?;
254    let end = line_col_to_offset(
255        sql,
256        token.span.end.line as usize,
257        token.span.end.column as usize,
258    )?;
259    Some((start, end))
260}
261
262fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
263    if line == 0 || column == 0 {
264        return None;
265    }
266
267    let mut current_line = 1usize;
268    let mut current_col = 1usize;
269
270    for (offset, ch) in sql.char_indices() {
271        if current_line == line && current_col == column {
272            return Some(offset);
273        }
274
275        if ch == '\n' {
276            current_line += 1;
277            current_col = 1;
278        } else {
279            current_col += 1;
280        }
281    }
282
283    if current_line == line && current_col == column {
284        return Some(sql.len());
285    }
286
287    None
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use crate::parser::parse_sql;
294    use crate::types::IssueAutofixApplicability;
295
296    fn run(sql: &str) -> Vec<Issue> {
297        let statements = parse_sql(sql).expect("parse");
298        let rule = StructureDistinct;
299        statements
300            .iter()
301            .enumerate()
302            .flat_map(|(index, statement)| {
303                rule.check(
304                    statement,
305                    &LintContext {
306                        sql,
307                        statement_range: 0..sql.len(),
308                        statement_index: index,
309                    },
310                )
311            })
312            .collect()
313    }
314
315    fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
316        let autofix = issue.autofix.as_ref()?;
317        let mut edits = autofix.edits.clone();
318        edits.sort_by(|left, right| right.span.start.cmp(&left.span.start));
319
320        let mut out = sql.to_string();
321        for edit in edits {
322            out.replace_range(edit.span.start..edit.span.end, &edit.replacement);
323        }
324        Some(out)
325    }
326
327    #[test]
328    fn flags_distinct_parenthesized_projection() {
329        let issues = run("SELECT DISTINCT(a) FROM t");
330        assert_eq!(issues.len(), 1);
331        assert_eq!(issues[0].code, issue_codes::LINT_ST_008);
332    }
333
334    #[test]
335    fn does_not_flag_normal_distinct_projection() {
336        let issues = run("SELECT DISTINCT a FROM t");
337        assert!(issues.is_empty());
338    }
339
340    #[test]
341    fn flags_multiple_projections_removes_parens() {
342        // SQLFluff: test_fail_distinct_with_parenthesis_6
343        let sql = "SELECT DISTINCT(a), b\n";
344        let issues = run(sql);
345        assert_eq!(issues.len(), 1);
346        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
347        assert_eq!(fixed, "SELECT DISTINCT a, b\n");
348    }
349
350    #[test]
351    fn flags_in_nested_select_scope() {
352        let issues = run("SELECT * FROM (SELECT DISTINCT(a) FROM t) AS sub");
353        assert_eq!(issues.len(), 1);
354    }
355
356    #[test]
357    fn emits_safe_autofix_for_distinct_parenthesized_projection() {
358        let sql = "SELECT DISTINCT(a) FROM t";
359        let issues = run(sql);
360        assert_eq!(issues.len(), 1);
361
362        let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
363        assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
364        assert_eq!(autofix.edits.len(), 2);
365
366        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
367        assert_eq!(fixed, "SELECT DISTINCT a FROM t");
368    }
369
370    #[test]
371    fn adds_space_when_parens_needed_for_grouping() {
372        // SQLFluff: test_fail_distinct_with_parenthesis_2
373        let sql = "SELECT DISTINCT(a + b) * c";
374        let issues = run(sql);
375        assert_eq!(issues.len(), 1);
376        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
377        assert_eq!(fixed, "SELECT DISTINCT (a + b) * c");
378    }
379
380    #[test]
381    fn does_not_flag_distinct_with_space_and_needed_parens() {
382        // SQLFluff: test_fail_distinct_with_parenthesis_4
383        assert!(run("SELECT DISTINCT (a + b) * c").is_empty());
384    }
385
386    #[test]
387    fn flags_distinct_space_paren_single_column() {
388        // SQLFluff: test_fail_distinct_with_parenthesis_3
389        let sql = "SELECT DISTINCT (a)";
390        let issues = run(sql);
391        assert_eq!(issues.len(), 1);
392        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
393        assert_eq!(fixed, "SELECT DISTINCT a");
394    }
395
396    #[test]
397    fn flags_distinct_inside_count() {
398        // SQLFluff: test_fail_distinct_column_inside_count
399        let sql = "SELECT COUNT(DISTINCT(unique_key))\n";
400        let issues = run(sql);
401        assert_eq!(issues.len(), 1);
402        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
403        assert_eq!(fixed, "SELECT COUNT(DISTINCT unique_key)\n");
404    }
405
406    #[test]
407    fn flags_distinct_concat_inside_count() {
408        // SQLFluff: test_fail_distinct_concat_inside_count
409        let sql = "SELECT COUNT(DISTINCT(CONCAT(col1, '-', col2, '-', col3)))\n";
410        let issues = run(sql);
411        assert_eq!(issues.len(), 1);
412        let fixed = apply_issue_autofix(sql, &issues[0]).expect("apply autofix");
413        assert_eq!(
414            fixed,
415            "SELECT COUNT(DISTINCT CONCAT(col1, '-', col2, '-', col3))\n"
416        );
417    }
418
419    #[test]
420    fn does_not_flag_distinct_on_postgres() {
421        // SQLFluff: test_fail_distinct_with_parenthesis_7
422        // DISTINCT ON(...) is valid Postgres syntax, not flagged.
423        assert!(run("SELECT DISTINCT ON(bcolor) bcolor, fcolor FROM t").is_empty());
424    }
425
426    #[test]
427    fn does_not_flag_distinct_subquery_inside_count() {
428        // SQLFluff: test_pass_distinct_subquery_inside_count
429        let sql = "SELECT COUNT(DISTINCT(SELECT ANY_VALUE(id) FROM UNNEST(tag) t))";
430        assert!(run(sql).is_empty());
431    }
432}