Skip to main content

rigsql_rules/ambiguous/
am01.rs

1use rigsql_core::{Segment, SegmentType};
2
3use crate::rule::{CrawlType, Rule, RuleContext, RuleGroup};
4use crate::violation::LintViolation;
5
6/// AM01: DISTINCT used with GROUP BY is redundant.
7///
8/// GROUP BY already produces unique rows for the grouped columns,
9/// so adding DISTINCT is ambiguous and potentially misleading.
10#[derive(Debug, Default)]
11pub struct RuleAM01;
12
13impl Rule for RuleAM01 {
14    fn code(&self) -> &'static str {
15        "AM01"
16    }
17    fn name(&self) -> &'static str {
18        "ambiguous.distinct"
19    }
20    fn description(&self) -> &'static str {
21        "DISTINCT used with GROUP BY."
22    }
23    fn explanation(&self) -> &'static str {
24        "Using DISTINCT together with GROUP BY is redundant because GROUP BY already \
25         deduplicates the result set for the grouped columns. This combination is \
26         ambiguous and suggests the author may not fully understand the query semantics."
27    }
28    fn groups(&self) -> &[RuleGroup] {
29        &[RuleGroup::Ambiguous]
30    }
31    fn is_fixable(&self) -> bool {
32        false
33    }
34
35    fn crawl_type(&self) -> CrawlType {
36        CrawlType::Segment(vec![SegmentType::SelectStatement])
37    }
38
39    fn eval(&self, ctx: &RuleContext) -> Vec<LintViolation> {
40        let children = ctx.segment.children();
41
42        // Check if there's a GROUP BY clause
43        let has_group_by = children
44            .iter()
45            .any(|c| c.segment_type() == SegmentType::GroupByClause);
46
47        if !has_group_by {
48            return vec![];
49        }
50
51        // Check if the SelectClause has a DISTINCT keyword
52        let select_clause = children
53            .iter()
54            .find(|c| c.segment_type() == SegmentType::SelectClause);
55
56        if let Some(select) = select_clause {
57            let distinct_token = find_distinct_keyword(select);
58            if let Some(span) = distinct_token {
59                return vec![LintViolation::new(
60                    self.code(),
61                    "DISTINCT is redundant when used with GROUP BY.",
62                    span,
63                )];
64            }
65        }
66
67        vec![]
68    }
69}
70
71fn find_distinct_keyword(select_clause: &Segment) -> Option<rigsql_core::Span> {
72    for child in select_clause.children() {
73        if let Segment::Token(t) = child {
74            if t.segment_type == SegmentType::Keyword
75                && t.token.text.eq_ignore_ascii_case("DISTINCT")
76            {
77                return Some(t.token.span);
78            }
79        }
80    }
81    None
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87    use crate::test_utils::lint_sql;
88
89    #[test]
90    fn test_am01_flags_distinct_with_group_by() {
91        let violations = lint_sql("SELECT DISTINCT a FROM t GROUP BY a", RuleAM01);
92        assert_eq!(violations.len(), 1);
93        assert!(violations[0].message.contains("DISTINCT"));
94    }
95
96    #[test]
97    fn test_am01_accepts_distinct_without_group_by() {
98        let violations = lint_sql("SELECT DISTINCT a FROM t", RuleAM01);
99        assert_eq!(violations.len(), 0);
100    }
101
102    #[test]
103    fn test_am01_accepts_group_by_without_distinct() {
104        let violations = lint_sql("SELECT a FROM t GROUP BY a", RuleAM01);
105        assert_eq!(violations.len(), 0);
106    }
107}