Skip to main content

rigsql_rules/ambiguous/
am01.rs

1use rigsql_core::{Segment, SegmentType};
2
3use crate::rule::{CrawlType, Rule, RuleContext, RuleGroup};
4use crate::violation::LintViolation;
5
6/// AM01: DISTINCT used with GROUP BY is redundant.
7///
8/// GROUP BY already produces unique rows for the grouped columns,
9/// so adding DISTINCT is ambiguous and potentially misleading.
10#[derive(Debug, Default)]
11pub struct RuleAM01;
12
13impl Rule for RuleAM01 {
14    fn code(&self) -> &'static str {
15        "AM01"
16    }
17    fn name(&self) -> &'static str {
18        "ambiguous.distinct"
19    }
20    fn description(&self) -> &'static str {
21        "DISTINCT used with GROUP BY."
22    }
23    fn explanation(&self) -> &'static str {
24        "Using DISTINCT together with GROUP BY is redundant because GROUP BY already \
25         deduplicates the result set for the grouped columns. This combination is \
26         ambiguous and suggests the author may not fully understand the query semantics."
27    }
28    fn groups(&self) -> &[RuleGroup] {
29        &[RuleGroup::Ambiguous]
30    }
31    fn is_fixable(&self) -> bool {
32        false
33    }
34
35    fn crawl_type(&self) -> CrawlType {
36        CrawlType::Segment(vec![SegmentType::SelectStatement])
37    }
38
39    fn eval(&self, ctx: &RuleContext) -> Vec<LintViolation> {
40        let children = ctx.segment.children();
41
42        // Check if there's a GROUP BY clause
43        let has_group_by = children
44            .iter()
45            .any(|c| c.segment_type() == SegmentType::GroupByClause);
46
47        if !has_group_by {
48            return vec![];
49        }
50
51        // Check if the SelectClause has a DISTINCT keyword
52        let select_clause = children
53            .iter()
54            .find(|c| c.segment_type() == SegmentType::SelectClause);
55
56        if let Some(select) = select_clause {
57            let distinct_token = find_distinct_keyword(select);
58            if let Some(span) = distinct_token {
59                return vec![LintViolation::with_msg_key(
60                    self.code(),
61                    "DISTINCT is redundant when used with GROUP BY.",
62                    span,
63                    "rules.AM01.msg",
64                    vec![],
65                )];
66            }
67        }
68
69        vec![]
70    }
71}
72
73fn find_distinct_keyword(select_clause: &Segment) -> Option<rigsql_core::Span> {
74    for child in select_clause.children() {
75        if let Segment::Token(t) = child {
76            if t.segment_type == SegmentType::Keyword
77                && t.token.text.eq_ignore_ascii_case("DISTINCT")
78            {
79                return Some(t.token.span);
80            }
81        }
82    }
83    None
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use crate::test_utils::lint_sql;
90
91    #[test]
92    fn test_am01_flags_distinct_with_group_by() {
93        let violations = lint_sql("SELECT DISTINCT a FROM t GROUP BY a", RuleAM01);
94        assert_eq!(violations.len(), 1);
95        assert!(violations[0].message.contains("DISTINCT"));
96    }
97
98    #[test]
99    fn test_am01_accepts_distinct_without_group_by() {
100        let violations = lint_sql("SELECT DISTINCT a FROM t", RuleAM01);
101        assert_eq!(violations.len(), 0);
102    }
103
104    #[test]
105    fn test_am01_accepts_group_by_without_distinct() {
106        let violations = lint_sql("SELECT a FROM t GROUP BY a", RuleAM01);
107        assert_eq!(violations.len(), 0);
108    }
109}