Skip to main content

alint_rules/
unique_by.rs

1//! `unique_by` — flag any group of files (matching `select:`) that share
2//! the same rendered `key`. The key is a path template evaluated per
3//! matched file; default is `{basename}` (catches any two files with the
4//! same name regardless of directory).
5//!
6//! Canonical shape — every Rust source stem must be unique repo-wide:
7//!
8//! ```yaml
9//! - id: unique-rs-stems
10//!   kind: unique_by
11//!   select: "**/*.rs"
12//!   key: "{stem}"
13//!   level: warning
14//! ```
15//!
16//! Violations are emitted **one per collision group**, anchored on the
17//! lexicographically-first path of the group; the message enumerates
18//! every colliding file. For groups of N, that is one violation (not N),
19//! because the collision is a single fact.
20
21use std::collections::BTreeMap;
22use std::path::PathBuf;
23
24use alint_core::template::{PathTokens, render_message, render_path};
25use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
26use serde::Deserialize;
27
28#[derive(Debug, Deserialize)]
29#[serde(deny_unknown_fields)]
30struct Options {
31    select: String,
32    #[serde(default = "default_key")]
33    key: String,
34}
35
36fn default_key() -> String {
37    "{basename}".to_string()
38}
39
40#[derive(Debug)]
41pub struct UniqueByRule {
42    id: String,
43    level: Level,
44    policy_url: Option<String>,
45    message: Option<String>,
46    select_scope: Scope,
47    key_template: String,
48}
49
50impl Rule for UniqueByRule {
51    fn id(&self) -> &str {
52        &self.id
53    }
54    fn level(&self) -> Level {
55        self.level
56    }
57    fn policy_url(&self) -> Option<&str> {
58        self.policy_url.as_deref()
59    }
60
61    fn requires_full_index(&self) -> bool {
62        // Cross-file: detecting duplicate keys is only valid over
63        // the full set. A new file in the diff might collide with
64        // an unchanged-but-existing file elsewhere — invisible if
65        // we only see the diff. Per roadmap, opts out of
66        // `--changed` filtering.
67        true
68    }
69
70    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
71        // BTreeMap gives a stable (sorted) iteration order → deterministic output.
72        let mut groups: BTreeMap<String, Vec<PathBuf>> = BTreeMap::new();
73        for entry in ctx.index.files() {
74            if !self.select_scope.matches(&entry.path) {
75                continue;
76            }
77            let tokens = PathTokens::from_path(&entry.path);
78            let key = render_path(&self.key_template, &tokens);
79            if key.is_empty() {
80                // Skip files whose key renders to the empty string — likely a
81                // missing component like `{parent_name}` on a root-level file.
82                continue;
83            }
84            groups.entry(key).or_default().push(entry.path.clone());
85        }
86        let mut violations = Vec::new();
87        for (key, mut paths) in groups {
88            if paths.len() <= 1 {
89                continue;
90            }
91            paths.sort();
92            let anchor = paths[0].clone();
93            let msg = self.format_message(&key, &paths);
94            violations.push(Violation::new(msg).with_path(anchor));
95        }
96        Ok(violations)
97    }
98}
99
100impl UniqueByRule {
101    fn format_message(&self, key: &str, paths: &[PathBuf]) -> String {
102        let paths_joined = paths
103            .iter()
104            .map(|p| p.display().to_string())
105            .collect::<Vec<_>>()
106            .join(", ");
107        if let Some(user) = self.message.as_deref() {
108            let key_str = key.to_string();
109            let paths_str = paths_joined.clone();
110            let count = paths.len().to_string();
111            return render_message(user, |ns, k| match (ns, k) {
112                ("ctx", "key") => Some(key_str.clone()),
113                ("ctx", "paths") => Some(paths_str.clone()),
114                ("ctx", "count") => Some(count.clone()),
115                _ => None,
116            });
117        }
118        format!(
119            "duplicate key {:?} shared by {} file(s): {}",
120            key,
121            paths.len(),
122            paths_joined,
123        )
124    }
125}
126
127pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
128    let opts: Options = spec
129        .deserialize_options()
130        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
131    if opts.key.trim().is_empty() {
132        return Err(Error::rule_config(
133            &spec.id,
134            "unique_by `key` must not be empty",
135        ));
136    }
137    let select_scope = Scope::from_patterns(&[opts.select])?;
138    Ok(Box::new(UniqueByRule {
139        id: spec.id.clone(),
140        level: spec.level,
141        policy_url: spec.policy_url.clone(),
142        message: spec.message.clone(),
143        select_scope,
144        key_template: opts.key,
145    }))
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use alint_core::{FileEntry, FileIndex};
152    use std::path::Path;
153
154    fn index(files: &[&str]) -> FileIndex {
155        FileIndex {
156            entries: files
157                .iter()
158                .map(|p| FileEntry {
159                    path: PathBuf::from(p),
160                    is_dir: false,
161                    size: 1,
162                })
163                .collect(),
164        }
165    }
166
167    fn rule(select: &str, key: &str) -> UniqueByRule {
168        UniqueByRule {
169            id: "t".into(),
170            level: Level::Error,
171            policy_url: None,
172            message: None,
173            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
174            key_template: key.to_string(),
175        }
176    }
177
178    fn eval(rule: &UniqueByRule, files: &[&str]) -> Vec<Violation> {
179        let idx = index(files);
180        let ctx = Context {
181            root: Path::new("/"),
182            index: &idx,
183            registry: None,
184            facts: None,
185            vars: None,
186            git_tracked: None,
187            git_blame: None,
188        };
189        rule.evaluate(&ctx).unwrap()
190    }
191
192    #[test]
193    fn passes_when_every_key_unique() {
194        let r = rule("**/*.rs", "{stem}");
195        let v = eval(&r, &["src/foo.rs", "src/bar.rs", "tests/baz.rs"]);
196        assert!(v.is_empty(), "unexpected: {v:?}");
197    }
198
199    #[test]
200    fn flags_stem_collision() {
201        let r = rule("**/*.rs", "{stem}");
202        let v = eval(&r, &["src/mod1/foo.rs", "src/mod2/foo.rs"]);
203        assert_eq!(v.len(), 1);
204        // Anchor is lex-smallest of the collision group.
205        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/mod1/foo.rs")));
206        assert!(v[0].message.contains("src/mod1/foo.rs"));
207        assert!(v[0].message.contains("src/mod2/foo.rs"));
208    }
209
210    #[test]
211    fn one_violation_per_group_regardless_of_group_size() {
212        let r = rule("**/*.rs", "{stem}");
213        let v = eval(
214            &r,
215            &[
216                "src/a/foo.rs",
217                "src/b/foo.rs",
218                "src/c/foo.rs", // 3-way collision on "foo"
219                "src/bar.rs",   // unique
220            ],
221        );
222        assert_eq!(v.len(), 1);
223        assert!(v[0].message.contains('3'));
224    }
225
226    #[test]
227    fn multiple_independent_groups() {
228        let r = rule("**/*.rs", "{stem}");
229        let v = eval(
230            &r,
231            &[
232                "src/a/foo.rs",
233                "src/b/foo.rs", // group "foo"
234                "tests/bar.rs",
235                "integration/bar.rs", // group "bar"
236                "src/solo.rs",
237            ],
238        );
239        assert_eq!(v.len(), 2);
240    }
241
242    #[test]
243    fn default_key_is_basename() {
244        // No key option = default {basename}: collisions require identical
245        // filename including extension.
246        let r = UniqueByRule {
247            id: "t".into(),
248            level: Level::Error,
249            policy_url: None,
250            message: None,
251            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
252            key_template: default_key(),
253        };
254        let v = eval(&r, &["src/a/mod.rs", "src/b/mod.rs"]);
255        assert_eq!(v.len(), 1);
256    }
257
258    #[test]
259    fn different_extensions_same_stem_are_not_colliding_by_basename() {
260        let r = UniqueByRule {
261            id: "t".into(),
262            level: Level::Error,
263            policy_url: None,
264            message: None,
265            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
266            key_template: default_key(),
267        };
268        let v = eval(&r, &["src/foo.rs", "src/foo.md"]);
269        assert!(v.is_empty());
270    }
271
272    #[test]
273    fn empty_key_rendering_skips_entry() {
274        // `{parent_name}` on a root-level file renders to "" — excluded.
275        let r = rule("*.md", "{parent_name}");
276        let v = eval(&r, &["README.md", "CHANGELOG.md"]);
277        assert!(v.is_empty());
278    }
279
280    #[test]
281    fn message_template_substitution() {
282        let r = UniqueByRule {
283            id: "t".into(),
284            level: Level::Error,
285            policy_url: None,
286            message: Some("{{ctx.count}} files share stem {{ctx.key}}".into()),
287            select_scope: Scope::from_patterns(&["**/*.rs".to_string()]).unwrap(),
288            key_template: "{stem}".into(),
289        };
290        let v = eval(&r, &["a/foo.rs", "b/foo.rs"]);
291        assert_eq!(v.len(), 1);
292        assert_eq!(v[0].message, "2 files share stem foo");
293    }
294}