Skip to main content

alint_rules/
unique_by.rs

1//! `unique_by` — flag any group of files (matching `select:`) that share
2//! the same rendered `key`. The key is a path template evaluated per
3//! matched file; default is `{basename}` (catches any two files with the
4//! same name regardless of directory).
5//!
6//! Canonical shape — every Rust source stem must be unique repo-wide:
7//!
8//! ```yaml
9//! - id: unique-rs-stems
10//!   kind: unique_by
11//!   select: "**/*.rs"
12//!   key: "{stem}"
13//!   level: warning
14//! ```
15//!
16//! Violations are emitted **one per collision group**, anchored on the
17//! lexicographically-first path of the group; the message enumerates
18//! every colliding file. For groups of N, that is one violation (not N),
19//! because the collision is a single fact.
20
21use std::collections::BTreeMap;
22
23use alint_core::template::{PathTokens, render_message, render_path};
24use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
25use serde::Deserialize;
26
27#[derive(Debug, Deserialize)]
28#[serde(deny_unknown_fields)]
29struct Options {
30    select: String,
31    #[serde(default = "default_key")]
32    key: String,
33}
34
35fn default_key() -> String {
36    "{basename}".to_string()
37}
38
39#[derive(Debug)]
40pub struct UniqueByRule {
41    id: String,
42    level: Level,
43    policy_url: Option<String>,
44    message: Option<String>,
45    select_scope: Scope,
46    key_template: String,
47}
48
49impl Rule for UniqueByRule {
50    fn id(&self) -> &str {
51        &self.id
52    }
53    fn level(&self) -> Level {
54        self.level
55    }
56    fn policy_url(&self) -> Option<&str> {
57        self.policy_url.as_deref()
58    }
59
60    fn requires_full_index(&self) -> bool {
61        // Cross-file: detecting duplicate keys is only valid over
62        // the full set. A new file in the diff might collide with
63        // an unchanged-but-existing file elsewhere — invisible if
64        // we only see the diff. Per roadmap, opts out of
65        // `--changed` filtering.
66        true
67    }
68
69    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
70        // BTreeMap gives a stable (sorted) iteration order →
71        // deterministic output. Storing `Arc<Path>` re-uses the
72        // walker's per-file allocation rather than copying bytes
73        // through a `PathBuf`.
74        let mut groups: BTreeMap<String, Vec<std::sync::Arc<std::path::Path>>> = BTreeMap::new();
75        for entry in ctx.index.files() {
76            if !self.select_scope.matches(&entry.path) {
77                continue;
78            }
79            let tokens = PathTokens::from_path(&entry.path);
80            let key = render_path(&self.key_template, &tokens);
81            if key.is_empty() {
82                // Skip files whose key renders to the empty string — likely a
83                // missing component like `{parent_name}` on a root-level file.
84                continue;
85            }
86            groups.entry(key).or_default().push(entry.path.clone());
87        }
88        let mut violations = Vec::new();
89        for (key, mut paths) in groups {
90            if paths.len() <= 1 {
91                continue;
92            }
93            paths.sort();
94            let anchor = paths[0].clone();
95            let msg = self.format_message(&key, &paths);
96            violations.push(Violation::new(msg).with_path(anchor));
97        }
98        Ok(violations)
99    }
100}
101
102impl UniqueByRule {
103    fn format_message(&self, key: &str, paths: &[std::sync::Arc<std::path::Path>]) -> String {
104        let paths_joined = paths
105            .iter()
106            .map(|p| p.display().to_string())
107            .collect::<Vec<_>>()
108            .join(", ");
109        if let Some(user) = self.message.as_deref() {
110            let key_str = key.to_string();
111            let paths_str = paths_joined.clone();
112            let count = paths.len().to_string();
113            return render_message(user, |ns, k| match (ns, k) {
114                ("ctx", "key") => Some(key_str.clone()),
115                ("ctx", "paths") => Some(paths_str.clone()),
116                ("ctx", "count") => Some(count.clone()),
117                _ => None,
118            });
119        }
120        format!(
121            "duplicate key {:?} shared by {} file(s): {}",
122            key,
123            paths.len(),
124            paths_joined,
125        )
126    }
127}
128
129pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
130    let opts: Options = spec
131        .deserialize_options()
132        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
133    if opts.key.trim().is_empty() {
134        return Err(Error::rule_config(
135            &spec.id,
136            "unique_by `key` must not be empty",
137        ));
138    }
139    let select_scope = Scope::from_patterns(&[opts.select])?;
140    Ok(Box::new(UniqueByRule {
141        id: spec.id.clone(),
142        level: spec.level,
143        policy_url: spec.policy_url.clone(),
144        message: spec.message.clone(),
145        select_scope,
146        key_template: opts.key,
147    }))
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use alint_core::{FileEntry, FileIndex};
154    use std::path::Path;
155
156    fn index(files: &[&str]) -> FileIndex {
157        FileIndex {
158            entries: files
159                .iter()
160                .map(|p| FileEntry {
161                    path: std::path::Path::new(p).into(),
162                    is_dir: false,
163                    size: 1,
164                })
165                .collect(),
166        }
167    }
168
169    fn rule(select: &str, key: &str) -> UniqueByRule {
170        UniqueByRule {
171            id: "t".into(),
172            level: Level::Error,
173            policy_url: None,
174            message: None,
175            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
176            key_template: key.to_string(),
177        }
178    }
179
180    fn eval(rule: &UniqueByRule, files: &[&str]) -> Vec<Violation> {
181        let idx = index(files);
182        let ctx = Context {
183            root: Path::new("/"),
184            index: &idx,
185            registry: None,
186            facts: None,
187            vars: None,
188            git_tracked: None,
189            git_blame: None,
190        };
191        rule.evaluate(&ctx).unwrap()
192    }
193
194    #[test]
195    fn passes_when_every_key_unique() {
196        let r = rule("**/*.rs", "{stem}");
197        let v = eval(&r, &["src/foo.rs", "src/bar.rs", "tests/baz.rs"]);
198        assert!(v.is_empty(), "unexpected: {v:?}");
199    }
200
201    #[test]
202    fn flags_stem_collision() {
203        let r = rule("**/*.rs", "{stem}");
204        let v = eval(&r, &["src/mod1/foo.rs", "src/mod2/foo.rs"]);
205        assert_eq!(v.len(), 1);
206        // Anchor is lex-smallest of the collision group.
207        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/mod1/foo.rs")));
208        assert!(v[0].message.contains("src/mod1/foo.rs"));
209        assert!(v[0].message.contains("src/mod2/foo.rs"));
210    }
211
212    #[test]
213    fn one_violation_per_group_regardless_of_group_size() {
214        let r = rule("**/*.rs", "{stem}");
215        let v = eval(
216            &r,
217            &[
218                "src/a/foo.rs",
219                "src/b/foo.rs",
220                "src/c/foo.rs", // 3-way collision on "foo"
221                "src/bar.rs",   // unique
222            ],
223        );
224        assert_eq!(v.len(), 1);
225        assert!(v[0].message.contains('3'));
226    }
227
228    #[test]
229    fn multiple_independent_groups() {
230        let r = rule("**/*.rs", "{stem}");
231        let v = eval(
232            &r,
233            &[
234                "src/a/foo.rs",
235                "src/b/foo.rs", // group "foo"
236                "tests/bar.rs",
237                "integration/bar.rs", // group "bar"
238                "src/solo.rs",
239            ],
240        );
241        assert_eq!(v.len(), 2);
242    }
243
244    #[test]
245    fn default_key_is_basename() {
246        // No key option = default {basename}: collisions require identical
247        // filename including extension.
248        let r = UniqueByRule {
249            id: "t".into(),
250            level: Level::Error,
251            policy_url: None,
252            message: None,
253            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
254            key_template: default_key(),
255        };
256        let v = eval(&r, &["src/a/mod.rs", "src/b/mod.rs"]);
257        assert_eq!(v.len(), 1);
258    }
259
260    #[test]
261    fn different_extensions_same_stem_are_not_colliding_by_basename() {
262        let r = UniqueByRule {
263            id: "t".into(),
264            level: Level::Error,
265            policy_url: None,
266            message: None,
267            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
268            key_template: default_key(),
269        };
270        let v = eval(&r, &["src/foo.rs", "src/foo.md"]);
271        assert!(v.is_empty());
272    }
273
274    #[test]
275    fn empty_key_rendering_skips_entry() {
276        // `{parent_name}` on a root-level file renders to "" — excluded.
277        let r = rule("*.md", "{parent_name}");
278        let v = eval(&r, &["README.md", "CHANGELOG.md"]);
279        assert!(v.is_empty());
280    }
281
282    #[test]
283    fn message_template_substitution() {
284        let r = UniqueByRule {
285            id: "t".into(),
286            level: Level::Error,
287            policy_url: None,
288            message: Some("{{ctx.count}} files share stem {{ctx.key}}".into()),
289            select_scope: Scope::from_patterns(&["**/*.rs".to_string()]).unwrap(),
290            key_template: "{stem}".into(),
291        };
292        let v = eval(&r, &["a/foo.rs", "b/foo.rs"]);
293        assert_eq!(v.len(), 1);
294        assert_eq!(v[0].message, "2 files share stem foo");
295    }
296}