Skip to main content

alint_rules/
unique_by.rs

1//! `unique_by` — flag any group of files (matching `select:`) that share
2//! the same rendered `key`. The key is a path template evaluated per
3//! matched file; default is `{basename}` (catches any two files with the
4//! same name regardless of directory).
5//!
6//! Canonical shape — every Rust source stem must be unique repo-wide:
7//!
8//! ```yaml
9//! - id: unique-rs-stems
10//!   kind: unique_by
11//!   select: "**/*.rs"
12//!   key: "{stem}"
13//!   level: warning
14//! ```
15//!
16//! Violations are emitted **one per collision group**, anchored on the
17//! lexicographically-first path of the group; the message enumerates
18//! every colliding file. For groups of N, that is one violation (not N),
19//! because the collision is a single fact.
20
21use std::collections::BTreeMap;
22
23use alint_core::template::{PathTokens, render_message, render_path};
24use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
25use serde::Deserialize;
26
27#[derive(Debug, Deserialize)]
28#[serde(deny_unknown_fields)]
29struct Options {
30    select: String,
31    #[serde(default = "default_key")]
32    key: String,
33}
34
35fn default_key() -> String {
36    "{basename}".to_string()
37}
38
39#[derive(Debug)]
40pub struct UniqueByRule {
41    id: String,
42    level: Level,
43    policy_url: Option<String>,
44    message: Option<String>,
45    select_scope: Scope,
46    key_template: String,
47}
48
49impl Rule for UniqueByRule {
50    alint_core::rule_common_impl!();
51
52    fn requires_full_index(&self) -> bool {
53        // Cross-file: detecting duplicate keys is only valid over
54        // the full set. A new file in the diff might collide with
55        // an unchanged-but-existing file elsewhere — invisible if
56        // we only see the diff. Per roadmap, opts out of
57        // `--changed` filtering.
58        true
59    }
60
61    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
62        // BTreeMap gives a stable (sorted) iteration order →
63        // deterministic output. Storing `Arc<Path>` re-uses the
64        // walker's per-file allocation rather than copying bytes
65        // through a `PathBuf`.
66        let mut groups: BTreeMap<String, Vec<std::sync::Arc<std::path::Path>>> = BTreeMap::new();
67        for entry in ctx.index.files() {
68            if !self.select_scope.matches(&entry.path, ctx.index) {
69                continue;
70            }
71            let tokens = PathTokens::from_path(&entry.path);
72            let key = render_path(&self.key_template, &tokens);
73            if key.is_empty() {
74                // Skip files whose key renders to the empty string — likely a
75                // missing component like `{parent_name}` on a root-level file.
76                continue;
77            }
78            groups.entry(key).or_default().push(entry.path.clone());
79        }
80        let mut violations = Vec::new();
81        for (key, mut paths) in groups {
82            if paths.len() <= 1 {
83                continue;
84            }
85            paths.sort();
86            let anchor = paths[0].clone();
87            let msg = self.format_message(&key, &paths);
88            violations.push(Violation::new(msg).with_path(anchor));
89        }
90        Ok(violations)
91    }
92}
93
94impl UniqueByRule {
95    fn format_message(&self, key: &str, paths: &[std::sync::Arc<std::path::Path>]) -> String {
96        let paths_joined = paths
97            .iter()
98            .map(|p| p.display().to_string())
99            .collect::<Vec<_>>()
100            .join(", ");
101        if let Some(user) = self.message.as_deref() {
102            let key_str = key.to_string();
103            let paths_str = paths_joined.clone();
104            let count = paths.len().to_string();
105            return render_message(user, |ns, k| match (ns, k) {
106                ("ctx", "key") => Some(key_str.clone()),
107                ("ctx", "paths") => Some(paths_str.clone()),
108                ("ctx", "count") => Some(count.clone()),
109                _ => None,
110            });
111        }
112        format!(
113            "duplicate key {:?} shared by {} file(s): {}",
114            key,
115            paths.len(),
116            paths_joined,
117        )
118    }
119}
120
121pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
122    alint_core::reject_scope_filter_on_cross_file(spec, "unique_by")?;
123    let opts: Options = spec
124        .deserialize_options()
125        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
126    if opts.key.trim().is_empty() {
127        return Err(Error::rule_config(
128            &spec.id,
129            "unique_by `key` must not be empty",
130        ));
131    }
132    let select_scope = Scope::from_patterns(&[opts.select])?;
133    Ok(Box::new(UniqueByRule {
134        id: spec.id.clone(),
135        level: spec.level,
136        policy_url: spec.policy_url.clone(),
137        message: spec.message.clone(),
138        select_scope,
139        key_template: opts.key,
140    }))
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use alint_core::{FileEntry, FileIndex};
147    use std::path::Path;
148
149    fn index(files: &[&str]) -> FileIndex {
150        FileIndex::from_entries(
151            files
152                .iter()
153                .map(|p| FileEntry {
154                    path: std::path::Path::new(p).into(),
155                    is_dir: false,
156                    size: 1,
157                })
158                .collect(),
159        )
160    }
161
162    fn rule(select: &str, key: &str) -> UniqueByRule {
163        UniqueByRule {
164            id: "t".into(),
165            level: Level::Error,
166            policy_url: None,
167            message: None,
168            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
169            key_template: key.to_string(),
170        }
171    }
172
173    fn eval(rule: &UniqueByRule, files: &[&str]) -> Vec<Violation> {
174        let idx = index(files);
175        let ctx = Context {
176            root: Path::new("/"),
177            index: &idx,
178            registry: None,
179            facts: None,
180            vars: None,
181            git_tracked: None,
182            git_blame: None,
183        };
184        rule.evaluate(&ctx).unwrap()
185    }
186
187    #[test]
188    fn passes_when_every_key_unique() {
189        let r = rule("**/*.rs", "{stem}");
190        let v = eval(&r, &["src/foo.rs", "src/bar.rs", "tests/baz.rs"]);
191        assert!(v.is_empty(), "unexpected: {v:?}");
192    }
193
194    #[test]
195    fn flags_stem_collision() {
196        let r = rule("**/*.rs", "{stem}");
197        let v = eval(&r, &["src/mod1/foo.rs", "src/mod2/foo.rs"]);
198        assert_eq!(v.len(), 1);
199        // Anchor is lex-smallest of the collision group.
200        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/mod1/foo.rs")));
201        assert!(v[0].message.contains("src/mod1/foo.rs"));
202        assert!(v[0].message.contains("src/mod2/foo.rs"));
203    }
204
205    #[test]
206    fn one_violation_per_group_regardless_of_group_size() {
207        let r = rule("**/*.rs", "{stem}");
208        let v = eval(
209            &r,
210            &[
211                "src/a/foo.rs",
212                "src/b/foo.rs",
213                "src/c/foo.rs", // 3-way collision on "foo"
214                "src/bar.rs",   // unique
215            ],
216        );
217        assert_eq!(v.len(), 1);
218        assert!(v[0].message.contains('3'));
219    }
220
221    #[test]
222    fn multiple_independent_groups() {
223        let r = rule("**/*.rs", "{stem}");
224        let v = eval(
225            &r,
226            &[
227                "src/a/foo.rs",
228                "src/b/foo.rs", // group "foo"
229                "tests/bar.rs",
230                "integration/bar.rs", // group "bar"
231                "src/solo.rs",
232            ],
233        );
234        assert_eq!(v.len(), 2);
235    }
236
237    #[test]
238    fn default_key_is_basename() {
239        // No key option = default {basename}: collisions require identical
240        // filename including extension.
241        let r = UniqueByRule {
242            id: "t".into(),
243            level: Level::Error,
244            policy_url: None,
245            message: None,
246            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
247            key_template: default_key(),
248        };
249        let v = eval(&r, &["src/a/mod.rs", "src/b/mod.rs"]);
250        assert_eq!(v.len(), 1);
251    }
252
253    #[test]
254    fn different_extensions_same_stem_are_not_colliding_by_basename() {
255        let r = UniqueByRule {
256            id: "t".into(),
257            level: Level::Error,
258            policy_url: None,
259            message: None,
260            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
261            key_template: default_key(),
262        };
263        let v = eval(&r, &["src/foo.rs", "src/foo.md"]);
264        assert!(v.is_empty());
265    }
266
267    #[test]
268    fn empty_key_rendering_skips_entry() {
269        // `{parent_name}` on a root-level file renders to "" — excluded.
270        let r = rule("*.md", "{parent_name}");
271        let v = eval(&r, &["README.md", "CHANGELOG.md"]);
272        assert!(v.is_empty());
273    }
274
275    #[test]
276    fn message_template_substitution() {
277        let r = UniqueByRule {
278            id: "t".into(),
279            level: Level::Error,
280            policy_url: None,
281            message: Some("{{ctx.count}} files share stem {{ctx.key}}".into()),
282            select_scope: Scope::from_patterns(&["**/*.rs".to_string()]).unwrap(),
283            key_template: "{stem}".into(),
284        };
285        let v = eval(&r, &["a/foo.rs", "b/foo.rs"]);
286        assert_eq!(v.len(), 1);
287        assert_eq!(v[0].message, "2 files share stem foo");
288    }
289
290    #[test]
291    fn build_rejects_scope_filter_on_cross_file_rule() {
292        // unique_by is a cross-file rule (requires_full_index =
293        // true); scope_filter is per-file-rules-only. The build
294        // path must reject it with a clear message pointing at
295        // the for_each_dir + when_iter: alternative.
296        let yaml = r#"
297id: t
298kind: unique_by
299select: "**/*.rs"
300key: "{stem}"
301level: error
302scope_filter:
303  has_ancestor: Cargo.toml
304"#;
305        let spec = crate::test_support::spec_yaml(yaml);
306        let err = build(&spec).unwrap_err().to_string();
307        assert!(
308            err.contains("scope_filter is supported on per-file rules only"),
309            "expected per-file-only message, got: {err}",
310        );
311        assert!(
312            err.contains("unique_by"),
313            "expected message to name the cross-file kind, got: {err}",
314        );
315    }
316}