Skip to main content

alint_rules/
unique_by.rs

1//! `unique_by` — flag any group of files (matching `select:`) that share
2//! the same rendered `key`. The key is a path template evaluated per
3//! matched file; default is `{basename}` (catches any two files with the
4//! same name regardless of directory).
5//!
6//! Canonical shape — every Rust source stem must be unique repo-wide:
7//!
8//! ```yaml
9//! - id: unique-rs-stems
10//!   kind: unique_by
11//!   select: "**/*.rs"
12//!   key: "{stem}"
13//!   level: warning
14//! ```
15//!
16//! Violations are emitted **one per collision group**, anchored on the
17//! lexicographically-first path of the group; the message enumerates
18//! every colliding file. For groups of N, that is one violation (not N),
19//! because the collision is a single fact.
20
21use std::collections::BTreeMap;
22
23use alint_core::template::{PathTokens, render_message, render_path};
24use alint_core::{Context, Error, Level, Result, Rule, RuleSpec, Scope, Violation};
25use serde::Deserialize;
26
27#[derive(Debug, Deserialize)]
28#[serde(deny_unknown_fields)]
29struct Options {
30    select: String,
31    #[serde(default = "default_key")]
32    key: String,
33}
34
35fn default_key() -> String {
36    "{basename}".to_string()
37}
38
39#[derive(Debug)]
40pub struct UniqueByRule {
41    id: String,
42    level: Level,
43    policy_url: Option<String>,
44    message: Option<String>,
45    select_scope: Scope,
46    key_template: String,
47}
48
49impl Rule for UniqueByRule {
50    fn id(&self) -> &str {
51        &self.id
52    }
53    fn level(&self) -> Level {
54        self.level
55    }
56    fn policy_url(&self) -> Option<&str> {
57        self.policy_url.as_deref()
58    }
59
60    fn requires_full_index(&self) -> bool {
61        // Cross-file: detecting duplicate keys is only valid over
62        // the full set. A new file in the diff might collide with
63        // an unchanged-but-existing file elsewhere — invisible if
64        // we only see the diff. Per roadmap, opts out of
65        // `--changed` filtering.
66        true
67    }
68
69    fn evaluate(&self, ctx: &Context<'_>) -> Result<Vec<Violation>> {
70        // BTreeMap gives a stable (sorted) iteration order →
71        // deterministic output. Storing `Arc<Path>` re-uses the
72        // walker's per-file allocation rather than copying bytes
73        // through a `PathBuf`.
74        let mut groups: BTreeMap<String, Vec<std::sync::Arc<std::path::Path>>> = BTreeMap::new();
75        for entry in ctx.index.files() {
76            if !self.select_scope.matches(&entry.path, ctx.index) {
77                continue;
78            }
79            let tokens = PathTokens::from_path(&entry.path);
80            let key = render_path(&self.key_template, &tokens);
81            if key.is_empty() {
82                // Skip files whose key renders to the empty string — likely a
83                // missing component like `{parent_name}` on a root-level file.
84                continue;
85            }
86            groups.entry(key).or_default().push(entry.path.clone());
87        }
88        let mut violations = Vec::new();
89        for (key, mut paths) in groups {
90            if paths.len() <= 1 {
91                continue;
92            }
93            paths.sort();
94            let anchor = paths[0].clone();
95            let msg = self.format_message(&key, &paths);
96            violations.push(Violation::new(msg).with_path(anchor));
97        }
98        Ok(violations)
99    }
100}
101
102impl UniqueByRule {
103    fn format_message(&self, key: &str, paths: &[std::sync::Arc<std::path::Path>]) -> String {
104        let paths_joined = paths
105            .iter()
106            .map(|p| p.display().to_string())
107            .collect::<Vec<_>>()
108            .join(", ");
109        if let Some(user) = self.message.as_deref() {
110            let key_str = key.to_string();
111            let paths_str = paths_joined.clone();
112            let count = paths.len().to_string();
113            return render_message(user, |ns, k| match (ns, k) {
114                ("ctx", "key") => Some(key_str.clone()),
115                ("ctx", "paths") => Some(paths_str.clone()),
116                ("ctx", "count") => Some(count.clone()),
117                _ => None,
118            });
119        }
120        format!(
121            "duplicate key {:?} shared by {} file(s): {}",
122            key,
123            paths.len(),
124            paths_joined,
125        )
126    }
127}
128
129pub fn build(spec: &RuleSpec) -> Result<Box<dyn Rule>> {
130    alint_core::reject_scope_filter_on_cross_file(spec, "unique_by")?;
131    let opts: Options = spec
132        .deserialize_options()
133        .map_err(|e| Error::rule_config(&spec.id, format!("invalid options: {e}")))?;
134    if opts.key.trim().is_empty() {
135        return Err(Error::rule_config(
136            &spec.id,
137            "unique_by `key` must not be empty",
138        ));
139    }
140    let select_scope = Scope::from_patterns(&[opts.select])?;
141    Ok(Box::new(UniqueByRule {
142        id: spec.id.clone(),
143        level: spec.level,
144        policy_url: spec.policy_url.clone(),
145        message: spec.message.clone(),
146        select_scope,
147        key_template: opts.key,
148    }))
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154    use alint_core::{FileEntry, FileIndex};
155    use std::path::Path;
156
157    fn index(files: &[&str]) -> FileIndex {
158        FileIndex::from_entries(
159            files
160                .iter()
161                .map(|p| FileEntry {
162                    path: std::path::Path::new(p).into(),
163                    is_dir: false,
164                    size: 1,
165                })
166                .collect(),
167        )
168    }
169
170    fn rule(select: &str, key: &str) -> UniqueByRule {
171        UniqueByRule {
172            id: "t".into(),
173            level: Level::Error,
174            policy_url: None,
175            message: None,
176            select_scope: Scope::from_patterns(&[select.to_string()]).unwrap(),
177            key_template: key.to_string(),
178        }
179    }
180
181    fn eval(rule: &UniqueByRule, files: &[&str]) -> Vec<Violation> {
182        let idx = index(files);
183        let ctx = Context {
184            root: Path::new("/"),
185            index: &idx,
186            registry: None,
187            facts: None,
188            vars: None,
189            git_tracked: None,
190            git_blame: None,
191        };
192        rule.evaluate(&ctx).unwrap()
193    }
194
195    #[test]
196    fn passes_when_every_key_unique() {
197        let r = rule("**/*.rs", "{stem}");
198        let v = eval(&r, &["src/foo.rs", "src/bar.rs", "tests/baz.rs"]);
199        assert!(v.is_empty(), "unexpected: {v:?}");
200    }
201
202    #[test]
203    fn flags_stem_collision() {
204        let r = rule("**/*.rs", "{stem}");
205        let v = eval(&r, &["src/mod1/foo.rs", "src/mod2/foo.rs"]);
206        assert_eq!(v.len(), 1);
207        // Anchor is lex-smallest of the collision group.
208        assert_eq!(v[0].path.as_deref(), Some(Path::new("src/mod1/foo.rs")));
209        assert!(v[0].message.contains("src/mod1/foo.rs"));
210        assert!(v[0].message.contains("src/mod2/foo.rs"));
211    }
212
213    #[test]
214    fn one_violation_per_group_regardless_of_group_size() {
215        let r = rule("**/*.rs", "{stem}");
216        let v = eval(
217            &r,
218            &[
219                "src/a/foo.rs",
220                "src/b/foo.rs",
221                "src/c/foo.rs", // 3-way collision on "foo"
222                "src/bar.rs",   // unique
223            ],
224        );
225        assert_eq!(v.len(), 1);
226        assert!(v[0].message.contains('3'));
227    }
228
229    #[test]
230    fn multiple_independent_groups() {
231        let r = rule("**/*.rs", "{stem}");
232        let v = eval(
233            &r,
234            &[
235                "src/a/foo.rs",
236                "src/b/foo.rs", // group "foo"
237                "tests/bar.rs",
238                "integration/bar.rs", // group "bar"
239                "src/solo.rs",
240            ],
241        );
242        assert_eq!(v.len(), 2);
243    }
244
245    #[test]
246    fn default_key_is_basename() {
247        // No key option = default {basename}: collisions require identical
248        // filename including extension.
249        let r = UniqueByRule {
250            id: "t".into(),
251            level: Level::Error,
252            policy_url: None,
253            message: None,
254            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
255            key_template: default_key(),
256        };
257        let v = eval(&r, &["src/a/mod.rs", "src/b/mod.rs"]);
258        assert_eq!(v.len(), 1);
259    }
260
261    #[test]
262    fn different_extensions_same_stem_are_not_colliding_by_basename() {
263        let r = UniqueByRule {
264            id: "t".into(),
265            level: Level::Error,
266            policy_url: None,
267            message: None,
268            select_scope: Scope::from_patterns(&["**/*".to_string()]).unwrap(),
269            key_template: default_key(),
270        };
271        let v = eval(&r, &["src/foo.rs", "src/foo.md"]);
272        assert!(v.is_empty());
273    }
274
275    #[test]
276    fn empty_key_rendering_skips_entry() {
277        // `{parent_name}` on a root-level file renders to "" — excluded.
278        let r = rule("*.md", "{parent_name}");
279        let v = eval(&r, &["README.md", "CHANGELOG.md"]);
280        assert!(v.is_empty());
281    }
282
283    #[test]
284    fn message_template_substitution() {
285        let r = UniqueByRule {
286            id: "t".into(),
287            level: Level::Error,
288            policy_url: None,
289            message: Some("{{ctx.count}} files share stem {{ctx.key}}".into()),
290            select_scope: Scope::from_patterns(&["**/*.rs".to_string()]).unwrap(),
291            key_template: "{stem}".into(),
292        };
293        let v = eval(&r, &["a/foo.rs", "b/foo.rs"]);
294        assert_eq!(v.len(), 1);
295        assert_eq!(v[0].message, "2 files share stem foo");
296    }
297
298    #[test]
299    fn build_rejects_scope_filter_on_cross_file_rule() {
300        // unique_by is a cross-file rule (requires_full_index =
301        // true); scope_filter is per-file-rules-only. The build
302        // path must reject it with a clear message pointing at
303        // the for_each_dir + when_iter: alternative.
304        let yaml = r#"
305id: t
306kind: unique_by
307select: "**/*.rs"
308key: "{stem}"
309level: error
310scope_filter:
311  has_ancestor: Cargo.toml
312"#;
313        let spec = crate::test_support::spec_yaml(yaml);
314        let err = build(&spec).unwrap_err().to_string();
315        assert!(
316            err.contains("scope_filter is supported on per-file rules only"),
317            "expected per-file-only message, got: {err}",
318        );
319        assert!(
320            err.contains("unique_by"),
321            "expected message to name the cross-file kind, got: {err}",
322        );
323    }
324}