Skip to main content

dupes_core/
ignore.rs

1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5
6use crate::fingerprint::Fingerprint;
7use crate::grouper::DuplicateGroup;
8
9const IGNORE_FILE_NAME: &str = ".dupes-ignore.toml";
10
11/// An entry in the ignore file.
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
13pub struct IgnoreEntry {
14    /// The fingerprint of the duplicated code.
15    pub fingerprint: String,
16    /// Optional reason for ignoring.
17    #[serde(default, skip_serializing_if = "Option::is_none")]
18    pub reason: Option<String>,
19    /// Names of the code units in the group (for documentation).
20    #[serde(default, skip_serializing_if = "Vec::is_empty")]
21    pub members: Vec<String>,
22}
23
24/// The ignore file structure.
25#[derive(Debug, Clone, Serialize, Deserialize, Default)]
26pub struct IgnoreFile {
27    #[serde(default)]
28    pub ignore: Vec<IgnoreEntry>,
29}
30
31/// Get the path to the ignore file for a project root.
32#[must_use]
33pub fn ignore_file_path(root: &Path) -> PathBuf {
34    root.join(IGNORE_FILE_NAME)
35}
36
37/// Load the ignore file from disk.
38#[must_use]
39pub fn load_ignore_file(root: &Path) -> IgnoreFile {
40    let path = ignore_file_path(root);
41    if !path.exists() {
42        return IgnoreFile::default();
43    }
44    std::fs::read_to_string(&path).map_or_else(
45        |_| IgnoreFile::default(),
46        |content| toml::from_str(&content).unwrap_or_default(),
47    )
48}
49
50/// Save the ignore file to disk.
51pub fn save_ignore_file(root: &Path, ignore_file: &IgnoreFile) -> std::io::Result<()> {
52    let path = ignore_file_path(root);
53    let content = toml::to_string_pretty(ignore_file)
54        .map_err(|e| std::io::Error::other(format!("Failed to serialize ignore file: {e}")))?;
55    std::fs::write(path, content)
56}
57
58/// Add an ignore entry for a fingerprint.
59pub fn add_ignore(
60    ignore_file: &mut IgnoreFile,
61    fingerprint: &Fingerprint,
62    reason: Option<String>,
63    members: Vec<String>,
64) {
65    let fp_hex = fingerprint.to_hex();
66    // Don't add duplicates
67    if ignore_file.ignore.iter().any(|e| e.fingerprint == fp_hex) {
68        return;
69    }
70    ignore_file.ignore.push(IgnoreEntry {
71        fingerprint: fp_hex,
72        reason,
73        members,
74    });
75}
76
77/// Remove an ignore entry by fingerprint.
78pub fn remove_ignore(ignore_file: &mut IgnoreFile, fingerprint: &str) -> bool {
79    let initial_len = ignore_file.ignore.len();
80    ignore_file.ignore.retain(|e| e.fingerprint != fingerprint);
81    ignore_file.ignore.len() < initial_len
82}
83
84/// Check if a fingerprint is ignored.
85#[must_use]
86pub fn is_ignored(ignore_file: &IgnoreFile, fingerprint: &Fingerprint) -> bool {
87    let fp_hex = fingerprint.to_hex();
88    ignore_file.ignore.iter().any(|e| e.fingerprint == fp_hex)
89}
90
91/// Filter out ignored groups from a list of duplicate groups.
92#[must_use]
93pub fn filter_ignored(
94    groups: Vec<DuplicateGroup>,
95    ignore_file: &IgnoreFile,
96) -> Vec<DuplicateGroup> {
97    groups
98        .into_iter()
99        .filter(|g| !is_ignored(ignore_file, &g.fingerprint))
100        .collect()
101}
102
103/// Find ignore entries whose fingerprint doesn't match any live group.
104#[must_use]
105pub fn find_stale_entries<'a>(
106    ignore_file: &'a IgnoreFile,
107    live_fingerprints: &HashSet<Fingerprint>,
108) -> Vec<&'a IgnoreEntry> {
109    ignore_file
110        .ignore
111        .iter()
112        .filter(|entry| {
113            !Fingerprint::from_hex(&entry.fingerprint)
114                .is_some_and(|fp| live_fingerprints.contains(&fp)) // invalid hex is always stale
115        })
116        .collect()
117}
118
119/// Remove and return stale ignore entries.
120pub fn remove_stale_entries(
121    ignore_file: &mut IgnoreFile,
122    live_fingerprints: &HashSet<Fingerprint>,
123) -> Vec<IgnoreEntry> {
124    let mut stale = Vec::new();
125    let mut live = Vec::new();
126    for entry in ignore_file.ignore.drain(..) {
127        let is_live = Fingerprint::from_hex(&entry.fingerprint)
128            .is_some_and(|fp| live_fingerprints.contains(&fp));
129        if is_live {
130            live.push(entry);
131        } else {
132            stale.push(entry);
133        }
134    }
135    ignore_file.ignore = live;
136    stale
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142    use crate::node::{LiteralKind, NodeKind, NormalizedNode};
143    use tempfile::TempDir;
144
145    fn test_fingerprint() -> Fingerprint {
146        Fingerprint::from_node(&NormalizedNode::leaf(NodeKind::Literal(LiteralKind::Int)))
147    }
148
149    #[test]
150    fn load_nonexistent_returns_default() {
151        let tmp = TempDir::new().unwrap();
152        let ignore = load_ignore_file(tmp.path());
153        assert!(ignore.ignore.is_empty());
154    }
155
156    #[test]
157    fn roundtrip_save_and_load() {
158        let tmp = TempDir::new().unwrap();
159        let fp = test_fingerprint();
160        let mut ignore = IgnoreFile::default();
161        add_ignore(
162            &mut ignore,
163            &fp,
164            Some("test reason".to_string()),
165            vec!["foo".to_string(), "bar".to_string()],
166        );
167        save_ignore_file(tmp.path(), &ignore).unwrap();
168        let loaded = load_ignore_file(tmp.path());
169        assert_eq!(loaded.ignore.len(), 1);
170        assert_eq!(loaded.ignore[0].fingerprint, fp.to_hex());
171        assert_eq!(loaded.ignore[0].reason, Some("test reason".to_string()));
172        assert_eq!(loaded.ignore[0].members, vec!["foo", "bar"]);
173    }
174
175    #[test]
176    fn add_ignore_deduplicates() {
177        let fp = test_fingerprint();
178        let mut ignore = IgnoreFile::default();
179        add_ignore(&mut ignore, &fp, None, vec![]);
180        add_ignore(&mut ignore, &fp, None, vec![]);
181        assert_eq!(ignore.ignore.len(), 1);
182    }
183
184    #[test]
185    fn remove_ignore_works() {
186        let fp = test_fingerprint();
187        let mut ignore = IgnoreFile::default();
188        add_ignore(&mut ignore, &fp, None, vec![]);
189        assert!(remove_ignore(&mut ignore, &fp.to_hex()));
190        assert!(ignore.ignore.is_empty());
191    }
192
193    #[test]
194    fn remove_nonexistent_returns_false() {
195        let mut ignore = IgnoreFile::default();
196        assert!(!remove_ignore(&mut ignore, "nonexistent"));
197    }
198
199    #[test]
200    fn is_ignored_works() {
201        let fp = test_fingerprint();
202        let mut ignore = IgnoreFile::default();
203        assert!(!is_ignored(&ignore, &fp));
204        add_ignore(&mut ignore, &fp, None, vec![]);
205        assert!(is_ignored(&ignore, &fp));
206    }
207
208    #[test]
209    fn filter_ignored_removes_matching_groups() {
210        let fp = test_fingerprint();
211        let mut ignore = IgnoreFile::default();
212        add_ignore(&mut ignore, &fp, None, vec![]);
213
214        let groups = vec![
215            DuplicateGroup {
216                fingerprint: fp,
217                members: vec![],
218                similarity: 1.0,
219            },
220            DuplicateGroup {
221                fingerprint: Fingerprint::from_node(&NormalizedNode::leaf(NodeKind::Opaque)),
222                members: vec![],
223                similarity: 1.0,
224            },
225        ];
226
227        let filtered = filter_ignored(groups, &ignore);
228        assert_eq!(filtered.len(), 1);
229    }
230
231    #[test]
232    fn filter_ignored_removes_near_duplicates_with_matching_fingerprint() {
233        let fp = test_fingerprint();
234        let mut ignore = IgnoreFile::default();
235        add_ignore(&mut ignore, &fp, None, vec![]);
236
237        let groups = vec![DuplicateGroup {
238            fingerprint: fp,
239            members: vec![],
240            similarity: 0.85,
241        }];
242
243        let filtered = filter_ignored(groups, &ignore);
244        assert!(filtered.is_empty());
245    }
246
247    #[test]
248    fn filter_ignored_keeps_near_duplicates_without_matching_entry() {
249        let fp = test_fingerprint();
250        let other_fp =
251            Fingerprint::from_node(&NormalizedNode::with_children(NodeKind::Block, vec![]));
252        let mut ignore = IgnoreFile::default();
253        add_ignore(&mut ignore, &other_fp, None, vec![]);
254
255        let groups = vec![DuplicateGroup {
256            fingerprint: fp,
257            members: vec![],
258            similarity: 0.85,
259        }];
260
261        let filtered = filter_ignored(groups, &ignore);
262        assert_eq!(filtered.len(), 1);
263    }
264
265    #[test]
266    fn find_stale_entries_identifies_stale_vs_live() {
267        let live_fp = test_fingerprint();
268        let stale_fp =
269            Fingerprint::from_node(&NormalizedNode::with_children(NodeKind::Block, vec![]));
270
271        let mut ignore = IgnoreFile::default();
272        add_ignore(&mut ignore, &live_fp, Some("live".to_string()), vec![]);
273        add_ignore(&mut ignore, &stale_fp, Some("stale".to_string()), vec![]);
274
275        let mut live_set = std::collections::HashSet::new();
276        live_set.insert(live_fp);
277
278        let stale = find_stale_entries(&ignore, &live_set);
279        assert_eq!(stale.len(), 1);
280        assert_eq!(stale[0].reason, Some("stale".to_string()));
281    }
282
283    #[test]
284    fn remove_stale_entries_removes_only_stale() {
285        let live_fp = test_fingerprint();
286        let stale_fp =
287            Fingerprint::from_node(&NormalizedNode::with_children(NodeKind::Block, vec![]));
288
289        let mut ignore = IgnoreFile::default();
290        add_ignore(&mut ignore, &live_fp, Some("live".to_string()), vec![]);
291        add_ignore(&mut ignore, &stale_fp, Some("stale".to_string()), vec![]);
292
293        let mut live_set = std::collections::HashSet::new();
294        live_set.insert(live_fp);
295
296        let removed = remove_stale_entries(&mut ignore, &live_set);
297        assert_eq!(removed.len(), 1);
298        assert_eq!(removed[0].reason, Some("stale".to_string()));
299        assert_eq!(ignore.ignore.len(), 1);
300        assert_eq!(ignore.ignore[0].reason, Some("live".to_string()));
301    }
302
303    #[test]
304    fn ignore_file_path_is_correct() {
305        let path = ignore_file_path(Path::new("/project"));
306        assert_eq!(path, PathBuf::from("/project/.dupes-ignore.toml"));
307    }
308}