1use std::collections::HashSet;
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5
6use crate::fingerprint::Fingerprint;
7use crate::grouper::DuplicateGroup;
8
9const IGNORE_FILE_NAME: &str = ".dupes-ignore.toml";
10
11#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
13pub struct IgnoreEntry {
14 pub fingerprint: String,
16 #[serde(default, skip_serializing_if = "Option::is_none")]
18 pub reason: Option<String>,
19 #[serde(default, skip_serializing_if = "Vec::is_empty")]
21 pub members: Vec<String>,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, Default)]
26pub struct IgnoreFile {
27 #[serde(default)]
28 pub ignore: Vec<IgnoreEntry>,
29}
30
31#[must_use]
33pub fn ignore_file_path(root: &Path) -> PathBuf {
34 root.join(IGNORE_FILE_NAME)
35}
36
37#[must_use]
39pub fn load_ignore_file(root: &Path) -> IgnoreFile {
40 let path = ignore_file_path(root);
41 if !path.exists() {
42 return IgnoreFile::default();
43 }
44 std::fs::read_to_string(&path).map_or_else(
45 |_| IgnoreFile::default(),
46 |content| toml::from_str(&content).unwrap_or_default(),
47 )
48}
49
50pub fn save_ignore_file(root: &Path, ignore_file: &IgnoreFile) -> std::io::Result<()> {
52 let path = ignore_file_path(root);
53 let content = toml::to_string_pretty(ignore_file)
54 .map_err(|e| std::io::Error::other(format!("Failed to serialize ignore file: {e}")))?;
55 std::fs::write(path, content)
56}
57
58pub fn add_ignore(
60 ignore_file: &mut IgnoreFile,
61 fingerprint: &Fingerprint,
62 reason: Option<String>,
63 members: Vec<String>,
64) {
65 let fp_hex = fingerprint.to_hex();
66 if ignore_file.ignore.iter().any(|e| e.fingerprint == fp_hex) {
68 return;
69 }
70 ignore_file.ignore.push(IgnoreEntry {
71 fingerprint: fp_hex,
72 reason,
73 members,
74 });
75}
76
77pub fn remove_ignore(ignore_file: &mut IgnoreFile, fingerprint: &str) -> bool {
79 let initial_len = ignore_file.ignore.len();
80 ignore_file.ignore.retain(|e| e.fingerprint != fingerprint);
81 ignore_file.ignore.len() < initial_len
82}
83
84#[must_use]
86pub fn is_ignored(ignore_file: &IgnoreFile, fingerprint: &Fingerprint) -> bool {
87 let fp_hex = fingerprint.to_hex();
88 ignore_file.ignore.iter().any(|e| e.fingerprint == fp_hex)
89}
90
91#[must_use]
93pub fn filter_ignored(
94 groups: Vec<DuplicateGroup>,
95 ignore_file: &IgnoreFile,
96) -> Vec<DuplicateGroup> {
97 groups
98 .into_iter()
99 .filter(|g| !is_ignored(ignore_file, &g.fingerprint))
100 .collect()
101}
102
103#[must_use]
105pub fn find_stale_entries<'a>(
106 ignore_file: &'a IgnoreFile,
107 live_fingerprints: &HashSet<Fingerprint>,
108) -> Vec<&'a IgnoreEntry> {
109 ignore_file
110 .ignore
111 .iter()
112 .filter(|entry| {
113 !Fingerprint::from_hex(&entry.fingerprint)
114 .is_some_and(|fp| live_fingerprints.contains(&fp)) })
116 .collect()
117}
118
119pub fn remove_stale_entries(
121 ignore_file: &mut IgnoreFile,
122 live_fingerprints: &HashSet<Fingerprint>,
123) -> Vec<IgnoreEntry> {
124 let mut stale = Vec::new();
125 let mut live = Vec::new();
126 for entry in ignore_file.ignore.drain(..) {
127 let is_live = Fingerprint::from_hex(&entry.fingerprint)
128 .is_some_and(|fp| live_fingerprints.contains(&fp));
129 if is_live {
130 live.push(entry);
131 } else {
132 stale.push(entry);
133 }
134 }
135 ignore_file.ignore = live;
136 stale
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142 use crate::node::{LiteralKind, NodeKind, NormalizedNode};
143 use tempfile::TempDir;
144
145 fn test_fingerprint() -> Fingerprint {
146 Fingerprint::from_node(&NormalizedNode::leaf(NodeKind::Literal(LiteralKind::Int)))
147 }
148
149 #[test]
150 fn load_nonexistent_returns_default() {
151 let tmp = TempDir::new().unwrap();
152 let ignore = load_ignore_file(tmp.path());
153 assert!(ignore.ignore.is_empty());
154 }
155
156 #[test]
157 fn roundtrip_save_and_load() {
158 let tmp = TempDir::new().unwrap();
159 let fp = test_fingerprint();
160 let mut ignore = IgnoreFile::default();
161 add_ignore(
162 &mut ignore,
163 &fp,
164 Some("test reason".to_string()),
165 vec!["foo".to_string(), "bar".to_string()],
166 );
167 save_ignore_file(tmp.path(), &ignore).unwrap();
168 let loaded = load_ignore_file(tmp.path());
169 assert_eq!(loaded.ignore.len(), 1);
170 assert_eq!(loaded.ignore[0].fingerprint, fp.to_hex());
171 assert_eq!(loaded.ignore[0].reason, Some("test reason".to_string()));
172 assert_eq!(loaded.ignore[0].members, vec!["foo", "bar"]);
173 }
174
175 #[test]
176 fn add_ignore_deduplicates() {
177 let fp = test_fingerprint();
178 let mut ignore = IgnoreFile::default();
179 add_ignore(&mut ignore, &fp, None, vec![]);
180 add_ignore(&mut ignore, &fp, None, vec![]);
181 assert_eq!(ignore.ignore.len(), 1);
182 }
183
184 #[test]
185 fn remove_ignore_works() {
186 let fp = test_fingerprint();
187 let mut ignore = IgnoreFile::default();
188 add_ignore(&mut ignore, &fp, None, vec![]);
189 assert!(remove_ignore(&mut ignore, &fp.to_hex()));
190 assert!(ignore.ignore.is_empty());
191 }
192
193 #[test]
194 fn remove_nonexistent_returns_false() {
195 let mut ignore = IgnoreFile::default();
196 assert!(!remove_ignore(&mut ignore, "nonexistent"));
197 }
198
199 #[test]
200 fn is_ignored_works() {
201 let fp = test_fingerprint();
202 let mut ignore = IgnoreFile::default();
203 assert!(!is_ignored(&ignore, &fp));
204 add_ignore(&mut ignore, &fp, None, vec![]);
205 assert!(is_ignored(&ignore, &fp));
206 }
207
208 #[test]
209 fn filter_ignored_removes_matching_groups() {
210 let fp = test_fingerprint();
211 let mut ignore = IgnoreFile::default();
212 add_ignore(&mut ignore, &fp, None, vec![]);
213
214 let groups = vec![
215 DuplicateGroup {
216 fingerprint: fp,
217 members: vec![],
218 similarity: 1.0,
219 },
220 DuplicateGroup {
221 fingerprint: Fingerprint::from_node(&NormalizedNode::leaf(NodeKind::Opaque)),
222 members: vec![],
223 similarity: 1.0,
224 },
225 ];
226
227 let filtered = filter_ignored(groups, &ignore);
228 assert_eq!(filtered.len(), 1);
229 }
230
231 #[test]
232 fn filter_ignored_removes_near_duplicates_with_matching_fingerprint() {
233 let fp = test_fingerprint();
234 let mut ignore = IgnoreFile::default();
235 add_ignore(&mut ignore, &fp, None, vec![]);
236
237 let groups = vec![DuplicateGroup {
238 fingerprint: fp,
239 members: vec![],
240 similarity: 0.85,
241 }];
242
243 let filtered = filter_ignored(groups, &ignore);
244 assert!(filtered.is_empty());
245 }
246
247 #[test]
248 fn filter_ignored_keeps_near_duplicates_without_matching_entry() {
249 let fp = test_fingerprint();
250 let other_fp =
251 Fingerprint::from_node(&NormalizedNode::with_children(NodeKind::Block, vec![]));
252 let mut ignore = IgnoreFile::default();
253 add_ignore(&mut ignore, &other_fp, None, vec![]);
254
255 let groups = vec![DuplicateGroup {
256 fingerprint: fp,
257 members: vec![],
258 similarity: 0.85,
259 }];
260
261 let filtered = filter_ignored(groups, &ignore);
262 assert_eq!(filtered.len(), 1);
263 }
264
265 #[test]
266 fn find_stale_entries_identifies_stale_vs_live() {
267 let live_fp = test_fingerprint();
268 let stale_fp =
269 Fingerprint::from_node(&NormalizedNode::with_children(NodeKind::Block, vec![]));
270
271 let mut ignore = IgnoreFile::default();
272 add_ignore(&mut ignore, &live_fp, Some("live".to_string()), vec![]);
273 add_ignore(&mut ignore, &stale_fp, Some("stale".to_string()), vec![]);
274
275 let mut live_set = std::collections::HashSet::new();
276 live_set.insert(live_fp);
277
278 let stale = find_stale_entries(&ignore, &live_set);
279 assert_eq!(stale.len(), 1);
280 assert_eq!(stale[0].reason, Some("stale".to_string()));
281 }
282
283 #[test]
284 fn remove_stale_entries_removes_only_stale() {
285 let live_fp = test_fingerprint();
286 let stale_fp =
287 Fingerprint::from_node(&NormalizedNode::with_children(NodeKind::Block, vec![]));
288
289 let mut ignore = IgnoreFile::default();
290 add_ignore(&mut ignore, &live_fp, Some("live".to_string()), vec![]);
291 add_ignore(&mut ignore, &stale_fp, Some("stale".to_string()), vec![]);
292
293 let mut live_set = std::collections::HashSet::new();
294 live_set.insert(live_fp);
295
296 let removed = remove_stale_entries(&mut ignore, &live_set);
297 assert_eq!(removed.len(), 1);
298 assert_eq!(removed[0].reason, Some("stale".to_string()));
299 assert_eq!(ignore.ignore.len(), 1);
300 assert_eq!(ignore.ignore[0].reason, Some("live".to_string()));
301 }
302
303 #[test]
304 fn ignore_file_path_is_correct() {
305 let path = ignore_file_path(Path::new("/project"));
306 assert_eq!(path, PathBuf::from("/project/.dupes-ignore.toml"));
307 }
308}