1use anyhow::{Context, Result};
10use chrono::{DateTime, Utc};
11use serde::{Deserialize, Serialize};
12use sha2::{Digest, Sha256};
13use std::collections::HashSet;
14use std::path::{Path, PathBuf};
15
16const IGNORE_FILE_VERSION: u32 = 1;
18
19#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct FileRange {
22 pub file: PathBuf,
23 pub start_line: usize,
24 pub end_line: usize,
25}
26
27impl std::fmt::Display for FileRange {
28 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
29 write!(
30 f,
31 "{}:{}-{}",
32 self.file.display(),
33 self.start_line,
34 self.end_line
35 )
36 }
37}
38
39impl FileRange {
40 pub fn parse(s: &str) -> Result<Self> {
42 let parts: Vec<&str> = s.rsplitn(2, ':').collect();
43 if parts.len() != 2 {
44 anyhow::bail!("Invalid file range format: {}", s);
45 }
46
47 let file = PathBuf::from(parts[1]);
48 let range_parts: Vec<&str> = parts[0].split('-').collect();
49
50 if range_parts.len() != 2 {
51 anyhow::bail!("Invalid line range format: {}", s);
52 }
53
54 let start_line = range_parts[0]
55 .parse()
56 .context("Invalid start line number")?;
57 let end_line = range_parts[1].parse().context("Invalid end line number")?;
58
59 if start_line > end_line {
60 anyhow::bail!(
61 "Start line ({}) must be <= end line ({})",
62 start_line,
63 end_line
64 );
65 }
66
67 Ok(FileRange {
68 file,
69 start_line,
70 end_line,
71 })
72 }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
77pub struct IgnoreEntry {
78 pub id: String,
81
82 pub files: Vec<FileRange>,
84
85 pub reason: String,
87
88 pub added_by: String,
90
91 pub added_at: DateTime<Utc>,
93}
94
95impl IgnoreEntry {
96 pub fn new(id: String, files: Vec<FileRange>, reason: String, added_by: String) -> Self {
98 Self {
99 id,
100 files,
101 reason,
102 added_by,
103 added_at: Utc::now(),
104 }
105 }
106
107 pub fn matches_id(&self, duplicate_id: &str) -> bool {
109 self.id == duplicate_id
110 }
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct IgnoreFile {
116 pub version: u32,
117 pub ignores: Vec<IgnoreEntry>,
118}
119
120impl Default for IgnoreFile {
121 fn default() -> Self {
122 Self {
123 version: IGNORE_FILE_VERSION,
124 ignores: Vec::new(),
125 }
126 }
127}
128
129pub struct IgnoreManager {
131 ignore_file_path: PathBuf,
132 ignore_file: IgnoreFile,
133 ignored_ids: HashSet<String>,
134}
135
136impl IgnoreManager {
137 pub fn new(directory: &Path) -> Self {
139 let ignore_file_path = directory.join(".polydup-ignore");
140 Self {
141 ignore_file_path,
142 ignore_file: IgnoreFile::default(),
143 ignored_ids: HashSet::new(),
144 }
145 }
146
147 pub fn load(&mut self) -> Result<()> {
149 if !self.ignore_file_path.exists() {
150 return Ok(());
152 }
153
154 let contents = std::fs::read_to_string(&self.ignore_file_path)
155 .context("Failed to read .polydup-ignore file")?;
156
157 self.ignore_file =
158 toml::from_str(&contents).context("Failed to parse .polydup-ignore file")?;
159
160 if self.ignore_file.version > IGNORE_FILE_VERSION {
162 anyhow::bail!(
163 "Unsupported .polydup-ignore version: {} (expected <= {})",
164 self.ignore_file.version,
165 IGNORE_FILE_VERSION
166 );
167 }
168
169 self.ignored_ids = self
171 .ignore_file
172 .ignores
173 .iter()
174 .map(|entry| entry.id.clone())
175 .collect();
176
177 Ok(())
178 }
179
180 pub fn save(&self) -> Result<()> {
182 let contents =
183 toml::to_string_pretty(&self.ignore_file).context("Failed to serialize ignore file")?;
184
185 std::fs::write(&self.ignore_file_path, contents)
186 .context("Failed to write .polydup-ignore file")?;
187
188 Ok(())
189 }
190
191 pub fn is_ignored(&self, duplicate_id: &str) -> bool {
193 self.ignored_ids.contains(duplicate_id)
194 }
195
196 pub fn add_ignore(&mut self, entry: IgnoreEntry) {
198 self.ignored_ids.insert(entry.id.clone());
199 self.ignore_file.ignores.push(entry);
200 }
201
202 pub fn remove_ignore(&mut self, duplicate_id: &str) -> bool {
204 if let Some(pos) = self
205 .ignore_file
206 .ignores
207 .iter()
208 .position(|e| e.id == duplicate_id)
209 {
210 self.ignore_file.ignores.remove(pos);
211 self.ignored_ids.remove(duplicate_id);
212 true
213 } else {
214 false
215 }
216 }
217
218 pub fn list_ignores(&self) -> &[IgnoreEntry] {
220 &self.ignore_file.ignores
221 }
222
223 pub fn count(&self) -> usize {
225 self.ignore_file.ignores.len()
226 }
227}
228
229pub fn compute_duplicate_id(normalized_tokens: &[String]) -> String {
236 let mut hasher = Sha256::new();
237
238 for token in normalized_tokens {
240 hasher.update(token.as_bytes());
241 hasher.update(b"\n"); }
243
244 let result = hasher.finalize();
245 format!("sha256:{}", hex::encode(result))
246}
247
248pub fn compute_symmetric_duplicate_id(
253 normalized_tokens1: &[String],
254 normalized_tokens2: &[String],
255) -> String {
256 let id1 = compute_duplicate_id(normalized_tokens1);
257 let id2 = compute_duplicate_id(normalized_tokens2);
258
259 if id1 == id2 {
261 return id1;
262 }
263
264 let (first, second) = if id1 <= id2 { (id1, id2) } else { (id2, id1) };
265
266 let mut hasher = Sha256::new();
267 hasher.update(first.as_bytes());
268 hasher.update(b"\n");
269 hasher.update(second.as_bytes());
270
271 let result = hasher.finalize();
272 format!("sha256:{}", hex::encode(result))
273}
274
275#[cfg(test)]
276mod tests {
277 use super::*;
278
279 #[test]
280 fn test_file_range_parse() {
281 let range = FileRange::parse("src/main.rs:10-25").unwrap();
282 assert_eq!(range.file, PathBuf::from("src/main.rs"));
283 assert_eq!(range.start_line, 10);
284 assert_eq!(range.end_line, 25);
285 }
286
287 #[test]
288 fn test_file_range_parse_invalid() {
289 assert!(FileRange::parse("invalid").is_err());
290 assert!(FileRange::parse("src/main.rs").is_err());
291 assert!(FileRange::parse("src/main.rs:10").is_err());
292 assert!(FileRange::parse("src/main.rs:25-10").is_err()); }
294
295 #[test]
296 fn test_file_range_display() {
297 let range = FileRange {
298 file: PathBuf::from("src/lib.rs"),
299 start_line: 5,
300 end_line: 15,
301 };
302 assert_eq!(range.to_string(), "src/lib.rs:5-15");
303 }
304
305 #[test]
306 fn test_compute_duplicate_id() {
307 let tokens1 = vec!["fn".to_string(), "$$ID".to_string(), "$$NUM".to_string()];
308 let tokens2 = vec!["fn".to_string(), "$$ID".to_string(), "$$NUM".to_string()];
309 let tokens3 = vec!["fn".to_string(), "$$ID".to_string(), "$$STR".to_string()];
310
311 let id1 = compute_duplicate_id(&tokens1);
312 let id2 = compute_duplicate_id(&tokens2);
313 let id3 = compute_duplicate_id(&tokens3);
314
315 assert_eq!(id1, id2, "Same tokens should produce same ID");
316 assert_ne!(id1, id3, "Different tokens should produce different IDs");
317 assert!(id1.starts_with("sha256:"), "ID should have sha256 prefix");
318 }
319
320 #[test]
321 fn test_compute_duplicate_id_symmetric_same_tokens() {
322 let tokens = vec!["a".to_string(), "b".to_string()];
323
324 let symmetric = compute_symmetric_duplicate_id(&tokens, &tokens);
325 let single = compute_duplicate_id(&tokens);
326
327 assert_eq!(
328 symmetric, single,
329 "Symmetric ID should match legacy ID when windows are identical"
330 );
331 }
332
333 #[test]
334 fn test_compute_duplicate_id_symmetric_order_independent() {
335 let tokens_a = vec!["a".to_string(), "b".to_string(), "c".to_string()];
336 let tokens_b = vec![
337 "a".to_string(),
338 "b".to_string(),
339 "c".to_string(),
340 "d".to_string(),
341 ];
342
343 let id1 = compute_symmetric_duplicate_id(&tokens_a, &tokens_b);
344 let id2 = compute_symmetric_duplicate_id(&tokens_b, &tokens_a);
345
346 assert_eq!(id1, id2, "Symmetric ID should ignore argument order");
347 assert_ne!(
348 id1,
349 compute_duplicate_id(&tokens_a),
350 "Should incorporate both windows when they differ"
351 );
352 }
353
354 #[test]
355 fn test_ignore_entry_creation() {
356 let files = vec![FileRange {
357 file: PathBuf::from("src/main.rs"),
358 start_line: 1,
359 end_line: 10,
360 }];
361
362 let entry = IgnoreEntry::new(
363 "sha256:abc123".to_string(),
364 files.clone(),
365 "License header".to_string(),
366 "user@example.com".to_string(),
367 );
368
369 assert_eq!(entry.id, "sha256:abc123");
370 assert_eq!(entry.files, files);
371 assert_eq!(entry.reason, "License header");
372 assert_eq!(entry.added_by, "user@example.com");
373 }
374
375 #[test]
376 fn test_ignore_manager_basic() {
377 let temp_dir = std::env::temp_dir();
378 let mut manager = IgnoreManager::new(&temp_dir);
379
380 assert_eq!(manager.count(), 0);
382 assert!(!manager.is_ignored("sha256:test"));
383
384 let entry = IgnoreEntry::new(
386 "sha256:test".to_string(),
387 vec![],
388 "Test".to_string(),
389 "test@example.com".to_string(),
390 );
391 manager.add_ignore(entry);
392
393 assert_eq!(manager.count(), 1);
394 assert!(manager.is_ignored("sha256:test"));
395 assert!(!manager.is_ignored("sha256:other"));
396
397 assert!(manager.remove_ignore("sha256:test"));
399 assert_eq!(manager.count(), 0);
400 assert!(!manager.is_ignored("sha256:test"));
401 }
402
403 #[test]
404 fn test_ignore_manager_remove_nonexistent() {
405 let temp_dir = std::env::temp_dir();
406 let mut manager = IgnoreManager::new(&temp_dir);
407
408 assert!(!manager.remove_ignore("sha256:nonexistent"));
409 }
410}