1use chrono::{DateTime, Utc};
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14
15use crate::error::{PolyDupError, Result};
16
17const IGNORE_FILE_VERSION: u32 = 1;
19
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
22pub struct FileRange {
23 pub file: PathBuf,
24 pub start_line: usize,
25 pub end_line: usize,
26}
27
28impl std::fmt::Display for FileRange {
29 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30 write!(
31 f,
32 "{}:{}-{}",
33 self.file.display(),
34 self.start_line,
35 self.end_line
36 )
37 }
38}
39
40impl FileRange {
41 pub fn parse(s: &str) -> Result<Self> {
43 let parts: Vec<&str> = s.rsplitn(2, ':').collect();
44 if parts.len() != 2 {
45 return Err(PolyDupError::IgnoreRule(format!(
46 "Invalid file range format: {}",
47 s
48 )));
49 }
50
51 let file = PathBuf::from(parts[1]);
52 let range_parts: Vec<&str> = parts[0].split('-').collect();
53
54 if range_parts.len() != 2 {
55 return Err(PolyDupError::IgnoreRule(format!(
56 "Invalid line range format: {}",
57 s
58 )));
59 }
60
61 let start_line = range_parts[0]
62 .parse()
63 .map_err(|_| PolyDupError::IgnoreRule("Invalid start line number".to_string()))?;
64 let end_line = range_parts[1]
65 .parse()
66 .map_err(|_| PolyDupError::IgnoreRule("Invalid end line number".to_string()))?;
67
68 if start_line > end_line {
69 return Err(PolyDupError::IgnoreRule(format!(
70 "Start line ({}) must be <= end line ({})",
71 start_line, end_line
72 )));
73 }
74
75 Ok(FileRange {
76 file,
77 start_line,
78 end_line,
79 })
80 }
81}
82
83#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
85pub struct IgnoreEntry {
86 pub id: String,
89
90 pub files: Vec<FileRange>,
92
93 pub reason: String,
95
96 pub added_by: String,
98
99 pub added_at: DateTime<Utc>,
101}
102
103impl IgnoreEntry {
104 pub fn new(id: String, files: Vec<FileRange>, reason: String, added_by: String) -> Self {
106 Self {
107 id,
108 files,
109 reason,
110 added_by,
111 added_at: Utc::now(),
112 }
113 }
114
115 pub fn matches_id(&self, duplicate_id: &str) -> bool {
117 self.id == duplicate_id
118 }
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct IgnoreFile {
124 pub version: u32,
125 pub ignores: Vec<IgnoreEntry>,
126}
127
128impl Default for IgnoreFile {
129 fn default() -> Self {
130 Self {
131 version: IGNORE_FILE_VERSION,
132 ignores: Vec::new(),
133 }
134 }
135}
136
137pub struct IgnoreManager {
139 ignore_file_path: PathBuf,
140 ignore_file: IgnoreFile,
141 ignored_ids: HashSet<String>,
142}
143
144impl IgnoreManager {
145 pub fn new(directory: &Path) -> Self {
147 let ignore_file_path = directory.join(".polydup-ignore");
148 Self {
149 ignore_file_path,
150 ignore_file: IgnoreFile::default(),
151 ignored_ids: HashSet::new(),
152 }
153 }
154
155 pub fn load(&mut self) -> Result<()> {
157 if !self.ignore_file_path.exists() {
158 return Ok(());
160 }
161
162 let contents = std::fs::read_to_string(&self.ignore_file_path).map_err(PolyDupError::Io)?;
163
164 self.ignore_file = toml::from_str(&contents).map_err(|e| {
165 PolyDupError::Parsing(format!("Failed to parse .polydup-ignore file: {}", e))
166 })?;
167
168 if self.ignore_file.version > IGNORE_FILE_VERSION {
170 return Err(PolyDupError::Config(format!(
171 "Unsupported .polydup-ignore version: {} (expected <= {})",
172 self.ignore_file.version, IGNORE_FILE_VERSION
173 )));
174 }
175
176 self.ignored_ids = self
178 .ignore_file
179 .ignores
180 .iter()
181 .map(|entry| entry.id.clone())
182 .collect();
183
184 Ok(())
185 }
186
187 pub fn save(&self) -> Result<()> {
189 let contents = toml::to_string_pretty(&self.ignore_file).map_err(|e| {
190 PolyDupError::Parsing(format!("Failed to serialize ignore file: {}", e))
191 })?;
192
193 std::fs::write(&self.ignore_file_path, contents).map_err(PolyDupError::Io)?;
194
195 Ok(())
196 }
197
198 pub fn is_ignored(&self, duplicate_id: &str) -> bool {
200 self.ignored_ids.contains(duplicate_id)
201 }
202
203 pub fn add_ignore(&mut self, entry: IgnoreEntry) {
205 self.ignored_ids.insert(entry.id.clone());
206 self.ignore_file.ignores.push(entry);
207 }
208
209 pub fn remove_ignore(&mut self, duplicate_id: &str) -> bool {
211 if let Some(pos) = self
212 .ignore_file
213 .ignores
214 .iter()
215 .position(|e| e.id == duplicate_id)
216 {
217 self.ignore_file.ignores.remove(pos);
218 self.ignored_ids.remove(duplicate_id);
219 true
220 } else {
221 false
222 }
223 }
224
225 pub fn list_ignores(&self) -> &[IgnoreEntry] {
227 &self.ignore_file.ignores
228 }
229
230 pub fn count(&self) -> usize {
232 self.ignore_file.ignores.len()
233 }
234}
235
236pub fn compute_duplicate_id(normalized_tokens: &[String]) -> String {
243 let mut hasher = Sha256::new();
244
245 for token in normalized_tokens {
247 hasher.update(token.as_bytes());
248 hasher.update(b"\n"); }
250
251 let result = hasher.finalize();
252 format!("sha256:{}", hex::encode(result))
253}
254
255pub fn compute_symmetric_duplicate_id(
260 normalized_tokens1: &[String],
261 normalized_tokens2: &[String],
262) -> String {
263 let id1 = compute_duplicate_id(normalized_tokens1);
264 let id2 = compute_duplicate_id(normalized_tokens2);
265
266 if id1 == id2 {
268 return id1;
269 }
270
271 let (first, second) = if id1 <= id2 { (id1, id2) } else { (id2, id1) };
272
273 let mut hasher = Sha256::new();
274 hasher.update(first.as_bytes());
275 hasher.update(b"\n");
276 hasher.update(second.as_bytes());
277
278 let result = hasher.finalize();
279 format!("sha256:{}", hex::encode(result))
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
287 fn test_file_range_parse() {
288 let range = FileRange::parse("src/main.rs:10-25").unwrap();
289 assert_eq!(range.file, PathBuf::from("src/main.rs"));
290 assert_eq!(range.start_line, 10);
291 assert_eq!(range.end_line, 25);
292 }
293
294 #[test]
295 fn test_file_range_parse_invalid() {
296 assert!(FileRange::parse("invalid").is_err());
297 assert!(FileRange::parse("src/main.rs").is_err());
298 assert!(FileRange::parse("src/main.rs:10").is_err());
299 assert!(FileRange::parse("src/main.rs:25-10").is_err()); }
301
302 #[test]
303 fn test_file_range_display() {
304 let range = FileRange {
305 file: PathBuf::from("src/lib.rs"),
306 start_line: 5,
307 end_line: 15,
308 };
309 assert_eq!(range.to_string(), "src/lib.rs:5-15");
310 }
311
312 #[test]
313 fn test_compute_duplicate_id() {
314 let tokens1 = vec!["fn".to_string(), "$$ID".to_string(), "$$NUM".to_string()];
315 let tokens2 = vec!["fn".to_string(), "$$ID".to_string(), "$$NUM".to_string()];
316 let tokens3 = vec!["fn".to_string(), "$$ID".to_string(), "$$STR".to_string()];
317
318 let id1 = compute_duplicate_id(&tokens1);
319 let id2 = compute_duplicate_id(&tokens2);
320 let id3 = compute_duplicate_id(&tokens3);
321
322 assert_eq!(id1, id2, "Same tokens should produce same ID");
323 assert_ne!(id1, id3, "Different tokens should produce different IDs");
324 assert!(id1.starts_with("sha256:"), "ID should have sha256 prefix");
325 }
326
327 #[test]
328 fn test_compute_duplicate_id_symmetric_same_tokens() {
329 let tokens = vec!["a".to_string(), "b".to_string()];
330
331 let symmetric = compute_symmetric_duplicate_id(&tokens, &tokens);
332 let single = compute_duplicate_id(&tokens);
333
334 assert_eq!(
335 symmetric, single,
336 "Symmetric ID should match legacy ID when windows are identical"
337 );
338 }
339
340 #[test]
341 fn test_compute_duplicate_id_symmetric_order_independent() {
342 let tokens_a = vec!["a".to_string(), "b".to_string(), "c".to_string()];
343 let tokens_b = vec![
344 "a".to_string(),
345 "b".to_string(),
346 "c".to_string(),
347 "d".to_string(),
348 ];
349
350 let id1 = compute_symmetric_duplicate_id(&tokens_a, &tokens_b);
351 let id2 = compute_symmetric_duplicate_id(&tokens_b, &tokens_a);
352
353 assert_eq!(id1, id2, "Symmetric ID should ignore argument order");
354 assert_ne!(
355 id1,
356 compute_duplicate_id(&tokens_a),
357 "Should incorporate both windows when they differ"
358 );
359 }
360
361 #[test]
362 fn test_ignore_entry_creation() {
363 let files = vec![FileRange {
364 file: PathBuf::from("src/main.rs"),
365 start_line: 1,
366 end_line: 10,
367 }];
368
369 let entry = IgnoreEntry::new(
370 "sha256:abc123".to_string(),
371 files.clone(),
372 "License header".to_string(),
373 "user@example.com".to_string(),
374 );
375
376 assert_eq!(entry.id, "sha256:abc123");
377 assert_eq!(entry.files, files);
378 assert_eq!(entry.reason, "License header");
379 assert_eq!(entry.added_by, "user@example.com");
380 }
381
382 #[test]
383 fn test_ignore_manager_basic() {
384 let temp_dir = std::env::temp_dir();
385 let mut manager = IgnoreManager::new(&temp_dir);
386
387 assert_eq!(manager.count(), 0);
389 assert!(!manager.is_ignored("sha256:test"));
390
391 let entry = IgnoreEntry::new(
393 "sha256:test".to_string(),
394 vec![],
395 "Test".to_string(),
396 "test@example.com".to_string(),
397 );
398 manager.add_ignore(entry);
399
400 assert_eq!(manager.count(), 1);
401 assert!(manager.is_ignored("sha256:test"));
402 assert!(!manager.is_ignored("sha256:other"));
403
404 assert!(manager.remove_ignore("sha256:test"));
406 assert_eq!(manager.count(), 0);
407 assert!(!manager.is_ignored("sha256:test"));
408 }
409
410 #[test]
411 fn test_ignore_manager_remove_nonexistent() {
412 let temp_dir = std::env::temp_dir();
413 let mut manager = IgnoreManager::new(&temp_dir);
414
415 assert!(!manager.remove_ignore("sha256:nonexistent"));
416 }
417}