1#![allow(dead_code)]
22#![allow(clippy::cast_precision_loss)]
23
24use std::collections::HashMap;
25use std::path::{Path, PathBuf};
26
27use serde::{Deserialize, Serialize};
28
29use crate::{DedupError, DedupResult};
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37pub struct FileState {
38 pub size: u64,
40 pub mtime_secs: u64,
42 pub content_hash: Option<String>,
44}
45
46impl FileState {
47 pub fn from_path(path: &Path) -> DedupResult<Self> {
53 let meta = std::fs::metadata(path)?;
54 let size = meta.len();
55 let mtime_secs = meta
56 .modified()
57 .ok()
58 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
59 .map(|d| d.as_secs())
60 .unwrap_or(0);
61 Ok(Self {
62 size,
63 mtime_secs,
64 content_hash: None,
65 })
66 }
67
68 #[must_use]
70 pub fn matches(&self, other: &Self) -> bool {
71 self.size == other.size && self.mtime_secs == other.mtime_secs
72 }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum FileChange {
82 New,
84 Modified,
86 Unchanged,
88 Deleted,
90}
91
92impl FileChange {
93 #[must_use]
95 pub fn needs_processing(self) -> bool {
96 matches!(self, Self::New | Self::Modified)
97 }
98
99 #[must_use]
101 pub fn label(self) -> &'static str {
102 match self {
103 Self::New => "new",
104 Self::Modified => "modified",
105 Self::Unchanged => "unchanged",
106 Self::Deleted => "deleted",
107 }
108 }
109}
110
111#[derive(Debug, Clone)]
117pub struct ScanResult {
118 pub to_process: Vec<PathBuf>,
120 pub unchanged: Vec<PathBuf>,
122 pub deleted: Vec<PathBuf>,
124 pub changes: Vec<(PathBuf, FileChange)>,
126}
127
128impl ScanResult {
129 #[must_use]
131 pub fn total_examined(&self) -> usize {
132 self.to_process.len() + self.unchanged.len()
133 }
134
135 #[must_use]
137 pub fn processing_ratio(&self) -> f64 {
138 let total = self.total_examined();
139 if total == 0 {
140 return 0.0;
141 }
142 self.to_process.len() as f64 / total as f64
143 }
144
145 #[must_use]
147 pub fn new_count(&self) -> usize {
148 self.changes
149 .iter()
150 .filter(|(_, c)| *c == FileChange::New)
151 .count()
152 }
153
154 #[must_use]
156 pub fn modified_count(&self) -> usize {
157 self.changes
158 .iter()
159 .filter(|(_, c)| *c == FileChange::Modified)
160 .count()
161 }
162
163 #[must_use]
165 pub fn summary(&self) -> String {
166 format!(
167 "{} to process ({} new, {} modified), {} unchanged, {} deleted",
168 self.to_process.len(),
169 self.new_count(),
170 self.modified_count(),
171 self.unchanged.len(),
172 self.deleted.len(),
173 )
174 }
175}
176
177#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct IncrementalIndex {
187 files: HashMap<String, FileState>,
189 last_scan_epoch: u64,
191 scan_count: u64,
193}
194
195impl IncrementalIndex {
196 #[must_use]
198 pub fn new() -> Self {
199 Self {
200 files: HashMap::new(),
201 last_scan_epoch: 0,
202 scan_count: 0,
203 }
204 }
205
206 #[must_use]
208 pub fn tracked_count(&self) -> usize {
209 self.files.len()
210 }
211
212 #[must_use]
214 pub fn scan_count(&self) -> u64 {
215 self.scan_count
216 }
217
218 #[must_use]
220 pub fn last_scan_epoch(&self) -> u64 {
221 self.last_scan_epoch
222 }
223
224 pub fn classify(&self, path: &Path) -> DedupResult<(FileChange, FileState)> {
230 let current = FileState::from_path(path)?;
231 let key = path.to_string_lossy().to_string();
232
233 let change = match self.files.get(&key) {
234 Some(stored) if stored.matches(¤t) => FileChange::Unchanged,
235 Some(_) => FileChange::Modified,
236 None => FileChange::New,
237 };
238
239 Ok((change, current))
240 }
241
242 pub fn scan(&self, candidates: &[PathBuf]) -> ScanResult {
249 let mut to_process = Vec::new();
250 let mut unchanged = Vec::new();
251 let mut changes = Vec::new();
252
253 let candidate_set: std::collections::HashSet<String> = candidates
254 .iter()
255 .map(|p| p.to_string_lossy().to_string())
256 .collect();
257
258 for path in candidates {
259 match self.classify(path) {
260 Ok((change, _state)) => {
261 if change.needs_processing() {
262 to_process.push(path.clone());
263 } else {
264 unchanged.push(path.clone());
265 }
266 changes.push((path.clone(), change));
267 }
268 Err(_) => {
269 changes.push((path.clone(), FileChange::Deleted));
271 }
272 }
273 }
274
275 let mut deleted = Vec::new();
277 for key in self.files.keys() {
278 if !candidate_set.contains(key) {
279 deleted.push(PathBuf::from(key));
280 changes.push((PathBuf::from(key), FileChange::Deleted));
281 }
282 }
283
284 ScanResult {
285 to_process,
286 unchanged,
287 deleted,
288 changes,
289 }
290 }
291
292 pub fn commit(&mut self, paths: &[PathBuf]) {
296 for path in paths {
297 let key = path.to_string_lossy().to_string();
298 if let Ok(state) = FileState::from_path(path) {
299 self.files.insert(key, state);
300 }
301 }
302 self.last_scan_epoch = std::time::SystemTime::now()
303 .duration_since(std::time::UNIX_EPOCH)
304 .unwrap_or_default()
305 .as_secs();
306 self.scan_count += 1;
307 }
308
309 pub fn commit_file(&mut self, path: &Path, state: FileState) {
311 let key = path.to_string_lossy().to_string();
312 self.files.insert(key, state);
313 }
314
315 pub fn prune_deleted(&mut self, deleted: &[PathBuf]) {
317 for path in deleted {
318 let key = path.to_string_lossy().to_string();
319 self.files.remove(&key);
320 }
321 }
322
323 #[must_use]
325 pub fn get_state(&self, path: &Path) -> Option<&FileState> {
326 let key = path.to_string_lossy().to_string();
327 self.files.get(&key)
328 }
329
330 #[must_use]
332 pub fn is_tracked(&self, path: &Path) -> bool {
333 let key = path.to_string_lossy().to_string();
334 self.files.contains_key(&key)
335 }
336
337 pub fn to_json(&self) -> DedupResult<String> {
343 serde_json::to_string_pretty(self)
344 .map_err(|e| DedupError::Hash(format!("JSON serialise: {e}")))
345 }
346
347 pub fn from_json(json: &str) -> DedupResult<Self> {
353 serde_json::from_str(json).map_err(|e| DedupError::Hash(format!("JSON deserialise: {e}")))
354 }
355
356 pub fn save_to_file(&self, path: &Path) -> DedupResult<()> {
362 let json = self.to_json()?;
363 std::fs::write(path, json)?;
364 Ok(())
365 }
366
367 pub fn load_from_file(path: &Path) -> DedupResult<Self> {
373 let json = std::fs::read_to_string(path)?;
374 Self::from_json(&json)
375 }
376
377 pub fn clear(&mut self) {
379 self.files.clear();
380 self.last_scan_epoch = 0;
381 self.scan_count = 0;
382 }
383
384 #[must_use]
386 pub fn tracked_paths(&self) -> Vec<String> {
387 self.files.keys().cloned().collect()
388 }
389
390 pub fn merge(&mut self, other: &IncrementalIndex) {
392 for (key, state) in &other.files {
393 self.files.insert(key.clone(), state.clone());
394 }
395 self.last_scan_epoch = self.last_scan_epoch.max(other.last_scan_epoch);
396 }
397}
398
399impl Default for IncrementalIndex {
400 fn default() -> Self {
401 Self::new()
402 }
403}
404
405#[cfg(test)]
410mod tests {
411 use super::*;
412
413 fn make_temp_file(dir: &Path, name: &str, content: &[u8]) -> PathBuf {
414 let path = dir.join(name);
415 std::fs::write(&path, content).expect("write temp file");
416 path
417 }
418
419 #[test]
420 fn test_file_state_from_path() {
421 let dir = std::env::temp_dir().join("oximedia_dedup_incr_state");
422 let _ = std::fs::create_dir_all(&dir);
423 let path = make_temp_file(&dir, "test_state.bin", &[0u8; 100]);
424
425 let state = FileState::from_path(&path).expect("should read state");
426 assert_eq!(state.size, 100);
427 assert!(state.mtime_secs > 0);
428 assert!(state.content_hash.is_none());
429
430 let _ = std::fs::remove_dir_all(&dir);
431 }
432
433 #[test]
434 fn test_file_state_matches() {
435 let a = FileState {
436 size: 1000,
437 mtime_secs: 12345,
438 content_hash: None,
439 };
440 let b = FileState {
441 size: 1000,
442 mtime_secs: 12345,
443 content_hash: Some("abc".to_string()),
444 };
445 assert!(a.matches(&b)); let c = FileState {
448 size: 2000,
449 mtime_secs: 12345,
450 content_hash: None,
451 };
452 assert!(!a.matches(&c));
453 }
454
455 #[test]
456 fn test_file_change_needs_processing() {
457 assert!(FileChange::New.needs_processing());
458 assert!(FileChange::Modified.needs_processing());
459 assert!(!FileChange::Unchanged.needs_processing());
460 assert!(!FileChange::Deleted.needs_processing());
461 }
462
463 #[test]
464 fn test_file_change_labels() {
465 assert_eq!(FileChange::New.label(), "new");
466 assert_eq!(FileChange::Modified.label(), "modified");
467 assert_eq!(FileChange::Unchanged.label(), "unchanged");
468 assert_eq!(FileChange::Deleted.label(), "deleted");
469 }
470
471 #[test]
472 fn test_incremental_index_new_empty() {
473 let idx = IncrementalIndex::new();
474 assert_eq!(idx.tracked_count(), 0);
475 assert_eq!(idx.scan_count(), 0);
476 assert_eq!(idx.last_scan_epoch(), 0);
477 }
478
479 #[test]
480 fn test_classify_new_file() {
481 let dir = std::env::temp_dir().join("oximedia_dedup_incr_new");
482 let _ = std::fs::create_dir_all(&dir);
483 let path = make_temp_file(&dir, "new_file.bin", &[1u8; 50]);
484
485 let idx = IncrementalIndex::new();
486 let (change, state) = idx.classify(&path).expect("classify");
487 assert_eq!(change, FileChange::New);
488 assert_eq!(state.size, 50);
489
490 let _ = std::fs::remove_dir_all(&dir);
491 }
492
493 #[test]
494 fn test_classify_unchanged_file() {
495 let dir = std::env::temp_dir().join("oximedia_dedup_incr_unchanged");
496 let _ = std::fs::create_dir_all(&dir);
497 let path = make_temp_file(&dir, "unchanged.bin", &[2u8; 75]);
498
499 let mut idx = IncrementalIndex::new();
500 idx.commit(std::slice::from_ref(&path));
501
502 let (change, _) = idx.classify(&path).expect("classify");
503 assert_eq!(change, FileChange::Unchanged);
504
505 let _ = std::fs::remove_dir_all(&dir);
506 }
507
508 #[test]
509 fn test_classify_modified_file() {
510 let dir = std::env::temp_dir().join("oximedia_dedup_incr_modified");
511 let _ = std::fs::create_dir_all(&dir);
512 let path = make_temp_file(&dir, "modifiable.bin", &[3u8; 100]);
513
514 let mut idx = IncrementalIndex::new();
515 idx.commit(std::slice::from_ref(&path));
516
517 std::fs::write(&path, &[4u8; 200]).expect("rewrite");
519
520 let (change, _) = idx.classify(&path).expect("classify");
521 assert_eq!(change, FileChange::Modified);
522
523 let _ = std::fs::remove_dir_all(&dir);
524 }
525
526 #[test]
527 fn test_scan_mixed_files() {
528 let dir = std::env::temp_dir().join("oximedia_dedup_incr_scan");
529 let _ = std::fs::create_dir_all(&dir);
530
531 let f1 = make_temp_file(&dir, "existing.bin", &[5u8; 60]);
532 let f2 = make_temp_file(&dir, "new_one.bin", &[6u8; 80]);
533
534 let mut idx = IncrementalIndex::new();
535 idx.commit(std::slice::from_ref(&f1));
536
537 let result = idx.scan(&[f1.clone(), f2.clone()]);
538 assert_eq!(result.unchanged.len(), 1);
539 assert_eq!(result.to_process.len(), 1);
540 assert_eq!(result.to_process[0], f2);
541 assert_eq!(result.new_count(), 1);
542 assert_eq!(result.modified_count(), 0);
543 assert!(result.summary().contains("1 to process"));
544
545 let _ = std::fs::remove_dir_all(&dir);
546 }
547
548 #[test]
549 fn test_scan_detects_deleted_files() {
550 let dir = std::env::temp_dir().join("oximedia_dedup_incr_deleted");
551 let _ = std::fs::create_dir_all(&dir);
552
553 let f1 = make_temp_file(&dir, "will_delete.bin", &[7u8; 40]);
554 let f2 = make_temp_file(&dir, "stays.bin", &[8u8; 40]);
555
556 let mut idx = IncrementalIndex::new();
557 idx.commit(&[f1.clone(), f2.clone()]);
558
559 let result = idx.scan(std::slice::from_ref(&f2));
561 assert_eq!(result.deleted.len(), 1);
562 assert_eq!(result.deleted[0], f1);
563
564 let _ = std::fs::remove_dir_all(&dir);
565 }
566
567 #[test]
568 fn test_prune_deleted() {
569 let mut idx = IncrementalIndex::new();
570 idx.files.insert(
571 "/old/file.bin".to_string(),
572 FileState {
573 size: 100,
574 mtime_secs: 0,
575 content_hash: None,
576 },
577 );
578 assert_eq!(idx.tracked_count(), 1);
579
580 idx.prune_deleted(&[PathBuf::from("/old/file.bin")]);
581 assert_eq!(idx.tracked_count(), 0);
582 }
583
584 #[test]
585 fn test_commit_updates_scan_count() {
586 let dir = std::env::temp_dir().join("oximedia_dedup_incr_commit");
587 let _ = std::fs::create_dir_all(&dir);
588 let f = make_temp_file(&dir, "commit_test.bin", &[9u8; 30]);
589
590 let mut idx = IncrementalIndex::new();
591 assert_eq!(idx.scan_count(), 0);
592
593 idx.commit(&[f]);
594 assert_eq!(idx.scan_count(), 1);
595 assert!(idx.last_scan_epoch() > 0);
596
597 let _ = std::fs::remove_dir_all(&dir);
598 }
599
600 #[test]
601 fn test_json_roundtrip() {
602 let mut idx = IncrementalIndex::new();
603 idx.files.insert(
604 "/some/file.mp4".to_string(),
605 FileState {
606 size: 999,
607 mtime_secs: 1700000000,
608 content_hash: Some("abcd1234".to_string()),
609 },
610 );
611 idx.scan_count = 5;
612 idx.last_scan_epoch = 1700000100;
613
614 let json = idx.to_json().expect("serialise");
615 let restored = IncrementalIndex::from_json(&json).expect("deserialise");
616
617 assert_eq!(restored.tracked_count(), 1);
618 assert_eq!(restored.scan_count(), 5);
619 assert_eq!(restored.last_scan_epoch(), 1700000100);
620
621 let state = restored
622 .get_state(Path::new("/some/file.mp4"))
623 .expect("state should exist");
624 assert_eq!(state.size, 999);
625 assert_eq!(state.content_hash.as_deref(), Some("abcd1234"));
626 }
627
628 #[test]
629 fn test_save_and_load_file() {
630 let dir = std::env::temp_dir().join("oximedia_dedup_incr_persist");
631 let _ = std::fs::create_dir_all(&dir);
632 let index_path = dir.join("dedup_index.json");
633
634 let mut idx = IncrementalIndex::new();
635 idx.files.insert(
636 "video.mp4".to_string(),
637 FileState {
638 size: 500,
639 mtime_secs: 12345,
640 content_hash: None,
641 },
642 );
643
644 idx.save_to_file(&index_path).expect("save");
645 let loaded = IncrementalIndex::load_from_file(&index_path).expect("load");
646 assert_eq!(loaded.tracked_count(), 1);
647 assert!(loaded.is_tracked(Path::new("video.mp4")));
648
649 let _ = std::fs::remove_dir_all(&dir);
650 }
651
652 #[test]
653 fn test_merge_indices() {
654 let mut idx1 = IncrementalIndex::new();
655 idx1.files.insert(
656 "a.mp4".to_string(),
657 FileState {
658 size: 100,
659 mtime_secs: 1,
660 content_hash: None,
661 },
662 );
663 idx1.last_scan_epoch = 100;
664
665 let mut idx2 = IncrementalIndex::new();
666 idx2.files.insert(
667 "b.mp4".to_string(),
668 FileState {
669 size: 200,
670 mtime_secs: 2,
671 content_hash: None,
672 },
673 );
674 idx2.last_scan_epoch = 200;
675
676 idx1.merge(&idx2);
677 assert_eq!(idx1.tracked_count(), 2);
678 assert!(idx1.is_tracked(Path::new("a.mp4")));
679 assert!(idx1.is_tracked(Path::new("b.mp4")));
680 assert_eq!(idx1.last_scan_epoch(), 200);
681 }
682
683 #[test]
684 fn test_commit_file_with_hash() {
685 let mut idx = IncrementalIndex::new();
686 let state = FileState {
687 size: 1024,
688 mtime_secs: 1700000000,
689 content_hash: Some("deadbeef".to_string()),
690 };
691 idx.commit_file(Path::new("/media/video.mp4"), state);
692
693 let stored = idx
694 .get_state(Path::new("/media/video.mp4"))
695 .expect("should exist");
696 assert_eq!(stored.content_hash.as_deref(), Some("deadbeef"));
697 }
698
699 #[test]
700 fn test_clear_index() {
701 let mut idx = IncrementalIndex::new();
702 idx.files.insert(
703 "x.mp4".to_string(),
704 FileState {
705 size: 1,
706 mtime_secs: 1,
707 content_hash: None,
708 },
709 );
710 idx.scan_count = 10;
711 idx.clear();
712 assert_eq!(idx.tracked_count(), 0);
713 assert_eq!(idx.scan_count(), 0);
714 }
715
716 #[test]
717 fn test_tracked_paths() {
718 let mut idx = IncrementalIndex::new();
719 idx.files.insert(
720 "a.mp4".to_string(),
721 FileState {
722 size: 1,
723 mtime_secs: 1,
724 content_hash: None,
725 },
726 );
727 idx.files.insert(
728 "b.mp4".to_string(),
729 FileState {
730 size: 2,
731 mtime_secs: 2,
732 content_hash: None,
733 },
734 );
735 let mut paths = idx.tracked_paths();
736 paths.sort();
737 assert_eq!(paths, vec!["a.mp4", "b.mp4"]);
738 }
739
740 #[test]
741 fn test_processing_ratio() {
742 let result = ScanResult {
743 to_process: vec![PathBuf::from("a"), PathBuf::from("b")],
744 unchanged: vec![PathBuf::from("c"), PathBuf::from("d"), PathBuf::from("e")],
745 deleted: Vec::new(),
746 changes: Vec::new(),
747 };
748 assert!((result.processing_ratio() - 0.4).abs() < f64::EPSILON);
749 }
750
751 #[test]
752 fn test_processing_ratio_empty() {
753 let result = ScanResult {
754 to_process: Vec::new(),
755 unchanged: Vec::new(),
756 deleted: Vec::new(),
757 changes: Vec::new(),
758 };
759 assert_eq!(result.processing_ratio(), 0.0);
760 }
761
762 #[test]
763 fn test_full_incremental_workflow() {
764 let dir = std::env::temp_dir().join("oximedia_dedup_incr_workflow");
765 let _ = std::fs::create_dir_all(&dir);
766
767 let f1 = make_temp_file(&dir, "video1.bin", &[10u8; 100]);
769 let f2 = make_temp_file(&dir, "video2.bin", &[20u8; 200]);
770
771 let mut idx = IncrementalIndex::new();
772 let scan1 = idx.scan(&[f1.clone(), f2.clone()]);
773 assert_eq!(scan1.to_process.len(), 2);
774 assert_eq!(scan1.new_count(), 2);
775
776 idx.commit(&scan1.to_process);
777
778 let scan2 = idx.scan(&[f1.clone(), f2.clone()]);
780 assert_eq!(scan2.to_process.len(), 0);
781 assert_eq!(scan2.unchanged.len(), 2);
782
783 std::fs::write(&f1, &[11u8; 150]).expect("modify f1");
785 let f3 = make_temp_file(&dir, "video3.bin", &[30u8; 300]);
786
787 let scan3 = idx.scan(&[f1.clone(), f2.clone(), f3.clone()]);
788 assert_eq!(scan3.to_process.len(), 2); assert_eq!(scan3.unchanged.len(), 1); assert_eq!(scan3.modified_count(), 1);
791 assert_eq!(scan3.new_count(), 1);
792
793 idx.commit(&scan3.to_process);
794 assert_eq!(idx.scan_count(), 2);
795 assert_eq!(idx.tracked_count(), 3);
796
797 let scan4 = idx.scan(&[f1.clone(), f3.clone()]);
799 assert_eq!(scan4.deleted.len(), 1);
800 idx.prune_deleted(&scan4.deleted);
801 assert_eq!(idx.tracked_count(), 2);
802
803 let _ = std::fs::remove_dir_all(&dir);
804 }
805}