Skip to main content

oximedia_dedup/
incremental.rs

1//! Incremental deduplication: only scan new or modified files.
2//!
3//! Tracks file state (path, size, modification timestamp) across sessions so
4//! that subsequent dedup passes only process files that have been added or
5//! changed since the last scan.  This dramatically reduces work for large,
6//! slowly-evolving media libraries.
7//!
8//! # Design
9//!
10//! [`IncrementalIndex`] maintains an in-memory map from file path to
11//! [`FileState`] (size + mtime).  On each scan cycle:
12//!
13//! 1. Walk the candidate file list.
14//! 2. Compare each file's current state against the stored state.
15//! 3. Classify as **New**, **Modified**, or **Unchanged**.
16//! 4. Return only New/Modified files for processing by the dedup pipeline.
17//! 5. After processing, update the index with the new state.
18//!
19//! The index can be serialised to / deserialised from JSON for persistence.
20
21#![allow(dead_code)]
22#![allow(clippy::cast_precision_loss)]
23
24use std::collections::HashMap;
25use std::path::{Path, PathBuf};
26
27use serde::{Deserialize, Serialize};
28
29use crate::{DedupError, DedupResult};
30
31// ---------------------------------------------------------------------------
32// FileState
33// ---------------------------------------------------------------------------
34
35/// Snapshot of a file's identity-relevant metadata.
36#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37pub struct FileState {
38    /// File size in bytes.
39    pub size: u64,
40    /// Modification time as seconds since UNIX epoch.
41    pub mtime_secs: u64,
42    /// BLAKE3 content hash hex (computed on first scan; re-verified on change).
43    pub content_hash: Option<String>,
44}
45
46impl FileState {
47    /// Read the current state of `path` from the filesystem.
48    ///
49    /// # Errors
50    ///
51    /// Returns `DedupError::Io` if metadata cannot be read.
52    pub fn from_path(path: &Path) -> DedupResult<Self> {
53        let meta = std::fs::metadata(path)?;
54        let size = meta.len();
55        let mtime_secs = meta
56            .modified()
57            .ok()
58            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
59            .map(|d| d.as_secs())
60            .unwrap_or(0);
61        Ok(Self {
62            size,
63            mtime_secs,
64            content_hash: None,
65        })
66    }
67
68    /// Returns `true` if the file appears unchanged compared to `other`.
69    #[must_use]
70    pub fn matches(&self, other: &Self) -> bool {
71        self.size == other.size && self.mtime_secs == other.mtime_secs
72    }
73}
74
75// ---------------------------------------------------------------------------
76// FileChange
77// ---------------------------------------------------------------------------
78
79/// Classification of a file's change status.
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum FileChange {
82    /// File is new (not previously tracked).
83    New,
84    /// File has been modified (size or mtime changed).
85    Modified,
86    /// File is unchanged since the last scan.
87    Unchanged,
88    /// File was previously tracked but no longer exists.
89    Deleted,
90}
91
92impl FileChange {
93    /// Returns `true` if this change requires re-processing.
94    #[must_use]
95    pub fn needs_processing(self) -> bool {
96        matches!(self, Self::New | Self::Modified)
97    }
98
99    /// Human-readable label.
100    #[must_use]
101    pub fn label(self) -> &'static str {
102        match self {
103            Self::New => "new",
104            Self::Modified => "modified",
105            Self::Unchanged => "unchanged",
106            Self::Deleted => "deleted",
107        }
108    }
109}
110
111// ---------------------------------------------------------------------------
112// ScanResult
113// ---------------------------------------------------------------------------
114
115/// Result of an incremental scan.
116#[derive(Debug, Clone)]
117pub struct ScanResult {
118    /// Files that need processing (new or modified).
119    pub to_process: Vec<PathBuf>,
120    /// Files that are unchanged.
121    pub unchanged: Vec<PathBuf>,
122    /// Files that were deleted since the last scan.
123    pub deleted: Vec<PathBuf>,
124    /// Per-file change classification.
125    pub changes: Vec<(PathBuf, FileChange)>,
126}
127
128impl ScanResult {
129    /// Total files examined.
130    #[must_use]
131    pub fn total_examined(&self) -> usize {
132        self.to_process.len() + self.unchanged.len()
133    }
134
135    /// Fraction of files that need processing (0.0 - 1.0).
136    #[must_use]
137    pub fn processing_ratio(&self) -> f64 {
138        let total = self.total_examined();
139        if total == 0 {
140            return 0.0;
141        }
142        self.to_process.len() as f64 / total as f64
143    }
144
145    /// Number of new files.
146    #[must_use]
147    pub fn new_count(&self) -> usize {
148        self.changes
149            .iter()
150            .filter(|(_, c)| *c == FileChange::New)
151            .count()
152    }
153
154    /// Number of modified files.
155    #[must_use]
156    pub fn modified_count(&self) -> usize {
157        self.changes
158            .iter()
159            .filter(|(_, c)| *c == FileChange::Modified)
160            .count()
161    }
162
163    /// Human-readable summary.
164    #[must_use]
165    pub fn summary(&self) -> String {
166        format!(
167            "{} to process ({} new, {} modified), {} unchanged, {} deleted",
168            self.to_process.len(),
169            self.new_count(),
170            self.modified_count(),
171            self.unchanged.len(),
172            self.deleted.len(),
173        )
174    }
175}
176
177// ---------------------------------------------------------------------------
178// IncrementalIndex
179// ---------------------------------------------------------------------------
180
181/// Persistent index for incremental deduplication.
182///
183/// Tracks which files have been seen and their state at the time of the last
184/// scan, enabling subsequent scans to skip unchanged files.
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct IncrementalIndex {
187    /// Map from canonical file path string to its last-known state.
188    files: HashMap<String, FileState>,
189    /// Epoch timestamp of the last completed scan.
190    last_scan_epoch: u64,
191    /// Number of scans performed.
192    scan_count: u64,
193}
194
195impl IncrementalIndex {
196    /// Create a new, empty index.
197    #[must_use]
198    pub fn new() -> Self {
199        Self {
200            files: HashMap::new(),
201            last_scan_epoch: 0,
202            scan_count: 0,
203        }
204    }
205
206    /// Number of tracked files.
207    #[must_use]
208    pub fn tracked_count(&self) -> usize {
209        self.files.len()
210    }
211
212    /// Number of scans completed.
213    #[must_use]
214    pub fn scan_count(&self) -> u64 {
215        self.scan_count
216    }
217
218    /// Epoch of the last scan.
219    #[must_use]
220    pub fn last_scan_epoch(&self) -> u64 {
221        self.last_scan_epoch
222    }
223
224    /// Classify a single file against the stored state.
225    ///
226    /// # Errors
227    ///
228    /// Returns an error if the file's metadata cannot be read.
229    pub fn classify(&self, path: &Path) -> DedupResult<(FileChange, FileState)> {
230        let current = FileState::from_path(path)?;
231        let key = path.to_string_lossy().to_string();
232
233        let change = match self.files.get(&key) {
234            Some(stored) if stored.matches(&current) => FileChange::Unchanged,
235            Some(_) => FileChange::Modified,
236            None => FileChange::New,
237        };
238
239        Ok((change, current))
240    }
241
242    /// Perform an incremental scan over a list of candidate paths.
243    ///
244    /// Classifies each file, identifies deleted files (tracked but not in
245    /// the candidate list), and returns a [`ScanResult`].
246    ///
247    /// **Does not** update the index -- call `commit` after processing.
248    pub fn scan(&self, candidates: &[PathBuf]) -> ScanResult {
249        let mut to_process = Vec::new();
250        let mut unchanged = Vec::new();
251        let mut changes = Vec::new();
252
253        let candidate_set: std::collections::HashSet<String> = candidates
254            .iter()
255            .map(|p| p.to_string_lossy().to_string())
256            .collect();
257
258        for path in candidates {
259            match self.classify(path) {
260                Ok((change, _state)) => {
261                    if change.needs_processing() {
262                        to_process.push(path.clone());
263                    } else {
264                        unchanged.push(path.clone());
265                    }
266                    changes.push((path.clone(), change));
267                }
268                Err(_) => {
269                    // File cannot be read; treat as deleted/inaccessible
270                    changes.push((path.clone(), FileChange::Deleted));
271                }
272            }
273        }
274
275        // Find files that were tracked but are no longer in the candidate list.
276        let mut deleted = Vec::new();
277        for key in self.files.keys() {
278            if !candidate_set.contains(key) {
279                deleted.push(PathBuf::from(key));
280                changes.push((PathBuf::from(key), FileChange::Deleted));
281            }
282        }
283
284        ScanResult {
285            to_process,
286            unchanged,
287            deleted,
288            changes,
289        }
290    }
291
292    /// Commit processed files to the index, updating their state.
293    ///
294    /// Call this after successfully processing the files from a scan.
295    pub fn commit(&mut self, paths: &[PathBuf]) {
296        for path in paths {
297            let key = path.to_string_lossy().to_string();
298            if let Ok(state) = FileState::from_path(path) {
299                self.files.insert(key, state);
300            }
301        }
302        self.last_scan_epoch = std::time::SystemTime::now()
303            .duration_since(std::time::UNIX_EPOCH)
304            .unwrap_or_default()
305            .as_secs();
306        self.scan_count += 1;
307    }
308
309    /// Commit a single file with an explicit state (e.g. with content hash).
310    pub fn commit_file(&mut self, path: &Path, state: FileState) {
311        let key = path.to_string_lossy().to_string();
312        self.files.insert(key, state);
313    }
314
315    /// Remove deleted files from the index.
316    pub fn prune_deleted(&mut self, deleted: &[PathBuf]) {
317        for path in deleted {
318            let key = path.to_string_lossy().to_string();
319            self.files.remove(&key);
320        }
321    }
322
323    /// Get the stored state for a file.
324    #[must_use]
325    pub fn get_state(&self, path: &Path) -> Option<&FileState> {
326        let key = path.to_string_lossy().to_string();
327        self.files.get(&key)
328    }
329
330    /// Check if a file is tracked.
331    #[must_use]
332    pub fn is_tracked(&self, path: &Path) -> bool {
333        let key = path.to_string_lossy().to_string();
334        self.files.contains_key(&key)
335    }
336
337    /// Serialise the index to JSON.
338    ///
339    /// # Errors
340    ///
341    /// Returns an error if serialisation fails.
342    pub fn to_json(&self) -> DedupResult<String> {
343        serde_json::to_string_pretty(self)
344            .map_err(|e| DedupError::Hash(format!("JSON serialise: {e}")))
345    }
346
347    /// Deserialise an index from JSON.
348    ///
349    /// # Errors
350    ///
351    /// Returns an error if the JSON is invalid.
352    pub fn from_json(json: &str) -> DedupResult<Self> {
353        serde_json::from_str(json).map_err(|e| DedupError::Hash(format!("JSON deserialise: {e}")))
354    }
355
356    /// Save the index to a file.
357    ///
358    /// # Errors
359    ///
360    /// Returns an I/O error if the file cannot be written.
361    pub fn save_to_file(&self, path: &Path) -> DedupResult<()> {
362        let json = self.to_json()?;
363        std::fs::write(path, json)?;
364        Ok(())
365    }
366
367    /// Load the index from a file.
368    ///
369    /// # Errors
370    ///
371    /// Returns an error if the file cannot be read or parsed.
372    pub fn load_from_file(path: &Path) -> DedupResult<Self> {
373        let json = std::fs::read_to_string(path)?;
374        Self::from_json(&json)
375    }
376
377    /// Clear the entire index.
378    pub fn clear(&mut self) {
379        self.files.clear();
380        self.last_scan_epoch = 0;
381        self.scan_count = 0;
382    }
383
384    /// Return all tracked file paths.
385    #[must_use]
386    pub fn tracked_paths(&self) -> Vec<String> {
387        self.files.keys().cloned().collect()
388    }
389
390    /// Merge another index into this one. Files in `other` override this index.
391    pub fn merge(&mut self, other: &IncrementalIndex) {
392        for (key, state) in &other.files {
393            self.files.insert(key.clone(), state.clone());
394        }
395        self.last_scan_epoch = self.last_scan_epoch.max(other.last_scan_epoch);
396    }
397}
398
399impl Default for IncrementalIndex {
400    fn default() -> Self {
401        Self::new()
402    }
403}
404
405// ---------------------------------------------------------------------------
406// Tests
407// ---------------------------------------------------------------------------
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    fn make_temp_file(dir: &Path, name: &str, content: &[u8]) -> PathBuf {
414        let path = dir.join(name);
415        std::fs::write(&path, content).expect("write temp file");
416        path
417    }
418
419    #[test]
420    fn test_file_state_from_path() {
421        let dir = std::env::temp_dir().join("oximedia_dedup_incr_state");
422        let _ = std::fs::create_dir_all(&dir);
423        let path = make_temp_file(&dir, "test_state.bin", &[0u8; 100]);
424
425        let state = FileState::from_path(&path).expect("should read state");
426        assert_eq!(state.size, 100);
427        assert!(state.mtime_secs > 0);
428        assert!(state.content_hash.is_none());
429
430        let _ = std::fs::remove_dir_all(&dir);
431    }
432
433    #[test]
434    fn test_file_state_matches() {
435        let a = FileState {
436            size: 1000,
437            mtime_secs: 12345,
438            content_hash: None,
439        };
440        let b = FileState {
441            size: 1000,
442            mtime_secs: 12345,
443            content_hash: Some("abc".to_string()),
444        };
445        assert!(a.matches(&b)); // content_hash not compared
446
447        let c = FileState {
448            size: 2000,
449            mtime_secs: 12345,
450            content_hash: None,
451        };
452        assert!(!a.matches(&c));
453    }
454
455    #[test]
456    fn test_file_change_needs_processing() {
457        assert!(FileChange::New.needs_processing());
458        assert!(FileChange::Modified.needs_processing());
459        assert!(!FileChange::Unchanged.needs_processing());
460        assert!(!FileChange::Deleted.needs_processing());
461    }
462
463    #[test]
464    fn test_file_change_labels() {
465        assert_eq!(FileChange::New.label(), "new");
466        assert_eq!(FileChange::Modified.label(), "modified");
467        assert_eq!(FileChange::Unchanged.label(), "unchanged");
468        assert_eq!(FileChange::Deleted.label(), "deleted");
469    }
470
471    #[test]
472    fn test_incremental_index_new_empty() {
473        let idx = IncrementalIndex::new();
474        assert_eq!(idx.tracked_count(), 0);
475        assert_eq!(idx.scan_count(), 0);
476        assert_eq!(idx.last_scan_epoch(), 0);
477    }
478
479    #[test]
480    fn test_classify_new_file() {
481        let dir = std::env::temp_dir().join("oximedia_dedup_incr_new");
482        let _ = std::fs::create_dir_all(&dir);
483        let path = make_temp_file(&dir, "new_file.bin", &[1u8; 50]);
484
485        let idx = IncrementalIndex::new();
486        let (change, state) = idx.classify(&path).expect("classify");
487        assert_eq!(change, FileChange::New);
488        assert_eq!(state.size, 50);
489
490        let _ = std::fs::remove_dir_all(&dir);
491    }
492
493    #[test]
494    fn test_classify_unchanged_file() {
495        let dir = std::env::temp_dir().join("oximedia_dedup_incr_unchanged");
496        let _ = std::fs::create_dir_all(&dir);
497        let path = make_temp_file(&dir, "unchanged.bin", &[2u8; 75]);
498
499        let mut idx = IncrementalIndex::new();
500        idx.commit(std::slice::from_ref(&path));
501
502        let (change, _) = idx.classify(&path).expect("classify");
503        assert_eq!(change, FileChange::Unchanged);
504
505        let _ = std::fs::remove_dir_all(&dir);
506    }
507
508    #[test]
509    fn test_classify_modified_file() {
510        let dir = std::env::temp_dir().join("oximedia_dedup_incr_modified");
511        let _ = std::fs::create_dir_all(&dir);
512        let path = make_temp_file(&dir, "modifiable.bin", &[3u8; 100]);
513
514        let mut idx = IncrementalIndex::new();
515        idx.commit(std::slice::from_ref(&path));
516
517        // Modify the file (change size)
518        std::fs::write(&path, &[4u8; 200]).expect("rewrite");
519
520        let (change, _) = idx.classify(&path).expect("classify");
521        assert_eq!(change, FileChange::Modified);
522
523        let _ = std::fs::remove_dir_all(&dir);
524    }
525
526    #[test]
527    fn test_scan_mixed_files() {
528        let dir = std::env::temp_dir().join("oximedia_dedup_incr_scan");
529        let _ = std::fs::create_dir_all(&dir);
530
531        let f1 = make_temp_file(&dir, "existing.bin", &[5u8; 60]);
532        let f2 = make_temp_file(&dir, "new_one.bin", &[6u8; 80]);
533
534        let mut idx = IncrementalIndex::new();
535        idx.commit(std::slice::from_ref(&f1));
536
537        let result = idx.scan(&[f1.clone(), f2.clone()]);
538        assert_eq!(result.unchanged.len(), 1);
539        assert_eq!(result.to_process.len(), 1);
540        assert_eq!(result.to_process[0], f2);
541        assert_eq!(result.new_count(), 1);
542        assert_eq!(result.modified_count(), 0);
543        assert!(result.summary().contains("1 to process"));
544
545        let _ = std::fs::remove_dir_all(&dir);
546    }
547
548    #[test]
549    fn test_scan_detects_deleted_files() {
550        let dir = std::env::temp_dir().join("oximedia_dedup_incr_deleted");
551        let _ = std::fs::create_dir_all(&dir);
552
553        let f1 = make_temp_file(&dir, "will_delete.bin", &[7u8; 40]);
554        let f2 = make_temp_file(&dir, "stays.bin", &[8u8; 40]);
555
556        let mut idx = IncrementalIndex::new();
557        idx.commit(&[f1.clone(), f2.clone()]);
558
559        // Scan with only f2 in candidates (f1 is "deleted")
560        let result = idx.scan(std::slice::from_ref(&f2));
561        assert_eq!(result.deleted.len(), 1);
562        assert_eq!(result.deleted[0], f1);
563
564        let _ = std::fs::remove_dir_all(&dir);
565    }
566
567    #[test]
568    fn test_prune_deleted() {
569        let mut idx = IncrementalIndex::new();
570        idx.files.insert(
571            "/old/file.bin".to_string(),
572            FileState {
573                size: 100,
574                mtime_secs: 0,
575                content_hash: None,
576            },
577        );
578        assert_eq!(idx.tracked_count(), 1);
579
580        idx.prune_deleted(&[PathBuf::from("/old/file.bin")]);
581        assert_eq!(idx.tracked_count(), 0);
582    }
583
584    #[test]
585    fn test_commit_updates_scan_count() {
586        let dir = std::env::temp_dir().join("oximedia_dedup_incr_commit");
587        let _ = std::fs::create_dir_all(&dir);
588        let f = make_temp_file(&dir, "commit_test.bin", &[9u8; 30]);
589
590        let mut idx = IncrementalIndex::new();
591        assert_eq!(idx.scan_count(), 0);
592
593        idx.commit(&[f]);
594        assert_eq!(idx.scan_count(), 1);
595        assert!(idx.last_scan_epoch() > 0);
596
597        let _ = std::fs::remove_dir_all(&dir);
598    }
599
600    #[test]
601    fn test_json_roundtrip() {
602        let mut idx = IncrementalIndex::new();
603        idx.files.insert(
604            "/some/file.mp4".to_string(),
605            FileState {
606                size: 999,
607                mtime_secs: 1700000000,
608                content_hash: Some("abcd1234".to_string()),
609            },
610        );
611        idx.scan_count = 5;
612        idx.last_scan_epoch = 1700000100;
613
614        let json = idx.to_json().expect("serialise");
615        let restored = IncrementalIndex::from_json(&json).expect("deserialise");
616
617        assert_eq!(restored.tracked_count(), 1);
618        assert_eq!(restored.scan_count(), 5);
619        assert_eq!(restored.last_scan_epoch(), 1700000100);
620
621        let state = restored
622            .get_state(Path::new("/some/file.mp4"))
623            .expect("state should exist");
624        assert_eq!(state.size, 999);
625        assert_eq!(state.content_hash.as_deref(), Some("abcd1234"));
626    }
627
628    #[test]
629    fn test_save_and_load_file() {
630        let dir = std::env::temp_dir().join("oximedia_dedup_incr_persist");
631        let _ = std::fs::create_dir_all(&dir);
632        let index_path = dir.join("dedup_index.json");
633
634        let mut idx = IncrementalIndex::new();
635        idx.files.insert(
636            "video.mp4".to_string(),
637            FileState {
638                size: 500,
639                mtime_secs: 12345,
640                content_hash: None,
641            },
642        );
643
644        idx.save_to_file(&index_path).expect("save");
645        let loaded = IncrementalIndex::load_from_file(&index_path).expect("load");
646        assert_eq!(loaded.tracked_count(), 1);
647        assert!(loaded.is_tracked(Path::new("video.mp4")));
648
649        let _ = std::fs::remove_dir_all(&dir);
650    }
651
652    #[test]
653    fn test_merge_indices() {
654        let mut idx1 = IncrementalIndex::new();
655        idx1.files.insert(
656            "a.mp4".to_string(),
657            FileState {
658                size: 100,
659                mtime_secs: 1,
660                content_hash: None,
661            },
662        );
663        idx1.last_scan_epoch = 100;
664
665        let mut idx2 = IncrementalIndex::new();
666        idx2.files.insert(
667            "b.mp4".to_string(),
668            FileState {
669                size: 200,
670                mtime_secs: 2,
671                content_hash: None,
672            },
673        );
674        idx2.last_scan_epoch = 200;
675
676        idx1.merge(&idx2);
677        assert_eq!(idx1.tracked_count(), 2);
678        assert!(idx1.is_tracked(Path::new("a.mp4")));
679        assert!(idx1.is_tracked(Path::new("b.mp4")));
680        assert_eq!(idx1.last_scan_epoch(), 200);
681    }
682
683    #[test]
684    fn test_commit_file_with_hash() {
685        let mut idx = IncrementalIndex::new();
686        let state = FileState {
687            size: 1024,
688            mtime_secs: 1700000000,
689            content_hash: Some("deadbeef".to_string()),
690        };
691        idx.commit_file(Path::new("/media/video.mp4"), state);
692
693        let stored = idx
694            .get_state(Path::new("/media/video.mp4"))
695            .expect("should exist");
696        assert_eq!(stored.content_hash.as_deref(), Some("deadbeef"));
697    }
698
699    #[test]
700    fn test_clear_index() {
701        let mut idx = IncrementalIndex::new();
702        idx.files.insert(
703            "x.mp4".to_string(),
704            FileState {
705                size: 1,
706                mtime_secs: 1,
707                content_hash: None,
708            },
709        );
710        idx.scan_count = 10;
711        idx.clear();
712        assert_eq!(idx.tracked_count(), 0);
713        assert_eq!(idx.scan_count(), 0);
714    }
715
716    #[test]
717    fn test_tracked_paths() {
718        let mut idx = IncrementalIndex::new();
719        idx.files.insert(
720            "a.mp4".to_string(),
721            FileState {
722                size: 1,
723                mtime_secs: 1,
724                content_hash: None,
725            },
726        );
727        idx.files.insert(
728            "b.mp4".to_string(),
729            FileState {
730                size: 2,
731                mtime_secs: 2,
732                content_hash: None,
733            },
734        );
735        let mut paths = idx.tracked_paths();
736        paths.sort();
737        assert_eq!(paths, vec!["a.mp4", "b.mp4"]);
738    }
739
740    #[test]
741    fn test_processing_ratio() {
742        let result = ScanResult {
743            to_process: vec![PathBuf::from("a"), PathBuf::from("b")],
744            unchanged: vec![PathBuf::from("c"), PathBuf::from("d"), PathBuf::from("e")],
745            deleted: Vec::new(),
746            changes: Vec::new(),
747        };
748        assert!((result.processing_ratio() - 0.4).abs() < f64::EPSILON);
749    }
750
751    #[test]
752    fn test_processing_ratio_empty() {
753        let result = ScanResult {
754            to_process: Vec::new(),
755            unchanged: Vec::new(),
756            deleted: Vec::new(),
757            changes: Vec::new(),
758        };
759        assert_eq!(result.processing_ratio(), 0.0);
760    }
761
762    #[test]
763    fn test_full_incremental_workflow() {
764        let dir = std::env::temp_dir().join("oximedia_dedup_incr_workflow");
765        let _ = std::fs::create_dir_all(&dir);
766
767        // Session 1: All files are new
768        let f1 = make_temp_file(&dir, "video1.bin", &[10u8; 100]);
769        let f2 = make_temp_file(&dir, "video2.bin", &[20u8; 200]);
770
771        let mut idx = IncrementalIndex::new();
772        let scan1 = idx.scan(&[f1.clone(), f2.clone()]);
773        assert_eq!(scan1.to_process.len(), 2);
774        assert_eq!(scan1.new_count(), 2);
775
776        idx.commit(&scan1.to_process);
777
778        // Session 2: No changes -> nothing to process
779        let scan2 = idx.scan(&[f1.clone(), f2.clone()]);
780        assert_eq!(scan2.to_process.len(), 0);
781        assert_eq!(scan2.unchanged.len(), 2);
782
783        // Session 3: Modify one file, add a new one
784        std::fs::write(&f1, &[11u8; 150]).expect("modify f1");
785        let f3 = make_temp_file(&dir, "video3.bin", &[30u8; 300]);
786
787        let scan3 = idx.scan(&[f1.clone(), f2.clone(), f3.clone()]);
788        assert_eq!(scan3.to_process.len(), 2); // f1 (modified) + f3 (new)
789        assert_eq!(scan3.unchanged.len(), 1); // f2
790        assert_eq!(scan3.modified_count(), 1);
791        assert_eq!(scan3.new_count(), 1);
792
793        idx.commit(&scan3.to_process);
794        assert_eq!(idx.scan_count(), 2);
795        assert_eq!(idx.tracked_count(), 3);
796
797        // Session 4: Delete f2
798        let scan4 = idx.scan(&[f1.clone(), f3.clone()]);
799        assert_eq!(scan4.deleted.len(), 1);
800        idx.prune_deleted(&scan4.deleted);
801        assert_eq!(idx.tracked_count(), 2);
802
803        let _ = std::fs::remove_dir_all(&dir);
804    }
805}