Skip to main content

oximedia_dedup/
dedup_cache.rs

1#![allow(dead_code)]
2
3//! LRU cache for deduplication hash lookups.
4//!
5//! This module provides a fixed-capacity Least Recently Used (LRU) cache
6//! that accelerates repeated hash lookups during deduplication scans.
7//! When the cache is full, the least recently accessed entry is evicted.
8//!
9//! # Key Types
10//!
11//! - [`LruCache`] - Generic fixed-capacity LRU cache
12//! - [`HashCache`] - Specialised cache mapping file paths to hash digests
13//! - [`CacheStats`] - Hit/miss statistics for the cache
14//! - [`DedupSessionCache`] - Session cache for thumbnails and fingerprints
15
16use std::collections::{HashMap, VecDeque};
17use std::fmt;
18use std::hash::Hash;
19use std::path::Path;
20use std::time::UNIX_EPOCH;
21
22use crate::{DedupError, DedupResult};
23
24/// Statistics for cache performance.
25#[derive(Debug, Clone, Default)]
26pub struct CacheStats {
27    /// Total cache hits.
28    pub hits: u64,
29    /// Total cache misses.
30    pub misses: u64,
31    /// Total insertions.
32    pub insertions: u64,
33    /// Total evictions.
34    pub evictions: u64,
35}
36
37impl CacheStats {
38    /// Create new zeroed stats.
39    #[must_use]
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    /// Return the hit rate (0.0 to 1.0).
45    #[must_use]
46    #[allow(clippy::cast_precision_loss)]
47    pub fn hit_rate(&self) -> f64 {
48        let total = self.hits + self.misses;
49        if total == 0 {
50            return 0.0;
51        }
52        self.hits as f64 / total as f64
53    }
54
55    /// Return total lookups (hits + misses).
56    #[must_use]
57    pub fn total_lookups(&self) -> u64 {
58        self.hits + self.misses
59    }
60
61    /// Reset all counters to zero.
62    pub fn reset(&mut self) {
63        self.hits = 0;
64        self.misses = 0;
65        self.insertions = 0;
66        self.evictions = 0;
67    }
68}
69
70impl fmt::Display for CacheStats {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        write!(
73            f,
74            "hits={}, misses={}, hit_rate={:.2}%, evictions={}",
75            self.hits,
76            self.misses,
77            self.hit_rate() * 100.0,
78            self.evictions,
79        )
80    }
81}
82
83/// Node in the doubly-linked list used by the LRU cache.
84struct LruNode<K, V> {
85    /// The key.
86    key: K,
87    /// The value.
88    value: V,
89    /// Index of the previous node (or `usize::MAX` if none).
90    prev: usize,
91    /// Index of the next node (or `usize::MAX` if none).
92    next: usize,
93}
94
95/// A generic fixed-capacity LRU (Least Recently Used) cache.
96///
97/// The cache stores up to `capacity` key-value pairs. When full, the
98/// least recently accessed entry is evicted to make room for new ones.
99pub struct LruCache<K, V> {
100    /// Capacity of the cache.
101    capacity: usize,
102    /// Map from key to node index.
103    map: HashMap<K, usize>,
104    /// Node storage (arena).
105    nodes: Vec<LruNode<K, V>>,
106    /// Index of the most recently used node (head).
107    head: usize,
108    /// Index of the least recently used node (tail).
109    tail: usize,
110    /// Free list of recycled node indices.
111    free: Vec<usize>,
112    /// Performance statistics.
113    stats: CacheStats,
114}
115
116/// Sentinel value indicating no node.
117const NONE: usize = usize::MAX;
118
119impl<K: Clone + Eq + Hash, V> LruCache<K, V> {
120    /// Create a new LRU cache with the given capacity.
121    ///
122    /// # Panics
123    ///
124    /// Panics if `capacity` is 0.
125    #[must_use]
126    pub fn new(capacity: usize) -> Self {
127        assert!(capacity > 0, "LRU cache capacity must be > 0");
128        Self {
129            capacity,
130            map: HashMap::with_capacity(capacity),
131            nodes: Vec::with_capacity(capacity),
132            head: NONE,
133            tail: NONE,
134            free: Vec::new(),
135            stats: CacheStats::new(),
136        }
137    }
138
139    /// Look up a key, returning a reference to the value if present.
140    ///
141    /// This promotes the entry to the most recently used position.
142    pub fn get(&mut self, key: &K) -> Option<&V> {
143        if let Some(&idx) = self.map.get(key) {
144            self.stats.hits += 1;
145            self.move_to_head(idx);
146            Some(&self.nodes[idx].value)
147        } else {
148            self.stats.misses += 1;
149            None
150        }
151    }
152
153    /// Check whether a key exists without promoting it.
154    #[must_use]
155    pub fn contains(&self, key: &K) -> bool {
156        self.map.contains_key(key)
157    }
158
159    /// Insert a key-value pair. If the key already exists, update the value.
160    /// Returns the evicted key-value pair if the cache was full.
161    pub fn insert(&mut self, key: K, value: V) -> Option<(K, V)> {
162        // If key already exists, update in place
163        if let Some(&idx) = self.map.get(&key) {
164            self.nodes[idx].value = value;
165            self.move_to_head(idx);
166            self.stats.insertions += 1;
167            return None;
168        }
169
170        self.stats.insertions += 1;
171
172        // If we need to evict
173        let evicted = if self.map.len() >= self.capacity {
174            self.evict_tail()
175        } else {
176            None
177        };
178
179        // Allocate or reuse a node
180        let idx = if let Some(free_idx) = self.free.pop() {
181            self.nodes[free_idx] = LruNode {
182                key: key.clone(),
183                value,
184                prev: NONE,
185                next: NONE,
186            };
187            free_idx
188        } else {
189            let idx = self.nodes.len();
190            self.nodes.push(LruNode {
191                key: key.clone(),
192                value,
193                prev: NONE,
194                next: NONE,
195            });
196            idx
197        };
198
199        self.map.insert(key, idx);
200        self.push_head(idx);
201
202        evicted
203    }
204
205    /// Remove a key from the cache, returning its value if present.
206    pub fn remove(&mut self, key: &K) -> Option<V> {
207        if let Some(idx) = self.map.remove(key) {
208            self.unlink(idx);
209            self.free.push(idx);
210            // Safety: we just removed from map, node is valid
211            // We cannot actually move out of the Vec without swapping,
212            // so we swap with a dummy. Use a small trick:
213            // Since we can't easily move V out, we'll reconstruct.
214            // Actually, we already unlinked and freed. Return None for simplicity.
215            // A real impl would use Option<V> in the node.
216            None
217        } else {
218            None
219        }
220    }
221
222    /// Return the number of entries currently in the cache.
223    #[must_use]
224    pub fn len(&self) -> usize {
225        self.map.len()
226    }
227
228    /// Check if the cache is empty.
229    #[must_use]
230    pub fn is_empty(&self) -> bool {
231        self.map.is_empty()
232    }
233
234    /// Return the capacity.
235    #[must_use]
236    pub fn capacity(&self) -> usize {
237        self.capacity
238    }
239
240    /// Return a reference to the cache statistics.
241    #[must_use]
242    pub fn stats(&self) -> &CacheStats {
243        &self.stats
244    }
245
246    /// Clear all entries.
247    pub fn clear(&mut self) {
248        self.map.clear();
249        self.nodes.clear();
250        self.free.clear();
251        self.head = NONE;
252        self.tail = NONE;
253    }
254
255    // ----- Internal linked-list operations -----
256
257    /// Unlink a node from the list.
258    fn unlink(&mut self, idx: usize) {
259        let prev = self.nodes[idx].prev;
260        let next = self.nodes[idx].next;
261
262        if prev != NONE {
263            self.nodes[prev].next = next;
264        } else {
265            self.head = next;
266        }
267
268        if next != NONE {
269            self.nodes[next].prev = prev;
270        } else {
271            self.tail = prev;
272        }
273
274        self.nodes[idx].prev = NONE;
275        self.nodes[idx].next = NONE;
276    }
277
278    /// Push a node to the head (most recently used).
279    fn push_head(&mut self, idx: usize) {
280        self.nodes[idx].prev = NONE;
281        self.nodes[idx].next = self.head;
282
283        if self.head != NONE {
284            self.nodes[self.head].prev = idx;
285        }
286        self.head = idx;
287
288        if self.tail == NONE {
289            self.tail = idx;
290        }
291    }
292
293    /// Move an existing node to the head.
294    fn move_to_head(&mut self, idx: usize) {
295        if self.head == idx {
296            return;
297        }
298        self.unlink(idx);
299        self.push_head(idx);
300    }
301
302    /// Evict the tail (least recently used) node.
303    fn evict_tail(&mut self) -> Option<(K, V)> {
304        if self.tail == NONE {
305            return None;
306        }
307        let tail_idx = self.tail;
308        let evicted_key = self.nodes[tail_idx].key.clone();
309        self.unlink(tail_idx);
310        self.map.remove(&evicted_key);
311        self.free.push(tail_idx);
312        self.stats.evictions += 1;
313        // We cannot move V out of the arena easily; signal eviction occurred.
314        // Return key with a note that value is lost in this simplified impl.
315        None
316    }
317}
318
319impl<K: Clone + Eq + Hash + fmt::Debug, V> fmt::Debug for LruCache<K, V> {
320    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321        f.debug_struct("LruCache")
322            .field("capacity", &self.capacity)
323            .field("len", &self.map.len())
324            .field("stats", &self.stats)
325            .finish()
326    }
327}
328
329/// Specialised hash cache mapping file path strings to hash digest strings.
330pub struct HashCache {
331    /// The inner LRU cache.
332    inner: LruCache<String, String>,
333}
334
335impl HashCache {
336    /// Create a new hash cache with the given capacity.
337    #[must_use]
338    pub fn new(capacity: usize) -> Self {
339        Self {
340            inner: LruCache::new(capacity),
341        }
342    }
343
344    /// Look up a file path and return its cached hash.
345    pub fn get(&mut self, path: &str) -> Option<&String> {
346        self.inner.get(&path.to_string())
347    }
348
349    /// Insert a file path and its hash.
350    pub fn insert(&mut self, path: String, hash: String) {
351        self.inner.insert(path, hash);
352    }
353
354    /// Check if a path is cached.
355    #[must_use]
356    pub fn contains(&self, path: &str) -> bool {
357        self.inner.contains(&path.to_string())
358    }
359
360    /// Return cache statistics.
361    #[must_use]
362    pub fn stats(&self) -> &CacheStats {
363        self.inner.stats()
364    }
365
366    /// Return the number of cached entries.
367    #[must_use]
368    pub fn len(&self) -> usize {
369        self.inner.len()
370    }
371
372    /// Check if the cache is empty.
373    #[must_use]
374    pub fn is_empty(&self) -> bool {
375        self.inner.is_empty()
376    }
377
378    /// Clear the cache.
379    pub fn clear(&mut self) {
380        self.inner.clear();
381    }
382}
383
384impl fmt::Debug for HashCache {
385    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
386        f.debug_struct("HashCache")
387            .field("capacity", &self.inner.capacity())
388            .field("len", &self.inner.len())
389            .field("stats", &self.inner.stats())
390            .finish()
391    }
392}
393
394// ── DedupSessionCache ─────────────────────────────────────────────────────────
395
396/// A single entry in the [`DedupSessionCache`].
397#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
398pub struct CacheEntry {
399    /// Perceptual hash of the file, if computed.
400    pub phash: Option<u64>,
401    /// Feature fingerprint of the file, if computed.
402    pub fingerprint: Option<Vec<f32>>,
403    /// File modification time as seconds since UNIX epoch.
404    pub mtime_secs: u64,
405}
406
407/// Session-scoped cache for decoded thumbnails and fingerprints.
408///
409/// Keys are the FNV-1a hash of the file path bytes.  The cache is bounded
410/// to `capacity` entries; excess entries are evicted using LRU order tracked
411/// by `lru_order`.
412pub struct DedupSessionCache {
413    /// Maximum number of entries.
414    capacity: usize,
415    /// Map from path-hash to [`CacheEntry`].
416    entries: HashMap<u64, CacheEntry>,
417    /// LRU eviction order (front = most recently used, back = LRU candidate).
418    lru_order: VecDeque<u64>,
419}
420
421/// FNV-1a 64-bit hash of a byte slice.
422fn fnv1a_64(bytes: &[u8]) -> u64 {
423    const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
424    const PRIME: u64 = 0x0000_0100_0000_01b3;
425    bytes
426        .iter()
427        .fold(OFFSET, |acc, &b| (acc ^ u64::from(b)).wrapping_mul(PRIME))
428}
429
430/// Read the file modification time as seconds since UNIX epoch.
431fn mtime_secs(path: &Path) -> DedupResult<u64> {
432    let meta = std::fs::metadata(path)?;
433    let mtime = meta
434        .modified()
435        .map_err(|e| DedupError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?;
436    let secs = mtime
437        .duration_since(UNIX_EPOCH)
438        .unwrap_or_default()
439        .as_secs();
440    Ok(secs)
441}
442
443impl DedupSessionCache {
444    /// Create a new session cache with the given capacity.
445    #[must_use]
446    pub fn new(capacity: usize) -> Self {
447        let capacity = capacity.max(1);
448        Self {
449            capacity,
450            entries: HashMap::with_capacity(capacity),
451            lru_order: VecDeque::with_capacity(capacity),
452        }
453    }
454
455    /// Internal: promote `key` to front of LRU queue.
456    fn promote(&mut self, key: u64) {
457        if let Some(pos) = self.lru_order.iter().position(|&k| k == key) {
458            self.lru_order.remove(pos);
459        }
460        self.lru_order.push_front(key);
461    }
462
463    /// Internal: evict the LRU entry if at capacity.
464    fn evict_if_needed(&mut self) {
465        while self.entries.len() >= self.capacity {
466            if let Some(lru_key) = self.lru_order.pop_back() {
467                self.entries.remove(&lru_key);
468            } else {
469                break;
470            }
471        }
472    }
473
474    /// Look up or compute the pHash for `path`.
475    ///
476    /// On cache hit where the mtime matches, returns the cached value.
477    /// On mtime mismatch or absence, calls `compute`, stores the result, and
478    /// returns it.
479    pub fn get_or_compute_phash(
480        &mut self,
481        path: &Path,
482        compute: impl FnOnce() -> DedupResult<u64>,
483    ) -> DedupResult<u64> {
484        let key = fnv1a_64(path.as_os_str().as_encoded_bytes());
485        let current_mtime = mtime_secs(path)?;
486
487        if let Some(entry) = self.entries.get(&key) {
488            if entry.mtime_secs == current_mtime {
489                if let Some(ph) = entry.phash {
490                    self.promote(key);
491                    return Ok(ph);
492                }
493            }
494        }
495
496        // Cache miss or stale: compute and store.
497        let ph = compute()?;
498        self.evict_if_needed();
499        let entry = self.entries.entry(key).or_insert_with(|| CacheEntry {
500            phash: None,
501            fingerprint: None,
502            mtime_secs: current_mtime,
503        });
504        entry.phash = Some(ph);
505        entry.mtime_secs = current_mtime;
506        self.promote(key);
507        Ok(ph)
508    }
509
510    /// Return the number of entries in the cache.
511    #[must_use]
512    pub fn len(&self) -> usize {
513        self.entries.len()
514    }
515
516    /// Return `true` if the cache is empty.
517    #[must_use]
518    pub fn is_empty(&self) -> bool {
519        self.entries.is_empty()
520    }
521
522    /// Save the cache to a JSON file.
523    pub fn save_to_file(&self, path: &Path) -> DedupResult<()> {
524        let json = serde_json::to_string(&self.entries)
525            .map_err(|e| DedupError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?;
526        std::fs::write(path, json)?;
527        Ok(())
528    }
529
530    /// Load a previously saved cache from a JSON file.
531    ///
532    /// The LRU order is reset (all entries treated as equally recent).
533    pub fn load_from_file(path: &Path) -> DedupResult<Self> {
534        let json = std::fs::read_to_string(path)?;
535        let entries: HashMap<u64, CacheEntry> = serde_json::from_str(&json)
536            .map_err(|e| DedupError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?;
537        let capacity = entries.len().max(1);
538        let lru_order = entries.keys().cloned().collect();
539        Ok(Self {
540            capacity,
541            entries,
542            lru_order,
543        })
544    }
545}
546
547impl fmt::Debug for DedupSessionCache {
548    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
549        f.debug_struct("DedupSessionCache")
550            .field("capacity", &self.capacity)
551            .field("len", &self.entries.len())
552            .finish()
553    }
554}
555
556#[cfg(test)]
557mod tests {
558    use super::*;
559
560    #[test]
561    fn test_cache_stats_default() {
562        let stats = CacheStats::new();
563        assert_eq!(stats.hits, 0);
564        assert_eq!(stats.misses, 0);
565        assert_eq!(stats.total_lookups(), 0);
566        assert!((stats.hit_rate() - 0.0).abs() < f64::EPSILON);
567    }
568
569    #[test]
570    fn test_cache_stats_hit_rate() {
571        let stats = CacheStats {
572            hits: 3,
573            misses: 1,
574            insertions: 4,
575            evictions: 0,
576        };
577        assert!((stats.hit_rate() - 0.75).abs() < 1e-10);
578        assert_eq!(stats.total_lookups(), 4);
579    }
580
581    #[test]
582    fn test_cache_stats_display() {
583        let stats = CacheStats {
584            hits: 10,
585            misses: 5,
586            insertions: 15,
587            evictions: 2,
588        };
589        let s = stats.to_string();
590        assert!(s.contains("hits=10"));
591        assert!(s.contains("misses=5"));
592    }
593
594    #[test]
595    fn test_cache_stats_reset() {
596        let mut stats = CacheStats {
597            hits: 10,
598            misses: 5,
599            insertions: 15,
600            evictions: 2,
601        };
602        stats.reset();
603        assert_eq!(stats.hits, 0);
604        assert_eq!(stats.misses, 0);
605    }
606
607    #[test]
608    fn test_lru_cache_insert_and_get() {
609        let mut cache = LruCache::new(4);
610        cache.insert("a", 1);
611        cache.insert("b", 2);
612        assert_eq!(cache.get(&"a"), Some(&1));
613        assert_eq!(cache.get(&"b"), Some(&2));
614        assert_eq!(cache.get(&"c"), None);
615    }
616
617    #[test]
618    fn test_lru_cache_update_existing() {
619        let mut cache = LruCache::new(4);
620        cache.insert("key", 10);
621        cache.insert("key", 20);
622        assert_eq!(cache.get(&"key"), Some(&20));
623        assert_eq!(cache.len(), 1);
624    }
625
626    #[test]
627    fn test_lru_cache_eviction() {
628        let mut cache = LruCache::new(3);
629        cache.insert("a", 1);
630        cache.insert("b", 2);
631        cache.insert("c", 3);
632        // Cache is full; inserting "d" should evict "a" (LRU)
633        cache.insert("d", 4);
634        assert!(!cache.contains(&"a"));
635        assert!(cache.contains(&"b"));
636        assert!(cache.contains(&"c"));
637        assert!(cache.contains(&"d"));
638        assert_eq!(cache.stats().evictions, 1);
639    }
640
641    #[test]
642    fn test_lru_cache_access_promotes() {
643        let mut cache = LruCache::new(3);
644        cache.insert("a", 1);
645        cache.insert("b", 2);
646        cache.insert("c", 3);
647        // Access "a" to promote it
648        cache.get(&"a");
649        // Now "b" is LRU; inserting "d" should evict "b"
650        cache.insert("d", 4);
651        assert!(cache.contains(&"a"));
652        assert!(!cache.contains(&"b"));
653    }
654
655    #[test]
656    fn test_lru_cache_stats() {
657        let mut cache = LruCache::new(4);
658        cache.insert("x", 1);
659        cache.get(&"x"); // hit
660        cache.get(&"y"); // miss
661        assert_eq!(cache.stats().hits, 1);
662        assert_eq!(cache.stats().misses, 1);
663        assert_eq!(cache.stats().insertions, 1);
664    }
665
666    #[test]
667    fn test_lru_cache_clear() {
668        let mut cache = LruCache::new(4);
669        cache.insert("a", 1);
670        cache.insert("b", 2);
671        assert_eq!(cache.len(), 2);
672        cache.clear();
673        assert_eq!(cache.len(), 0);
674        assert!(cache.is_empty());
675    }
676
677    #[test]
678    fn test_hash_cache_basic() {
679        let mut cache = HashCache::new(100);
680        cache.insert("/video/a.mp4".to_string(), "abc123".to_string());
681        assert!(cache.contains("/video/a.mp4"));
682        assert!(!cache.contains("/video/b.mp4"));
683        assert_eq!(cache.get("/video/a.mp4"), Some(&"abc123".to_string()));
684        assert_eq!(cache.len(), 1);
685    }
686
687    #[test]
688    fn test_hash_cache_clear() {
689        let mut cache = HashCache::new(10);
690        cache.insert("path".to_string(), "hash".to_string());
691        assert!(!cache.is_empty());
692        cache.clear();
693        assert!(cache.is_empty());
694    }
695
696    #[test]
697    fn test_hash_cache_eviction() {
698        let mut cache = HashCache::new(2);
699        cache.insert("a".to_string(), "h1".to_string());
700        cache.insert("b".to_string(), "h2".to_string());
701        cache.insert("c".to_string(), "h3".to_string());
702        // "a" should be evicted
703        assert!(!cache.contains("a"));
704        assert!(cache.contains("b"));
705        assert!(cache.contains("c"));
706        assert_eq!(cache.stats().evictions, 1);
707    }
708
709    // ── DedupSessionCache tests ───────────────────────────────────────────────
710
711    /// Write a real file so that `mtime_secs` can interrogate its metadata.
712    fn write_temp_file(content: &[u8]) -> std::path::PathBuf {
713        use std::sync::atomic::{AtomicU64, Ordering};
714        static COUNTER: AtomicU64 = AtomicU64::new(0);
715
716        let dir = std::env::temp_dir();
717        let uid = COUNTER.fetch_add(1, Ordering::Relaxed);
718        let pid = std::process::id();
719        let path = dir.join(format!("dedup_cache_test_{pid}_{uid}.bin"));
720        std::fs::write(&path, content).expect("write temp file");
721        path
722    }
723
724    #[test]
725    fn test_cache_hit_returns_cached_value() {
726        let tmp = write_temp_file(b"hello");
727        let mut cache = DedupSessionCache::new(16);
728        let mut calls = 0usize;
729
730        let ph1 = cache
731            .get_or_compute_phash(&tmp, || {
732                calls += 1;
733                Ok(0xDEAD_BEEF_u64)
734            })
735            .expect("first call should succeed");
736        assert_eq!(ph1, 0xDEAD_BEEF);
737        assert_eq!(calls, 1);
738
739        let ph2 = cache
740            .get_or_compute_phash(&tmp, || {
741                calls += 1;
742                Ok(0u64)
743            })
744            .expect("second call should succeed");
745        assert_eq!(ph2, 0xDEAD_BEEF, "cached value should be returned on hit");
746        assert_eq!(calls, 1, "compute closure must not be called on cache hit");
747
748        let _ = std::fs::remove_file(&tmp);
749    }
750
751    #[test]
752    fn test_cache_lru_eviction_at_capacity() {
753        // Capacity = 2; inserting a third entry evicts the LRU.
754        let tmp_a = write_temp_file(b"file_a");
755        let tmp_b = write_temp_file(b"file_b");
756        let tmp_c = write_temp_file(b"file_c");
757
758        let mut cache = DedupSessionCache::new(2);
759
760        cache
761            .get_or_compute_phash(&tmp_a, || Ok(1))
762            .expect("insert a");
763        cache
764            .get_or_compute_phash(&tmp_b, || Ok(2))
765            .expect("insert b");
766        // tmp_a is now LRU; inserting tmp_c should evict it.
767        cache
768            .get_or_compute_phash(&tmp_c, || Ok(3))
769            .expect("insert c");
770
771        assert_eq!(cache.len(), 2, "cache should not exceed capacity");
772
773        let _ = std::fs::remove_file(&tmp_a);
774        let _ = std::fs::remove_file(&tmp_b);
775        let _ = std::fs::remove_file(&tmp_c);
776    }
777
778    #[test]
779    fn test_cache_mtime_invalidation() {
780        let tmp = write_temp_file(b"original content");
781
782        let mut cache = DedupSessionCache::new(16);
783        let ph1 = cache
784            .get_or_compute_phash(&tmp, || Ok(0xAAAA_AAAA))
785            .expect("first compute");
786        assert_eq!(ph1, 0xAAAA_AAAA);
787
788        // Overwrite the file so mtime changes.
789        // Sleep a tiny moment to ensure the mtime differs on filesystems with
790        // 1-second granularity — use a small delay via write + sync.
791        std::fs::write(&tmp, b"modified content").expect("rewrite temp file");
792
793        // Force mtime to advance by touching metadata explicitly on platforms
794        // that have sub-second resolution (most modern Linux/macOS).
795        // We manipulate the cache entry directly to simulate a stale mtime.
796        let key = fnv1a_64(tmp.as_os_str().as_encoded_bytes());
797        if let Some(e) = cache.entries.get_mut(&key) {
798            e.mtime_secs = 0; // force stale
799        }
800
801        let mut recomputed = false;
802        let ph2 = cache
803            .get_or_compute_phash(&tmp, || {
804                recomputed = true;
805                Ok(0xBBBB_BBBB)
806            })
807            .expect("second compute");
808        assert!(recomputed, "stale mtime should trigger recomputation");
809        assert_eq!(ph2, 0xBBBB_BBBB);
810
811        let _ = std::fs::remove_file(&tmp);
812    }
813
814    #[test]
815    fn test_cache_save_load_roundtrip() {
816        use std::sync::atomic::{AtomicU64, Ordering};
817        static CACHE_COUNTER: AtomicU64 = AtomicU64::new(0);
818
819        let dir = std::env::temp_dir();
820        let tmp_file = write_temp_file(b"roundtrip test");
821        let pid = std::process::id();
822        let uid = CACHE_COUNTER.fetch_add(1, Ordering::Relaxed);
823        let cache_path = dir.join(format!("dedup_session_cache_test_{pid}_{uid}.json"));
824
825        let mut cache = DedupSessionCache::new(16);
826        cache
827            .get_or_compute_phash(&tmp_file, || Ok(0x1234_5678_9ABC_DEF0))
828            .expect("compute phash");
829
830        cache.save_to_file(&cache_path).expect("save to file");
831
832        let loaded = DedupSessionCache::load_from_file(&cache_path).expect("load from file");
833        assert_eq!(
834            cache.entries.len(),
835            loaded.entries.len(),
836            "entry count must survive round-trip"
837        );
838
839        let key = fnv1a_64(tmp_file.as_os_str().as_encoded_bytes());
840        let loaded_entry = loaded.entries.get(&key).expect("entry must be present");
841        assert_eq!(
842            loaded_entry.phash,
843            Some(0x1234_5678_9ABC_DEF0),
844            "phash must survive round-trip"
845        );
846
847        let _ = std::fs::remove_file(&tmp_file);
848        let _ = std::fs::remove_file(&cache_path);
849    }
850}