1#![allow(dead_code)]
2
3use std::collections::{HashMap, VecDeque};
17use std::fmt;
18use std::hash::Hash;
19use std::path::Path;
20use std::time::UNIX_EPOCH;
21
22use crate::{DedupError, DedupResult};
23
24#[derive(Debug, Clone, Default)]
26pub struct CacheStats {
27 pub hits: u64,
29 pub misses: u64,
31 pub insertions: u64,
33 pub evictions: u64,
35}
36
37impl CacheStats {
38 #[must_use]
40 pub fn new() -> Self {
41 Self::default()
42 }
43
44 #[must_use]
46 #[allow(clippy::cast_precision_loss)]
47 pub fn hit_rate(&self) -> f64 {
48 let total = self.hits + self.misses;
49 if total == 0 {
50 return 0.0;
51 }
52 self.hits as f64 / total as f64
53 }
54
55 #[must_use]
57 pub fn total_lookups(&self) -> u64 {
58 self.hits + self.misses
59 }
60
61 pub fn reset(&mut self) {
63 self.hits = 0;
64 self.misses = 0;
65 self.insertions = 0;
66 self.evictions = 0;
67 }
68}
69
70impl fmt::Display for CacheStats {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 write!(
73 f,
74 "hits={}, misses={}, hit_rate={:.2}%, evictions={}",
75 self.hits,
76 self.misses,
77 self.hit_rate() * 100.0,
78 self.evictions,
79 )
80 }
81}
82
83struct LruNode<K, V> {
85 key: K,
87 value: V,
89 prev: usize,
91 next: usize,
93}
94
95pub struct LruCache<K, V> {
100 capacity: usize,
102 map: HashMap<K, usize>,
104 nodes: Vec<LruNode<K, V>>,
106 head: usize,
108 tail: usize,
110 free: Vec<usize>,
112 stats: CacheStats,
114}
115
116const NONE: usize = usize::MAX;
118
119impl<K: Clone + Eq + Hash, V> LruCache<K, V> {
120 #[must_use]
126 pub fn new(capacity: usize) -> Self {
127 assert!(capacity > 0, "LRU cache capacity must be > 0");
128 Self {
129 capacity,
130 map: HashMap::with_capacity(capacity),
131 nodes: Vec::with_capacity(capacity),
132 head: NONE,
133 tail: NONE,
134 free: Vec::new(),
135 stats: CacheStats::new(),
136 }
137 }
138
139 pub fn get(&mut self, key: &K) -> Option<&V> {
143 if let Some(&idx) = self.map.get(key) {
144 self.stats.hits += 1;
145 self.move_to_head(idx);
146 Some(&self.nodes[idx].value)
147 } else {
148 self.stats.misses += 1;
149 None
150 }
151 }
152
153 #[must_use]
155 pub fn contains(&self, key: &K) -> bool {
156 self.map.contains_key(key)
157 }
158
159 pub fn insert(&mut self, key: K, value: V) -> Option<(K, V)> {
162 if let Some(&idx) = self.map.get(&key) {
164 self.nodes[idx].value = value;
165 self.move_to_head(idx);
166 self.stats.insertions += 1;
167 return None;
168 }
169
170 self.stats.insertions += 1;
171
172 let evicted = if self.map.len() >= self.capacity {
174 self.evict_tail()
175 } else {
176 None
177 };
178
179 let idx = if let Some(free_idx) = self.free.pop() {
181 self.nodes[free_idx] = LruNode {
182 key: key.clone(),
183 value,
184 prev: NONE,
185 next: NONE,
186 };
187 free_idx
188 } else {
189 let idx = self.nodes.len();
190 self.nodes.push(LruNode {
191 key: key.clone(),
192 value,
193 prev: NONE,
194 next: NONE,
195 });
196 idx
197 };
198
199 self.map.insert(key, idx);
200 self.push_head(idx);
201
202 evicted
203 }
204
205 pub fn remove(&mut self, key: &K) -> Option<V> {
207 if let Some(idx) = self.map.remove(key) {
208 self.unlink(idx);
209 self.free.push(idx);
210 None
217 } else {
218 None
219 }
220 }
221
222 #[must_use]
224 pub fn len(&self) -> usize {
225 self.map.len()
226 }
227
228 #[must_use]
230 pub fn is_empty(&self) -> bool {
231 self.map.is_empty()
232 }
233
234 #[must_use]
236 pub fn capacity(&self) -> usize {
237 self.capacity
238 }
239
240 #[must_use]
242 pub fn stats(&self) -> &CacheStats {
243 &self.stats
244 }
245
246 pub fn clear(&mut self) {
248 self.map.clear();
249 self.nodes.clear();
250 self.free.clear();
251 self.head = NONE;
252 self.tail = NONE;
253 }
254
255 fn unlink(&mut self, idx: usize) {
259 let prev = self.nodes[idx].prev;
260 let next = self.nodes[idx].next;
261
262 if prev != NONE {
263 self.nodes[prev].next = next;
264 } else {
265 self.head = next;
266 }
267
268 if next != NONE {
269 self.nodes[next].prev = prev;
270 } else {
271 self.tail = prev;
272 }
273
274 self.nodes[idx].prev = NONE;
275 self.nodes[idx].next = NONE;
276 }
277
278 fn push_head(&mut self, idx: usize) {
280 self.nodes[idx].prev = NONE;
281 self.nodes[idx].next = self.head;
282
283 if self.head != NONE {
284 self.nodes[self.head].prev = idx;
285 }
286 self.head = idx;
287
288 if self.tail == NONE {
289 self.tail = idx;
290 }
291 }
292
293 fn move_to_head(&mut self, idx: usize) {
295 if self.head == idx {
296 return;
297 }
298 self.unlink(idx);
299 self.push_head(idx);
300 }
301
302 fn evict_tail(&mut self) -> Option<(K, V)> {
304 if self.tail == NONE {
305 return None;
306 }
307 let tail_idx = self.tail;
308 let evicted_key = self.nodes[tail_idx].key.clone();
309 self.unlink(tail_idx);
310 self.map.remove(&evicted_key);
311 self.free.push(tail_idx);
312 self.stats.evictions += 1;
313 None
316 }
317}
318
319impl<K: Clone + Eq + Hash + fmt::Debug, V> fmt::Debug for LruCache<K, V> {
320 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
321 f.debug_struct("LruCache")
322 .field("capacity", &self.capacity)
323 .field("len", &self.map.len())
324 .field("stats", &self.stats)
325 .finish()
326 }
327}
328
329pub struct HashCache {
331 inner: LruCache<String, String>,
333}
334
335impl HashCache {
336 #[must_use]
338 pub fn new(capacity: usize) -> Self {
339 Self {
340 inner: LruCache::new(capacity),
341 }
342 }
343
344 pub fn get(&mut self, path: &str) -> Option<&String> {
346 self.inner.get(&path.to_string())
347 }
348
349 pub fn insert(&mut self, path: String, hash: String) {
351 self.inner.insert(path, hash);
352 }
353
354 #[must_use]
356 pub fn contains(&self, path: &str) -> bool {
357 self.inner.contains(&path.to_string())
358 }
359
360 #[must_use]
362 pub fn stats(&self) -> &CacheStats {
363 self.inner.stats()
364 }
365
366 #[must_use]
368 pub fn len(&self) -> usize {
369 self.inner.len()
370 }
371
372 #[must_use]
374 pub fn is_empty(&self) -> bool {
375 self.inner.is_empty()
376 }
377
378 pub fn clear(&mut self) {
380 self.inner.clear();
381 }
382}
383
384impl fmt::Debug for HashCache {
385 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
386 f.debug_struct("HashCache")
387 .field("capacity", &self.inner.capacity())
388 .field("len", &self.inner.len())
389 .field("stats", &self.inner.stats())
390 .finish()
391 }
392}
393
394#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
398pub struct CacheEntry {
399 pub phash: Option<u64>,
401 pub fingerprint: Option<Vec<f32>>,
403 pub mtime_secs: u64,
405}
406
407pub struct DedupSessionCache {
413 capacity: usize,
415 entries: HashMap<u64, CacheEntry>,
417 lru_order: VecDeque<u64>,
419}
420
421fn fnv1a_64(bytes: &[u8]) -> u64 {
423 const OFFSET: u64 = 0xcbf2_9ce4_8422_2325;
424 const PRIME: u64 = 0x0000_0100_0000_01b3;
425 bytes
426 .iter()
427 .fold(OFFSET, |acc, &b| (acc ^ u64::from(b)).wrapping_mul(PRIME))
428}
429
430fn mtime_secs(path: &Path) -> DedupResult<u64> {
432 let meta = std::fs::metadata(path)?;
433 let mtime = meta
434 .modified()
435 .map_err(|e| DedupError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?;
436 let secs = mtime
437 .duration_since(UNIX_EPOCH)
438 .unwrap_or_default()
439 .as_secs();
440 Ok(secs)
441}
442
443impl DedupSessionCache {
444 #[must_use]
446 pub fn new(capacity: usize) -> Self {
447 let capacity = capacity.max(1);
448 Self {
449 capacity,
450 entries: HashMap::with_capacity(capacity),
451 lru_order: VecDeque::with_capacity(capacity),
452 }
453 }
454
455 fn promote(&mut self, key: u64) {
457 if let Some(pos) = self.lru_order.iter().position(|&k| k == key) {
458 self.lru_order.remove(pos);
459 }
460 self.lru_order.push_front(key);
461 }
462
463 fn evict_if_needed(&mut self) {
465 while self.entries.len() >= self.capacity {
466 if let Some(lru_key) = self.lru_order.pop_back() {
467 self.entries.remove(&lru_key);
468 } else {
469 break;
470 }
471 }
472 }
473
474 pub fn get_or_compute_phash(
480 &mut self,
481 path: &Path,
482 compute: impl FnOnce() -> DedupResult<u64>,
483 ) -> DedupResult<u64> {
484 let key = fnv1a_64(path.as_os_str().as_encoded_bytes());
485 let current_mtime = mtime_secs(path)?;
486
487 if let Some(entry) = self.entries.get(&key) {
488 if entry.mtime_secs == current_mtime {
489 if let Some(ph) = entry.phash {
490 self.promote(key);
491 return Ok(ph);
492 }
493 }
494 }
495
496 let ph = compute()?;
498 self.evict_if_needed();
499 let entry = self.entries.entry(key).or_insert_with(|| CacheEntry {
500 phash: None,
501 fingerprint: None,
502 mtime_secs: current_mtime,
503 });
504 entry.phash = Some(ph);
505 entry.mtime_secs = current_mtime;
506 self.promote(key);
507 Ok(ph)
508 }
509
510 #[must_use]
512 pub fn len(&self) -> usize {
513 self.entries.len()
514 }
515
516 #[must_use]
518 pub fn is_empty(&self) -> bool {
519 self.entries.is_empty()
520 }
521
522 pub fn save_to_file(&self, path: &Path) -> DedupResult<()> {
524 let json = serde_json::to_string(&self.entries)
525 .map_err(|e| DedupError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?;
526 std::fs::write(path, json)?;
527 Ok(())
528 }
529
530 pub fn load_from_file(path: &Path) -> DedupResult<Self> {
534 let json = std::fs::read_to_string(path)?;
535 let entries: HashMap<u64, CacheEntry> = serde_json::from_str(&json)
536 .map_err(|e| DedupError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?;
537 let capacity = entries.len().max(1);
538 let lru_order = entries.keys().cloned().collect();
539 Ok(Self {
540 capacity,
541 entries,
542 lru_order,
543 })
544 }
545}
546
547impl fmt::Debug for DedupSessionCache {
548 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
549 f.debug_struct("DedupSessionCache")
550 .field("capacity", &self.capacity)
551 .field("len", &self.entries.len())
552 .finish()
553 }
554}
555
556#[cfg(test)]
557mod tests {
558 use super::*;
559
560 #[test]
561 fn test_cache_stats_default() {
562 let stats = CacheStats::new();
563 assert_eq!(stats.hits, 0);
564 assert_eq!(stats.misses, 0);
565 assert_eq!(stats.total_lookups(), 0);
566 assert!((stats.hit_rate() - 0.0).abs() < f64::EPSILON);
567 }
568
569 #[test]
570 fn test_cache_stats_hit_rate() {
571 let stats = CacheStats {
572 hits: 3,
573 misses: 1,
574 insertions: 4,
575 evictions: 0,
576 };
577 assert!((stats.hit_rate() - 0.75).abs() < 1e-10);
578 assert_eq!(stats.total_lookups(), 4);
579 }
580
581 #[test]
582 fn test_cache_stats_display() {
583 let stats = CacheStats {
584 hits: 10,
585 misses: 5,
586 insertions: 15,
587 evictions: 2,
588 };
589 let s = stats.to_string();
590 assert!(s.contains("hits=10"));
591 assert!(s.contains("misses=5"));
592 }
593
594 #[test]
595 fn test_cache_stats_reset() {
596 let mut stats = CacheStats {
597 hits: 10,
598 misses: 5,
599 insertions: 15,
600 evictions: 2,
601 };
602 stats.reset();
603 assert_eq!(stats.hits, 0);
604 assert_eq!(stats.misses, 0);
605 }
606
607 #[test]
608 fn test_lru_cache_insert_and_get() {
609 let mut cache = LruCache::new(4);
610 cache.insert("a", 1);
611 cache.insert("b", 2);
612 assert_eq!(cache.get(&"a"), Some(&1));
613 assert_eq!(cache.get(&"b"), Some(&2));
614 assert_eq!(cache.get(&"c"), None);
615 }
616
617 #[test]
618 fn test_lru_cache_update_existing() {
619 let mut cache = LruCache::new(4);
620 cache.insert("key", 10);
621 cache.insert("key", 20);
622 assert_eq!(cache.get(&"key"), Some(&20));
623 assert_eq!(cache.len(), 1);
624 }
625
626 #[test]
627 fn test_lru_cache_eviction() {
628 let mut cache = LruCache::new(3);
629 cache.insert("a", 1);
630 cache.insert("b", 2);
631 cache.insert("c", 3);
632 cache.insert("d", 4);
634 assert!(!cache.contains(&"a"));
635 assert!(cache.contains(&"b"));
636 assert!(cache.contains(&"c"));
637 assert!(cache.contains(&"d"));
638 assert_eq!(cache.stats().evictions, 1);
639 }
640
641 #[test]
642 fn test_lru_cache_access_promotes() {
643 let mut cache = LruCache::new(3);
644 cache.insert("a", 1);
645 cache.insert("b", 2);
646 cache.insert("c", 3);
647 cache.get(&"a");
649 cache.insert("d", 4);
651 assert!(cache.contains(&"a"));
652 assert!(!cache.contains(&"b"));
653 }
654
655 #[test]
656 fn test_lru_cache_stats() {
657 let mut cache = LruCache::new(4);
658 cache.insert("x", 1);
659 cache.get(&"x"); cache.get(&"y"); assert_eq!(cache.stats().hits, 1);
662 assert_eq!(cache.stats().misses, 1);
663 assert_eq!(cache.stats().insertions, 1);
664 }
665
666 #[test]
667 fn test_lru_cache_clear() {
668 let mut cache = LruCache::new(4);
669 cache.insert("a", 1);
670 cache.insert("b", 2);
671 assert_eq!(cache.len(), 2);
672 cache.clear();
673 assert_eq!(cache.len(), 0);
674 assert!(cache.is_empty());
675 }
676
677 #[test]
678 fn test_hash_cache_basic() {
679 let mut cache = HashCache::new(100);
680 cache.insert("/video/a.mp4".to_string(), "abc123".to_string());
681 assert!(cache.contains("/video/a.mp4"));
682 assert!(!cache.contains("/video/b.mp4"));
683 assert_eq!(cache.get("/video/a.mp4"), Some(&"abc123".to_string()));
684 assert_eq!(cache.len(), 1);
685 }
686
687 #[test]
688 fn test_hash_cache_clear() {
689 let mut cache = HashCache::new(10);
690 cache.insert("path".to_string(), "hash".to_string());
691 assert!(!cache.is_empty());
692 cache.clear();
693 assert!(cache.is_empty());
694 }
695
696 #[test]
697 fn test_hash_cache_eviction() {
698 let mut cache = HashCache::new(2);
699 cache.insert("a".to_string(), "h1".to_string());
700 cache.insert("b".to_string(), "h2".to_string());
701 cache.insert("c".to_string(), "h3".to_string());
702 assert!(!cache.contains("a"));
704 assert!(cache.contains("b"));
705 assert!(cache.contains("c"));
706 assert_eq!(cache.stats().evictions, 1);
707 }
708
709 fn write_temp_file(content: &[u8]) -> std::path::PathBuf {
713 use std::sync::atomic::{AtomicU64, Ordering};
714 static COUNTER: AtomicU64 = AtomicU64::new(0);
715
716 let dir = std::env::temp_dir();
717 let uid = COUNTER.fetch_add(1, Ordering::Relaxed);
718 let pid = std::process::id();
719 let path = dir.join(format!("dedup_cache_test_{pid}_{uid}.bin"));
720 std::fs::write(&path, content).expect("write temp file");
721 path
722 }
723
724 #[test]
725 fn test_cache_hit_returns_cached_value() {
726 let tmp = write_temp_file(b"hello");
727 let mut cache = DedupSessionCache::new(16);
728 let mut calls = 0usize;
729
730 let ph1 = cache
731 .get_or_compute_phash(&tmp, || {
732 calls += 1;
733 Ok(0xDEAD_BEEF_u64)
734 })
735 .expect("first call should succeed");
736 assert_eq!(ph1, 0xDEAD_BEEF);
737 assert_eq!(calls, 1);
738
739 let ph2 = cache
740 .get_or_compute_phash(&tmp, || {
741 calls += 1;
742 Ok(0u64)
743 })
744 .expect("second call should succeed");
745 assert_eq!(ph2, 0xDEAD_BEEF, "cached value should be returned on hit");
746 assert_eq!(calls, 1, "compute closure must not be called on cache hit");
747
748 let _ = std::fs::remove_file(&tmp);
749 }
750
751 #[test]
752 fn test_cache_lru_eviction_at_capacity() {
753 let tmp_a = write_temp_file(b"file_a");
755 let tmp_b = write_temp_file(b"file_b");
756 let tmp_c = write_temp_file(b"file_c");
757
758 let mut cache = DedupSessionCache::new(2);
759
760 cache
761 .get_or_compute_phash(&tmp_a, || Ok(1))
762 .expect("insert a");
763 cache
764 .get_or_compute_phash(&tmp_b, || Ok(2))
765 .expect("insert b");
766 cache
768 .get_or_compute_phash(&tmp_c, || Ok(3))
769 .expect("insert c");
770
771 assert_eq!(cache.len(), 2, "cache should not exceed capacity");
772
773 let _ = std::fs::remove_file(&tmp_a);
774 let _ = std::fs::remove_file(&tmp_b);
775 let _ = std::fs::remove_file(&tmp_c);
776 }
777
778 #[test]
779 fn test_cache_mtime_invalidation() {
780 let tmp = write_temp_file(b"original content");
781
782 let mut cache = DedupSessionCache::new(16);
783 let ph1 = cache
784 .get_or_compute_phash(&tmp, || Ok(0xAAAA_AAAA))
785 .expect("first compute");
786 assert_eq!(ph1, 0xAAAA_AAAA);
787
788 std::fs::write(&tmp, b"modified content").expect("rewrite temp file");
792
793 let key = fnv1a_64(tmp.as_os_str().as_encoded_bytes());
797 if let Some(e) = cache.entries.get_mut(&key) {
798 e.mtime_secs = 0; }
800
801 let mut recomputed = false;
802 let ph2 = cache
803 .get_or_compute_phash(&tmp, || {
804 recomputed = true;
805 Ok(0xBBBB_BBBB)
806 })
807 .expect("second compute");
808 assert!(recomputed, "stale mtime should trigger recomputation");
809 assert_eq!(ph2, 0xBBBB_BBBB);
810
811 let _ = std::fs::remove_file(&tmp);
812 }
813
814 #[test]
815 fn test_cache_save_load_roundtrip() {
816 use std::sync::atomic::{AtomicU64, Ordering};
817 static CACHE_COUNTER: AtomicU64 = AtomicU64::new(0);
818
819 let dir = std::env::temp_dir();
820 let tmp_file = write_temp_file(b"roundtrip test");
821 let pid = std::process::id();
822 let uid = CACHE_COUNTER.fetch_add(1, Ordering::Relaxed);
823 let cache_path = dir.join(format!("dedup_session_cache_test_{pid}_{uid}.json"));
824
825 let mut cache = DedupSessionCache::new(16);
826 cache
827 .get_or_compute_phash(&tmp_file, || Ok(0x1234_5678_9ABC_DEF0))
828 .expect("compute phash");
829
830 cache.save_to_file(&cache_path).expect("save to file");
831
832 let loaded = DedupSessionCache::load_from_file(&cache_path).expect("load from file");
833 assert_eq!(
834 cache.entries.len(),
835 loaded.entries.len(),
836 "entry count must survive round-trip"
837 );
838
839 let key = fnv1a_64(tmp_file.as_os_str().as_encoded_bytes());
840 let loaded_entry = loaded.entries.get(&key).expect("entry must be present");
841 assert_eq!(
842 loaded_entry.phash,
843 Some(0x1234_5678_9ABC_DEF0),
844 "phash must survive round-trip"
845 );
846
847 let _ = std::fs::remove_file(&tmp_file);
848 let _ = std::fs::remove_file(&cache_path);
849 }
850}