oximedia_dedup/
persistent_cache.rs1#![allow(dead_code)]
53#![allow(clippy::cast_precision_loss)]
54
55use std::collections::HashMap;
56use std::io::{self, BufReader, BufWriter};
57use std::path::{Path, PathBuf};
58
59use serde::{Deserialize, Serialize};
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct CachedEntry {
68 pub path: String,
70 pub blake3_hex: String,
72 pub phash: u64,
74 pub thumbnail: Option<Vec<u8>>,
77 pub modified_secs: u64,
79}
80
81impl CachedEntry {
82 #[must_use]
84 pub fn thumbnail_valid(&self) -> bool {
85 self.thumbnail
86 .as_ref()
87 .map(|t| t.len() == 64)
88 .unwrap_or(true) }
90}
91
92#[derive(Debug, Clone)]
100pub struct PersistentFingerprintCache {
101 cache_path: PathBuf,
103 entries: HashMap<String, CachedEntry>,
105 hits: u64,
107 misses: u64,
109}
110
111impl PersistentFingerprintCache {
112 #[must_use]
117 pub fn new(cache_path: PathBuf) -> Self {
118 Self {
119 cache_path,
120 entries: HashMap::new(),
121 hits: 0,
122 misses: 0,
123 }
124 }
125
126 pub fn load(cache_path: PathBuf) -> io::Result<Self> {
135 if !cache_path.exists() {
136 return Ok(Self::new(cache_path));
137 }
138 let file = std::fs::File::open(&cache_path)?;
139 let reader = BufReader::new(file);
140 let entries: HashMap<String, CachedEntry> =
141 serde_json::from_reader(reader).map_err(|e| {
142 io::Error::new(
143 io::ErrorKind::InvalidData,
144 format!("cache parse error: {e}"),
145 )
146 })?;
147 Ok(Self {
148 cache_path,
149 entries,
150 hits: 0,
151 misses: 0,
152 })
153 }
154
155 pub fn save(&self) -> io::Result<()> {
161 if let Some(parent) = self.cache_path.parent() {
163 std::fs::create_dir_all(parent)?;
164 }
165
166 let tmp_path = self.cache_path.with_extension("tmp");
168 {
169 let file = std::fs::File::create(&tmp_path)?;
170 let writer = BufWriter::new(file);
171 serde_json::to_writer(writer, &self.entries).map_err(|e| {
172 io::Error::new(io::ErrorKind::Other, format!("cache write error: {e}"))
173 })?;
174 }
175 std::fs::rename(&tmp_path, &self.cache_path)?;
176 Ok(())
177 }
178
179 pub fn insert(&mut self, entry: CachedEntry) {
181 self.entries.insert(entry.path.clone(), entry);
182 }
183
184 pub fn remove(&mut self, path: &str) -> Option<CachedEntry> {
186 self.entries.remove(path)
187 }
188
189 #[must_use]
191 pub fn len(&self) -> usize {
192 self.entries.len()
193 }
194
195 #[must_use]
197 pub fn is_empty(&self) -> bool {
198 self.entries.is_empty()
199 }
200
201 #[must_use]
205 pub fn get(&self, path: &str) -> Option<&CachedEntry> {
206 self.entries.get(path)
207 }
208
209 pub fn get_valid(&mut self, path: &str) -> Option<&CachedEntry> {
219 let entry = match self.entries.get(path) {
220 Some(e) => e,
221 None => {
222 self.misses += 1;
223 return None;
224 }
225 };
226
227 match compute_blake3_hex(Path::new(path)) {
229 Ok(current_hex) => {
230 if current_hex == entry.blake3_hex {
231 self.hits += 1;
232 self.entries.get(path)
233 } else {
234 self.misses += 1;
236 self.entries.remove(path);
237 None
238 }
239 }
240 Err(_) => {
241 self.misses += 1;
243 None
244 }
245 }
246 }
247
248 #[must_use]
250 pub fn hits(&self) -> u64 {
251 self.hits
252 }
253
254 #[must_use]
256 pub fn misses(&self) -> u64 {
257 self.misses
258 }
259
260 #[must_use]
262 pub fn hit_rate(&self) -> f64 {
263 let total = self.hits + self.misses;
264 if total == 0 {
265 return 0.0;
266 }
267 self.hits as f64 / total as f64
268 }
269
270 pub fn reset_stats(&mut self) {
272 self.hits = 0;
273 self.misses = 0;
274 }
275
276 pub fn evict_missing(&mut self) -> usize {
280 let before = self.entries.len();
281 self.entries.retain(|path, _| Path::new(path).exists());
282 before - self.entries.len()
283 }
284
285 pub fn evict_stale(&mut self) -> usize {
290 let paths: Vec<String> = self.entries.keys().cloned().collect();
291 let mut evicted = 0;
292 for path in paths {
293 let stale = if let Some(entry) = self.entries.get(&path) {
294 compute_blake3_hex(Path::new(&path))
295 .map(|h| h != entry.blake3_hex)
296 .unwrap_or(true) } else {
298 false
299 };
300 if stale {
301 self.entries.remove(&path);
302 evicted += 1;
303 }
304 }
305 evicted
306 }
307
308 pub fn merge_from(&mut self, other: &Self) {
312 for (path, entry) in &other.entries {
313 self.entries.insert(path.clone(), entry.clone());
314 }
315 }
316
317 pub fn iter(&self) -> impl Iterator<Item = (&String, &CachedEntry)> {
319 self.entries.iter()
320 }
321}
322
323fn compute_blake3_hex(path: &Path) -> io::Result<String> {
333 use std::io::Read;
334
335 let mut file = std::fs::File::open(path)?;
336 let mut hasher = blake3::Hasher::new();
337 let mut buf = vec![0u8; 65_536];
338 loop {
339 let n = file.read(&mut buf)?;
340 if n == 0 {
341 break;
342 }
343 hasher.update(&buf[..n]);
344 }
345 Ok(hasher.finalize().to_hex().to_string())
346}
347
348#[cfg(test)]
353mod tests {
354 use super::*;
355 use std::io::Write;
356
357 fn tmp_cache_path(name: &str) -> PathBuf {
358 std::env::temp_dir()
359 .join("oximedia_persistent_cache_tests")
360 .join(name)
361 }
362
363 fn sample_entry(path: &str) -> CachedEntry {
364 CachedEntry {
365 path: path.to_string(),
366 blake3_hex: "0".repeat(64),
367 phash: 0xDEAD_BEEF_1234_5678,
368 thumbnail: None,
369 modified_secs: 1_700_000_000,
370 }
371 }
372
373 #[test]
374 fn test_new_cache_is_empty() {
375 let cache = PersistentFingerprintCache::new(tmp_cache_path("new_empty.json"));
376 assert!(cache.is_empty());
377 assert_eq!(cache.len(), 0);
378 }
379
380 #[test]
381 fn test_insert_and_get() {
382 let mut cache = PersistentFingerprintCache::new(tmp_cache_path("insert.json"));
383 cache.insert(sample_entry("/media/a.mp4"));
384 let e = cache.get("/media/a.mp4");
385 assert!(e.is_some());
386 assert_eq!(e.unwrap().phash, 0xDEAD_BEEF_1234_5678);
387 }
388
389 #[test]
390 fn test_remove() {
391 let mut cache = PersistentFingerprintCache::new(tmp_cache_path("remove.json"));
392 cache.insert(sample_entry("/media/b.mp4"));
393 assert!(cache.remove("/media/b.mp4").is_some());
394 assert!(cache.get("/media/b.mp4").is_none());
395 }
396
397 #[test]
398 fn test_save_and_load_roundtrip() {
399 let path = tmp_cache_path("roundtrip.json");
400 std::fs::create_dir_all(path.parent().unwrap()).ok();
401
402 let mut cache = PersistentFingerprintCache::new(path.clone());
403 cache.insert(sample_entry("/media/c.mp4"));
404 cache.save().expect("save should succeed");
405
406 let loaded = PersistentFingerprintCache::load(path).expect("load should succeed");
407 assert_eq!(loaded.len(), 1);
408 assert!(loaded.get("/media/c.mp4").is_some());
409 }
410
411 #[test]
412 fn test_load_nonexistent_returns_empty() {
413 let path = tmp_cache_path("nonexistent_xyzabc.json");
414 let _ = std::fs::remove_file(&path);
416 let cache = PersistentFingerprintCache::load(path).expect("should not fail");
417 assert!(cache.is_empty());
418 }
419
420 #[test]
421 fn test_hit_miss_counters() {
422 let mut cache = PersistentFingerprintCache::new(tmp_cache_path("stats.json"));
423 cache.insert(sample_entry("/x.mp4"));
424 let _ = cache.get("/x.mp4");
426 assert_eq!(cache.hits(), 0);
427 assert_eq!(cache.misses(), 0);
428 }
429
430 #[test]
431 fn test_hit_rate_zero_on_no_lookups() {
432 let cache = PersistentFingerprintCache::new(tmp_cache_path("hitrate.json"));
433 assert_eq!(cache.hit_rate(), 0.0);
434 }
435
436 #[test]
437 fn test_evict_missing_removes_nonexistent_paths() {
438 let mut cache = PersistentFingerprintCache::new(tmp_cache_path("evict.json"));
439 cache.insert(sample_entry("/definitely/does/not/exist/zzz.mp4"));
440 assert_eq!(cache.len(), 1);
441 let evicted = cache.evict_missing();
442 assert_eq!(evicted, 1);
443 assert!(cache.is_empty());
444 }
445
446 #[test]
447 fn test_evict_stale_removes_changed_files() {
448 let dir = std::env::temp_dir().join("oximedia_pc_stale_test");
450 std::fs::create_dir_all(&dir).ok();
451 let file_path = dir.join("media_file.bin");
452
453 {
455 let mut f = std::fs::File::create(&file_path).expect("create");
456 f.write_all(b"original content for hashing").expect("write");
457 }
458
459 let real_hash = compute_blake3_hex(&file_path).expect("hash ok");
461
462 let mut cache = PersistentFingerprintCache::new(tmp_cache_path("stale.json"));
463 cache.insert(CachedEntry {
464 path: file_path.to_string_lossy().to_string(),
465 blake3_hex: real_hash.clone(),
466 phash: 0x1111,
467 thumbnail: None,
468 modified_secs: 0,
469 });
470
471 let evicted = cache.evict_stale();
473 assert_eq!(evicted, 0, "file unchanged → no eviction");
474
475 {
477 let mut f = std::fs::File::create(&file_path).expect("create");
478 f.write_all(b"modified content, different bytes!")
479 .expect("write");
480 }
481
482 let evicted2 = cache.evict_stale();
484 assert_eq!(evicted2, 1, "changed file → entry evicted");
485 assert!(cache.is_empty());
486
487 let _ = std::fs::remove_file(&file_path);
488 }
489
490 #[test]
491 fn test_merge_from() {
492 let mut a = PersistentFingerprintCache::new(tmp_cache_path("merge_a.json"));
493 let mut b = PersistentFingerprintCache::new(tmp_cache_path("merge_b.json"));
494 a.insert(sample_entry("/file_a.mp4"));
495 b.insert(sample_entry("/file_b.mp4"));
496 a.merge_from(&b);
497 assert_eq!(a.len(), 2);
498 assert!(a.get("/file_a.mp4").is_some());
499 assert!(a.get("/file_b.mp4").is_some());
500 }
501
502 #[test]
503 fn test_thumbnail_valid_no_thumbnail() {
504 let entry = sample_entry("/x.mp4");
505 assert!(entry.thumbnail_valid()); }
507
508 #[test]
509 fn test_thumbnail_valid_correct_size() {
510 let entry = CachedEntry {
511 thumbnail: Some(vec![128u8; 64]), ..sample_entry("/y.mp4")
513 };
514 assert!(entry.thumbnail_valid());
515 }
516
517 #[test]
518 fn test_thumbnail_invalid_wrong_size() {
519 let entry = CachedEntry {
520 thumbnail: Some(vec![0u8; 32]), ..sample_entry("/z.mp4")
522 };
523 assert!(!entry.thumbnail_valid());
524 }
525
526 #[test]
527 fn test_reset_stats() {
528 let mut cache = PersistentFingerprintCache::new(tmp_cache_path("reset.json"));
529 let _ = cache.get_valid("/nonexistent.mp4");
531 assert!(cache.misses() > 0);
532 cache.reset_stats();
533 assert_eq!(cache.misses(), 0);
534 assert_eq!(cache.hits(), 0);
535 }
536}