Skip to main content

simple_gal/
cache.rs

1//! Image processing cache for incremental builds.
2//!
3//! AVIF encoding is the bottleneck of the build pipeline — a single image
4//! at three responsive sizes can take several seconds through rav1e. This
5//! module lets the process stage skip encoding when the source image and
6//! encoding parameters haven't changed since the last build.
7//!
8//! # Design
9//!
10//! The cache targets only the expensive encoding operations
11//! ([`create_responsive_images`](crate::imaging::create_responsive_images) and
12//! [`create_thumbnail`](crate::imaging::create_thumbnail)). Everything else
13//! — dimension reads, IPTC metadata extraction, title/description resolution —
14//! always runs. This means metadata changes (e.g. updating an IPTC title in
15//! Lightroom) are picked up immediately without a cache bust.
16//!
17//! ## Cache keys
18//!
19//! The cache is **content-addressed**: lookups are by the combination of
20//! `source_hash` and `params_hash`, not by output file path. This means
21//! album renames, file renumbers, and slug changes do not invalidate the
22//! cache — only actual image content or encoding parameter changes do.
23//!
24//! - **`source_hash`**: SHA-256 of the source file contents. Content-based
25//!   rather than mtime-based so it survives `git checkout` (which resets
26//!   modification times). Computed once per source file and shared across all
27//!   its output variants.
28//!
29//! - **`params_hash`**: SHA-256 of the encoding parameters. For responsive
30//!   variants this includes (target width, quality). For thumbnails it includes
31//!   (aspect ratio, short edge, quality, sharpening). If any config value
32//!   changes, the params hash changes and the image is re-encoded.
33//!
34//! A cache hit requires:
35//! 1. An entry with matching `source_hash` and `params_hash` exists
36//! 2. The previously-written output file still exists on disk
37//!
38//! When a hit is found but the output path has changed (e.g. album renamed),
39//! the cached file is copied to the new location instead of re-encoding.
40//!
41//! ## Storage
42//!
43//! The cache manifest is a JSON file at `<output_dir>/.cache-manifest.json`.
44//! It lives alongside the processed images so it travels with the output
45//! directory when cached in CI (e.g. `actions/cache` on `dist/`).
46//!
47//! ## Bypassing the cache
48//!
49//! Pass `--no-cache` to the `build` or `process` command to force a full
50//! rebuild. This loads an empty manifest, so every image is re-encoded. The
51//! old output files are overwritten naturally.
52
53use sha2::{Digest, Sha256};
54use std::collections::{HashMap, HashSet};
55use std::fmt;
56use std::io;
57use std::path::{Path, PathBuf};
58
59/// Name of the cache manifest file within the output directory.
60const MANIFEST_FILENAME: &str = ".cache-manifest.json";
61
62/// Version of the cache manifest format. Bump this to invalidate all
63/// existing caches when the format or key computation changes.
64const MANIFEST_VERSION: u32 = 1;
65
66/// A single cached output file.
67#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
68pub struct CacheEntry {
69    pub source_hash: String,
70    pub params_hash: String,
71}
72
73/// On-disk cache manifest mapping output paths to their cache entries.
74///
75/// Lookups go through a runtime `content_index` that maps
76/// `"{source_hash}:{params_hash}"` to the stored output path, making
77/// the cache resilient to album renames and file renumbering.
78#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
79pub struct CacheManifest {
80    pub version: u32,
81    pub entries: HashMap<String, CacheEntry>,
82    /// Runtime reverse index: `"{source_hash}:{params_hash}"` → output_path.
83    /// Built at load time, maintained on insert. Never serialized.
84    #[serde(skip)]
85    content_index: HashMap<String, String>,
86}
87
88impl CacheManifest {
89    /// Create an empty manifest (used for `--no-cache` or first build).
90    pub fn empty() -> Self {
91        Self {
92            version: MANIFEST_VERSION,
93            entries: HashMap::new(),
94            content_index: HashMap::new(),
95        }
96    }
97
98    /// Load from the output directory. Returns an empty manifest if the
99    /// file doesn't exist or can't be parsed (version mismatch, corruption).
100    pub fn load(output_dir: &Path) -> Self {
101        let path = output_dir.join(MANIFEST_FILENAME);
102        let content = match std::fs::read_to_string(&path) {
103            Ok(c) => c,
104            Err(_) => return Self::empty(),
105        };
106        let mut manifest: Self = match serde_json::from_str(&content) {
107            Ok(m) => m,
108            Err(_) => return Self::empty(),
109        };
110        if manifest.version != MANIFEST_VERSION {
111            return Self::empty();
112        }
113        manifest.content_index = build_content_index(&manifest.entries);
114        manifest
115    }
116
117    /// Save to the output directory.
118    pub fn save(&self, output_dir: &Path) -> io::Result<()> {
119        let path = output_dir.join(MANIFEST_FILENAME);
120        let json = serde_json::to_string_pretty(self)?;
121        std::fs::write(path, json)
122    }
123
124    /// Look up a cached output file by content hashes.
125    ///
126    /// Returns `Some(stored_output_path)` if an entry with matching
127    /// `source_hash` and `params_hash` exists **and** the file is still
128    /// on disk. The returned path may differ from the caller's expected
129    /// output path (e.g. after an album rename); the caller is responsible
130    /// for copying the file to the new location if needed.
131    pub fn find_cached(
132        &self,
133        source_hash: &str,
134        params_hash: &str,
135        output_dir: &Path,
136    ) -> Option<String> {
137        let content_key = format!("{}:{}", source_hash, params_hash);
138        let stored_path = self.content_index.get(&content_key)?;
139        if output_dir.join(stored_path).exists() {
140            Some(stored_path.clone())
141        } else {
142            None
143        }
144    }
145
146    /// Record a cache entry for an output file.
147    ///
148    /// If an entry with the same content (source_hash + params_hash) already
149    /// exists under a different output path, the old entry is removed to keep
150    /// the manifest clean when images move (e.g. album rename).
151    ///
152    /// If the output path already has an entry for *different* content (e.g.
153    /// image swap: file A moved to where B used to be), the old content's
154    /// `content_index` entry is removed so stale lookups don't return a file
155    /// whose content has been overwritten.
156    pub fn insert(&mut self, output_path: String, source_hash: String, params_hash: String) {
157        let content_key = format!("{}:{}", source_hash, params_hash);
158
159        // Remove stale entry if content moved to a new path
160        if let Some(old_path) = self.content_index.get(&content_key)
161            && *old_path != output_path
162        {
163            self.entries.remove(old_path.as_str());
164        }
165
166        // If this output path previously held different content, invalidate
167        // that content's lookup entry — the file on disk no longer matches.
168        if let Some(displaced) = self.entries.get(&output_path) {
169            let displaced_key = format!("{}:{}", displaced.source_hash, displaced.params_hash);
170            if displaced_key != content_key {
171                self.content_index.remove(&displaced_key);
172            }
173        }
174
175        self.content_index.insert(content_key, output_path.clone());
176        self.entries.insert(
177            output_path,
178            CacheEntry {
179                source_hash,
180                params_hash,
181            },
182        );
183    }
184
185    /// Remove all entries whose output path is not in `live_paths`, and
186    /// delete the corresponding files from `output_dir`.
187    ///
188    /// Call this after a full build to clean up processed files for images
189    /// that were deleted, renumbered, or belong to renamed/removed albums.
190    pub fn prune(&mut self, live_paths: &HashSet<String>, output_dir: &Path) -> u32 {
191        let stale: Vec<String> = self
192            .entries
193            .keys()
194            .filter(|p| !live_paths.contains(p.as_str()))
195            .cloned()
196            .collect();
197
198        let mut removed = 0u32;
199        for path in &stale {
200            if let Some(entry) = self.entries.remove(path) {
201                let content_key = format!("{}:{}", entry.source_hash, entry.params_hash);
202                self.content_index.remove(&content_key);
203            }
204            let file = output_dir.join(path);
205            if file.exists() {
206                let _ = std::fs::remove_file(&file);
207            }
208            removed += 1;
209        }
210        removed
211    }
212}
213
214/// Build the content_index reverse map from the entries map.
215fn build_content_index(entries: &HashMap<String, CacheEntry>) -> HashMap<String, String> {
216    entries
217        .iter()
218        .map(|(output_path, entry)| {
219            let content_key = format!("{}:{}", entry.source_hash, entry.params_hash);
220            (content_key, output_path.clone())
221        })
222        .collect()
223}
224
225/// SHA-256 hash of a file's contents, returned as a hex string.
226pub fn hash_file(path: &Path) -> io::Result<String> {
227    let bytes = std::fs::read(path)?;
228    let digest = Sha256::digest(&bytes);
229    Ok(format!("{:x}", digest))
230}
231
232/// SHA-256 hash of encoding parameters for a responsive variant.
233///
234/// Inputs: target width and quality. If any of these change, the
235/// previously cached output is invalid.
236pub fn hash_responsive_params(target_width: u32, quality: u32) -> String {
237    let mut hasher = Sha256::new();
238    hasher.update(b"responsive\0");
239    hasher.update(target_width.to_le_bytes());
240    hasher.update(quality.to_le_bytes());
241    format!("{:x}", hasher.finalize())
242}
243
244/// SHA-256 hash of encoding parameters for a thumbnail.
245///
246/// Inputs: aspect ratio, short edge size, quality, and sharpening
247/// settings. If any of these change, the thumbnail is re-generated.
248pub fn hash_thumbnail_params(
249    aspect: (u32, u32),
250    short_edge: u32,
251    quality: u32,
252    sharpening: Option<(f32, i32)>,
253) -> String {
254    let mut hasher = Sha256::new();
255    hasher.update(b"thumbnail\0");
256    hasher.update(aspect.0.to_le_bytes());
257    hasher.update(aspect.1.to_le_bytes());
258    hasher.update(short_edge.to_le_bytes());
259    hasher.update(quality.to_le_bytes());
260    match sharpening {
261        Some((sigma, threshold)) => {
262            hasher.update(b"\x01");
263            hasher.update(sigma.to_le_bytes());
264            hasher.update(threshold.to_le_bytes());
265        }
266        None => {
267            hasher.update(b"\x00");
268        }
269    }
270    format!("{:x}", hasher.finalize())
271}
272
273/// Summary of cache performance for a build run.
274#[derive(Debug, Default)]
275pub struct CacheStats {
276    pub hits: u32,
277    pub copies: u32,
278    pub misses: u32,
279}
280
281impl CacheStats {
282    pub fn hit(&mut self) {
283        self.hits += 1;
284    }
285
286    pub fn copy(&mut self) {
287        self.copies += 1;
288    }
289
290    pub fn miss(&mut self) {
291        self.misses += 1;
292    }
293
294    pub fn total(&self) -> u32 {
295        self.hits + self.copies + self.misses
296    }
297}
298
299impl fmt::Display for CacheStats {
300    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
301        if self.hits > 0 || self.copies > 0 {
302            if self.copies > 0 {
303                write!(
304                    f,
305                    "{} cached, {} copied, {} encoded ({} total)",
306                    self.hits,
307                    self.copies,
308                    self.misses,
309                    self.total()
310                )
311            } else {
312                write!(
313                    f,
314                    "{} cached, {} encoded ({} total)",
315                    self.hits,
316                    self.misses,
317                    self.total()
318                )
319            }
320        } else {
321            write!(f, "{} encoded", self.misses)
322        }
323    }
324}
325
326/// Resolve the cache manifest path for an output directory.
327pub fn manifest_path(output_dir: &Path) -> PathBuf {
328    output_dir.join(MANIFEST_FILENAME)
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use std::fs;
335    use tempfile::TempDir;
336
337    // =========================================================================
338    // CacheManifest basics
339    // =========================================================================
340
341    #[test]
342    fn empty_manifest_has_no_entries() {
343        let m = CacheManifest::empty();
344        assert_eq!(m.version, MANIFEST_VERSION);
345        assert!(m.entries.is_empty());
346        assert!(m.content_index.is_empty());
347    }
348
349    #[test]
350    fn find_cached_hit() {
351        let tmp = TempDir::new().unwrap();
352        let mut m = CacheManifest::empty();
353        m.insert("a/b.avif".into(), "src123".into(), "prm456".into());
354
355        let out = tmp.path().join("a");
356        fs::create_dir_all(&out).unwrap();
357        fs::write(out.join("b.avif"), "data").unwrap();
358
359        assert_eq!(
360            m.find_cached("src123", "prm456", tmp.path()),
361            Some("a/b.avif".to_string())
362        );
363    }
364
365    #[test]
366    fn find_cached_miss_wrong_source_hash() {
367        let tmp = TempDir::new().unwrap();
368        let mut m = CacheManifest::empty();
369        m.insert("out.avif".into(), "hash_a".into(), "params".into());
370        fs::write(tmp.path().join("out.avif"), "data").unwrap();
371
372        assert_eq!(m.find_cached("hash_b", "params", tmp.path()), None);
373    }
374
375    #[test]
376    fn find_cached_miss_wrong_params_hash() {
377        let tmp = TempDir::new().unwrap();
378        let mut m = CacheManifest::empty();
379        m.insert("out.avif".into(), "hash".into(), "params_a".into());
380        fs::write(tmp.path().join("out.avif"), "data").unwrap();
381
382        assert_eq!(m.find_cached("hash", "params_b", tmp.path()), None);
383    }
384
385    #[test]
386    fn find_cached_miss_file_deleted() {
387        let mut m = CacheManifest::empty();
388        m.insert("gone.avif".into(), "h".into(), "p".into());
389        let tmp = TempDir::new().unwrap();
390        // File doesn't exist
391        assert_eq!(m.find_cached("h", "p", tmp.path()), None);
392    }
393
394    #[test]
395    fn find_cached_miss_no_entry() {
396        let m = CacheManifest::empty();
397        let tmp = TempDir::new().unwrap();
398        assert_eq!(m.find_cached("h", "p", tmp.path()), None);
399    }
400
401    #[test]
402    fn find_cached_returns_old_path_after_content_match() {
403        let tmp = TempDir::new().unwrap();
404        let mut m = CacheManifest::empty();
405        m.insert(
406            "old-album/01-800.avif".into(),
407            "srchash".into(),
408            "prmhash".into(),
409        );
410
411        let old_dir = tmp.path().join("old-album");
412        fs::create_dir_all(&old_dir).unwrap();
413        fs::write(old_dir.join("01-800.avif"), "avif data").unwrap();
414
415        let result = m.find_cached("srchash", "prmhash", tmp.path());
416        assert_eq!(result, Some("old-album/01-800.avif".to_string()));
417    }
418
419    #[test]
420    fn insert_removes_stale_entry_on_path_change() {
421        let mut m = CacheManifest::empty();
422        m.insert("old-album/img-800.avif".into(), "src".into(), "prm".into());
423        assert!(m.entries.contains_key("old-album/img-800.avif"));
424
425        // Insert same content under new path
426        m.insert("new-album/img-800.avif".into(), "src".into(), "prm".into());
427
428        assert!(!m.entries.contains_key("old-album/img-800.avif"));
429        assert!(m.entries.contains_key("new-album/img-800.avif"));
430    }
431
432    #[test]
433    fn insert_invalidates_displaced_content_index() {
434        let mut m = CacheManifest::empty();
435        // Path "album/309-800.avif" holds content A
436        m.insert(
437            "album/309-800.avif".into(),
438            "hash_A".into(),
439            "params".into(),
440        );
441        assert_eq!(
442            m.content_index.get("hash_A:params"),
443            Some(&"album/309-800.avif".to_string())
444        );
445
446        // Now content B overwrites that path (image swap)
447        m.insert(
448            "album/309-800.avif".into(),
449            "hash_B".into(),
450            "params".into(),
451        );
452
453        // hash_A's content_index entry should be gone (file overwritten)
454        assert_eq!(m.content_index.get("hash_A:params"), None);
455        // hash_B points to the path
456        assert_eq!(
457            m.content_index.get("hash_B:params"),
458            Some(&"album/309-800.avif".to_string())
459        );
460    }
461
462    #[test]
463    fn prune_removes_stale_entries_and_files() {
464        let tmp = TempDir::new().unwrap();
465        let mut m = CacheManifest::empty();
466        m.insert("album/live.avif".into(), "s1".into(), "p1".into());
467        m.insert("album/stale.avif".into(), "s2".into(), "p2".into());
468
469        // Create both files on disk
470        let dir = tmp.path().join("album");
471        fs::create_dir_all(&dir).unwrap();
472        fs::write(dir.join("live.avif"), "data").unwrap();
473        fs::write(dir.join("stale.avif"), "data").unwrap();
474
475        let mut live = HashSet::new();
476        live.insert("album/live.avif".to_string());
477        let removed = m.prune(&live, tmp.path());
478
479        assert_eq!(removed, 1);
480        assert!(m.entries.contains_key("album/live.avif"));
481        assert!(!m.entries.contains_key("album/stale.avif"));
482        assert!(dir.join("live.avif").exists());
483        assert!(!dir.join("stale.avif").exists());
484    }
485
486    #[test]
487    fn content_index_rebuilt_on_load() {
488        let tmp = TempDir::new().unwrap();
489        let mut m = CacheManifest::empty();
490        m.insert("a/x.avif".into(), "s1".into(), "p1".into());
491        m.insert("b/y.avif".into(), "s2".into(), "p2".into());
492        m.save(tmp.path()).unwrap();
493
494        let loaded = CacheManifest::load(tmp.path());
495        assert_eq!(
496            loaded.find_cached("s1", "p1", tmp.path()),
497            None // files don't exist, but index was built
498        );
499        assert_eq!(
500            loaded.content_index.get("s1:p1"),
501            Some(&"a/x.avif".to_string())
502        );
503        assert_eq!(
504            loaded.content_index.get("s2:p2"),
505            Some(&"b/y.avif".to_string())
506        );
507    }
508
509    // =========================================================================
510    // Save / Load roundtrip
511    // =========================================================================
512
513    #[test]
514    fn save_and_load_roundtrip() {
515        let tmp = TempDir::new().unwrap();
516        let mut m = CacheManifest::empty();
517        m.insert("x.avif".into(), "s1".into(), "p1".into());
518        m.insert("y.avif".into(), "s2".into(), "p2".into());
519
520        m.save(tmp.path()).unwrap();
521        let loaded = CacheManifest::load(tmp.path());
522
523        assert_eq!(loaded.version, MANIFEST_VERSION);
524        assert_eq!(loaded.entries.len(), 2);
525        assert_eq!(
526            loaded.entries["x.avif"],
527            CacheEntry {
528                source_hash: "s1".into(),
529                params_hash: "p1".into()
530            }
531        );
532    }
533
534    #[test]
535    fn load_missing_file_returns_empty() {
536        let tmp = TempDir::new().unwrap();
537        let m = CacheManifest::load(tmp.path());
538        assert!(m.entries.is_empty());
539    }
540
541    #[test]
542    fn load_corrupt_json_returns_empty() {
543        let tmp = TempDir::new().unwrap();
544        fs::write(tmp.path().join(MANIFEST_FILENAME), "not json").unwrap();
545        let m = CacheManifest::load(tmp.path());
546        assert!(m.entries.is_empty());
547    }
548
549    #[test]
550    fn load_wrong_version_returns_empty() {
551        let tmp = TempDir::new().unwrap();
552        let json = format!(
553            r#"{{"version": {}, "entries": {{"a": {{"source_hash":"h","params_hash":"p"}}}}}}"#,
554            MANIFEST_VERSION + 1
555        );
556        fs::write(tmp.path().join(MANIFEST_FILENAME), json).unwrap();
557        let m = CacheManifest::load(tmp.path());
558        assert!(m.entries.is_empty());
559    }
560
561    // =========================================================================
562    // Hash functions
563    // =========================================================================
564
565    #[test]
566    fn hash_file_deterministic() {
567        let tmp = TempDir::new().unwrap();
568        let path = tmp.path().join("test.bin");
569        fs::write(&path, b"hello world").unwrap();
570
571        let h1 = hash_file(&path).unwrap();
572        let h2 = hash_file(&path).unwrap();
573        assert_eq!(h1, h2);
574        assert_eq!(h1.len(), 64); // SHA-256 hex is 64 chars
575    }
576
577    #[test]
578    fn hash_file_changes_with_content() {
579        let tmp = TempDir::new().unwrap();
580        let path = tmp.path().join("test.bin");
581
582        fs::write(&path, b"version 1").unwrap();
583        let h1 = hash_file(&path).unwrap();
584
585        fs::write(&path, b"version 2").unwrap();
586        let h2 = hash_file(&path).unwrap();
587
588        assert_ne!(h1, h2);
589    }
590
591    #[test]
592    fn hash_responsive_params_deterministic() {
593        let h1 = hash_responsive_params(1400, 90);
594        let h2 = hash_responsive_params(1400, 90);
595        assert_eq!(h1, h2);
596    }
597
598    #[test]
599    fn hash_responsive_params_varies_with_width() {
600        assert_ne!(
601            hash_responsive_params(800, 90),
602            hash_responsive_params(1400, 90)
603        );
604    }
605
606    #[test]
607    fn hash_responsive_params_varies_with_quality() {
608        assert_ne!(
609            hash_responsive_params(800, 85),
610            hash_responsive_params(800, 90)
611        );
612    }
613
614    #[test]
615    fn hash_thumbnail_params_deterministic() {
616        let h1 = hash_thumbnail_params((4, 5), 400, 90, Some((0.5, 0)));
617        let h2 = hash_thumbnail_params((4, 5), 400, 90, Some((0.5, 0)));
618        assert_eq!(h1, h2);
619    }
620
621    #[test]
622    fn hash_thumbnail_params_varies_with_aspect() {
623        assert_ne!(
624            hash_thumbnail_params((4, 5), 400, 90, None),
625            hash_thumbnail_params((16, 9), 400, 90, None)
626        );
627    }
628
629    #[test]
630    fn hash_thumbnail_params_varies_with_sharpening() {
631        assert_ne!(
632            hash_thumbnail_params((4, 5), 400, 90, Some((0.5, 0))),
633            hash_thumbnail_params((4, 5), 400, 90, None)
634        );
635    }
636
637    // =========================================================================
638    // CacheStats
639    // =========================================================================
640
641    #[test]
642    fn cache_stats_display_with_hits() {
643        let mut s = CacheStats::default();
644        s.hits = 5;
645        s.misses = 2;
646        assert_eq!(format!("{}", s), "5 cached, 2 encoded (7 total)");
647    }
648
649    #[test]
650    fn cache_stats_display_with_copies() {
651        let mut s = CacheStats::default();
652        s.hits = 3;
653        s.copies = 2;
654        s.misses = 1;
655        assert_eq!(format!("{}", s), "3 cached, 2 copied, 1 encoded (6 total)");
656    }
657
658    #[test]
659    fn cache_stats_display_no_hits() {
660        let mut s = CacheStats::default();
661        s.misses = 3;
662        assert_eq!(format!("{}", s), "3 encoded");
663    }
664}