Skip to main content

simple_gal/
cache.rs

1//! Image processing cache for incremental builds.
2//!
3//! AVIF encoding is the bottleneck of the build pipeline — a single image
4//! at three responsive sizes can take several seconds through rav1e. This
5//! module lets the process stage skip encoding when the source image and
6//! encoding parameters haven't changed since the last build.
7//!
8//! # Design
9//!
10//! The cache targets only the expensive encoding operations
11//! ([`create_responsive_images`](crate::imaging::create_responsive_images) and
12//! [`create_thumbnail`](crate::imaging::create_thumbnail)). Everything else
13//! — dimension reads, IPTC metadata extraction, title/description resolution —
14//! always runs. This means metadata changes (e.g. updating an IPTC title in
15//! Lightroom) are picked up immediately without a cache bust.
16//!
17//! ## Cache keys
18//!
19//! Each output file (e.g. `NY/001-Storm_1400.avif`) is keyed by two values:
20//!
21//! - **`source_hash`**: SHA-256 of the source file contents. Content-based
22//!   rather than mtime-based so it survives `git checkout` (which resets
23//!   modification times). Computed once per source file and shared across all
24//!   its output variants.
25//!
26//! - **`params_hash`**: SHA-256 of the encoding parameters. For responsive
27//!   variants this includes (target width, quality). For thumbnails it includes
28//!   (aspect ratio, short edge, quality, sharpening). If any config value
29//!   changes, the params hash changes and the image is re-encoded.
30//!
31//! A cache hit requires **all four** conditions:
32//! 1. Entry exists in the manifest
33//! 2. `source_hash` matches
34//! 3. `params_hash` matches
35//! 4. Output file exists on disk
36//!
37//! ## Storage
38//!
39//! The cache manifest is a JSON file at `<output_dir>/.cache-manifest.json`.
40//! It lives alongside the processed images so it travels with the output
41//! directory when cached in CI (e.g. `actions/cache` on `dist/`).
42//!
43//! ## Bypassing the cache
44//!
45//! Pass `--no-cache` to the `build` or `process` command to force a full
46//! rebuild. This loads an empty manifest, so every image is re-encoded. The
47//! old output files are overwritten naturally.
48
49use sha2::{Digest, Sha256};
50use std::collections::HashMap;
51use std::fmt;
52use std::io;
53use std::path::{Path, PathBuf};
54
55/// Name of the cache manifest file within the output directory.
56const MANIFEST_FILENAME: &str = ".cache-manifest.json";
57
58/// Version of the cache manifest format. Bump this to invalidate all
59/// existing caches when the format or key computation changes.
60const MANIFEST_VERSION: u32 = 1;
61
62/// A single cached output file.
63#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
64pub struct CacheEntry {
65    pub source_hash: String,
66    pub params_hash: String,
67}
68
69/// On-disk cache manifest mapping output paths to their cache entries.
70#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
71pub struct CacheManifest {
72    pub version: u32,
73    pub entries: HashMap<String, CacheEntry>,
74}
75
76impl CacheManifest {
77    /// Create an empty manifest (used for `--no-cache` or first build).
78    pub fn empty() -> Self {
79        Self {
80            version: MANIFEST_VERSION,
81            entries: HashMap::new(),
82        }
83    }
84
85    /// Load from the output directory. Returns an empty manifest if the
86    /// file doesn't exist or can't be parsed (version mismatch, corruption).
87    pub fn load(output_dir: &Path) -> Self {
88        let path = output_dir.join(MANIFEST_FILENAME);
89        let content = match std::fs::read_to_string(&path) {
90            Ok(c) => c,
91            Err(_) => return Self::empty(),
92        };
93        let manifest: Self = match serde_json::from_str(&content) {
94            Ok(m) => m,
95            Err(_) => return Self::empty(),
96        };
97        if manifest.version != MANIFEST_VERSION {
98            return Self::empty();
99        }
100        manifest
101    }
102
103    /// Save to the output directory.
104    pub fn save(&self, output_dir: &Path) -> io::Result<()> {
105        let path = output_dir.join(MANIFEST_FILENAME);
106        let json = serde_json::to_string_pretty(self)?;
107        std::fs::write(path, json)
108    }
109
110    /// Check whether an output file can be reused.
111    ///
112    /// Returns `true` only if the manifest has a matching entry (same
113    /// source hash, same params hash) **and** the output file still
114    /// exists on disk.
115    pub fn is_cached(
116        &self,
117        output_path: &str,
118        source_hash: &str,
119        params_hash: &str,
120        output_dir: &Path,
121    ) -> bool {
122        match self.entries.get(output_path) {
123            Some(entry) => {
124                entry.source_hash == source_hash
125                    && entry.params_hash == params_hash
126                    && output_dir.join(output_path).exists()
127            }
128            None => false,
129        }
130    }
131
132    /// Record a (possibly new) cache entry for an output file.
133    pub fn insert(&mut self, output_path: String, source_hash: String, params_hash: String) {
134        self.entries.insert(
135            output_path,
136            CacheEntry {
137                source_hash,
138                params_hash,
139            },
140        );
141    }
142}
143
144/// SHA-256 hash of a file's contents, returned as a hex string.
145pub fn hash_file(path: &Path) -> io::Result<String> {
146    let bytes = std::fs::read(path)?;
147    let digest = Sha256::digest(&bytes);
148    Ok(format!("{:x}", digest))
149}
150
151/// SHA-256 hash of encoding parameters for a responsive variant.
152///
153/// Inputs: target width and quality. If any of these change, the
154/// previously cached output is invalid.
155pub fn hash_responsive_params(target_width: u32, quality: u32) -> String {
156    let mut hasher = Sha256::new();
157    hasher.update(b"responsive\0");
158    hasher.update(target_width.to_le_bytes());
159    hasher.update(quality.to_le_bytes());
160    format!("{:x}", hasher.finalize())
161}
162
163/// SHA-256 hash of encoding parameters for a thumbnail.
164///
165/// Inputs: aspect ratio, short edge size, quality, and sharpening
166/// settings. If any of these change, the thumbnail is re-generated.
167pub fn hash_thumbnail_params(
168    aspect: (u32, u32),
169    short_edge: u32,
170    quality: u32,
171    sharpening: Option<(f32, i32)>,
172) -> String {
173    let mut hasher = Sha256::new();
174    hasher.update(b"thumbnail\0");
175    hasher.update(aspect.0.to_le_bytes());
176    hasher.update(aspect.1.to_le_bytes());
177    hasher.update(short_edge.to_le_bytes());
178    hasher.update(quality.to_le_bytes());
179    match sharpening {
180        Some((sigma, threshold)) => {
181            hasher.update(b"\x01");
182            hasher.update(sigma.to_le_bytes());
183            hasher.update(threshold.to_le_bytes());
184        }
185        None => {
186            hasher.update(b"\x00");
187        }
188    }
189    format!("{:x}", hasher.finalize())
190}
191
192/// Summary of cache performance for a build run.
193#[derive(Debug, Default)]
194pub struct CacheStats {
195    pub hits: u32,
196    pub misses: u32,
197}
198
199impl CacheStats {
200    pub fn hit(&mut self) {
201        self.hits += 1;
202    }
203
204    pub fn miss(&mut self) {
205        self.misses += 1;
206    }
207
208    pub fn total(&self) -> u32 {
209        self.hits + self.misses
210    }
211}
212
213impl fmt::Display for CacheStats {
214    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
215        if self.hits > 0 {
216            write!(
217                f,
218                "{} cached, {} encoded ({} total)",
219                self.hits,
220                self.misses,
221                self.total()
222            )
223        } else {
224            write!(f, "{} encoded", self.misses)
225        }
226    }
227}
228
229/// Resolve the cache manifest path for an output directory.
230pub fn manifest_path(output_dir: &Path) -> PathBuf {
231    output_dir.join(MANIFEST_FILENAME)
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use std::fs;
238    use tempfile::TempDir;
239
240    // =========================================================================
241    // CacheManifest basics
242    // =========================================================================
243
244    #[test]
245    fn empty_manifest_has_no_entries() {
246        let m = CacheManifest::empty();
247        assert_eq!(m.version, MANIFEST_VERSION);
248        assert!(m.entries.is_empty());
249    }
250
251    #[test]
252    fn insert_and_lookup() {
253        let tmp = TempDir::new().unwrap();
254        let mut m = CacheManifest::empty();
255        m.insert("a/b.avif".into(), "src123".into(), "prm456".into());
256
257        // Create the output file so is_cached passes the existence check
258        let out = tmp.path().join("a");
259        fs::create_dir_all(&out).unwrap();
260        fs::write(out.join("b.avif"), "data").unwrap();
261
262        assert!(m.is_cached("a/b.avif", "src123", "prm456", tmp.path()));
263    }
264
265    #[test]
266    fn cache_miss_wrong_source_hash() {
267        let tmp = TempDir::new().unwrap();
268        let mut m = CacheManifest::empty();
269        m.insert("out.avif".into(), "hash_a".into(), "params".into());
270        fs::write(tmp.path().join("out.avif"), "data").unwrap();
271
272        assert!(!m.is_cached("out.avif", "hash_b", "params", tmp.path()));
273    }
274
275    #[test]
276    fn cache_miss_wrong_params_hash() {
277        let tmp = TempDir::new().unwrap();
278        let mut m = CacheManifest::empty();
279        m.insert("out.avif".into(), "hash".into(), "params_a".into());
280        fs::write(tmp.path().join("out.avif"), "data").unwrap();
281
282        assert!(!m.is_cached("out.avif", "hash", "params_b", tmp.path()));
283    }
284
285    #[test]
286    fn cache_miss_file_deleted() {
287        let m = CacheManifest {
288            version: MANIFEST_VERSION,
289            entries: HashMap::from([(
290                "gone.avif".into(),
291                CacheEntry {
292                    source_hash: "h".into(),
293                    params_hash: "p".into(),
294                },
295            )]),
296        };
297        let tmp = TempDir::new().unwrap();
298        // File doesn't exist
299        assert!(!m.is_cached("gone.avif", "h", "p", tmp.path()));
300    }
301
302    #[test]
303    fn cache_miss_no_entry() {
304        let m = CacheManifest::empty();
305        let tmp = TempDir::new().unwrap();
306        assert!(!m.is_cached("nope.avif", "h", "p", tmp.path()));
307    }
308
309    // =========================================================================
310    // Save / Load roundtrip
311    // =========================================================================
312
313    #[test]
314    fn save_and_load_roundtrip() {
315        let tmp = TempDir::new().unwrap();
316        let mut m = CacheManifest::empty();
317        m.insert("x.avif".into(), "s1".into(), "p1".into());
318        m.insert("y.avif".into(), "s2".into(), "p2".into());
319
320        m.save(tmp.path()).unwrap();
321        let loaded = CacheManifest::load(tmp.path());
322
323        assert_eq!(loaded.version, MANIFEST_VERSION);
324        assert_eq!(loaded.entries.len(), 2);
325        assert_eq!(
326            loaded.entries["x.avif"],
327            CacheEntry {
328                source_hash: "s1".into(),
329                params_hash: "p1".into()
330            }
331        );
332    }
333
334    #[test]
335    fn load_missing_file_returns_empty() {
336        let tmp = TempDir::new().unwrap();
337        let m = CacheManifest::load(tmp.path());
338        assert!(m.entries.is_empty());
339    }
340
341    #[test]
342    fn load_corrupt_json_returns_empty() {
343        let tmp = TempDir::new().unwrap();
344        fs::write(tmp.path().join(MANIFEST_FILENAME), "not json").unwrap();
345        let m = CacheManifest::load(tmp.path());
346        assert!(m.entries.is_empty());
347    }
348
349    #[test]
350    fn load_wrong_version_returns_empty() {
351        let tmp = TempDir::new().unwrap();
352        let json = format!(
353            r#"{{"version": {}, "entries": {{"a": {{"source_hash":"h","params_hash":"p"}}}}}}"#,
354            MANIFEST_VERSION + 1
355        );
356        fs::write(tmp.path().join(MANIFEST_FILENAME), json).unwrap();
357        let m = CacheManifest::load(tmp.path());
358        assert!(m.entries.is_empty());
359    }
360
361    // =========================================================================
362    // Hash functions
363    // =========================================================================
364
365    #[test]
366    fn hash_file_deterministic() {
367        let tmp = TempDir::new().unwrap();
368        let path = tmp.path().join("test.bin");
369        fs::write(&path, b"hello world").unwrap();
370
371        let h1 = hash_file(&path).unwrap();
372        let h2 = hash_file(&path).unwrap();
373        assert_eq!(h1, h2);
374        assert_eq!(h1.len(), 64); // SHA-256 hex is 64 chars
375    }
376
377    #[test]
378    fn hash_file_changes_with_content() {
379        let tmp = TempDir::new().unwrap();
380        let path = tmp.path().join("test.bin");
381
382        fs::write(&path, b"version 1").unwrap();
383        let h1 = hash_file(&path).unwrap();
384
385        fs::write(&path, b"version 2").unwrap();
386        let h2 = hash_file(&path).unwrap();
387
388        assert_ne!(h1, h2);
389    }
390
391    #[test]
392    fn hash_responsive_params_deterministic() {
393        let h1 = hash_responsive_params(1400, 90);
394        let h2 = hash_responsive_params(1400, 90);
395        assert_eq!(h1, h2);
396    }
397
398    #[test]
399    fn hash_responsive_params_varies_with_width() {
400        assert_ne!(
401            hash_responsive_params(800, 90),
402            hash_responsive_params(1400, 90)
403        );
404    }
405
406    #[test]
407    fn hash_responsive_params_varies_with_quality() {
408        assert_ne!(
409            hash_responsive_params(800, 85),
410            hash_responsive_params(800, 90)
411        );
412    }
413
414    #[test]
415    fn hash_thumbnail_params_deterministic() {
416        let h1 = hash_thumbnail_params((4, 5), 400, 90, Some((0.5, 0)));
417        let h2 = hash_thumbnail_params((4, 5), 400, 90, Some((0.5, 0)));
418        assert_eq!(h1, h2);
419    }
420
421    #[test]
422    fn hash_thumbnail_params_varies_with_aspect() {
423        assert_ne!(
424            hash_thumbnail_params((4, 5), 400, 90, None),
425            hash_thumbnail_params((16, 9), 400, 90, None)
426        );
427    }
428
429    #[test]
430    fn hash_thumbnail_params_varies_with_sharpening() {
431        assert_ne!(
432            hash_thumbnail_params((4, 5), 400, 90, Some((0.5, 0))),
433            hash_thumbnail_params((4, 5), 400, 90, None)
434        );
435    }
436
437    // =========================================================================
438    // CacheStats
439    // =========================================================================
440
441    #[test]
442    fn cache_stats_display_with_hits() {
443        let mut s = CacheStats::default();
444        s.hits = 5;
445        s.misses = 2;
446        assert_eq!(format!("{}", s), "5 cached, 2 encoded (7 total)");
447    }
448
449    #[test]
450    fn cache_stats_display_no_hits() {
451        let mut s = CacheStats::default();
452        s.misses = 3;
453        assert_eq!(format!("{}", s), "3 encoded");
454    }
455}