Skip to main content

shuck_cache/
lib.rs

1#![warn(missing_docs)]
2
3//! File-oriented cache keys and persistent package caches for Shuck.
4//!
5//! The types in this crate power the `shuck` CLI cache, but are generic enough to reuse in other
6//! Rust tooling that wants SHA-256-based cache partitioning and serialized per-file entries.
7use std::collections::{BTreeMap, BTreeSet};
8use std::fs::{self, File};
9use std::io::{self, BufReader, Write};
10use std::path::{Path, PathBuf};
11use std::time::{Duration, SystemTime, UNIX_EPOCH};
12
13use serde::Serialize;
14use serde::de::DeserializeOwned;
15use sha2::{Digest, Sha256};
16use tempfile::NamedTempFile;
17
18/// Legacy per-project cache directory name used by older shuck releases.
19pub const CACHE_DIR_NAME: &str = ".shuck_cache";
20
21const MAX_LAST_SEEN_AGE: Duration = Duration::from_secs(30 * 24 * 60 * 60);
22
23/// Returns the legacy cache directory that lives under a project root.
24pub fn legacy_cache_dir(project_root: &Path) -> PathBuf {
25    project_root.join(CACHE_DIR_NAME)
26}
27
28/// Reads the cached project root marker stored in a legacy cache file.
29pub fn read_project_root_from_cache_file(path: &Path) -> io::Result<Option<PathBuf>> {
30    let file = match File::open(path) {
31        Ok(file) => file,
32        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
33        Err(err) => return Err(err),
34    };
35
36    let mut reader = BufReader::new(file);
37    match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
38        Ok(project_root) => Ok(Some(project_root)),
39        Err(_) => Ok(None),
40    }
41}
42
43/// Trait for values that can contribute to a deterministic package cache key.
44#[allow(missing_docs)]
45pub trait CacheKey {
46    fn cache_key(&self, state: &mut CacheKeyHasher);
47}
48
49/// Incremental hasher used to build structured cache keys.
50pub struct CacheKeyHasher {
51    hasher: Sha256,
52}
53
54#[allow(missing_docs)]
55impl CacheKeyHasher {
56    #[must_use]
57    pub fn new() -> Self {
58        Self {
59            hasher: Sha256::new(),
60        }
61    }
62
63    pub fn write_tag(&mut self, tag: &[u8]) {
64        self.write_bytes(tag);
65    }
66
67    pub fn write_bool(&mut self, value: bool) {
68        self.hasher.update([u8::from(value)]);
69    }
70
71    pub fn write_u8(&mut self, value: u8) {
72        self.hasher.update([value]);
73    }
74
75    pub fn write_u32(&mut self, value: u32) {
76        self.hasher.update(value.to_le_bytes());
77    }
78
79    pub fn write_u64(&mut self, value: u64) {
80        self.hasher.update(value.to_le_bytes());
81    }
82
83    pub fn write_u128(&mut self, value: u128) {
84        self.hasher.update(value.to_le_bytes());
85    }
86
87    pub fn write_usize(&mut self, value: usize) {
88        self.write_u64(value as u64);
89    }
90
91    pub fn write_str(&mut self, value: &str) {
92        self.write_bytes(value.as_bytes());
93    }
94
95    pub fn write_bytes(&mut self, bytes: &[u8]) {
96        self.write_u64(bytes.len() as u64);
97        self.hasher.update(bytes);
98    }
99
100    #[must_use]
101    pub fn finish_hex(self) -> String {
102        let digest = self.hasher.finalize();
103        let mut out = String::with_capacity(digest.len() * 2);
104        for byte in digest {
105            use std::fmt::Write as _;
106            let _ = write!(&mut out, "{byte:02x}");
107        }
108        out
109    }
110}
111
112impl Default for CacheKeyHasher {
113    fn default() -> Self {
114        Self::new()
115    }
116}
117
118/// Returns the hex-encoded cache key for a value.
119#[must_use]
120pub fn cache_key_hex<T: CacheKey>(value: &T) -> String {
121    let mut hasher = CacheKeyHasher::new();
122    value.cache_key(&mut hasher);
123    hasher.finish_hex()
124}
125
126impl CacheKey for bool {
127    fn cache_key(&self, state: &mut CacheKeyHasher) {
128        state.write_bool(*self);
129    }
130}
131
132impl CacheKey for u8 {
133    fn cache_key(&self, state: &mut CacheKeyHasher) {
134        state.write_u8(*self);
135    }
136}
137
138impl CacheKey for u32 {
139    fn cache_key(&self, state: &mut CacheKeyHasher) {
140        state.write_u32(*self);
141    }
142}
143
144impl CacheKey for u64 {
145    fn cache_key(&self, state: &mut CacheKeyHasher) {
146        state.write_u64(*self);
147    }
148}
149
150impl CacheKey for u128 {
151    fn cache_key(&self, state: &mut CacheKeyHasher) {
152        state.write_u128(*self);
153    }
154}
155
156impl CacheKey for usize {
157    fn cache_key(&self, state: &mut CacheKeyHasher) {
158        state.write_usize(*self);
159    }
160}
161
162impl CacheKey for str {
163    fn cache_key(&self, state: &mut CacheKeyHasher) {
164        state.write_str(self);
165    }
166}
167
168impl CacheKey for String {
169    fn cache_key(&self, state: &mut CacheKeyHasher) {
170        self.as_str().cache_key(state);
171    }
172}
173
174impl<T: CacheKey + ?Sized> CacheKey for &T {
175    fn cache_key(&self, state: &mut CacheKeyHasher) {
176        (**self).cache_key(state);
177    }
178}
179
180impl<T: CacheKey> CacheKey for Option<T> {
181    fn cache_key(&self, state: &mut CacheKeyHasher) {
182        match self {
183            Some(value) => {
184                state.write_u8(1);
185                value.cache_key(state);
186            }
187            None => state.write_u8(0),
188        }
189    }
190}
191
192impl<T: CacheKey> CacheKey for [T] {
193    fn cache_key(&self, state: &mut CacheKeyHasher) {
194        state.write_usize(self.len());
195        for value in self {
196            value.cache_key(state);
197        }
198    }
199}
200
201impl<T: CacheKey> CacheKey for Vec<T> {
202    fn cache_key(&self, state: &mut CacheKeyHasher) {
203        self.as_slice().cache_key(state);
204    }
205}
206
207impl CacheKey for Path {
208    fn cache_key(&self, state: &mut CacheKeyHasher) {
209        state.write_str(&self.to_string_lossy());
210    }
211}
212
213impl CacheKey for PathBuf {
214    fn cache_key(&self, state: &mut CacheKeyHasher) {
215        self.as_path().cache_key(state);
216    }
217}
218
219impl<K, V> CacheKey for BTreeMap<K, V>
220where
221    K: CacheKey + Ord,
222    V: CacheKey,
223{
224    fn cache_key(&self, state: &mut CacheKeyHasher) {
225        state.write_usize(self.len());
226        for (key, value) in self {
227            key.cache_key(state);
228            value.cache_key(state);
229        }
230    }
231}
232
233/// File metadata used to validate cached entries against a filesystem path.
234#[allow(missing_docs)]
235#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
236pub struct FileCacheKey {
237    pub file_last_modified_ms: u128,
238    pub file_permissions_mode: u32,
239    pub file_size_bytes: u64,
240}
241
242#[allow(missing_docs)]
243impl FileCacheKey {
244    pub fn from_path(path: &Path) -> io::Result<Self> {
245        let metadata = path.metadata()?;
246        let file_last_modified_ms = metadata
247            .modified()
248            .and_then(|modified| {
249                modified
250                    .duration_since(UNIX_EPOCH)
251                    .map_err(io::Error::other)
252            })?
253            .as_millis();
254
255        #[cfg(unix)]
256        let file_permissions_mode = {
257            use std::os::unix::fs::PermissionsExt;
258            metadata.permissions().mode()
259        };
260
261        #[cfg(windows)]
262        let file_permissions_mode: u32 = u32::from(metadata.permissions().readonly());
263
264        Ok(Self {
265            file_last_modified_ms,
266            file_permissions_mode,
267            file_size_bytes: metadata.len(),
268        })
269    }
270}
271
272#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
273struct CachedFile<T> {
274    key: FileCacheKey,
275    last_seen_ms: u64,
276    data: T,
277}
278
279#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
280struct StoredPackage<T> {
281    project_root: PathBuf,
282    files: BTreeMap<PathBuf, CachedFile<T>>,
283}
284
285#[derive(Debug, Clone)]
286struct Change<T> {
287    key: FileCacheKey,
288    data: T,
289}
290
291/// On-disk cache for file-scoped analysis results within a package.
292#[derive(Debug, Clone)]
293pub struct PackageCache<T> {
294    path: PathBuf,
295    package: StoredPackage<T>,
296    seen_paths: BTreeSet<PathBuf>,
297    changes: BTreeMap<PathBuf, Change<T>>,
298    last_seen_ms: u64,
299}
300
301#[allow(missing_docs)]
302impl<T> PackageCache<T>
303where
304    T: Clone + Serialize + DeserializeOwned,
305{
306    pub fn open(
307        cache_root: &Path,
308        canonical_root: PathBuf,
309        tool_version: &str,
310        package_key: &impl CacheKey,
311    ) -> io::Result<Self> {
312        let key = cache_key_hex(package_key);
313        let path = cache_root.join(tool_version).join(format!("{key}.bin"));
314
315        let file = match File::open(&path) {
316            Ok(file) => file,
317            Err(err) if err.kind() == io::ErrorKind::NotFound => {
318                return Ok(Self::empty(path, canonical_root));
319            }
320            Err(err) => return Err(err),
321        };
322
323        let mut reader = BufReader::new(file);
324        let package: StoredPackage<T> =
325            match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
326                Ok(package) => package,
327                Err(_) => return Ok(Self::empty(path, canonical_root)),
328            };
329
330        if package.project_root != canonical_root {
331            return Ok(Self::empty(path, canonical_root));
332        }
333
334        Ok(Self {
335            path,
336            package,
337            seen_paths: BTreeSet::new(),
338            changes: BTreeMap::new(),
339            last_seen_ms: current_time_ms(),
340        })
341    }
342
343    #[must_use]
344    pub fn path(&self) -> &Path {
345        &self.path
346    }
347
348    pub fn get(&mut self, relative_path: &Path, key: &FileCacheKey) -> Option<T> {
349        let file = self.package.files.get(relative_path)?;
350        if &file.key != key {
351            return None;
352        }
353
354        self.seen_paths.insert(relative_path.to_path_buf());
355        Some(file.data.clone())
356    }
357
358    pub fn insert(&mut self, relative_path: PathBuf, key: FileCacheKey, data: T) {
359        self.seen_paths.insert(relative_path.clone());
360        self.changes.insert(relative_path, Change { key, data });
361    }
362
363    pub fn persist(mut self) -> io::Result<()> {
364        if !self.save() {
365            return Ok(());
366        }
367
368        let parent = self
369            .path
370            .parent()
371            .ok_or_else(|| io::Error::other("cache path has no parent directory"))?;
372        fs::create_dir_all(parent)?;
373
374        let mut temp_file = NamedTempFile::new_in(parent)?;
375        let encoded = bincode::serde::encode_to_vec(&self.package, bincode::config::standard())
376            .map_err(io::Error::other)?;
377        temp_file.write_all(&encoded)?;
378
379        match temp_file.persist(&self.path) {
380            Ok(_) => Ok(()),
381            Err(err) => Err(err.error),
382        }
383    }
384
385    fn empty(path: PathBuf, canonical_root: PathBuf) -> Self {
386        Self {
387            path,
388            package: StoredPackage {
389                project_root: canonical_root,
390                files: BTreeMap::new(),
391            },
392            seen_paths: BTreeSet::new(),
393            changes: BTreeMap::new(),
394            last_seen_ms: current_time_ms(),
395        }
396    }
397
398    fn save(&mut self) -> bool {
399        if self.seen_paths.is_empty() && self.changes.is_empty() {
400            return false;
401        }
402
403        let max_age_ms = MAX_LAST_SEEN_AGE.as_millis() as u64;
404        let now = self.last_seen_ms;
405
406        self.package
407            .files
408            .retain(|_, file| now.saturating_sub(file.last_seen_ms) <= max_age_ms);
409
410        for path in &self.seen_paths {
411            if let Some(change) = self.changes.remove(path) {
412                self.package.files.insert(
413                    path.clone(),
414                    CachedFile {
415                        key: change.key,
416                        last_seen_ms: now,
417                        data: change.data,
418                    },
419                );
420            } else if let Some(existing) = self.package.files.get_mut(path) {
421                existing.last_seen_ms = now;
422            }
423        }
424
425        for (path, change) in std::mem::take(&mut self.changes) {
426            self.package.files.insert(
427                path,
428                CachedFile {
429                    key: change.key,
430                    last_seen_ms: now,
431                    data: change.data,
432                },
433            );
434        }
435
436        true
437    }
438}
439
440fn current_time_ms() -> u64 {
441    SystemTime::now()
442        .duration_since(UNIX_EPOCH)
443        .unwrap_or(Duration::ZERO)
444        .as_millis() as u64
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[derive(Debug, Clone)]
452    struct TestSettings {
453        strict: bool,
454        label: String,
455    }
456
457    impl CacheKey for TestSettings {
458        fn cache_key(&self, state: &mut CacheKeyHasher) {
459            state.write_tag(b"test-settings");
460            self.strict.cache_key(state);
461            self.label.cache_key(state);
462        }
463    }
464
465    #[test]
466    fn cache_key_hashing_is_deterministic() {
467        let settings = TestSettings {
468            strict: true,
469            label: "alpha".to_string(),
470        };
471
472        let first = cache_key_hex(&settings);
473        let second = cache_key_hex(&settings);
474
475        assert_eq!(first, second);
476    }
477
478    #[test]
479    fn cache_key_changes_when_settings_change() {
480        let first = TestSettings {
481            strict: true,
482            label: "alpha".to_string(),
483        };
484        let second = TestSettings {
485            strict: false,
486            label: "alpha".to_string(),
487        };
488
489        assert_ne!(cache_key_hex(&first), cache_key_hex(&second));
490    }
491
492    #[test]
493    fn package_cache_persists_and_reloads() {
494        let tempdir = tempfile::tempdir().unwrap();
495        let cache_root = tempdir.path().join("cache");
496        let storage_root = tempdir.path().join("project");
497        fs::create_dir_all(&storage_root).unwrap();
498        let canonical_root = fs::canonicalize(&storage_root).unwrap();
499
500        let settings = TestSettings {
501            strict: true,
502            label: "alpha".to_string(),
503        };
504
505        let mut cache =
506            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
507                .unwrap();
508        cache.insert(
509            PathBuf::from("script.sh"),
510            FileCacheKey {
511                file_last_modified_ms: 1,
512                file_permissions_mode: 0o644,
513                file_size_bytes: 2,
514            },
515            "ok".to_string(),
516        );
517        let cache_path = cache.path().to_path_buf();
518        cache.persist().unwrap();
519
520        assert!(cache_path.is_file());
521
522        let mut reopened =
523            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
524        let value = reopened.get(
525            Path::new("script.sh"),
526            &FileCacheKey {
527                file_last_modified_ms: 1,
528                file_permissions_mode: 0o644,
529                file_size_bytes: 2,
530            },
531        );
532
533        assert_eq!(value.as_deref(), Some("ok"));
534    }
535
536    #[test]
537    fn persist_prunes_stale_entries() {
538        let tempdir = tempfile::tempdir().unwrap();
539        let cache_root = tempdir.path().join("cache");
540        let storage_root = tempdir.path().join("project");
541        fs::create_dir_all(&storage_root).unwrap();
542        let canonical_root = fs::canonicalize(&storage_root).unwrap();
543        let settings = TestSettings {
544            strict: true,
545            label: "alpha".to_string(),
546        };
547
548        let mut cache =
549            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
550                .unwrap();
551        cache.insert(
552            PathBuf::from("stale.sh"),
553            FileCacheKey {
554                file_last_modified_ms: 1,
555                file_permissions_mode: 0o644,
556                file_size_bytes: 5,
557            },
558            "stale".to_string(),
559        );
560        let cache_path = cache.path().to_path_buf();
561        cache.persist().unwrap();
562
563        let mut stored: StoredPackage<String> = {
564            let mut reader = BufReader::new(File::open(&cache_path).unwrap());
565            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap()
566        };
567        stored
568            .files
569            .get_mut(Path::new("stale.sh"))
570            .unwrap()
571            .last_seen_ms = 0;
572        let encoded = bincode::serde::encode_to_vec(&stored, bincode::config::standard()).unwrap();
573        fs::write(&cache_path, encoded).unwrap();
574
575        let mut reopened =
576            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
577        reopened.insert(
578            PathBuf::from("fresh.sh"),
579            FileCacheKey {
580                file_last_modified_ms: 2,
581                file_permissions_mode: 0o644,
582                file_size_bytes: 5,
583            },
584            "fresh".to_string(),
585        );
586        reopened.persist().unwrap();
587
588        let mut reader = BufReader::new(File::open(&cache_path).unwrap());
589        let stored: StoredPackage<String> =
590            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap();
591
592        assert!(!stored.files.contains_key(Path::new("stale.sh")));
593        assert!(stored.files.contains_key(Path::new("fresh.sh")));
594    }
595
596    #[test]
597    fn cache_key_miss_when_only_file_size_changes() {
598        let tempdir = tempfile::tempdir().unwrap();
599        let cache_root = tempdir.path().join("cache");
600        let storage_root = tempdir.path().join("project");
601        fs::create_dir_all(&storage_root).unwrap();
602        let canonical_root = fs::canonicalize(&storage_root).unwrap();
603        let settings = TestSettings {
604            strict: true,
605            label: "alpha".to_string(),
606        };
607
608        let mut cache =
609            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
610                .unwrap();
611        cache.insert(
612            PathBuf::from("script.sh"),
613            FileCacheKey {
614                file_last_modified_ms: 1,
615                file_permissions_mode: 0o644,
616                file_size_bytes: 2,
617            },
618            "ok".to_string(),
619        );
620        cache.persist().unwrap();
621
622        let mut reopened =
623            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
624        let value = reopened.get(
625            Path::new("script.sh"),
626            &FileCacheKey {
627                file_last_modified_ms: 1,
628                file_permissions_mode: 0o644,
629                file_size_bytes: 3,
630            },
631        );
632
633        assert!(value.is_none());
634    }
635
636    #[test]
637    fn reads_project_root_from_cache_file_without_knowing_payload_type() {
638        let tempdir = tempfile::tempdir().unwrap();
639        let cache_root = tempdir.path().join("cache");
640        let storage_root = tempdir.path().join("project");
641        fs::create_dir_all(&storage_root).unwrap();
642        let canonical_root = fs::canonicalize(&storage_root).unwrap();
643        let settings = TestSettings {
644            strict: true,
645            label: "alpha".to_string(),
646        };
647
648        let mut cache =
649            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
650                .unwrap();
651        cache.insert(
652            PathBuf::from("script.sh"),
653            FileCacheKey {
654                file_last_modified_ms: 1,
655                file_permissions_mode: 0o644,
656                file_size_bytes: 2,
657            },
658            "ok".to_string(),
659        );
660        let cache_path = cache.path().to_path_buf();
661        cache.persist().unwrap();
662
663        let project_root = read_project_root_from_cache_file(&cache_path).unwrap();
664        assert_eq!(project_root, Some(canonical_root));
665    }
666}