Skip to main content

shuck_cache/
lib.rs

1#![warn(missing_docs)]
2#![cfg_attr(not(test), warn(clippy::unwrap_used))]
3
4//! File-oriented cache keys and persistent package caches for Shuck.
5//!
6//! The types in this crate power the `shuck` CLI cache, but are generic enough to reuse in other
7//! Rust tooling that wants SHA-256-based cache partitioning and serialized per-file entries.
8use std::collections::{BTreeMap, BTreeSet};
9use std::fs::{self, File};
10use std::io::{self, BufReader, Write};
11use std::path::{Path, PathBuf};
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13
14use serde::Serialize;
15use serde::de::DeserializeOwned;
16use sha2::{Digest, Sha256};
17use tempfile::NamedTempFile;
18
19/// Legacy per-project cache directory name used by older shuck releases.
20pub const CACHE_DIR_NAME: &str = ".shuck_cache";
21
22const MAX_LAST_SEEN_AGE: Duration = Duration::from_secs(30 * 24 * 60 * 60);
23
24/// Returns the legacy cache directory that lives under a project root.
25pub fn legacy_cache_dir(project_root: &Path) -> PathBuf {
26    project_root.join(CACHE_DIR_NAME)
27}
28
29/// Reads the cached project root marker stored in a legacy cache file.
30pub fn read_project_root_from_cache_file(path: &Path) -> io::Result<Option<PathBuf>> {
31    let file = match File::open(path) {
32        Ok(file) => file,
33        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
34        Err(err) => return Err(err),
35    };
36
37    let mut reader = BufReader::new(file);
38    match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
39        Ok(project_root) => Ok(Some(project_root)),
40        Err(_) => Ok(None),
41    }
42}
43
44/// Trait for values that can contribute to a deterministic package cache key.
45pub trait CacheKey {
46    /// Write this value into the structured cache-key hasher.
47    fn cache_key(&self, state: &mut CacheKeyHasher);
48}
49
50/// Incremental hasher used to build structured cache keys.
51pub struct CacheKeyHasher {
52    hasher: Sha256,
53}
54
55impl CacheKeyHasher {
56    /// Create an empty cache-key hasher.
57    #[must_use]
58    pub fn new() -> Self {
59        Self {
60            hasher: Sha256::new(),
61        }
62    }
63
64    /// Write a caller-defined domain tag into the key.
65    pub fn write_tag(&mut self, tag: &[u8]) {
66        self.write_bytes(tag);
67    }
68
69    /// Write a boolean value into the key.
70    pub fn write_bool(&mut self, value: bool) {
71        self.hasher.update([u8::from(value)]);
72    }
73
74    /// Write an unsigned 8-bit integer into the key.
75    pub fn write_u8(&mut self, value: u8) {
76        self.hasher.update([value]);
77    }
78
79    /// Write an unsigned 32-bit integer into the key.
80    pub fn write_u32(&mut self, value: u32) {
81        self.hasher.update(value.to_le_bytes());
82    }
83
84    /// Write an unsigned 64-bit integer into the key.
85    pub fn write_u64(&mut self, value: u64) {
86        self.hasher.update(value.to_le_bytes());
87    }
88
89    /// Write an unsigned 128-bit integer into the key.
90    pub fn write_u128(&mut self, value: u128) {
91        self.hasher.update(value.to_le_bytes());
92    }
93
94    /// Write a `usize` value into the key using a platform-independent encoding.
95    pub fn write_usize(&mut self, value: usize) {
96        self.write_u64(value as u64);
97    }
98
99    /// Write a UTF-8 string into the key.
100    pub fn write_str(&mut self, value: &str) {
101        self.write_bytes(value.as_bytes());
102    }
103
104    /// Write a byte slice into the key with a length prefix.
105    pub fn write_bytes(&mut self, bytes: &[u8]) {
106        self.write_u64(bytes.len() as u64);
107        self.hasher.update(bytes);
108    }
109
110    /// Finish the hasher and return a lowercase hex digest.
111    #[must_use]
112    pub fn finish_hex(self) -> String {
113        let digest = self.hasher.finalize();
114        let mut out = String::with_capacity(digest.len() * 2);
115        for byte in digest {
116            use std::fmt::Write as _;
117            let _ = write!(&mut out, "{byte:02x}");
118        }
119        out
120    }
121}
122
123impl Default for CacheKeyHasher {
124    fn default() -> Self {
125        Self::new()
126    }
127}
128
129/// Returns the hex-encoded cache key for a value.
130#[must_use]
131pub fn cache_key_hex<T: CacheKey>(value: &T) -> String {
132    let mut hasher = CacheKeyHasher::new();
133    value.cache_key(&mut hasher);
134    hasher.finish_hex()
135}
136
137impl CacheKey for bool {
138    fn cache_key(&self, state: &mut CacheKeyHasher) {
139        state.write_bool(*self);
140    }
141}
142
143impl CacheKey for u8 {
144    fn cache_key(&self, state: &mut CacheKeyHasher) {
145        state.write_u8(*self);
146    }
147}
148
149impl CacheKey for u32 {
150    fn cache_key(&self, state: &mut CacheKeyHasher) {
151        state.write_u32(*self);
152    }
153}
154
155impl CacheKey for u64 {
156    fn cache_key(&self, state: &mut CacheKeyHasher) {
157        state.write_u64(*self);
158    }
159}
160
161impl CacheKey for u128 {
162    fn cache_key(&self, state: &mut CacheKeyHasher) {
163        state.write_u128(*self);
164    }
165}
166
167impl CacheKey for usize {
168    fn cache_key(&self, state: &mut CacheKeyHasher) {
169        state.write_usize(*self);
170    }
171}
172
173impl CacheKey for str {
174    fn cache_key(&self, state: &mut CacheKeyHasher) {
175        state.write_str(self);
176    }
177}
178
179impl CacheKey for String {
180    fn cache_key(&self, state: &mut CacheKeyHasher) {
181        self.as_str().cache_key(state);
182    }
183}
184
185impl<T: CacheKey + ?Sized> CacheKey for &T {
186    fn cache_key(&self, state: &mut CacheKeyHasher) {
187        (**self).cache_key(state);
188    }
189}
190
191impl<T: CacheKey> CacheKey for Option<T> {
192    fn cache_key(&self, state: &mut CacheKeyHasher) {
193        match self {
194            Some(value) => {
195                state.write_u8(1);
196                value.cache_key(state);
197            }
198            None => state.write_u8(0),
199        }
200    }
201}
202
203impl<T: CacheKey> CacheKey for [T] {
204    fn cache_key(&self, state: &mut CacheKeyHasher) {
205        state.write_usize(self.len());
206        for value in self {
207            value.cache_key(state);
208        }
209    }
210}
211
212impl<T: CacheKey> CacheKey for Vec<T> {
213    fn cache_key(&self, state: &mut CacheKeyHasher) {
214        self.as_slice().cache_key(state);
215    }
216}
217
218impl CacheKey for Path {
219    fn cache_key(&self, state: &mut CacheKeyHasher) {
220        state.write_str(&self.to_string_lossy());
221    }
222}
223
224impl CacheKey for PathBuf {
225    fn cache_key(&self, state: &mut CacheKeyHasher) {
226        self.as_path().cache_key(state);
227    }
228}
229
230impl<K, V> CacheKey for BTreeMap<K, V>
231where
232    K: CacheKey + Ord,
233    V: CacheKey,
234{
235    fn cache_key(&self, state: &mut CacheKeyHasher) {
236        state.write_usize(self.len());
237        for (key, value) in self {
238            key.cache_key(state);
239            value.cache_key(state);
240        }
241    }
242}
243
244/// File metadata used to validate cached entries against a filesystem path.
245#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
246pub struct FileCacheKey {
247    /// File modification time as nanoseconds since the Unix epoch.
248    pub file_last_modified_ns: u128,
249    /// File creation time as nanoseconds since the Unix epoch, when available.
250    pub file_created_ns: Option<u128>,
251    /// File status-change time as nanoseconds since the Unix epoch, when available.
252    pub file_status_changed_ns: Option<u128>,
253    /// Platform device identifier, when available.
254    pub file_device_id: Option<u64>,
255    /// Platform file identifier such as an inode, when available.
256    pub file_id: Option<u64>,
257    /// Platform permission bits or readonly flag used to invalidate stale entries.
258    pub file_permissions_mode: u32,
259    /// File size in bytes.
260    pub file_size_bytes: u64,
261}
262
263impl FileCacheKey {
264    /// Read file metadata from `path` and convert it into a cache validation key.
265    pub fn from_path(path: &Path) -> io::Result<Self> {
266        let metadata = path.metadata()?;
267        let file_last_modified_ns = system_time_ns(metadata.modified()?)?;
268        let file_created_ns = metadata
269            .created()
270            .ok()
271            .and_then(|created| system_time_ns(created).ok());
272        let (file_status_changed_ns, file_device_id, file_id) =
273            platform_metadata_identity(&metadata);
274
275        #[cfg(unix)]
276        let file_permissions_mode = {
277            use std::os::unix::fs::PermissionsExt;
278            metadata.permissions().mode()
279        };
280
281        #[cfg(windows)]
282        let file_permissions_mode: u32 = u32::from(metadata.permissions().readonly());
283
284        Ok(Self {
285            file_last_modified_ns,
286            file_created_ns,
287            file_status_changed_ns,
288            file_device_id,
289            file_id,
290            file_permissions_mode,
291            file_size_bytes: metadata.len(),
292        })
293    }
294}
295
296fn system_time_ns(time: SystemTime) -> io::Result<u128> {
297    Ok(time
298        .duration_since(UNIX_EPOCH)
299        .map_err(io::Error::other)?
300        .as_nanos())
301}
302
303#[cfg(unix)]
304fn platform_metadata_identity(metadata: &fs::Metadata) -> (Option<u128>, Option<u64>, Option<u64>) {
305    use std::os::unix::fs::MetadataExt;
306
307    (
308        unix_timestamp_ns(metadata.ctime(), metadata.ctime_nsec()),
309        Some(metadata.dev()),
310        Some(metadata.ino()),
311    )
312}
313
314#[cfg(not(unix))]
315fn platform_metadata_identity(_: &fs::Metadata) -> (Option<u128>, Option<u64>, Option<u64>) {
316    (None, None, None)
317}
318
319#[cfg(unix)]
320fn unix_timestamp_ns(seconds: i64, nanoseconds: i64) -> Option<u128> {
321    if seconds < 0 || nanoseconds < 0 {
322        return None;
323    }
324
325    Some((seconds as u128) * 1_000_000_000 + nanoseconds as u128)
326}
327
328#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
329struct CachedFile<T> {
330    key: FileCacheKey,
331    last_seen_ms: u64,
332    data: T,
333}
334
335#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
336struct StoredPackage<T> {
337    project_root: PathBuf,
338    files: BTreeMap<PathBuf, CachedFile<T>>,
339}
340
341#[derive(Debug, Clone)]
342struct Change<T> {
343    key: FileCacheKey,
344    data: T,
345}
346
347/// On-disk cache for file-scoped analysis results within a package.
348#[derive(Debug, Clone)]
349pub struct PackageCache<T> {
350    path: PathBuf,
351    package: StoredPackage<T>,
352    seen_paths: BTreeSet<PathBuf>,
353    changes: BTreeMap<PathBuf, Change<T>>,
354    last_seen_ms: u64,
355}
356
357impl<T> PackageCache<T>
358where
359    T: Clone + Serialize + DeserializeOwned,
360{
361    /// Open a package cache file for a canonical project root and tool version.
362    ///
363    /// Corrupt, missing, or root-mismatched cache files are treated as empty caches.
364    pub fn open(
365        cache_root: &Path,
366        canonical_root: PathBuf,
367        tool_version: &str,
368        package_key: &impl CacheKey,
369    ) -> io::Result<Self> {
370        let key = cache_key_hex(package_key);
371        let path = cache_root.join(tool_version).join(format!("{key}.bin"));
372
373        let file = match File::open(&path) {
374            Ok(file) => file,
375            Err(err) if err.kind() == io::ErrorKind::NotFound => {
376                return Ok(Self::empty(path, canonical_root));
377            }
378            Err(err) => return Err(err),
379        };
380
381        let mut reader = BufReader::new(file);
382        let package: StoredPackage<T> =
383            match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
384                Ok(package) => package,
385                Err(_) => return Ok(Self::empty(path, canonical_root)),
386            };
387
388        if package.project_root != canonical_root {
389            return Ok(Self::empty(path, canonical_root));
390        }
391
392        Ok(Self {
393            path,
394            package,
395            seen_paths: BTreeSet::new(),
396            changes: BTreeMap::new(),
397            last_seen_ms: current_time_ms(),
398        })
399    }
400
401    /// Return the on-disk path backing this package cache.
402    #[must_use]
403    pub fn path(&self) -> &Path {
404        &self.path
405    }
406
407    /// Return a cached entry when `relative_path` still matches `key`.
408    pub fn get(&mut self, relative_path: &Path, key: &FileCacheKey) -> Option<T> {
409        let file = self.package.files.get(relative_path)?;
410        if &file.key != key {
411            return None;
412        }
413
414        self.seen_paths.insert(relative_path.to_path_buf());
415        Some(file.data.clone())
416    }
417
418    /// Insert or replace a cached entry for a package-relative path.
419    pub fn insert(&mut self, relative_path: PathBuf, key: FileCacheKey, data: T) {
420        self.seen_paths.insert(relative_path.clone());
421        self.changes.insert(relative_path, Change { key, data });
422    }
423
424    /// Persist touched and changed entries to disk.
425    ///
426    /// Untouched entries older than the retention window are pruned before writing.
427    pub fn persist(mut self) -> io::Result<()> {
428        if !self.save() {
429            return Ok(());
430        }
431
432        let parent = self
433            .path
434            .parent()
435            .ok_or_else(|| io::Error::other("cache path has no parent directory"))?;
436        fs::create_dir_all(parent)?;
437
438        let mut temp_file = NamedTempFile::new_in(parent)?;
439        let encoded = bincode::serde::encode_to_vec(&self.package, bincode::config::standard())
440            .map_err(io::Error::other)?;
441        temp_file.write_all(&encoded)?;
442
443        match temp_file.persist(&self.path) {
444            Ok(_) => Ok(()),
445            Err(err) => Err(err.error),
446        }
447    }
448
449    fn empty(path: PathBuf, canonical_root: PathBuf) -> Self {
450        Self {
451            path,
452            package: StoredPackage {
453                project_root: canonical_root,
454                files: BTreeMap::new(),
455            },
456            seen_paths: BTreeSet::new(),
457            changes: BTreeMap::new(),
458            last_seen_ms: current_time_ms(),
459        }
460    }
461
462    fn save(&mut self) -> bool {
463        if self.seen_paths.is_empty() && self.changes.is_empty() {
464            return false;
465        }
466
467        let max_age_ms = MAX_LAST_SEEN_AGE.as_millis() as u64;
468        let now = self.last_seen_ms;
469
470        self.package
471            .files
472            .retain(|_, file| now.saturating_sub(file.last_seen_ms) <= max_age_ms);
473
474        for path in &self.seen_paths {
475            if let Some(change) = self.changes.remove(path) {
476                self.package.files.insert(
477                    path.clone(),
478                    CachedFile {
479                        key: change.key,
480                        last_seen_ms: now,
481                        data: change.data,
482                    },
483                );
484            } else if let Some(existing) = self.package.files.get_mut(path) {
485                existing.last_seen_ms = now;
486            }
487        }
488
489        for (path, change) in std::mem::take(&mut self.changes) {
490            self.package.files.insert(
491                path,
492                CachedFile {
493                    key: change.key,
494                    last_seen_ms: now,
495                    data: change.data,
496                },
497            );
498        }
499
500        true
501    }
502}
503
504fn current_time_ms() -> u64 {
505    SystemTime::now()
506        .duration_since(UNIX_EPOCH)
507        .unwrap_or(Duration::ZERO)
508        .as_millis() as u64
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514
515    #[derive(Debug, Clone)]
516    struct TestSettings {
517        strict: bool,
518        label: String,
519    }
520
521    impl CacheKey for TestSettings {
522        fn cache_key(&self, state: &mut CacheKeyHasher) {
523            state.write_tag(b"test-settings");
524            self.strict.cache_key(state);
525            self.label.cache_key(state);
526        }
527    }
528
529    fn test_file_key(file_last_modified_ns: u128, file_size_bytes: u64) -> FileCacheKey {
530        FileCacheKey {
531            file_last_modified_ns,
532            file_created_ns: Some(100),
533            file_status_changed_ns: Some(200),
534            file_device_id: Some(300),
535            file_id: Some(400),
536            file_permissions_mode: 0o644,
537            file_size_bytes,
538        }
539    }
540
541    #[test]
542    fn cache_key_hashing_is_deterministic() {
543        let settings = TestSettings {
544            strict: true,
545            label: "alpha".to_string(),
546        };
547
548        let first = cache_key_hex(&settings);
549        let second = cache_key_hex(&settings);
550
551        assert_eq!(first, second);
552    }
553
554    #[test]
555    fn cache_key_changes_when_settings_change() {
556        let first = TestSettings {
557            strict: true,
558            label: "alpha".to_string(),
559        };
560        let second = TestSettings {
561            strict: false,
562            label: "alpha".to_string(),
563        };
564
565        assert_ne!(cache_key_hex(&first), cache_key_hex(&second));
566    }
567
568    #[test]
569    fn package_cache_persists_and_reloads() {
570        let tempdir = tempfile::tempdir().unwrap();
571        let cache_root = tempdir.path().join("cache");
572        let storage_root = tempdir.path().join("project");
573        fs::create_dir_all(&storage_root).unwrap();
574        let canonical_root = fs::canonicalize(&storage_root).unwrap();
575
576        let settings = TestSettings {
577            strict: true,
578            label: "alpha".to_string(),
579        };
580
581        let mut cache =
582            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
583                .unwrap();
584        cache.insert(
585            PathBuf::from("script.sh"),
586            test_file_key(1, 2),
587            "ok".to_string(),
588        );
589        let cache_path = cache.path().to_path_buf();
590        cache.persist().unwrap();
591
592        assert!(cache_path.is_file());
593
594        let mut reopened =
595            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
596        let value = reopened.get(Path::new("script.sh"), &test_file_key(1, 2));
597
598        assert_eq!(value.as_deref(), Some("ok"));
599    }
600
601    #[test]
602    fn persist_prunes_stale_entries() {
603        let tempdir = tempfile::tempdir().unwrap();
604        let cache_root = tempdir.path().join("cache");
605        let storage_root = tempdir.path().join("project");
606        fs::create_dir_all(&storage_root).unwrap();
607        let canonical_root = fs::canonicalize(&storage_root).unwrap();
608        let settings = TestSettings {
609            strict: true,
610            label: "alpha".to_string(),
611        };
612
613        let mut cache =
614            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
615                .unwrap();
616        cache.insert(
617            PathBuf::from("stale.sh"),
618            test_file_key(1, 5),
619            "stale".to_string(),
620        );
621        let cache_path = cache.path().to_path_buf();
622        cache.persist().unwrap();
623
624        let mut stored: StoredPackage<String> = {
625            let mut reader = BufReader::new(File::open(&cache_path).unwrap());
626            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap()
627        };
628        stored
629            .files
630            .get_mut(Path::new("stale.sh"))
631            .unwrap()
632            .last_seen_ms = 0;
633        let encoded = bincode::serde::encode_to_vec(&stored, bincode::config::standard()).unwrap();
634        fs::write(&cache_path, encoded).unwrap();
635
636        let mut reopened =
637            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
638        reopened.insert(
639            PathBuf::from("fresh.sh"),
640            test_file_key(2, 5),
641            "fresh".to_string(),
642        );
643        reopened.persist().unwrap();
644
645        let mut reader = BufReader::new(File::open(&cache_path).unwrap());
646        let stored: StoredPackage<String> =
647            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap();
648
649        assert!(!stored.files.contains_key(Path::new("stale.sh")));
650        assert!(stored.files.contains_key(Path::new("fresh.sh")));
651    }
652
653    #[test]
654    fn cache_key_miss_when_only_file_size_changes() {
655        let tempdir = tempfile::tempdir().unwrap();
656        let cache_root = tempdir.path().join("cache");
657        let storage_root = tempdir.path().join("project");
658        fs::create_dir_all(&storage_root).unwrap();
659        let canonical_root = fs::canonicalize(&storage_root).unwrap();
660        let settings = TestSettings {
661            strict: true,
662            label: "alpha".to_string(),
663        };
664
665        let mut cache =
666            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
667                .unwrap();
668        cache.insert(
669            PathBuf::from("script.sh"),
670            test_file_key(1, 2),
671            "ok".to_string(),
672        );
673        cache.persist().unwrap();
674
675        let mut reopened =
676            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
677        let value = reopened.get(Path::new("script.sh"), &test_file_key(1, 3));
678
679        assert!(value.is_none());
680    }
681
682    #[test]
683    fn cache_key_miss_when_only_submillisecond_mtime_changes() {
684        let tempdir = tempfile::tempdir().unwrap();
685        let cache_root = tempdir.path().join("cache");
686        let storage_root = tempdir.path().join("project");
687        fs::create_dir_all(&storage_root).unwrap();
688        let canonical_root = fs::canonicalize(&storage_root).unwrap();
689        let settings = TestSettings {
690            strict: true,
691            label: "alpha".to_string(),
692        };
693
694        let mut cache =
695            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
696                .unwrap();
697        cache.insert(
698            PathBuf::from("script.sh"),
699            test_file_key(1_000_000, 2),
700            "ok".to_string(),
701        );
702        cache.persist().unwrap();
703
704        let mut reopened =
705            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
706        let value = reopened.get(Path::new("script.sh"), &test_file_key(1_000_001, 2));
707
708        assert!(value.is_none());
709    }
710
711    #[test]
712    fn reads_project_root_from_cache_file_without_knowing_payload_type() {
713        let tempdir = tempfile::tempdir().unwrap();
714        let cache_root = tempdir.path().join("cache");
715        let storage_root = tempdir.path().join("project");
716        fs::create_dir_all(&storage_root).unwrap();
717        let canonical_root = fs::canonicalize(&storage_root).unwrap();
718        let settings = TestSettings {
719            strict: true,
720            label: "alpha".to_string(),
721        };
722
723        let mut cache =
724            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
725                .unwrap();
726        cache.insert(
727            PathBuf::from("script.sh"),
728            test_file_key(1, 2),
729            "ok".to_string(),
730        );
731        let cache_path = cache.path().to_path_buf();
732        cache.persist().unwrap();
733
734        let project_root = read_project_root_from_cache_file(&cache_path).unwrap();
735        assert_eq!(project_root, Some(canonical_root));
736    }
737}