Skip to main content

shuck_cache/
lib.rs

1#![warn(missing_docs)]
2#![cfg_attr(not(test), warn(clippy::unwrap_used))]
3
4//! File-oriented cache keys and persistent package caches for Shuck.
5//!
6//! The types in this crate power the `shuck` CLI cache, but are generic enough to reuse in other
7//! Rust tooling that wants SHA-256-based cache partitioning and serialized per-file entries.
8use std::collections::{BTreeMap, BTreeSet};
9use std::fs::{self, File};
10use std::io::{self, BufReader, Write};
11use std::path::{Path, PathBuf};
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13
14use serde::Serialize;
15use serde::de::DeserializeOwned;
16use sha2::{Digest, Sha256};
17use tempfile::NamedTempFile;
18
19/// Legacy per-project cache directory name used by older shuck releases.
20pub const CACHE_DIR_NAME: &str = ".shuck_cache";
21
22const MAX_LAST_SEEN_AGE: Duration = Duration::from_secs(30 * 24 * 60 * 60);
23
24/// Returns the legacy cache directory that lives under a project root.
25pub fn legacy_cache_dir(project_root: &Path) -> PathBuf {
26    project_root.join(CACHE_DIR_NAME)
27}
28
29/// Reads the cached project root marker stored in a legacy cache file.
30pub fn read_project_root_from_cache_file(path: &Path) -> io::Result<Option<PathBuf>> {
31    let file = match File::open(path) {
32        Ok(file) => file,
33        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
34        Err(err) => return Err(err),
35    };
36
37    let mut reader = BufReader::new(file);
38    match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
39        Ok(project_root) => Ok(Some(project_root)),
40        Err(_) => Ok(None),
41    }
42}
43
44/// Trait for values that can contribute to a deterministic package cache key.
45#[allow(missing_docs)]
46pub trait CacheKey {
47    fn cache_key(&self, state: &mut CacheKeyHasher);
48}
49
50/// Incremental hasher used to build structured cache keys.
51pub struct CacheKeyHasher {
52    hasher: Sha256,
53}
54
55#[allow(missing_docs)]
56impl CacheKeyHasher {
57    #[must_use]
58    pub fn new() -> Self {
59        Self {
60            hasher: Sha256::new(),
61        }
62    }
63
64    pub fn write_tag(&mut self, tag: &[u8]) {
65        self.write_bytes(tag);
66    }
67
68    pub fn write_bool(&mut self, value: bool) {
69        self.hasher.update([u8::from(value)]);
70    }
71
72    pub fn write_u8(&mut self, value: u8) {
73        self.hasher.update([value]);
74    }
75
76    pub fn write_u32(&mut self, value: u32) {
77        self.hasher.update(value.to_le_bytes());
78    }
79
80    pub fn write_u64(&mut self, value: u64) {
81        self.hasher.update(value.to_le_bytes());
82    }
83
84    pub fn write_u128(&mut self, value: u128) {
85        self.hasher.update(value.to_le_bytes());
86    }
87
88    pub fn write_usize(&mut self, value: usize) {
89        self.write_u64(value as u64);
90    }
91
92    pub fn write_str(&mut self, value: &str) {
93        self.write_bytes(value.as_bytes());
94    }
95
96    pub fn write_bytes(&mut self, bytes: &[u8]) {
97        self.write_u64(bytes.len() as u64);
98        self.hasher.update(bytes);
99    }
100
101    #[must_use]
102    pub fn finish_hex(self) -> String {
103        let digest = self.hasher.finalize();
104        let mut out = String::with_capacity(digest.len() * 2);
105        for byte in digest {
106            use std::fmt::Write as _;
107            let _ = write!(&mut out, "{byte:02x}");
108        }
109        out
110    }
111}
112
113impl Default for CacheKeyHasher {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119/// Returns the hex-encoded cache key for a value.
120#[must_use]
121pub fn cache_key_hex<T: CacheKey>(value: &T) -> String {
122    let mut hasher = CacheKeyHasher::new();
123    value.cache_key(&mut hasher);
124    hasher.finish_hex()
125}
126
127impl CacheKey for bool {
128    fn cache_key(&self, state: &mut CacheKeyHasher) {
129        state.write_bool(*self);
130    }
131}
132
133impl CacheKey for u8 {
134    fn cache_key(&self, state: &mut CacheKeyHasher) {
135        state.write_u8(*self);
136    }
137}
138
139impl CacheKey for u32 {
140    fn cache_key(&self, state: &mut CacheKeyHasher) {
141        state.write_u32(*self);
142    }
143}
144
145impl CacheKey for u64 {
146    fn cache_key(&self, state: &mut CacheKeyHasher) {
147        state.write_u64(*self);
148    }
149}
150
151impl CacheKey for u128 {
152    fn cache_key(&self, state: &mut CacheKeyHasher) {
153        state.write_u128(*self);
154    }
155}
156
157impl CacheKey for usize {
158    fn cache_key(&self, state: &mut CacheKeyHasher) {
159        state.write_usize(*self);
160    }
161}
162
163impl CacheKey for str {
164    fn cache_key(&self, state: &mut CacheKeyHasher) {
165        state.write_str(self);
166    }
167}
168
169impl CacheKey for String {
170    fn cache_key(&self, state: &mut CacheKeyHasher) {
171        self.as_str().cache_key(state);
172    }
173}
174
175impl<T: CacheKey + ?Sized> CacheKey for &T {
176    fn cache_key(&self, state: &mut CacheKeyHasher) {
177        (**self).cache_key(state);
178    }
179}
180
181impl<T: CacheKey> CacheKey for Option<T> {
182    fn cache_key(&self, state: &mut CacheKeyHasher) {
183        match self {
184            Some(value) => {
185                state.write_u8(1);
186                value.cache_key(state);
187            }
188            None => state.write_u8(0),
189        }
190    }
191}
192
193impl<T: CacheKey> CacheKey for [T] {
194    fn cache_key(&self, state: &mut CacheKeyHasher) {
195        state.write_usize(self.len());
196        for value in self {
197            value.cache_key(state);
198        }
199    }
200}
201
202impl<T: CacheKey> CacheKey for Vec<T> {
203    fn cache_key(&self, state: &mut CacheKeyHasher) {
204        self.as_slice().cache_key(state);
205    }
206}
207
208impl CacheKey for Path {
209    fn cache_key(&self, state: &mut CacheKeyHasher) {
210        state.write_str(&self.to_string_lossy());
211    }
212}
213
214impl CacheKey for PathBuf {
215    fn cache_key(&self, state: &mut CacheKeyHasher) {
216        self.as_path().cache_key(state);
217    }
218}
219
220impl<K, V> CacheKey for BTreeMap<K, V>
221where
222    K: CacheKey + Ord,
223    V: CacheKey,
224{
225    fn cache_key(&self, state: &mut CacheKeyHasher) {
226        state.write_usize(self.len());
227        for (key, value) in self {
228            key.cache_key(state);
229            value.cache_key(state);
230        }
231    }
232}
233
234/// File metadata used to validate cached entries against a filesystem path.
235#[allow(missing_docs)]
236#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
237pub struct FileCacheKey {
238    pub file_last_modified_ns: u128,
239    pub file_created_ns: Option<u128>,
240    pub file_status_changed_ns: Option<u128>,
241    pub file_device_id: Option<u64>,
242    pub file_id: Option<u64>,
243    pub file_permissions_mode: u32,
244    pub file_size_bytes: u64,
245}
246
247#[allow(missing_docs)]
248impl FileCacheKey {
249    pub fn from_path(path: &Path) -> io::Result<Self> {
250        let metadata = path.metadata()?;
251        let file_last_modified_ns = system_time_ns(metadata.modified()?)?;
252        let file_created_ns = metadata
253            .created()
254            .ok()
255            .and_then(|created| system_time_ns(created).ok());
256        let (file_status_changed_ns, file_device_id, file_id) =
257            platform_metadata_identity(&metadata);
258
259        #[cfg(unix)]
260        let file_permissions_mode = {
261            use std::os::unix::fs::PermissionsExt;
262            metadata.permissions().mode()
263        };
264
265        #[cfg(windows)]
266        let file_permissions_mode: u32 = u32::from(metadata.permissions().readonly());
267
268        Ok(Self {
269            file_last_modified_ns,
270            file_created_ns,
271            file_status_changed_ns,
272            file_device_id,
273            file_id,
274            file_permissions_mode,
275            file_size_bytes: metadata.len(),
276        })
277    }
278}
279
280fn system_time_ns(time: SystemTime) -> io::Result<u128> {
281    Ok(time
282        .duration_since(UNIX_EPOCH)
283        .map_err(io::Error::other)?
284        .as_nanos())
285}
286
287#[cfg(unix)]
288fn platform_metadata_identity(metadata: &fs::Metadata) -> (Option<u128>, Option<u64>, Option<u64>) {
289    use std::os::unix::fs::MetadataExt;
290
291    (
292        unix_timestamp_ns(metadata.ctime(), metadata.ctime_nsec()),
293        Some(metadata.dev()),
294        Some(metadata.ino()),
295    )
296}
297
298#[cfg(not(unix))]
299fn platform_metadata_identity(_: &fs::Metadata) -> (Option<u128>, Option<u64>, Option<u64>) {
300    (None, None, None)
301}
302
303#[cfg(unix)]
304fn unix_timestamp_ns(seconds: i64, nanoseconds: i64) -> Option<u128> {
305    if seconds < 0 || nanoseconds < 0 {
306        return None;
307    }
308
309    Some((seconds as u128) * 1_000_000_000 + nanoseconds as u128)
310}
311
312#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
313struct CachedFile<T> {
314    key: FileCacheKey,
315    last_seen_ms: u64,
316    data: T,
317}
318
319#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
320struct StoredPackage<T> {
321    project_root: PathBuf,
322    files: BTreeMap<PathBuf, CachedFile<T>>,
323}
324
325#[derive(Debug, Clone)]
326struct Change<T> {
327    key: FileCacheKey,
328    data: T,
329}
330
331/// On-disk cache for file-scoped analysis results within a package.
332#[derive(Debug, Clone)]
333pub struct PackageCache<T> {
334    path: PathBuf,
335    package: StoredPackage<T>,
336    seen_paths: BTreeSet<PathBuf>,
337    changes: BTreeMap<PathBuf, Change<T>>,
338    last_seen_ms: u64,
339}
340
341#[allow(missing_docs)]
342impl<T> PackageCache<T>
343where
344    T: Clone + Serialize + DeserializeOwned,
345{
346    pub fn open(
347        cache_root: &Path,
348        canonical_root: PathBuf,
349        tool_version: &str,
350        package_key: &impl CacheKey,
351    ) -> io::Result<Self> {
352        let key = cache_key_hex(package_key);
353        let path = cache_root.join(tool_version).join(format!("{key}.bin"));
354
355        let file = match File::open(&path) {
356            Ok(file) => file,
357            Err(err) if err.kind() == io::ErrorKind::NotFound => {
358                return Ok(Self::empty(path, canonical_root));
359            }
360            Err(err) => return Err(err),
361        };
362
363        let mut reader = BufReader::new(file);
364        let package: StoredPackage<T> =
365            match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
366                Ok(package) => package,
367                Err(_) => return Ok(Self::empty(path, canonical_root)),
368            };
369
370        if package.project_root != canonical_root {
371            return Ok(Self::empty(path, canonical_root));
372        }
373
374        Ok(Self {
375            path,
376            package,
377            seen_paths: BTreeSet::new(),
378            changes: BTreeMap::new(),
379            last_seen_ms: current_time_ms(),
380        })
381    }
382
383    #[must_use]
384    pub fn path(&self) -> &Path {
385        &self.path
386    }
387
388    pub fn get(&mut self, relative_path: &Path, key: &FileCacheKey) -> Option<T> {
389        let file = self.package.files.get(relative_path)?;
390        if &file.key != key {
391            return None;
392        }
393
394        self.seen_paths.insert(relative_path.to_path_buf());
395        Some(file.data.clone())
396    }
397
398    pub fn insert(&mut self, relative_path: PathBuf, key: FileCacheKey, data: T) {
399        self.seen_paths.insert(relative_path.clone());
400        self.changes.insert(relative_path, Change { key, data });
401    }
402
403    pub fn persist(mut self) -> io::Result<()> {
404        if !self.save() {
405            return Ok(());
406        }
407
408        let parent = self
409            .path
410            .parent()
411            .ok_or_else(|| io::Error::other("cache path has no parent directory"))?;
412        fs::create_dir_all(parent)?;
413
414        let mut temp_file = NamedTempFile::new_in(parent)?;
415        let encoded = bincode::serde::encode_to_vec(&self.package, bincode::config::standard())
416            .map_err(io::Error::other)?;
417        temp_file.write_all(&encoded)?;
418
419        match temp_file.persist(&self.path) {
420            Ok(_) => Ok(()),
421            Err(err) => Err(err.error),
422        }
423    }
424
425    fn empty(path: PathBuf, canonical_root: PathBuf) -> Self {
426        Self {
427            path,
428            package: StoredPackage {
429                project_root: canonical_root,
430                files: BTreeMap::new(),
431            },
432            seen_paths: BTreeSet::new(),
433            changes: BTreeMap::new(),
434            last_seen_ms: current_time_ms(),
435        }
436    }
437
438    fn save(&mut self) -> bool {
439        if self.seen_paths.is_empty() && self.changes.is_empty() {
440            return false;
441        }
442
443        let max_age_ms = MAX_LAST_SEEN_AGE.as_millis() as u64;
444        let now = self.last_seen_ms;
445
446        self.package
447            .files
448            .retain(|_, file| now.saturating_sub(file.last_seen_ms) <= max_age_ms);
449
450        for path in &self.seen_paths {
451            if let Some(change) = self.changes.remove(path) {
452                self.package.files.insert(
453                    path.clone(),
454                    CachedFile {
455                        key: change.key,
456                        last_seen_ms: now,
457                        data: change.data,
458                    },
459                );
460            } else if let Some(existing) = self.package.files.get_mut(path) {
461                existing.last_seen_ms = now;
462            }
463        }
464
465        for (path, change) in std::mem::take(&mut self.changes) {
466            self.package.files.insert(
467                path,
468                CachedFile {
469                    key: change.key,
470                    last_seen_ms: now,
471                    data: change.data,
472                },
473            );
474        }
475
476        true
477    }
478}
479
480fn current_time_ms() -> u64 {
481    SystemTime::now()
482        .duration_since(UNIX_EPOCH)
483        .unwrap_or(Duration::ZERO)
484        .as_millis() as u64
485}
486
487#[cfg(test)]
488mod tests {
489    use super::*;
490
491    #[derive(Debug, Clone)]
492    struct TestSettings {
493        strict: bool,
494        label: String,
495    }
496
497    impl CacheKey for TestSettings {
498        fn cache_key(&self, state: &mut CacheKeyHasher) {
499            state.write_tag(b"test-settings");
500            self.strict.cache_key(state);
501            self.label.cache_key(state);
502        }
503    }
504
505    fn test_file_key(file_last_modified_ns: u128, file_size_bytes: u64) -> FileCacheKey {
506        FileCacheKey {
507            file_last_modified_ns,
508            file_created_ns: Some(100),
509            file_status_changed_ns: Some(200),
510            file_device_id: Some(300),
511            file_id: Some(400),
512            file_permissions_mode: 0o644,
513            file_size_bytes,
514        }
515    }
516
517    #[test]
518    fn cache_key_hashing_is_deterministic() {
519        let settings = TestSettings {
520            strict: true,
521            label: "alpha".to_string(),
522        };
523
524        let first = cache_key_hex(&settings);
525        let second = cache_key_hex(&settings);
526
527        assert_eq!(first, second);
528    }
529
530    #[test]
531    fn cache_key_changes_when_settings_change() {
532        let first = TestSettings {
533            strict: true,
534            label: "alpha".to_string(),
535        };
536        let second = TestSettings {
537            strict: false,
538            label: "alpha".to_string(),
539        };
540
541        assert_ne!(cache_key_hex(&first), cache_key_hex(&second));
542    }
543
544    #[test]
545    fn package_cache_persists_and_reloads() {
546        let tempdir = tempfile::tempdir().unwrap();
547        let cache_root = tempdir.path().join("cache");
548        let storage_root = tempdir.path().join("project");
549        fs::create_dir_all(&storage_root).unwrap();
550        let canonical_root = fs::canonicalize(&storage_root).unwrap();
551
552        let settings = TestSettings {
553            strict: true,
554            label: "alpha".to_string(),
555        };
556
557        let mut cache =
558            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
559                .unwrap();
560        cache.insert(
561            PathBuf::from("script.sh"),
562            test_file_key(1, 2),
563            "ok".to_string(),
564        );
565        let cache_path = cache.path().to_path_buf();
566        cache.persist().unwrap();
567
568        assert!(cache_path.is_file());
569
570        let mut reopened =
571            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
572        let value = reopened.get(Path::new("script.sh"), &test_file_key(1, 2));
573
574        assert_eq!(value.as_deref(), Some("ok"));
575    }
576
577    #[test]
578    fn persist_prunes_stale_entries() {
579        let tempdir = tempfile::tempdir().unwrap();
580        let cache_root = tempdir.path().join("cache");
581        let storage_root = tempdir.path().join("project");
582        fs::create_dir_all(&storage_root).unwrap();
583        let canonical_root = fs::canonicalize(&storage_root).unwrap();
584        let settings = TestSettings {
585            strict: true,
586            label: "alpha".to_string(),
587        };
588
589        let mut cache =
590            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
591                .unwrap();
592        cache.insert(
593            PathBuf::from("stale.sh"),
594            test_file_key(1, 5),
595            "stale".to_string(),
596        );
597        let cache_path = cache.path().to_path_buf();
598        cache.persist().unwrap();
599
600        let mut stored: StoredPackage<String> = {
601            let mut reader = BufReader::new(File::open(&cache_path).unwrap());
602            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap()
603        };
604        stored
605            .files
606            .get_mut(Path::new("stale.sh"))
607            .unwrap()
608            .last_seen_ms = 0;
609        let encoded = bincode::serde::encode_to_vec(&stored, bincode::config::standard()).unwrap();
610        fs::write(&cache_path, encoded).unwrap();
611
612        let mut reopened =
613            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
614        reopened.insert(
615            PathBuf::from("fresh.sh"),
616            test_file_key(2, 5),
617            "fresh".to_string(),
618        );
619        reopened.persist().unwrap();
620
621        let mut reader = BufReader::new(File::open(&cache_path).unwrap());
622        let stored: StoredPackage<String> =
623            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap();
624
625        assert!(!stored.files.contains_key(Path::new("stale.sh")));
626        assert!(stored.files.contains_key(Path::new("fresh.sh")));
627    }
628
629    #[test]
630    fn cache_key_miss_when_only_file_size_changes() {
631        let tempdir = tempfile::tempdir().unwrap();
632        let cache_root = tempdir.path().join("cache");
633        let storage_root = tempdir.path().join("project");
634        fs::create_dir_all(&storage_root).unwrap();
635        let canonical_root = fs::canonicalize(&storage_root).unwrap();
636        let settings = TestSettings {
637            strict: true,
638            label: "alpha".to_string(),
639        };
640
641        let mut cache =
642            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
643                .unwrap();
644        cache.insert(
645            PathBuf::from("script.sh"),
646            test_file_key(1, 2),
647            "ok".to_string(),
648        );
649        cache.persist().unwrap();
650
651        let mut reopened =
652            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
653        let value = reopened.get(Path::new("script.sh"), &test_file_key(1, 3));
654
655        assert!(value.is_none());
656    }
657
658    #[test]
659    fn cache_key_miss_when_only_submillisecond_mtime_changes() {
660        let tempdir = tempfile::tempdir().unwrap();
661        let cache_root = tempdir.path().join("cache");
662        let storage_root = tempdir.path().join("project");
663        fs::create_dir_all(&storage_root).unwrap();
664        let canonical_root = fs::canonicalize(&storage_root).unwrap();
665        let settings = TestSettings {
666            strict: true,
667            label: "alpha".to_string(),
668        };
669
670        let mut cache =
671            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
672                .unwrap();
673        cache.insert(
674            PathBuf::from("script.sh"),
675            test_file_key(1_000_000, 2),
676            "ok".to_string(),
677        );
678        cache.persist().unwrap();
679
680        let mut reopened =
681            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
682        let value = reopened.get(Path::new("script.sh"), &test_file_key(1_000_001, 2));
683
684        assert!(value.is_none());
685    }
686
687    #[test]
688    fn reads_project_root_from_cache_file_without_knowing_payload_type() {
689        let tempdir = tempfile::tempdir().unwrap();
690        let cache_root = tempdir.path().join("cache");
691        let storage_root = tempdir.path().join("project");
692        fs::create_dir_all(&storage_root).unwrap();
693        let canonical_root = fs::canonicalize(&storage_root).unwrap();
694        let settings = TestSettings {
695            strict: true,
696            label: "alpha".to_string(),
697        };
698
699        let mut cache =
700            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
701                .unwrap();
702        cache.insert(
703            PathBuf::from("script.sh"),
704            test_file_key(1, 2),
705            "ok".to_string(),
706        );
707        let cache_path = cache.path().to_path_buf();
708        cache.persist().unwrap();
709
710        let project_root = read_project_root_from_cache_file(&cache_path).unwrap();
711        assert_eq!(project_root, Some(canonical_root));
712    }
713}