Skip to main content

shuck_cache/
lib.rs

1#![warn(missing_docs)]
2#![cfg_attr(not(test), warn(clippy::unwrap_used))]
3
4//! File-oriented cache keys and persistent package caches for Shuck.
5//!
6//! The types in this crate power the `shuck` CLI cache, but are generic enough to reuse in other
7//! Rust tooling that wants SHA-256-based cache partitioning and serialized per-file entries.
8use std::collections::{BTreeMap, BTreeSet};
9use std::fs::{self, File};
10use std::io::{self, BufReader, Write};
11use std::path::{Path, PathBuf};
12use std::time::{Duration, SystemTime, UNIX_EPOCH};
13
14use serde::Serialize;
15use serde::de::DeserializeOwned;
16use sha2::{Digest, Sha256};
17use tempfile::NamedTempFile;
18
19/// Legacy per-project cache directory name used by older shuck releases.
20pub const CACHE_DIR_NAME: &str = ".shuck_cache";
21
22const MAX_LAST_SEEN_AGE: Duration = Duration::from_secs(30 * 24 * 60 * 60);
23
24/// Returns the legacy cache directory that lives under a project root.
25pub fn legacy_cache_dir(project_root: &Path) -> PathBuf {
26    project_root.join(CACHE_DIR_NAME)
27}
28
29/// Reads the cached project root marker stored in a legacy cache file.
30pub fn read_project_root_from_cache_file(path: &Path) -> io::Result<Option<PathBuf>> {
31    let file = match File::open(path) {
32        Ok(file) => file,
33        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
34        Err(err) => return Err(err),
35    };
36
37    let mut reader = BufReader::new(file);
38    match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
39        Ok(project_root) => Ok(Some(project_root)),
40        Err(_) => Ok(None),
41    }
42}
43
44/// Trait for values that can contribute to a deterministic package cache key.
45#[allow(missing_docs)]
46pub trait CacheKey {
47    fn cache_key(&self, state: &mut CacheKeyHasher);
48}
49
50/// Incremental hasher used to build structured cache keys.
51pub struct CacheKeyHasher {
52    hasher: Sha256,
53}
54
55#[allow(missing_docs)]
56impl CacheKeyHasher {
57    #[must_use]
58    pub fn new() -> Self {
59        Self {
60            hasher: Sha256::new(),
61        }
62    }
63
64    pub fn write_tag(&mut self, tag: &[u8]) {
65        self.write_bytes(tag);
66    }
67
68    pub fn write_bool(&mut self, value: bool) {
69        self.hasher.update([u8::from(value)]);
70    }
71
72    pub fn write_u8(&mut self, value: u8) {
73        self.hasher.update([value]);
74    }
75
76    pub fn write_u32(&mut self, value: u32) {
77        self.hasher.update(value.to_le_bytes());
78    }
79
80    pub fn write_u64(&mut self, value: u64) {
81        self.hasher.update(value.to_le_bytes());
82    }
83
84    pub fn write_u128(&mut self, value: u128) {
85        self.hasher.update(value.to_le_bytes());
86    }
87
88    pub fn write_usize(&mut self, value: usize) {
89        self.write_u64(value as u64);
90    }
91
92    pub fn write_str(&mut self, value: &str) {
93        self.write_bytes(value.as_bytes());
94    }
95
96    pub fn write_bytes(&mut self, bytes: &[u8]) {
97        self.write_u64(bytes.len() as u64);
98        self.hasher.update(bytes);
99    }
100
101    #[must_use]
102    pub fn finish_hex(self) -> String {
103        let digest = self.hasher.finalize();
104        let mut out = String::with_capacity(digest.len() * 2);
105        for byte in digest {
106            use std::fmt::Write as _;
107            let _ = write!(&mut out, "{byte:02x}");
108        }
109        out
110    }
111}
112
113impl Default for CacheKeyHasher {
114    fn default() -> Self {
115        Self::new()
116    }
117}
118
119/// Returns the hex-encoded cache key for a value.
120#[must_use]
121pub fn cache_key_hex<T: CacheKey>(value: &T) -> String {
122    let mut hasher = CacheKeyHasher::new();
123    value.cache_key(&mut hasher);
124    hasher.finish_hex()
125}
126
127impl CacheKey for bool {
128    fn cache_key(&self, state: &mut CacheKeyHasher) {
129        state.write_bool(*self);
130    }
131}
132
133impl CacheKey for u8 {
134    fn cache_key(&self, state: &mut CacheKeyHasher) {
135        state.write_u8(*self);
136    }
137}
138
139impl CacheKey for u32 {
140    fn cache_key(&self, state: &mut CacheKeyHasher) {
141        state.write_u32(*self);
142    }
143}
144
145impl CacheKey for u64 {
146    fn cache_key(&self, state: &mut CacheKeyHasher) {
147        state.write_u64(*self);
148    }
149}
150
151impl CacheKey for u128 {
152    fn cache_key(&self, state: &mut CacheKeyHasher) {
153        state.write_u128(*self);
154    }
155}
156
157impl CacheKey for usize {
158    fn cache_key(&self, state: &mut CacheKeyHasher) {
159        state.write_usize(*self);
160    }
161}
162
163impl CacheKey for str {
164    fn cache_key(&self, state: &mut CacheKeyHasher) {
165        state.write_str(self);
166    }
167}
168
169impl CacheKey for String {
170    fn cache_key(&self, state: &mut CacheKeyHasher) {
171        self.as_str().cache_key(state);
172    }
173}
174
175impl<T: CacheKey + ?Sized> CacheKey for &T {
176    fn cache_key(&self, state: &mut CacheKeyHasher) {
177        (**self).cache_key(state);
178    }
179}
180
181impl<T: CacheKey> CacheKey for Option<T> {
182    fn cache_key(&self, state: &mut CacheKeyHasher) {
183        match self {
184            Some(value) => {
185                state.write_u8(1);
186                value.cache_key(state);
187            }
188            None => state.write_u8(0),
189        }
190    }
191}
192
193impl<T: CacheKey> CacheKey for [T] {
194    fn cache_key(&self, state: &mut CacheKeyHasher) {
195        state.write_usize(self.len());
196        for value in self {
197            value.cache_key(state);
198        }
199    }
200}
201
202impl<T: CacheKey> CacheKey for Vec<T> {
203    fn cache_key(&self, state: &mut CacheKeyHasher) {
204        self.as_slice().cache_key(state);
205    }
206}
207
208impl CacheKey for Path {
209    fn cache_key(&self, state: &mut CacheKeyHasher) {
210        state.write_str(&self.to_string_lossy());
211    }
212}
213
214impl CacheKey for PathBuf {
215    fn cache_key(&self, state: &mut CacheKeyHasher) {
216        self.as_path().cache_key(state);
217    }
218}
219
220impl<K, V> CacheKey for BTreeMap<K, V>
221where
222    K: CacheKey + Ord,
223    V: CacheKey,
224{
225    fn cache_key(&self, state: &mut CacheKeyHasher) {
226        state.write_usize(self.len());
227        for (key, value) in self {
228            key.cache_key(state);
229            value.cache_key(state);
230        }
231    }
232}
233
234/// File metadata used to validate cached entries against a filesystem path.
235#[allow(missing_docs)]
236#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
237pub struct FileCacheKey {
238    pub file_last_modified_ms: u128,
239    pub file_permissions_mode: u32,
240    pub file_size_bytes: u64,
241}
242
243#[allow(missing_docs)]
244impl FileCacheKey {
245    pub fn from_path(path: &Path) -> io::Result<Self> {
246        let metadata = path.metadata()?;
247        let file_last_modified_ms = metadata
248            .modified()
249            .and_then(|modified| {
250                modified
251                    .duration_since(UNIX_EPOCH)
252                    .map_err(io::Error::other)
253            })?
254            .as_millis();
255
256        #[cfg(unix)]
257        let file_permissions_mode = {
258            use std::os::unix::fs::PermissionsExt;
259            metadata.permissions().mode()
260        };
261
262        #[cfg(windows)]
263        let file_permissions_mode: u32 = u32::from(metadata.permissions().readonly());
264
265        Ok(Self {
266            file_last_modified_ms,
267            file_permissions_mode,
268            file_size_bytes: metadata.len(),
269        })
270    }
271}
272
273#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
274struct CachedFile<T> {
275    key: FileCacheKey,
276    last_seen_ms: u64,
277    data: T,
278}
279
280#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
281struct StoredPackage<T> {
282    project_root: PathBuf,
283    files: BTreeMap<PathBuf, CachedFile<T>>,
284}
285
286#[derive(Debug, Clone)]
287struct Change<T> {
288    key: FileCacheKey,
289    data: T,
290}
291
292/// On-disk cache for file-scoped analysis results within a package.
293#[derive(Debug, Clone)]
294pub struct PackageCache<T> {
295    path: PathBuf,
296    package: StoredPackage<T>,
297    seen_paths: BTreeSet<PathBuf>,
298    changes: BTreeMap<PathBuf, Change<T>>,
299    last_seen_ms: u64,
300}
301
302#[allow(missing_docs)]
303impl<T> PackageCache<T>
304where
305    T: Clone + Serialize + DeserializeOwned,
306{
307    pub fn open(
308        cache_root: &Path,
309        canonical_root: PathBuf,
310        tool_version: &str,
311        package_key: &impl CacheKey,
312    ) -> io::Result<Self> {
313        let key = cache_key_hex(package_key);
314        let path = cache_root.join(tool_version).join(format!("{key}.bin"));
315
316        let file = match File::open(&path) {
317            Ok(file) => file,
318            Err(err) if err.kind() == io::ErrorKind::NotFound => {
319                return Ok(Self::empty(path, canonical_root));
320            }
321            Err(err) => return Err(err),
322        };
323
324        let mut reader = BufReader::new(file);
325        let package: StoredPackage<T> =
326            match bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()) {
327                Ok(package) => package,
328                Err(_) => return Ok(Self::empty(path, canonical_root)),
329            };
330
331        if package.project_root != canonical_root {
332            return Ok(Self::empty(path, canonical_root));
333        }
334
335        Ok(Self {
336            path,
337            package,
338            seen_paths: BTreeSet::new(),
339            changes: BTreeMap::new(),
340            last_seen_ms: current_time_ms(),
341        })
342    }
343
344    #[must_use]
345    pub fn path(&self) -> &Path {
346        &self.path
347    }
348
349    pub fn get(&mut self, relative_path: &Path, key: &FileCacheKey) -> Option<T> {
350        let file = self.package.files.get(relative_path)?;
351        if &file.key != key {
352            return None;
353        }
354
355        self.seen_paths.insert(relative_path.to_path_buf());
356        Some(file.data.clone())
357    }
358
359    pub fn insert(&mut self, relative_path: PathBuf, key: FileCacheKey, data: T) {
360        self.seen_paths.insert(relative_path.clone());
361        self.changes.insert(relative_path, Change { key, data });
362    }
363
364    pub fn persist(mut self) -> io::Result<()> {
365        if !self.save() {
366            return Ok(());
367        }
368
369        let parent = self
370            .path
371            .parent()
372            .ok_or_else(|| io::Error::other("cache path has no parent directory"))?;
373        fs::create_dir_all(parent)?;
374
375        let mut temp_file = NamedTempFile::new_in(parent)?;
376        let encoded = bincode::serde::encode_to_vec(&self.package, bincode::config::standard())
377            .map_err(io::Error::other)?;
378        temp_file.write_all(&encoded)?;
379
380        match temp_file.persist(&self.path) {
381            Ok(_) => Ok(()),
382            Err(err) => Err(err.error),
383        }
384    }
385
386    fn empty(path: PathBuf, canonical_root: PathBuf) -> Self {
387        Self {
388            path,
389            package: StoredPackage {
390                project_root: canonical_root,
391                files: BTreeMap::new(),
392            },
393            seen_paths: BTreeSet::new(),
394            changes: BTreeMap::new(),
395            last_seen_ms: current_time_ms(),
396        }
397    }
398
399    fn save(&mut self) -> bool {
400        if self.seen_paths.is_empty() && self.changes.is_empty() {
401            return false;
402        }
403
404        let max_age_ms = MAX_LAST_SEEN_AGE.as_millis() as u64;
405        let now = self.last_seen_ms;
406
407        self.package
408            .files
409            .retain(|_, file| now.saturating_sub(file.last_seen_ms) <= max_age_ms);
410
411        for path in &self.seen_paths {
412            if let Some(change) = self.changes.remove(path) {
413                self.package.files.insert(
414                    path.clone(),
415                    CachedFile {
416                        key: change.key,
417                        last_seen_ms: now,
418                        data: change.data,
419                    },
420                );
421            } else if let Some(existing) = self.package.files.get_mut(path) {
422                existing.last_seen_ms = now;
423            }
424        }
425
426        for (path, change) in std::mem::take(&mut self.changes) {
427            self.package.files.insert(
428                path,
429                CachedFile {
430                    key: change.key,
431                    last_seen_ms: now,
432                    data: change.data,
433                },
434            );
435        }
436
437        true
438    }
439}
440
441fn current_time_ms() -> u64 {
442    SystemTime::now()
443        .duration_since(UNIX_EPOCH)
444        .unwrap_or(Duration::ZERO)
445        .as_millis() as u64
446}
447
448#[cfg(test)]
449mod tests {
450    use super::*;
451
452    #[derive(Debug, Clone)]
453    struct TestSettings {
454        strict: bool,
455        label: String,
456    }
457
458    impl CacheKey for TestSettings {
459        fn cache_key(&self, state: &mut CacheKeyHasher) {
460            state.write_tag(b"test-settings");
461            self.strict.cache_key(state);
462            self.label.cache_key(state);
463        }
464    }
465
466    #[test]
467    fn cache_key_hashing_is_deterministic() {
468        let settings = TestSettings {
469            strict: true,
470            label: "alpha".to_string(),
471        };
472
473        let first = cache_key_hex(&settings);
474        let second = cache_key_hex(&settings);
475
476        assert_eq!(first, second);
477    }
478
479    #[test]
480    fn cache_key_changes_when_settings_change() {
481        let first = TestSettings {
482            strict: true,
483            label: "alpha".to_string(),
484        };
485        let second = TestSettings {
486            strict: false,
487            label: "alpha".to_string(),
488        };
489
490        assert_ne!(cache_key_hex(&first), cache_key_hex(&second));
491    }
492
493    #[test]
494    fn package_cache_persists_and_reloads() {
495        let tempdir = tempfile::tempdir().unwrap();
496        let cache_root = tempdir.path().join("cache");
497        let storage_root = tempdir.path().join("project");
498        fs::create_dir_all(&storage_root).unwrap();
499        let canonical_root = fs::canonicalize(&storage_root).unwrap();
500
501        let settings = TestSettings {
502            strict: true,
503            label: "alpha".to_string(),
504        };
505
506        let mut cache =
507            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
508                .unwrap();
509        cache.insert(
510            PathBuf::from("script.sh"),
511            FileCacheKey {
512                file_last_modified_ms: 1,
513                file_permissions_mode: 0o644,
514                file_size_bytes: 2,
515            },
516            "ok".to_string(),
517        );
518        let cache_path = cache.path().to_path_buf();
519        cache.persist().unwrap();
520
521        assert!(cache_path.is_file());
522
523        let mut reopened =
524            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
525        let value = reopened.get(
526            Path::new("script.sh"),
527            &FileCacheKey {
528                file_last_modified_ms: 1,
529                file_permissions_mode: 0o644,
530                file_size_bytes: 2,
531            },
532        );
533
534        assert_eq!(value.as_deref(), Some("ok"));
535    }
536
537    #[test]
538    fn persist_prunes_stale_entries() {
539        let tempdir = tempfile::tempdir().unwrap();
540        let cache_root = tempdir.path().join("cache");
541        let storage_root = tempdir.path().join("project");
542        fs::create_dir_all(&storage_root).unwrap();
543        let canonical_root = fs::canonicalize(&storage_root).unwrap();
544        let settings = TestSettings {
545            strict: true,
546            label: "alpha".to_string(),
547        };
548
549        let mut cache =
550            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
551                .unwrap();
552        cache.insert(
553            PathBuf::from("stale.sh"),
554            FileCacheKey {
555                file_last_modified_ms: 1,
556                file_permissions_mode: 0o644,
557                file_size_bytes: 5,
558            },
559            "stale".to_string(),
560        );
561        let cache_path = cache.path().to_path_buf();
562        cache.persist().unwrap();
563
564        let mut stored: StoredPackage<String> = {
565            let mut reader = BufReader::new(File::open(&cache_path).unwrap());
566            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap()
567        };
568        stored
569            .files
570            .get_mut(Path::new("stale.sh"))
571            .unwrap()
572            .last_seen_ms = 0;
573        let encoded = bincode::serde::encode_to_vec(&stored, bincode::config::standard()).unwrap();
574        fs::write(&cache_path, encoded).unwrap();
575
576        let mut reopened =
577            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
578        reopened.insert(
579            PathBuf::from("fresh.sh"),
580            FileCacheKey {
581                file_last_modified_ms: 2,
582                file_permissions_mode: 0o644,
583                file_size_bytes: 5,
584            },
585            "fresh".to_string(),
586        );
587        reopened.persist().unwrap();
588
589        let mut reader = BufReader::new(File::open(&cache_path).unwrap());
590        let stored: StoredPackage<String> =
591            bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard()).unwrap();
592
593        assert!(!stored.files.contains_key(Path::new("stale.sh")));
594        assert!(stored.files.contains_key(Path::new("fresh.sh")));
595    }
596
597    #[test]
598    fn cache_key_miss_when_only_file_size_changes() {
599        let tempdir = tempfile::tempdir().unwrap();
600        let cache_root = tempdir.path().join("cache");
601        let storage_root = tempdir.path().join("project");
602        fs::create_dir_all(&storage_root).unwrap();
603        let canonical_root = fs::canonicalize(&storage_root).unwrap();
604        let settings = TestSettings {
605            strict: true,
606            label: "alpha".to_string(),
607        };
608
609        let mut cache =
610            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
611                .unwrap();
612        cache.insert(
613            PathBuf::from("script.sh"),
614            FileCacheKey {
615                file_last_modified_ms: 1,
616                file_permissions_mode: 0o644,
617                file_size_bytes: 2,
618            },
619            "ok".to_string(),
620        );
621        cache.persist().unwrap();
622
623        let mut reopened =
624            PackageCache::<String>::open(&cache_root, canonical_root, "0.1.0", &settings).unwrap();
625        let value = reopened.get(
626            Path::new("script.sh"),
627            &FileCacheKey {
628                file_last_modified_ms: 1,
629                file_permissions_mode: 0o644,
630                file_size_bytes: 3,
631            },
632        );
633
634        assert!(value.is_none());
635    }
636
637    #[test]
638    fn reads_project_root_from_cache_file_without_knowing_payload_type() {
639        let tempdir = tempfile::tempdir().unwrap();
640        let cache_root = tempdir.path().join("cache");
641        let storage_root = tempdir.path().join("project");
642        fs::create_dir_all(&storage_root).unwrap();
643        let canonical_root = fs::canonicalize(&storage_root).unwrap();
644        let settings = TestSettings {
645            strict: true,
646            label: "alpha".to_string(),
647        };
648
649        let mut cache =
650            PackageCache::<String>::open(&cache_root, canonical_root.clone(), "0.1.0", &settings)
651                .unwrap();
652        cache.insert(
653            PathBuf::from("script.sh"),
654            FileCacheKey {
655                file_last_modified_ms: 1,
656                file_permissions_mode: 0o644,
657                file_size_bytes: 2,
658            },
659            "ok".to_string(),
660        );
661        let cache_path = cache.path().to_path_buf();
662        cache.persist().unwrap();
663
664        let project_root = read_project_root_from_cache_file(&cache_path).unwrap();
665        assert_eq!(project_root, Some(canonical_root));
666    }
667}