Skip to main content

fallow_extract/cache/
store.rs

1//! Cache store: load, save, and query cached module data.
2
3use std::path::Path;
4
5use rustc_hash::FxHashMap;
6
7use bitcode::{Decode, Encode};
8
9use super::types::{CACHE_VERSION, CachedModule, MAX_CACHE_SIZE};
10
11/// Cached module information stored on disk.
12#[derive(Debug, Encode, Decode)]
13pub struct CacheStore {
14    version: u32,
15    /// Map from file path to cached module data.
16    entries: FxHashMap<String, CachedModule>,
17}
18
19impl CacheStore {
20    /// Create a new empty cache.
21    #[must_use]
22    pub fn new() -> Self {
23        Self {
24            version: CACHE_VERSION,
25            entries: FxHashMap::default(),
26        }
27    }
28
29    /// Load cache from disk.
30    #[must_use]
31    pub fn load(cache_dir: &Path) -> Option<Self> {
32        let cache_file = cache_dir.join("cache.bin");
33        let data = std::fs::read(&cache_file).ok()?;
34        if data.len() > MAX_CACHE_SIZE {
35            tracing::warn!(
36                size_mb = data.len() / (1024 * 1024),
37                "Cache file exceeds size limit, ignoring"
38            );
39            return None;
40        }
41        let store: Self = bitcode::decode(&data).ok()?;
42        if store.version != CACHE_VERSION {
43            return None;
44        }
45        Some(store)
46    }
47
48    /// Save cache to disk.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error string when the cache directory cannot be created
53    /// or the cache file cannot be written.
54    pub fn save(&self, cache_dir: &Path) -> Result<(), String> {
55        std::fs::create_dir_all(cache_dir)
56            .map_err(|e| format!("Failed to create cache dir: {e}"))?;
57        let cache_file = cache_dir.join("cache.bin");
58        let data = bitcode::encode(self);
59        std::fs::write(&cache_file, data).map_err(|e| format!("Failed to write cache: {e}"))?;
60        Ok(())
61    }
62
63    /// Look up a cached module by path and content hash.
64    /// Returns None if not cached or hash mismatch.
65    #[must_use]
66    pub fn get(&self, path: &Path, content_hash: u64) -> Option<&CachedModule> {
67        let key = path.to_string_lossy().to_string();
68        let entry = self.entries.get(&key)?;
69        if entry.content_hash == content_hash {
70            Some(entry)
71        } else {
72            None
73        }
74    }
75
76    /// Insert or update a cached module.
77    pub fn insert(&mut self, path: &Path, module: CachedModule) {
78        let key = path.to_string_lossy().to_string();
79        self.entries.insert(key, module);
80    }
81
82    /// Fast cache lookup using only file metadata (mtime + size).
83    ///
84    /// If the cached entry has matching mtime and size, the file content
85    /// almost certainly has not changed, so we can skip reading the file
86    /// entirely. This turns a cache hit from `stat() + read() + hash`
87    /// into just `stat()`.
88    #[must_use]
89    pub fn get_by_metadata(
90        &self,
91        path: &Path,
92        mtime_secs: u64,
93        file_size: u64,
94    ) -> Option<&CachedModule> {
95        let key = path.to_string_lossy().to_string();
96        let entry = self.entries.get(&key)?;
97        if entry.mtime_secs == mtime_secs && entry.file_size == file_size && mtime_secs > 0 {
98            Some(entry)
99        } else {
100            None
101        }
102    }
103
104    /// Look up a cached module by path only (ignoring hash).
105    /// Used to check whether a module's content hash matches without
106    /// requiring the caller to know the hash upfront.
107    #[must_use]
108    pub fn get_by_path_only(&self, path: &Path) -> Option<&CachedModule> {
109        let key = path.to_string_lossy().to_string();
110        self.entries.get(&key)
111    }
112
113    /// Remove cache entries for files that are no longer in the project.
114    /// Keeps the cache from growing unboundedly as files are deleted.
115    pub fn retain_paths(&mut self, files: &[fallow_types::discover::DiscoveredFile]) {
116        use rustc_hash::FxHashSet;
117        let current_paths: FxHashSet<String> = files
118            .iter()
119            .map(|f| f.path.to_string_lossy().to_string())
120            .collect();
121        self.entries.retain(|key, _| current_paths.contains(key));
122    }
123
124    /// Number of cached entries.
125    #[must_use]
126    pub fn len(&self) -> usize {
127        self.entries.len()
128    }
129
130    /// Whether cache is empty.
131    #[must_use]
132    pub fn is_empty(&self) -> bool {
133        self.entries.is_empty()
134    }
135}
136
137impl Default for CacheStore {
138    fn default() -> Self {
139        Self::new()
140    }
141}