Skip to main content

fallow_extract/cache/
store.rs

1//! Cache store: load, save, and query cached module data.
2
3use std::path::Path;
4
5use rustc_hash::FxHashMap;
6
7use bincode::{Decode, Encode};
8
9use super::types::{CACHE_VERSION, CachedModule, MAX_CACHE_SIZE};
10
11/// Cached module information stored on disk.
12#[derive(Debug, Encode, Decode)]
13pub struct CacheStore {
14    version: u32,
15    /// Map from file path to cached module data.
16    entries: FxHashMap<String, CachedModule>,
17}
18
19impl CacheStore {
20    /// Create a new empty cache.
21    #[must_use]
22    pub fn new() -> Self {
23        Self {
24            version: CACHE_VERSION,
25            entries: FxHashMap::default(),
26        }
27    }
28
29    /// Load cache from disk.
30    #[must_use]
31    pub fn load(cache_dir: &Path) -> Option<Self> {
32        let cache_file = cache_dir.join("cache.bin");
33        let data = std::fs::read(&cache_file).ok()?;
34        if data.len() > MAX_CACHE_SIZE {
35            tracing::warn!(
36                size_mb = data.len() / (1024 * 1024),
37                "Cache file exceeds size limit, ignoring"
38            );
39            return None;
40        }
41        let (store, _): (Self, usize) =
42            bincode::decode_from_slice(&data, bincode::config::standard()).ok()?;
43        if store.version != CACHE_VERSION {
44            return None;
45        }
46        Some(store)
47    }
48
49    /// Save cache to disk.
50    ///
51    /// # Errors
52    ///
53    /// Returns an error string when the cache directory cannot be created,
54    /// the cache cannot be serialized, or the cache file cannot be written.
55    pub fn save(&self, cache_dir: &Path) -> Result<(), String> {
56        std::fs::create_dir_all(cache_dir)
57            .map_err(|e| format!("Failed to create cache dir: {e}"))?;
58        let cache_file = cache_dir.join("cache.bin");
59        let data = bincode::encode_to_vec(self, bincode::config::standard())
60            .map_err(|e| format!("Failed to serialize cache: {e}"))?;
61        std::fs::write(&cache_file, data).map_err(|e| format!("Failed to write cache: {e}"))?;
62        Ok(())
63    }
64
65    /// Look up a cached module by path and content hash.
66    /// Returns None if not cached or hash mismatch.
67    #[must_use]
68    pub fn get(&self, path: &Path, content_hash: u64) -> Option<&CachedModule> {
69        let key = path.to_string_lossy().to_string();
70        let entry = self.entries.get(&key)?;
71        if entry.content_hash == content_hash {
72            Some(entry)
73        } else {
74            None
75        }
76    }
77
78    /// Insert or update a cached module.
79    pub fn insert(&mut self, path: &Path, module: CachedModule) {
80        let key = path.to_string_lossy().to_string();
81        self.entries.insert(key, module);
82    }
83
84    /// Fast cache lookup using only file metadata (mtime + size).
85    ///
86    /// If the cached entry has matching mtime and size, the file content
87    /// almost certainly has not changed, so we can skip reading the file
88    /// entirely. This turns a cache hit from `stat() + read() + hash`
89    /// into just `stat()`.
90    #[must_use]
91    pub fn get_by_metadata(
92        &self,
93        path: &Path,
94        mtime_secs: u64,
95        file_size: u64,
96    ) -> Option<&CachedModule> {
97        let key = path.to_string_lossy().to_string();
98        let entry = self.entries.get(&key)?;
99        if entry.mtime_secs == mtime_secs && entry.file_size == file_size && mtime_secs > 0 {
100            Some(entry)
101        } else {
102            None
103        }
104    }
105
106    /// Look up a cached module by path only (ignoring hash).
107    /// Used to check whether a module's content hash matches without
108    /// requiring the caller to know the hash upfront.
109    #[must_use]
110    pub fn get_by_path_only(&self, path: &Path) -> Option<&CachedModule> {
111        let key = path.to_string_lossy().to_string();
112        self.entries.get(&key)
113    }
114
115    /// Remove cache entries for files that are no longer in the project.
116    /// Keeps the cache from growing unboundedly as files are deleted.
117    pub fn retain_paths(&mut self, files: &[fallow_types::discover::DiscoveredFile]) {
118        use rustc_hash::FxHashSet;
119        let current_paths: FxHashSet<String> = files
120            .iter()
121            .map(|f| f.path.to_string_lossy().to_string())
122            .collect();
123        self.entries.retain(|key, _| current_paths.contains(key));
124    }
125
126    /// Number of cached entries.
127    #[must_use]
128    pub fn len(&self) -> usize {
129        self.entries.len()
130    }
131
132    /// Whether cache is empty.
133    #[must_use]
134    pub fn is_empty(&self) -> bool {
135        self.entries.is_empty()
136    }
137}
138
139impl Default for CacheStore {
140    fn default() -> Self {
141        Self::new()
142    }
143}