Skip to main content

fallow_extract/cache/
store.rs

1//! Cache store: load, save, and query cached module data.
2
3use std::path::Path;
4
5use rustc_hash::FxHashMap;
6
7use bitcode::{Decode, Encode};
8
9use super::types::{CACHE_VERSION, CachedModule, MAX_CACHE_SIZE};
10
11/// Cached module information stored on disk.
12#[derive(Debug, Encode, Decode)]
13pub struct CacheStore {
14    version: u32,
15    /// Map from file path to cached module data.
16    entries: FxHashMap<String, CachedModule>,
17}
18
19impl CacheStore {
20    /// Create a new empty cache.
21    #[must_use]
22    pub fn new() -> Self {
23        Self {
24            version: CACHE_VERSION,
25            entries: FxHashMap::default(),
26        }
27    }
28
29    /// Load cache from disk.
30    #[must_use]
31    pub fn load(cache_dir: &Path) -> Option<Self> {
32        let cache_file = cache_dir.join("cache.bin");
33        let data = std::fs::read(&cache_file).ok()?;
34        if data.len() > MAX_CACHE_SIZE {
35            tracing::warn!(
36                size_mb = data.len() / (1024 * 1024),
37                "Cache file exceeds size limit, ignoring"
38            );
39            return None;
40        }
41        let store: Self = bitcode::decode(&data).ok()?;
42        if store.version != CACHE_VERSION {
43            return None;
44        }
45        Some(store)
46    }
47
48    /// Save cache to disk.
49    ///
50    /// # Errors
51    ///
52    /// Returns an error string when the cache directory cannot be created
53    /// or the cache file cannot be written.
54    pub fn save(&self, cache_dir: &Path) -> Result<(), String> {
55        std::fs::create_dir_all(cache_dir)
56            .map_err(|e| format!("Failed to create cache dir: {e}"))?;
57        write_cache_gitignore(cache_dir)?;
58        let cache_file = cache_dir.join("cache.bin");
59        let data = bitcode::encode(self);
60        std::fs::write(&cache_file, data).map_err(|e| format!("Failed to write cache: {e}"))?;
61        Ok(())
62    }
63
64    /// Look up a cached module by path and content hash.
65    /// Returns None if not cached or hash mismatch.
66    #[must_use]
67    pub fn get(&self, path: &Path, content_hash: u64) -> Option<&CachedModule> {
68        let key = path.to_string_lossy().to_string();
69        let entry = self.entries.get(&key)?;
70        if entry.content_hash == content_hash {
71            Some(entry)
72        } else {
73            None
74        }
75    }
76
77    /// Insert or update a cached module.
78    pub fn insert(&mut self, path: &Path, module: CachedModule) {
79        let key = path.to_string_lossy().to_string();
80        self.entries.insert(key, module);
81    }
82
83    /// Fast cache lookup using only file metadata (mtime + size).
84    ///
85    /// If the cached entry has matching mtime and size, the file content
86    /// almost certainly has not changed, so we can skip reading the file
87    /// entirely. This turns a cache hit from `stat() + read() + hash`
88    /// into just `stat()`.
89    #[must_use]
90    pub fn get_by_metadata(
91        &self,
92        path: &Path,
93        mtime_secs: u64,
94        file_size: u64,
95    ) -> Option<&CachedModule> {
96        let key = path.to_string_lossy().to_string();
97        let entry = self.entries.get(&key)?;
98        if entry.mtime_secs == mtime_secs && entry.file_size == file_size && mtime_secs > 0 {
99            Some(entry)
100        } else {
101            None
102        }
103    }
104
105    /// Look up a cached module by path only (ignoring hash).
106    /// Used to check whether a module's content hash matches without
107    /// requiring the caller to know the hash upfront.
108    #[must_use]
109    pub fn get_by_path_only(&self, path: &Path) -> Option<&CachedModule> {
110        let key = path.to_string_lossy().to_string();
111        self.entries.get(&key)
112    }
113
114    /// Remove cache entries for files that are no longer in the project.
115    /// Keeps the cache from growing unboundedly as files are deleted.
116    pub fn retain_paths(&mut self, files: &[fallow_types::discover::DiscoveredFile]) {
117        use rustc_hash::FxHashSet;
118        let current_paths: FxHashSet<String> = files
119            .iter()
120            .map(|f| f.path.to_string_lossy().to_string())
121            .collect();
122        self.entries.retain(|key, _| current_paths.contains(key));
123    }
124
125    /// Number of cached entries.
126    #[must_use]
127    pub fn len(&self) -> usize {
128        self.entries.len()
129    }
130
131    /// Whether cache is empty.
132    #[must_use]
133    pub fn is_empty(&self) -> bool {
134        self.entries.is_empty()
135    }
136}
137
138fn write_cache_gitignore(cache_dir: &Path) -> Result<(), String> {
139    std::fs::write(cache_dir.join(".gitignore"), "*\n")
140        .map_err(|e| format!("Failed to write cache .gitignore: {e}"))
141}
142
143impl Default for CacheStore {
144    fn default() -> Self {
145        Self::new()
146    }
147}