Skip to main content

fallow_extract/cache/
store.rs

1//! Cache store: load, save, and query cached module data.
2
3use std::path::Path;
4
5use rustc_hash::FxHashMap;
6
7use bincode::{Decode, Encode};
8
9use super::types::{CACHE_VERSION, CachedModule, MAX_CACHE_SIZE};
10
11/// Cached module information stored on disk.
12#[derive(Debug, Encode, Decode)]
13pub struct CacheStore {
14    version: u32,
15    /// Map from file path to cached module data.
16    entries: FxHashMap<String, CachedModule>,
17}
18
19impl CacheStore {
20    /// Create a new empty cache.
21    pub fn new() -> Self {
22        Self {
23            version: CACHE_VERSION,
24            entries: FxHashMap::default(),
25        }
26    }
27
28    /// Load cache from disk.
29    pub fn load(cache_dir: &Path) -> Option<Self> {
30        let cache_file = cache_dir.join("cache.bin");
31        let data = std::fs::read(&cache_file).ok()?;
32        if data.len() > MAX_CACHE_SIZE {
33            tracing::warn!(
34                size_mb = data.len() / (1024 * 1024),
35                "Cache file exceeds size limit, ignoring"
36            );
37            return None;
38        }
39        let (store, _): (Self, usize) =
40            bincode::decode_from_slice(&data, bincode::config::standard()).ok()?;
41        if store.version != CACHE_VERSION {
42            return None;
43        }
44        Some(store)
45    }
46
47    /// Save cache to disk.
48    pub fn save(&self, cache_dir: &Path) -> Result<(), String> {
49        std::fs::create_dir_all(cache_dir)
50            .map_err(|e| format!("Failed to create cache dir: {e}"))?;
51        let cache_file = cache_dir.join("cache.bin");
52        let data = bincode::encode_to_vec(self, bincode::config::standard())
53            .map_err(|e| format!("Failed to serialize cache: {e}"))?;
54        std::fs::write(&cache_file, data).map_err(|e| format!("Failed to write cache: {e}"))?;
55        Ok(())
56    }
57
58    /// Look up a cached module by path and content hash.
59    /// Returns None if not cached or hash mismatch.
60    pub fn get(&self, path: &Path, content_hash: u64) -> Option<&CachedModule> {
61        let key = path.to_string_lossy().to_string();
62        let entry = self.entries.get(&key)?;
63        if entry.content_hash == content_hash {
64            Some(entry)
65        } else {
66            None
67        }
68    }
69
70    /// Insert or update a cached module.
71    pub fn insert(&mut self, path: &Path, module: CachedModule) {
72        let key = path.to_string_lossy().to_string();
73        self.entries.insert(key, module);
74    }
75
76    /// Fast cache lookup using only file metadata (mtime + size).
77    ///
78    /// If the cached entry has matching mtime and size, the file content
79    /// almost certainly has not changed, so we can skip reading the file
80    /// entirely. This turns a cache hit from `stat() + read() + hash`
81    /// into just `stat()`.
82    pub fn get_by_metadata(
83        &self,
84        path: &Path,
85        mtime_secs: u64,
86        file_size: u64,
87    ) -> Option<&CachedModule> {
88        let key = path.to_string_lossy().to_string();
89        let entry = self.entries.get(&key)?;
90        if entry.mtime_secs == mtime_secs && entry.file_size == file_size && mtime_secs > 0 {
91            Some(entry)
92        } else {
93            None
94        }
95    }
96
97    /// Look up a cached module by path only (ignoring hash).
98    /// Used to check whether a module's content hash matches without
99    /// requiring the caller to know the hash upfront.
100    pub fn get_by_path_only(&self, path: &Path) -> Option<&CachedModule> {
101        let key = path.to_string_lossy().to_string();
102        self.entries.get(&key)
103    }
104
105    /// Remove cache entries for files that are no longer in the project.
106    /// Keeps the cache from growing unboundedly as files are deleted.
107    pub fn retain_paths(&mut self, files: &[fallow_types::discover::DiscoveredFile]) {
108        use rustc_hash::FxHashSet;
109        let current_paths: FxHashSet<String> = files
110            .iter()
111            .map(|f| f.path.to_string_lossy().to_string())
112            .collect();
113        self.entries.retain(|key, _| current_paths.contains(key));
114    }
115
116    /// Number of cached entries.
117    pub fn len(&self) -> usize {
118        self.entries.len()
119    }
120
121    /// Whether cache is empty.
122    pub fn is_empty(&self) -> bool {
123        self.entries.is_empty()
124    }
125}
126
127impl Default for CacheStore {
128    fn default() -> Self {
129        Self::new()
130    }
131}