fallow_extract/cache/
store.rs1use std::path::Path;
4
5use fallow_types::source_fingerprint::SourceFingerprint;
6use rustc_hash::FxHashMap;
7
8use bitcode::{Decode, Encode};
9
10use super::types::{
11 CACHE_VERSION, CachedModule, DEFAULT_CACHE_MAX_SIZE, EVICTION_SIGNIFICANT_BPS,
12 EVICTION_TARGET_BPS, EVICTION_TRIGGER_BPS,
13};
14
15#[derive(Debug, Encode, Decode)]
17pub struct CacheStore {
18 version: u32,
19 config_hash: u64,
21 entries: FxHashMap<String, CachedModule>,
23}
24
25impl CacheStore {
26 #[must_use]
28 pub fn new() -> Self {
29 Self {
30 version: CACHE_VERSION,
31 config_hash: 0,
32 entries: FxHashMap::default(),
33 }
34 }
35
36 #[must_use]
41 pub fn load(
42 cache_dir: &Path,
43 expected_config_hash: u64,
44 max_size_bytes: usize,
45 ) -> Option<Self> {
46 let cache_file = cache_dir.join("cache.bin");
47 let data = std::fs::read(&cache_file).ok()?;
48 let safety_ceiling = max_size_bytes.max(DEFAULT_CACHE_MAX_SIZE);
49 if data.len() > safety_ceiling {
50 tracing::warn!(
51 size_mb = data.len() / (1024 * 1024),
52 ceiling_mb = safety_ceiling / (1024 * 1024),
53 "Cache file exceeds safety ceiling, ignoring"
54 );
55 return None;
56 }
57 let store: Self = match bitcode::decode(&data) {
58 Ok(s) => s,
59 Err(_) => {
60 tracing::info!(
61 "Cache format upgraded, rebuilding (one-time cost after version bump)"
62 );
63 return None;
64 }
65 };
66 if store.version != CACHE_VERSION {
67 tracing::info!("Cache format upgraded, rebuilding (one-time cost after version bump)");
68 return None;
69 }
70 if store.config_hash != expected_config_hash {
71 return None;
72 }
73 Some(store)
74 }
75
76 pub fn save(
78 &mut self,
79 cache_dir: &Path,
80 config_hash: u64,
81 max_size_bytes: usize,
82 ) -> Result<(), String> {
83 std::fs::create_dir_all(cache_dir)
84 .map_err(|e| format!("Failed to create cache dir: {e}"))?;
85 write_cache_gitignore(cache_dir)?;
86
87 self.config_hash = config_hash;
88 let initial_entries = self.entries.len();
89 let mut encoded = bitcode::encode(self);
90
91 let trigger = (max_size_bytes / 10_000).saturating_mul(EVICTION_TRIGGER_BPS);
92 if encoded.len() > trigger {
93 let target = (max_size_bytes / 10_000).saturating_mul(EVICTION_TARGET_BPS);
94 self.evict_lru_to_target(target);
95 encoded = bitcode::encode(self);
96 let evicted = initial_entries.saturating_sub(self.entries.len());
97 let final_size = encoded.len();
98 let significant_evicted =
99 initial_entries.saturating_mul(EVICTION_SIGNIFICANT_BPS) / 10_000;
100 if evicted >= significant_evicted && initial_entries > 0 {
101 tracing::info!(
102 evicted_entries = evicted,
103 remaining_entries = self.entries.len(),
104 final_size_kb = final_size / 1024,
105 max_size_kb = max_size_bytes / 1024,
106 "Cache eviction: removed oldest entries to stay under cap"
107 );
108 } else {
109 tracing::debug!(
110 evicted_entries = evicted,
111 remaining_entries = self.entries.len(),
112 final_size_kb = final_size / 1024,
113 max_size_kb = max_size_bytes / 1024,
114 "Cache eviction"
115 );
116 }
117 }
118
119 let cache_file = cache_dir.join("cache.bin");
120 atomic_write(&cache_file, &encoded)?;
121 Ok(())
122 }
123
124 fn evict_lru_to_target(&mut self, target_bytes: usize) {
127 let mut order: Vec<(u64, String)> = self
128 .entries
129 .iter()
130 .map(|(k, v)| (v.last_access_secs, k.clone()))
131 .collect();
132 order.sort();
133
134 const BATCH: usize = 100;
135 let mut idx = 0;
136 while idx < order.len() {
137 let batch_end = (idx + BATCH).min(order.len());
138 for (_, key) in &order[idx..batch_end] {
139 if self.entries.len() <= 1 {
140 break;
141 }
142 self.entries.remove(key);
143 }
144 idx = batch_end;
145
146 let encoded_size = bitcode::encode(self).len();
147 if encoded_size <= target_bytes || self.entries.len() <= 1 {
148 if encoded_size > target_bytes && self.entries.len() <= 1 {
149 tracing::warn!(
150 encoded_kb = encoded_size / 1024,
151 target_kb = target_bytes / 1024,
152 "Single cache entry exceeds configured max; cache will overshoot the cap"
153 );
154 }
155 return;
156 }
157 }
158 }
159
160 #[must_use]
163 pub fn get(&self, path: &Path, content_hash: u64) -> Option<&CachedModule> {
164 let key = path.to_string_lossy();
165 let entry = self.entries.get(key.as_ref())?;
166 if entry.content_hash == content_hash {
167 Some(entry)
168 } else {
169 None
170 }
171 }
172
173 pub fn insert(&mut self, path: &Path, module: CachedModule) {
175 let key = path.to_string_lossy().into_owned();
176 self.entries.insert(key, module);
177 }
178
179 #[must_use]
181 pub fn get_by_metadata(
182 &self,
183 path: &Path,
184 fingerprint: SourceFingerprint,
185 ) -> Option<&CachedModule> {
186 let key = path.to_string_lossy();
187 let entry = self.entries.get(key.as_ref())?;
188 if entry.source_fingerprint() == fingerprint && fingerprint.has_known_mtime() {
189 Some(entry)
190 } else {
191 None
192 }
193 }
194
195 #[must_use]
197 pub fn get_by_path_only(&self, path: &Path) -> Option<&CachedModule> {
198 let key = path.to_string_lossy();
199 self.entries.get(key.as_ref())
200 }
201
202 pub fn retain_paths(&mut self, files: &[fallow_types::discover::DiscoveredFile]) -> bool {
206 use rustc_hash::FxHashSet;
207 let current_paths: FxHashSet<String> = files
208 .iter()
209 .map(|f| f.path.to_string_lossy().to_string())
210 .collect();
211 let before = self.entries.len();
212 self.entries.retain(|key, _| current_paths.contains(key));
213 self.entries.len() != before
214 }
215
216 #[must_use]
218 pub fn len(&self) -> usize {
219 self.entries.len()
220 }
221
222 #[must_use]
224 pub fn is_empty(&self) -> bool {
225 self.entries.is_empty()
226 }
227}
228
229fn write_cache_gitignore(cache_dir: &Path) -> Result<(), String> {
230 std::fs::write(cache_dir.join(".gitignore"), "*\n")
231 .map_err(|e| format!("Failed to write cache .gitignore: {e}"))
232}
233
234fn atomic_write(cache_file: &Path, data: &[u8]) -> Result<(), String> {
236 let tmp_file = match cache_file.file_name() {
237 Some(name) => cache_file.with_file_name({
238 let mut s = name.to_os_string();
239 s.push(".tmp");
240 s
241 }),
242 None => return Err("Cache file path has no filename component".to_owned()),
243 };
244
245 {
246 use std::io::Write as _;
247 let mut f = std::fs::File::create(&tmp_file)
248 .map_err(|e| format!("Failed to create cache tmp: {e}"))?;
249 f.write_all(data)
250 .map_err(|e| format!("Failed to write cache tmp: {e}"))?;
251 let _ = f.sync_all();
252 }
253
254 std::fs::rename(&tmp_file, cache_file)
255 .map_err(|e| format!("Failed to rename cache tmp into place: {e}"))?;
256 Ok(())
257}
258
259impl Default for CacheStore {
260 fn default() -> Self {
261 Self::new()
262 }
263}