fallow_extract/cache/
store.rs1use std::path::Path;
4
5use rustc_hash::FxHashMap;
6
7use bitcode::{Decode, Encode};
8
9use super::types::{
10 CACHE_VERSION, CachedModule, DEFAULT_CACHE_MAX_SIZE, EVICTION_SIGNIFICANT_BPS,
11 EVICTION_TARGET_BPS, EVICTION_TRIGGER_BPS,
12};
13
14#[derive(Debug, Encode, Decode)]
16pub struct CacheStore {
17 version: u32,
18 config_hash: u64,
20 entries: FxHashMap<String, CachedModule>,
22}
23
24impl CacheStore {
25 #[must_use]
27 pub fn new() -> Self {
28 Self {
29 version: CACHE_VERSION,
30 config_hash: 0,
31 entries: FxHashMap::default(),
32 }
33 }
34
35 #[must_use]
40 pub fn load(
41 cache_dir: &Path,
42 expected_config_hash: u64,
43 max_size_bytes: usize,
44 ) -> Option<Self> {
45 let cache_file = cache_dir.join("cache.bin");
46 let data = std::fs::read(&cache_file).ok()?;
47 let safety_ceiling = max_size_bytes.max(DEFAULT_CACHE_MAX_SIZE);
48 if data.len() > safety_ceiling {
49 tracing::warn!(
50 size_mb = data.len() / (1024 * 1024),
51 ceiling_mb = safety_ceiling / (1024 * 1024),
52 "Cache file exceeds safety ceiling, ignoring"
53 );
54 return None;
55 }
56 let store: Self = match bitcode::decode(&data) {
57 Ok(s) => s,
58 Err(_) => {
59 tracing::info!(
60 "Cache format upgraded, rebuilding (one-time cost after version bump)"
61 );
62 return None;
63 }
64 };
65 if store.version != CACHE_VERSION {
66 tracing::info!("Cache format upgraded, rebuilding (one-time cost after version bump)");
67 return None;
68 }
69 if store.config_hash != expected_config_hash {
70 return None;
71 }
72 Some(store)
73 }
74
75 pub fn save(
77 &mut self,
78 cache_dir: &Path,
79 config_hash: u64,
80 max_size_bytes: usize,
81 ) -> Result<(), String> {
82 std::fs::create_dir_all(cache_dir)
83 .map_err(|e| format!("Failed to create cache dir: {e}"))?;
84 write_cache_gitignore(cache_dir)?;
85
86 self.config_hash = config_hash;
87 let initial_entries = self.entries.len();
88 let mut encoded = bitcode::encode(self);
89
90 let trigger = (max_size_bytes / 10_000).saturating_mul(EVICTION_TRIGGER_BPS);
91 if encoded.len() > trigger {
92 let target = (max_size_bytes / 10_000).saturating_mul(EVICTION_TARGET_BPS);
93 self.evict_lru_to_target(target);
94 encoded = bitcode::encode(self);
95 let evicted = initial_entries.saturating_sub(self.entries.len());
96 let final_size = encoded.len();
97 let significant_evicted =
98 initial_entries.saturating_mul(EVICTION_SIGNIFICANT_BPS) / 10_000;
99 if evicted >= significant_evicted && initial_entries > 0 {
100 tracing::info!(
101 evicted_entries = evicted,
102 remaining_entries = self.entries.len(),
103 final_size_kb = final_size / 1024,
104 max_size_kb = max_size_bytes / 1024,
105 "Cache eviction: removed oldest entries to stay under cap"
106 );
107 } else {
108 tracing::debug!(
109 evicted_entries = evicted,
110 remaining_entries = self.entries.len(),
111 final_size_kb = final_size / 1024,
112 max_size_kb = max_size_bytes / 1024,
113 "Cache eviction"
114 );
115 }
116 }
117
118 let cache_file = cache_dir.join("cache.bin");
119 atomic_write(&cache_file, &encoded)?;
120 Ok(())
121 }
122
123 fn evict_lru_to_target(&mut self, target_bytes: usize) {
126 let mut order: Vec<(u64, String)> = self
127 .entries
128 .iter()
129 .map(|(k, v)| (v.last_access_secs, k.clone()))
130 .collect();
131 order.sort();
132
133 const BATCH: usize = 100;
134 let mut idx = 0;
135 while idx < order.len() {
136 let batch_end = (idx + BATCH).min(order.len());
137 for (_, key) in &order[idx..batch_end] {
138 if self.entries.len() <= 1 {
139 break;
140 }
141 self.entries.remove(key);
142 }
143 idx = batch_end;
144
145 let encoded_size = bitcode::encode(self).len();
146 if encoded_size <= target_bytes || self.entries.len() <= 1 {
147 if encoded_size > target_bytes && self.entries.len() <= 1 {
148 tracing::warn!(
149 encoded_kb = encoded_size / 1024,
150 target_kb = target_bytes / 1024,
151 "Single cache entry exceeds configured max; cache will overshoot the cap"
152 );
153 }
154 return;
155 }
156 }
157 }
158
159 #[must_use]
162 pub fn get(&self, path: &Path, content_hash: u64) -> Option<&CachedModule> {
163 let key = path.to_string_lossy();
164 let entry = self.entries.get(key.as_ref())?;
165 if entry.content_hash == content_hash {
166 Some(entry)
167 } else {
168 None
169 }
170 }
171
172 pub fn insert(&mut self, path: &Path, module: CachedModule) {
174 let key = path.to_string_lossy().into_owned();
175 self.entries.insert(key, module);
176 }
177
178 #[must_use]
180 pub fn get_by_metadata(
181 &self,
182 path: &Path,
183 mtime_secs: u64,
184 file_size: u64,
185 ) -> Option<&CachedModule> {
186 let key = path.to_string_lossy();
187 let entry = self.entries.get(key.as_ref())?;
188 if entry.mtime_secs == mtime_secs && entry.file_size == file_size && mtime_secs > 0 {
189 Some(entry)
190 } else {
191 None
192 }
193 }
194
195 #[must_use]
197 pub fn get_by_path_only(&self, path: &Path) -> Option<&CachedModule> {
198 let key = path.to_string_lossy();
199 self.entries.get(key.as_ref())
200 }
201
202 pub fn retain_paths(&mut self, files: &[fallow_types::discover::DiscoveredFile]) {
204 use rustc_hash::FxHashSet;
205 let current_paths: FxHashSet<String> = files
206 .iter()
207 .map(|f| f.path.to_string_lossy().to_string())
208 .collect();
209 self.entries.retain(|key, _| current_paths.contains(key));
210 }
211
212 #[must_use]
214 pub fn len(&self) -> usize {
215 self.entries.len()
216 }
217
218 #[must_use]
220 pub fn is_empty(&self) -> bool {
221 self.entries.is_empty()
222 }
223}
224
225fn write_cache_gitignore(cache_dir: &Path) -> Result<(), String> {
226 std::fs::write(cache_dir.join(".gitignore"), "*\n")
227 .map_err(|e| format!("Failed to write cache .gitignore: {e}"))
228}
229
230fn atomic_write(cache_file: &Path, data: &[u8]) -> Result<(), String> {
232 let tmp_file = match cache_file.file_name() {
233 Some(name) => cache_file.with_file_name({
234 let mut s = name.to_os_string();
235 s.push(".tmp");
236 s
237 }),
238 None => return Err("Cache file path has no filename component".to_owned()),
239 };
240
241 {
242 use std::io::Write as _;
243 let mut f = std::fs::File::create(&tmp_file)
244 .map_err(|e| format!("Failed to create cache tmp: {e}"))?;
245 f.write_all(data)
246 .map_err(|e| format!("Failed to write cache tmp: {e}"))?;
247 let _ = f.sync_all();
248 }
249
250 std::fs::rename(&tmp_file, cache_file)
251 .map_err(|e| format!("Failed to rename cache tmp into place: {e}"))?;
252 Ok(())
253}
254
255impl Default for CacheStore {
256 fn default() -> Self {
257 Self::new()
258 }
259}