lean_ctx/core/
bm25_cache.rs1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3use std::time::{Instant, SystemTime};
4
5use super::bm25_index::BM25Index;
6
7const DEFAULT_TTL_SECS: u64 = 60;
8
9#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
16pub struct IndexFingerprint {
17 mtime: Option<SystemTime>,
18 size: u64,
19}
20
21pub struct Bm25CacheEntry {
22 pub root: PathBuf,
23 pub index: Arc<BM25Index>,
24 pub loaded_at: Instant,
25 pub fingerprint: IndexFingerprint,
27}
28
29impl Bm25CacheEntry {
30 pub fn is_fresh(&self) -> bool {
31 if self.loaded_at.elapsed().as_secs() >= ttl_secs() {
32 return false;
33 }
34 index_fingerprint(&self.root) == self.fingerprint
37 }
38}
39
40pub(crate) fn index_fingerprint(root: &Path) -> IndexFingerprint {
42 match std::fs::metadata(BM25Index::index_file_path(root)) {
43 Ok(m) => IndexFingerprint {
44 mtime: m.modified().ok(),
45 size: m.len(),
46 },
47 Err(_) => IndexFingerprint::default(),
48 }
49}
50
51fn ttl_secs() -> u64 {
52 std::env::var("LEAN_CTX_BM25_CACHE_TTL")
53 .ok()
54 .and_then(|v| v.parse().ok())
55 .unwrap_or(DEFAULT_TTL_SECS)
56}
57
58pub type SharedBm25Cache = std::sync::Arc<std::sync::Mutex<Option<Bm25CacheEntry>>>;
59
60pub fn get_or_load(cache: &SharedBm25Cache, root: &Path) -> Arc<BM25Index> {
63 {
64 let guard = cache
65 .lock()
66 .unwrap_or_else(std::sync::PoisonError::into_inner);
67 if let Some(ref entry) = *guard {
68 if entry.root == root && entry.is_fresh() {
69 return Arc::clone(&entry.index);
70 }
71 }
72 }
73
74 let index = Arc::new(BM25Index::load_or_build_fast(root));
75
76 let mut guard = cache
77 .lock()
78 .unwrap_or_else(std::sync::PoisonError::into_inner);
79 *guard = Some(Bm25CacheEntry {
80 root: root.to_path_buf(),
81 index: Arc::clone(&index),
82 loaded_at: Instant::now(),
83 fingerprint: index_fingerprint(root),
84 });
85
86 index
87}
88
89pub fn get_or_background(cache: &SharedBm25Cache, root: &Path) -> Option<Arc<BM25Index>> {
92 let guard = cache
93 .lock()
94 .unwrap_or_else(std::sync::PoisonError::into_inner);
95 let entry = guard.as_ref()?;
96 if entry.root != root {
97 return None;
98 }
99
100 let idx = Arc::clone(&entry.index);
101
102 if !entry.is_fresh() {
103 let root_str = root.to_string_lossy().to_string();
104 let cache_clone = cache.clone();
105 let root_clone = root.to_path_buf();
106 std::thread::spawn(move || {
107 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
111 let rebuilt = BM25Index::load_or_build(&root_clone);
112 let rebuilt_fp = index_fingerprint(&root_clone);
113 let mut g = cache_clone
114 .lock()
115 .unwrap_or_else(std::sync::PoisonError::into_inner);
116 *g = Some(Bm25CacheEntry {
117 root: root_clone,
118 index: Arc::new(rebuilt),
119 loaded_at: Instant::now(),
120 fingerprint: rebuilt_fp,
121 });
122 }));
123 if result.is_ok() {
124 tracing::debug!("[bm25_cache: background refresh done for {root_str}]");
125 } else {
126 tracing::warn!(
127 "[bm25_cache: background refresh panicked for {root_str}; serving stale index]"
128 );
129 }
130 });
131 }
132
133 Some(idx)
134}
135
136pub fn unload(cache: &SharedBm25Cache) {
139 let mut guard = cache
140 .lock()
141 .unwrap_or_else(std::sync::PoisonError::into_inner);
142 if guard.is_some() {
143 *guard = None;
144 tracing::info!("[bm25_cache] unloaded index to free memory");
145 }
146}
147
148pub fn memory_usage(cache: &SharedBm25Cache) -> usize {
150 let guard = cache
151 .lock()
152 .unwrap_or_else(std::sync::PoisonError::into_inner);
153 guard.as_ref().map_or(0, |e| e.index.memory_usage_bytes())
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use std::sync::Arc;
160
161 #[test]
162 fn fresh_cache_returns_same_instance() {
163 let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
164 let tmp = tempfile::tempdir().unwrap();
165 let root = tmp.path();
166 std::fs::write(root.join("main.rs"), "fn main() {}\n").unwrap();
167
168 let idx1 = get_or_load(&cache, root);
169 assert!(idx1.doc_count > 0);
170
171 let idx2 = get_or_load(&cache, root);
172 assert_eq!(idx1.doc_count, idx2.doc_count);
173 }
174
175 #[test]
176 fn different_root_invalidates() {
177 let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
178 let tmp1 = tempfile::tempdir().unwrap();
179 let tmp2 = tempfile::tempdir().unwrap();
180 std::fs::write(tmp1.path().join("a.rs"), "fn a() {}\n").unwrap();
181 std::fs::write(tmp2.path().join("b.rs"), "fn b() {}\n").unwrap();
182
183 let _ = get_or_load(&cache, tmp1.path());
184 let idx2 = get_or_load(&cache, tmp2.path());
185
186 let guard = cache.lock().unwrap();
187 let entry = guard.as_ref().unwrap();
188 assert_eq!(entry.root, tmp2.path());
189 assert_eq!(entry.index.doc_count, idx2.doc_count);
190 }
191
192 #[test]
193 fn get_or_background_returns_none_on_empty() {
194 let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
195 let tmp = tempfile::tempdir().unwrap();
196 assert!(get_or_background(&cache, tmp.path()).is_none());
197 }
198
199 #[test]
200 fn fingerprint_default_when_index_file_absent() {
201 let tmp = tempfile::tempdir().unwrap();
202 assert_eq!(index_fingerprint(tmp.path()), IndexFingerprint::default());
204 }
205
206 #[test]
207 fn fingerprint_detects_size_change_under_equal_mtime() {
208 let mtime = Some(SystemTime::UNIX_EPOCH);
211 let a = IndexFingerprint { mtime, size: 100 };
212 let b = IndexFingerprint { mtime, size: 200 };
213 assert_ne!(a, b);
214 assert_eq!(a, IndexFingerprint { mtime, size: 100 });
215 }
216}