lean_ctx/core/
bm25_cache.rs1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3use std::time::Instant;
4
5use super::bm25_index::BM25Index;
6
7const DEFAULT_TTL_SECS: u64 = 60;
8
9pub struct Bm25CacheEntry {
10 pub root: PathBuf,
11 pub index: Arc<BM25Index>,
12 pub loaded_at: Instant,
13}
14
15impl Bm25CacheEntry {
16 pub fn is_fresh(&self) -> bool {
17 self.loaded_at.elapsed().as_secs() < ttl_secs()
18 }
19}
20
21fn ttl_secs() -> u64 {
22 std::env::var("LEAN_CTX_BM25_CACHE_TTL")
23 .ok()
24 .and_then(|v| v.parse().ok())
25 .unwrap_or(DEFAULT_TTL_SECS)
26}
27
28pub type SharedBm25Cache = std::sync::Arc<std::sync::Mutex<Option<Bm25CacheEntry>>>;
29
30pub fn get_or_load(cache: &SharedBm25Cache, root: &Path) -> Arc<BM25Index> {
33 {
34 let guard = cache
35 .lock()
36 .unwrap_or_else(std::sync::PoisonError::into_inner);
37 if let Some(ref entry) = *guard {
38 if entry.root == root && entry.is_fresh() {
39 return Arc::clone(&entry.index);
40 }
41 }
42 }
43
44 let index = Arc::new(BM25Index::load_or_build_fast(root));
45
46 let mut guard = cache
47 .lock()
48 .unwrap_or_else(std::sync::PoisonError::into_inner);
49 *guard = Some(Bm25CacheEntry {
50 root: root.to_path_buf(),
51 index: Arc::clone(&index),
52 loaded_at: Instant::now(),
53 });
54
55 index
56}
57
58pub fn get_or_background(cache: &SharedBm25Cache, root: &Path) -> Option<Arc<BM25Index>> {
61 let guard = cache
62 .lock()
63 .unwrap_or_else(std::sync::PoisonError::into_inner);
64 let entry = guard.as_ref()?;
65 if entry.root != root {
66 return None;
67 }
68
69 let idx = Arc::clone(&entry.index);
70
71 if !entry.is_fresh() {
72 let root_str = root.to_string_lossy().to_string();
73 let cache_clone = cache.clone();
74 let root_clone = root.to_path_buf();
75 std::thread::spawn(move || {
76 let rebuilt = BM25Index::load_or_build(&root_clone);
77 let mut g = cache_clone
78 .lock()
79 .unwrap_or_else(std::sync::PoisonError::into_inner);
80 *g = Some(Bm25CacheEntry {
81 root: root_clone,
82 index: Arc::new(rebuilt),
83 loaded_at: Instant::now(),
84 });
85 tracing::debug!("[bm25_cache: background refresh done for {root_str}]");
86 });
87 }
88
89 Some(idx)
90}
91
92pub fn unload(cache: &SharedBm25Cache) {
95 let mut guard = cache
96 .lock()
97 .unwrap_or_else(std::sync::PoisonError::into_inner);
98 if guard.is_some() {
99 *guard = None;
100 tracing::info!("[bm25_cache] unloaded index to free memory");
101 }
102}
103
104pub fn memory_usage(cache: &SharedBm25Cache) -> usize {
106 let guard = cache
107 .lock()
108 .unwrap_or_else(std::sync::PoisonError::into_inner);
109 guard.as_ref().map_or(0, |e| e.index.memory_usage_bytes())
110}
111
112#[cfg(test)]
113mod tests {
114 use super::*;
115 use std::sync::Arc;
116
117 #[test]
118 fn fresh_cache_returns_same_instance() {
119 let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
120 let tmp = tempfile::tempdir().unwrap();
121 let root = tmp.path();
122 std::fs::write(root.join("main.rs"), "fn main() {}\n").unwrap();
123
124 let idx1 = get_or_load(&cache, root);
125 assert!(idx1.doc_count > 0);
126
127 let idx2 = get_or_load(&cache, root);
128 assert_eq!(idx1.doc_count, idx2.doc_count);
129 }
130
131 #[test]
132 fn different_root_invalidates() {
133 let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
134 let tmp1 = tempfile::tempdir().unwrap();
135 let tmp2 = tempfile::tempdir().unwrap();
136 std::fs::write(tmp1.path().join("a.rs"), "fn a() {}\n").unwrap();
137 std::fs::write(tmp2.path().join("b.rs"), "fn b() {}\n").unwrap();
138
139 let _ = get_or_load(&cache, tmp1.path());
140 let idx2 = get_or_load(&cache, tmp2.path());
141
142 let guard = cache.lock().unwrap();
143 let entry = guard.as_ref().unwrap();
144 assert_eq!(entry.root, tmp2.path());
145 assert_eq!(entry.index.doc_count, idx2.doc_count);
146 }
147
148 #[test]
149 fn get_or_background_returns_none_on_empty() {
150 let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
151 let tmp = tempfile::tempdir().unwrap();
152 assert!(get_or_background(&cache, tmp.path()).is_none());
153 }
154}