Skip to main content

lean_ctx/core/
bm25_cache.rs

1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3use std::time::Instant;
4
5use super::bm25_index::BM25Index;
6
7const DEFAULT_TTL_SECS: u64 = 60;
8
9pub struct Bm25CacheEntry {
10    pub root: PathBuf,
11    pub index: Arc<BM25Index>,
12    pub loaded_at: Instant,
13}
14
15impl Bm25CacheEntry {
16    pub fn is_fresh(&self) -> bool {
17        self.loaded_at.elapsed().as_secs() < ttl_secs()
18    }
19}
20
21fn ttl_secs() -> u64 {
22    std::env::var("LEAN_CTX_BM25_CACHE_TTL")
23        .ok()
24        .and_then(|v| v.parse().ok())
25        .unwrap_or(DEFAULT_TTL_SECS)
26}
27
28pub type SharedBm25Cache = std::sync::Arc<std::sync::Mutex<Option<Bm25CacheEntry>>>;
29
30/// Get the BM25 index from cache if available and fresh, otherwise load/build,
31/// cache it, and return. Uses Arc to avoid cloning the entire index.
32pub fn get_or_load(cache: &SharedBm25Cache, root: &Path) -> Arc<BM25Index> {
33    {
34        let guard = cache
35            .lock()
36            .unwrap_or_else(std::sync::PoisonError::into_inner);
37        if let Some(ref entry) = *guard {
38            if entry.root == root && entry.is_fresh() {
39                return Arc::clone(&entry.index);
40            }
41        }
42    }
43
44    let index = Arc::new(BM25Index::load_or_build_fast(root));
45
46    let mut guard = cache
47        .lock()
48        .unwrap_or_else(std::sync::PoisonError::into_inner);
49    *guard = Some(Bm25CacheEntry {
50        root: root.to_path_buf(),
51        index: Arc::clone(&index),
52        loaded_at: Instant::now(),
53    });
54
55    index
56}
57
58/// Get index from cache (fresh or stale), triggering background rebuild if stale.
59/// Returns None only if no cache entry exists at all.
60pub fn get_or_background(cache: &SharedBm25Cache, root: &Path) -> Option<Arc<BM25Index>> {
61    let guard = cache
62        .lock()
63        .unwrap_or_else(std::sync::PoisonError::into_inner);
64    let entry = guard.as_ref()?;
65    if entry.root != root {
66        return None;
67    }
68
69    let idx = Arc::clone(&entry.index);
70
71    if !entry.is_fresh() {
72        let root_str = root.to_string_lossy().to_string();
73        let cache_clone = cache.clone();
74        let root_clone = root.to_path_buf();
75        std::thread::spawn(move || {
76            let rebuilt = BM25Index::load_or_build(&root_clone);
77            let mut g = cache_clone
78                .lock()
79                .unwrap_or_else(std::sync::PoisonError::into_inner);
80            *g = Some(Bm25CacheEntry {
81                root: root_clone,
82                index: Arc::new(rebuilt),
83                loaded_at: Instant::now(),
84            });
85            tracing::debug!("[bm25_cache: background refresh done for {root_str}]");
86        });
87    }
88
89    Some(idx)
90}
91
92/// Drops the cached BM25 index, freeing its heap memory.
93/// The index will be rebuilt from disk on the next search.
94pub fn unload(cache: &SharedBm25Cache) {
95    let mut guard = cache
96        .lock()
97        .unwrap_or_else(std::sync::PoisonError::into_inner);
98    if guard.is_some() {
99        *guard = None;
100        tracing::info!("[bm25_cache] unloaded index to free memory");
101    }
102}
103
104/// Returns the approximate heap memory used by the cached BM25 index, or 0.
105pub fn memory_usage(cache: &SharedBm25Cache) -> usize {
106    let guard = cache
107        .lock()
108        .unwrap_or_else(std::sync::PoisonError::into_inner);
109    guard.as_ref().map_or(0, |e| e.index.memory_usage_bytes())
110}
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115    use std::sync::Arc;
116
117    #[test]
118    fn fresh_cache_returns_same_instance() {
119        let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
120        let tmp = tempfile::tempdir().unwrap();
121        let root = tmp.path();
122        std::fs::write(root.join("main.rs"), "fn main() {}\n").unwrap();
123
124        let idx1 = get_or_load(&cache, root);
125        assert!(idx1.doc_count > 0);
126
127        let idx2 = get_or_load(&cache, root);
128        assert_eq!(idx1.doc_count, idx2.doc_count);
129    }
130
131    #[test]
132    fn different_root_invalidates() {
133        let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
134        let tmp1 = tempfile::tempdir().unwrap();
135        let tmp2 = tempfile::tempdir().unwrap();
136        std::fs::write(tmp1.path().join("a.rs"), "fn a() {}\n").unwrap();
137        std::fs::write(tmp2.path().join("b.rs"), "fn b() {}\n").unwrap();
138
139        let _ = get_or_load(&cache, tmp1.path());
140        let idx2 = get_or_load(&cache, tmp2.path());
141
142        let guard = cache.lock().unwrap();
143        let entry = guard.as_ref().unwrap();
144        assert_eq!(entry.root, tmp2.path());
145        assert_eq!(entry.index.doc_count, idx2.doc_count);
146    }
147
148    #[test]
149    fn get_or_background_returns_none_on_empty() {
150        let cache: SharedBm25Cache = Arc::new(std::sync::Mutex::new(None));
151        let tmp = tempfile::tempdir().unwrap();
152        assert!(get_or_background(&cache, tmp.path()).is_none());
153    }
154}