Skip to main content

lean_ctx/core/
storage_maintenance.rs

1//! Daemon-safe storage maintenance.
2//!
3//! Unlike the interactive `lean-ctx cache prune` (which prints per-file output),
4//! these routines are silent (tracing only) so they can run inside the MCP
5//! daemon without corrupting the stdio protocol. They enforce the disk budget
6//! that the field had been silently exceeding (see EPIC 6 / #2364): unbounded
7//! archive FTS growth and accumulated quarantined BM25 indexes.
8
9use std::path::PathBuf;
10
11/// Result of a quiet maintenance pass.
12#[derive(Debug, Default, Clone, Copy)]
13pub struct MaintenanceResult {
14    pub quarantined_removed: u32,
15    pub bytes_freed: u64,
16    pub archive_db_bytes_after: u64,
17}
18
19const QUARANTINED_FILES: &[&str] = &[
20    "bm25_index.json.quarantined",
21    "bm25_index.bin.quarantined",
22    "bm25_index.bin.zst.quarantined",
23];
24
25/// Remove accumulated quarantined BM25 index files. These are dead weight: an
26/// index is only quarantined when it failed a load/size check and was replaced.
27fn prune_quarantined_bm25() -> (u32, u64) {
28    let mut removed = 0u32;
29    let mut freed = 0u64;
30    let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
31        return (removed, freed);
32    };
33    let vectors_dir = data_dir.join("vectors");
34    let Ok(entries) = std::fs::read_dir(&vectors_dir) else {
35        return (removed, freed);
36    };
37    for entry in entries.flatten() {
38        let dir = entry.path();
39        if !dir.is_dir() {
40            continue;
41        }
42        for q_name in QUARANTINED_FILES {
43            let q: PathBuf = dir.join(q_name);
44            if q.exists() {
45                if let Ok(meta) = std::fs::metadata(&q) {
46                    freed = freed.saturating_add(meta.len());
47                }
48                if std::fs::remove_file(&q).is_ok() {
49                    removed += 1;
50                }
51            }
52        }
53    }
54    (removed, freed)
55}
56
57/// Run a silent maintenance pass: prune quarantined BM25 indexes and enforce
58/// the archive FTS size cap. Safe to call from the MCP daemon.
59pub fn run_quiet() -> MaintenanceResult {
60    let (quarantined_removed, bytes_freed) = prune_quarantined_bm25();
61    let archive_db_bytes_after = crate::core::archive_fts::enforce_cap();
62    if quarantined_removed > 0 {
63        tracing::info!(
64            "storage maintenance: pruned {quarantined_removed} quarantined BM25 index file(s), \
65             freed {bytes_freed} bytes; archive DB now {archive_db_bytes_after} bytes"
66        );
67    }
68    MaintenanceResult {
69        quarantined_removed,
70        bytes_freed,
71        archive_db_bytes_after,
72    }
73}
74
75#[cfg(test)]
76mod tests {
77    use super::*;
78
79    #[test]
80    fn prune_removes_quarantined_files() {
81        let _lock = crate::core::data_dir::test_env_lock();
82        let tmp = tempfile::tempdir().unwrap();
83        std::env::set_var("LEAN_CTX_DATA_DIR", tmp.path());
84
85        let idx_dir = tmp.path().join("vectors").join("proj_abc");
86        std::fs::create_dir_all(&idx_dir).unwrap();
87        std::fs::write(idx_dir.join("bm25_index.json.quarantined"), b"dead").unwrap();
88        std::fs::write(idx_dir.join("bm25_index.bin"), b"live").unwrap();
89
90        let (removed, freed) = prune_quarantined_bm25();
91        assert_eq!(removed, 1);
92        assert!(freed >= 4);
93        assert!(!idx_dir.join("bm25_index.json.quarantined").exists());
94        assert!(
95            idx_dir.join("bm25_index.bin").exists(),
96            "live index must be preserved"
97        );
98
99        std::env::remove_var("LEAN_CTX_DATA_DIR");
100    }
101}