Skip to main content

lean_ctx/core/
index_namespace.rs

1use std::path::{Path, PathBuf};
2
3use md5::{Digest, Md5};
4
5pub(crate) fn vectors_dir(project_root: &Path) -> PathBuf {
6    let hash = namespace_hash(project_root);
7    let legacy = legacy_vectors_hash(project_root);
8
9    if let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() {
10        let old_dir = data_dir.join("vectors").join(&legacy);
11        let new_dir = data_dir.join("vectors").join(&hash);
12        migrate_dir_if_needed(&old_dir, &new_dir);
13        return new_dir;
14    }
15
16    PathBuf::from(".").join("vectors").join(hash)
17}
18
19pub(crate) fn namespace_hash(project_root: &Path) -> String {
20    let seed = namespace_seed(project_root);
21    let mut hasher = Md5::new();
22    hasher.update(seed.as_bytes());
23    format!("{:x}", hasher.finalize())
24}
25
26fn namespace_seed(project_root: &Path) -> String {
27    let root_s = project_root.to_string_lossy().to_string();
28    let base = crate::core::project_hash::project_identity(&root_s)
29        .unwrap_or_else(|| crate::core::graph_index::normalize_project_root(&root_s));
30
31    if !branch_aware_enabled() {
32        return base;
33    }
34
35    let branch = git_branch(project_root).unwrap_or_else(|| "HEAD".to_string());
36    format!("{base}|branch:{branch}")
37}
38
39fn branch_aware_enabled() -> bool {
40    let Ok(v) = std::env::var("LEANCTX_INDEX_BRANCH_AWARE") else {
41        return false;
42    };
43    matches!(
44        v.trim().to_lowercase().as_str(),
45        "1" | "true" | "yes" | "on"
46    )
47}
48
49fn git_branch(project_root: &Path) -> Option<String> {
50    let out = std::process::Command::new("git")
51        .args(["rev-parse", "--abbrev-ref", "HEAD"])
52        .current_dir(project_root)
53        .stdout(std::process::Stdio::piped())
54        .stderr(std::process::Stdio::null())
55        .output()
56        .ok()?;
57    if !out.status.success() {
58        return None;
59    }
60    let s = String::from_utf8(out.stdout).ok()?;
61    let s = s.trim().to_string();
62    if s.is_empty() {
63        None
64    } else {
65        Some(s)
66    }
67}
68
69fn legacy_vectors_hash(project_root: &Path) -> String {
70    let mut hasher = Md5::new();
71    hasher.update(project_root.to_string_lossy().as_bytes());
72    format!("{:x}", hasher.finalize())
73}
74
75fn migrate_dir_if_needed(old_dir: &Path, new_dir: &Path) {
76    if old_dir == new_dir {
77        return;
78    }
79    if !old_dir.exists() || new_dir.exists() {
80        return;
81    }
82    if !verify_index_ownership(old_dir) {
83        tracing::warn!(
84            "lean-ctx: skipping index migration — ownership check failed for {old_dir:?}"
85        );
86        return;
87    }
88    if let Err(e) = copy_dir_contents(old_dir, new_dir) {
89        tracing::error!("lean-ctx: index migration failed: {e}");
90    }
91}
92
93fn verify_index_ownership(dir: &Path) -> bool {
94    let marker = if dir.join("bm25_index.bin.zst").exists() {
95        dir.join("bm25_index.bin.zst")
96    } else if dir.join("bm25_index.bin").exists() {
97        dir.join("bm25_index.bin")
98    } else {
99        dir.join("bm25_index.json")
100    };
101    if !marker.exists() {
102        return true;
103    }
104    let Ok(meta) = std::fs::metadata(&marker) else {
105        return true;
106    };
107    if meta.len() == 0 || meta.len() > 500_000_000 {
108        return false;
109    }
110    true
111}
112
113fn copy_dir_contents(src: &Path, dst: &Path) -> Result<(), String> {
114    std::fs::create_dir_all(dst).map_err(|e| e.to_string())?;
115    for entry in std::fs::read_dir(src).map_err(|e| e.to_string())?.flatten() {
116        let src_path = entry.path();
117        let dst_path = dst.join(entry.file_name());
118        if src_path.is_dir() {
119            copy_dir_contents(&src_path, &dst_path)?;
120        } else {
121            std::fs::copy(&src_path, &dst_path).map_err(|e| e.to_string())?;
122        }
123    }
124    Ok(())
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    #[test]
132    fn namespace_hash_is_stable_across_clones_with_same_git_remote() {
133        let _env = crate::core::data_dir::test_env_lock();
134        let a = tempfile::tempdir().unwrap();
135        let b = tempfile::tempdir().unwrap();
136
137        std::fs::create_dir_all(a.path().join(".git")).unwrap();
138        std::fs::write(
139            a.path().join(".git").join("config"),
140            "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
141        )
142        .unwrap();
143
144        std::fs::create_dir_all(b.path().join(".git")).unwrap();
145        std::fs::write(
146            b.path().join(".git").join("config"),
147            "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
148        )
149        .unwrap();
150
151        let ha = namespace_hash(a.path());
152        let hb = namespace_hash(b.path());
153        assert_eq!(ha, hb);
154    }
155
156    #[test]
157    fn vectors_dir_migrates_legacy_path_hash_directory() {
158        let _env = crate::core::data_dir::test_env_lock();
159        let data_dir = tempfile::tempdir().unwrap();
160        std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.path());
161
162        let project = tempfile::tempdir().unwrap();
163        std::fs::create_dir_all(project.path().join(".git")).unwrap();
164        std::fs::write(
165            project.path().join(".git").join("config"),
166            "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
167        )
168        .unwrap();
169
170        let legacy = legacy_vectors_hash(project.path());
171        let old_dir = data_dir.path().join("vectors").join(&legacy);
172        std::fs::create_dir_all(&old_dir).unwrap();
173        std::fs::write(old_dir.join("bm25_index.json"), "{\"doc_count\":0}").unwrap();
174
175        let new_dir = vectors_dir(project.path());
176        assert!(new_dir.exists());
177        assert!(new_dir.join("bm25_index.json").exists());
178    }
179}