lean_ctx/core/
index_namespace.rs1use std::path::{Path, PathBuf};
2
3use md5::{Digest, Md5};
4
5pub(crate) fn vectors_dir(project_root: &Path) -> PathBuf {
6 let hash = namespace_hash(project_root);
7 let legacy = legacy_vectors_hash(project_root);
8
9 if let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() {
10 let old_dir = data_dir.join("vectors").join(&legacy);
11 let new_dir = data_dir.join("vectors").join(&hash);
12 migrate_dir_if_needed(&old_dir, &new_dir);
13 return new_dir;
14 }
15
16 PathBuf::from(".").join("vectors").join(hash)
17}
18
19pub(crate) fn namespace_hash(project_root: &Path) -> String {
20 let seed = namespace_seed(project_root);
21 let mut hasher = Md5::new();
22 hasher.update(seed.as_bytes());
23 format!("{:x}", hasher.finalize())
24}
25
26fn namespace_seed(project_root: &Path) -> String {
27 let root_s = project_root.to_string_lossy().to_string();
28 let base = crate::core::project_hash::project_identity(&root_s)
29 .unwrap_or_else(|| crate::core::graph_index::normalize_project_root(&root_s));
30
31 if !branch_aware_enabled() {
32 return base;
33 }
34
35 let branch = git_branch(project_root).unwrap_or_else(|| "HEAD".to_string());
36 format!("{base}|branch:{branch}")
37}
38
39fn branch_aware_enabled() -> bool {
40 let Ok(v) = std::env::var("LEANCTX_INDEX_BRANCH_AWARE") else {
41 return false;
42 };
43 matches!(
44 v.trim().to_lowercase().as_str(),
45 "1" | "true" | "yes" | "on"
46 )
47}
48
49fn git_branch(project_root: &Path) -> Option<String> {
50 let out = std::process::Command::new("git")
51 .args(["rev-parse", "--abbrev-ref", "HEAD"])
52 .current_dir(project_root)
53 .stdout(std::process::Stdio::piped())
54 .stderr(std::process::Stdio::null())
55 .output()
56 .ok()?;
57 if !out.status.success() {
58 return None;
59 }
60 let s = String::from_utf8(out.stdout).ok()?;
61 let s = s.trim().to_string();
62 if s.is_empty() {
63 None
64 } else {
65 Some(s)
66 }
67}
68
69fn legacy_vectors_hash(project_root: &Path) -> String {
70 let mut hasher = Md5::new();
71 hasher.update(project_root.to_string_lossy().as_bytes());
72 format!("{:x}", hasher.finalize())
73}
74
75fn migrate_dir_if_needed(old_dir: &Path, new_dir: &Path) {
76 if old_dir == new_dir {
77 return;
78 }
79 if !old_dir.exists() || new_dir.exists() {
80 return;
81 }
82 if !verify_index_ownership(old_dir) {
83 tracing::warn!(
84 "lean-ctx: skipping index migration — ownership check failed for {old_dir:?}"
85 );
86 return;
87 }
88 if let Err(e) = copy_dir_contents(old_dir, new_dir) {
89 tracing::error!("lean-ctx: index migration failed: {e}");
90 }
91}
92
93fn verify_index_ownership(dir: &Path) -> bool {
94 let marker = dir.join("bm25_index.json");
95 if !marker.exists() {
96 return true;
97 }
98 let Ok(meta) = std::fs::metadata(&marker) else {
99 return true;
100 };
101 if meta.len() == 0 || meta.len() > 500_000_000 {
102 return false;
103 }
104 true
105}
106
107fn copy_dir_contents(src: &Path, dst: &Path) -> Result<(), String> {
108 std::fs::create_dir_all(dst).map_err(|e| e.to_string())?;
109 for entry in std::fs::read_dir(src).map_err(|e| e.to_string())?.flatten() {
110 let src_path = entry.path();
111 let dst_path = dst.join(entry.file_name());
112 if src_path.is_dir() {
113 copy_dir_contents(&src_path, &dst_path)?;
114 } else {
115 std::fs::copy(&src_path, &dst_path).map_err(|e| e.to_string())?;
116 }
117 }
118 Ok(())
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124
125 #[test]
126 fn namespace_hash_is_stable_across_clones_with_same_git_remote() {
127 let _env = crate::core::data_dir::test_env_lock();
128 let a = tempfile::tempdir().unwrap();
129 let b = tempfile::tempdir().unwrap();
130
131 std::fs::create_dir_all(a.path().join(".git")).unwrap();
132 std::fs::write(
133 a.path().join(".git").join("config"),
134 "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
135 )
136 .unwrap();
137
138 std::fs::create_dir_all(b.path().join(".git")).unwrap();
139 std::fs::write(
140 b.path().join(".git").join("config"),
141 "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
142 )
143 .unwrap();
144
145 let ha = namespace_hash(a.path());
146 let hb = namespace_hash(b.path());
147 assert_eq!(ha, hb);
148 }
149
150 #[test]
151 fn vectors_dir_migrates_legacy_path_hash_directory() {
152 let _env = crate::core::data_dir::test_env_lock();
153 let data_dir = tempfile::tempdir().unwrap();
154 std::env::set_var("LEAN_CTX_DATA_DIR", data_dir.path());
155
156 let project = tempfile::tempdir().unwrap();
157 std::fs::create_dir_all(project.path().join(".git")).unwrap();
158 std::fs::write(
159 project.path().join(".git").join("config"),
160 "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
161 )
162 .unwrap();
163
164 let legacy = legacy_vectors_hash(project.path());
165 let old_dir = data_dir.path().join("vectors").join(&legacy);
166 std::fs::create_dir_all(&old_dir).unwrap();
167 std::fs::write(old_dir.join("bm25_index.json"), "{\"doc_count\":0}").unwrap();
168
169 let new_dir = vectors_dir(project.path());
170 assert!(new_dir.exists());
171 assert!(new_dir.join("bm25_index.json").exists());
172 }
173}