Skip to main content

lean_ctx/core/
project_hash.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5/// Computes a composite hash from the project root path and any detected
6/// project identity markers (git remote, manifest file, etc.).
7///
8/// This prevents hash collisions when different projects share the same
9/// mount path (e.g. Docker volumes at `/workspace`).
10pub(crate) fn hash_project_root(root: &str) -> String {
11    let mut hasher = DefaultHasher::new();
12    root.hash(&mut hasher);
13
14    if let Some(identity) = project_identity(root) {
15        identity.hash(&mut hasher);
16    }
17
18    format!("{:016x}", hasher.finish())
19}
20
21/// Legacy path-only hash used before v3.3.2.
22/// Kept for auto-migration from old knowledge directories.
23pub(crate) fn hash_path_only(root: &str) -> String {
24    let mut hasher = DefaultHasher::new();
25    root.hash(&mut hasher);
26    format!("{:016x}", hasher.finish())
27}
28
29/// Extracts a stable project identity string from well-known config files.
30///
31/// Checks (in priority order):
32///   1. `.git/config`   → remote "origin" URL
33///   2. `Cargo.toml`    → `[package] name`
34///   3. `package.json`  → `"name"` field
35///   4. `pyproject.toml`→ `[project] name`
36///   5. `go.mod`        → `module` path
37///   6. `composer.json` → `"name"` field
38///   7. `build.gradle`  / `build.gradle.kts` → existence as a marker
39///   8. `*.sln`         → first `.sln` filename
40///
41/// Returns `None` when no identity marker is found, in which case
42/// the hash falls back to path-only (same behaviour as pre-3.3.2).
43pub(crate) fn project_identity(root: &str) -> Option<String> {
44    let root = Path::new(root);
45
46    // Explicit identity file — highest priority. Ideal for Docker containers
47    // where the mount path (/workspace) is reused across different projects.
48    // Users create `.lean-ctx-id` with a unique name to disambiguate.
49    if let Some(id) = explicit_identity_file(root) {
50        return Some(format!("explicit:{id}"));
51    }
52    if let Some(url) = git_remote_url(root) {
53        return Some(format!("git:{url}"));
54    }
55    if let Some(name) = cargo_package_name(root) {
56        return Some(format!("cargo:{name}"));
57    }
58    if let Some(name) = npm_package_name(root) {
59        return Some(format!("npm:{name}"));
60    }
61    if let Some(name) = pyproject_name(root) {
62        return Some(format!("python:{name}"));
63    }
64    if let Some(module) = go_module(root) {
65        return Some(format!("go:{module}"));
66    }
67    if let Some(name) = composer_name(root) {
68        return Some(format!("composer:{name}"));
69    }
70    if let Some(name) = gradle_project(root) {
71        return Some(format!("gradle:{name}"));
72    }
73    if let Some(name) = dotnet_solution(root) {
74        return Some(format!("dotnet:{name}"));
75    }
76
77    None
78}
79
80/// Copies all files from `old_hash` dir to `new_hash` dir when the composite
81/// hash differs from the legacy path-only hash.  Leaves the old directory
82/// intact so sibling projects sharing the same mount path can still migrate
83/// their own data independently.
84pub(crate) fn migrate_if_needed(old_hash: &str, new_hash: &str, project_root: &str) {
85    if old_hash == new_hash {
86        return;
87    }
88
89    let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
90        return;
91    };
92
93    let old_dir = data_dir.join("knowledge").join(old_hash);
94    let new_dir = data_dir.join("knowledge").join(new_hash);
95
96    if !old_dir.exists() || new_dir.exists() {
97        return;
98    }
99
100    if !verify_ownership(&old_dir, project_root) {
101        return;
102    }
103
104    if let Err(e) = copy_dir_contents(&old_dir, &new_dir) {
105        tracing::error!("lean-ctx: knowledge migration failed: {e}");
106    }
107}
108
109// ---------------------------------------------------------------------------
110// Identity detectors
111// ---------------------------------------------------------------------------
112
113fn explicit_identity_file(root: &Path) -> Option<String> {
114    let path = root.join(".lean-ctx-id");
115    let content = std::fs::read_to_string(path).ok()?;
116    let id = content.trim().to_string();
117    if id.is_empty() || id.len() > 256 {
118        return None;
119    }
120    Some(id)
121}
122
123fn git_remote_url(root: &Path) -> Option<String> {
124    let config = root.join(".git").join("config");
125    let content = std::fs::read_to_string(config).ok()?;
126
127    let mut in_origin = false;
128    for line in content.lines() {
129        let trimmed = line.trim();
130        if trimmed.starts_with('[') {
131            in_origin = trimmed == r#"[remote "origin"]"#;
132            continue;
133        }
134        if in_origin {
135            if let Some(url) = trimmed.strip_prefix("url") {
136                let url = url.trim_start_matches([' ', '=']);
137                let url = url.trim();
138                if !url.is_empty() {
139                    return Some(normalize_git_url(url));
140                }
141            }
142        }
143    }
144    None
145}
146
147fn normalize_git_url(url: &str) -> String {
148    let url = url.trim_end_matches(".git");
149    let url = url
150        .strip_prefix("git@")
151        .map_or_else(|| url.to_string(), |s| s.replacen(':', "/", 1));
152    url.to_lowercase()
153}
154
155fn cargo_package_name(root: &Path) -> Option<String> {
156    extract_toml_value(&root.join("Cargo.toml"), "name", Some("[package]"))
157}
158
159fn npm_package_name(root: &Path) -> Option<String> {
160    extract_json_string_field(&root.join("package.json"), "name")
161}
162
163fn pyproject_name(root: &Path) -> Option<String> {
164    extract_toml_value(&root.join("pyproject.toml"), "name", Some("[project]"))
165        .or_else(|| extract_toml_value(&root.join("pyproject.toml"), "name", Some("[tool.poetry]")))
166}
167
168fn go_module(root: &Path) -> Option<String> {
169    let content = std::fs::read_to_string(root.join("go.mod")).ok()?;
170    let first = content.lines().next()?;
171    first.strip_prefix("module").map(|s| s.trim().to_string())
172}
173
174fn composer_name(root: &Path) -> Option<String> {
175    extract_json_string_field(&root.join("composer.json"), "name")
176}
177
178fn gradle_project(root: &Path) -> Option<String> {
179    let settings = root.join("settings.gradle");
180    let settings_kts = root.join("settings.gradle.kts");
181
182    let path = if settings.exists() {
183        settings
184    } else if settings_kts.exists() {
185        settings_kts
186    } else {
187        return None;
188    };
189
190    let content = std::fs::read_to_string(path).ok()?;
191    for line in content.lines() {
192        let trimmed = line.trim();
193        if let Some(rest) = trimmed.strip_prefix("rootProject.name") {
194            let rest = rest.trim_start_matches([' ', '=']);
195            let name = rest.trim().trim_matches(['\'', '"']);
196            if !name.is_empty() {
197                return Some(name.to_string());
198            }
199        }
200    }
201    None
202}
203
204fn dotnet_solution(root: &Path) -> Option<String> {
205    let entries = std::fs::read_dir(root).ok()?;
206    for entry in entries.flatten() {
207        if let Some(ext) = entry.path().extension() {
208            if ext == "sln" {
209                return entry
210                    .path()
211                    .file_stem()
212                    .and_then(|s| s.to_str())
213                    .map(String::from);
214            }
215        }
216    }
217    None
218}
219
220// ---------------------------------------------------------------------------
221// TOML / JSON helpers (lightweight, no extra deps)
222// ---------------------------------------------------------------------------
223
224fn extract_toml_value(path: &Path, key: &str, section: Option<&str>) -> Option<String> {
225    let content = std::fs::read_to_string(path).ok()?;
226    let mut in_section = section.is_none();
227    let target_section = section.unwrap_or("");
228
229    for line in content.lines() {
230        let trimmed = line.trim();
231
232        if trimmed.starts_with('[') {
233            in_section = trimmed == target_section;
234            continue;
235        }
236
237        if in_section {
238            if let Some(rest) = trimmed.strip_prefix(key) {
239                let rest = rest.trim_start();
240                if let Some(rest) = rest.strip_prefix('=') {
241                    let val = rest.trim().trim_matches('"');
242                    if !val.is_empty() {
243                        return Some(val.to_string());
244                    }
245                }
246            }
247        }
248    }
249    None
250}
251
252fn extract_json_string_field(path: &Path, field: &str) -> Option<String> {
253    let content = std::fs::read_to_string(path).ok()?;
254    let needle = format!("\"{field}\"");
255    for line in content.lines() {
256        let trimmed = line.trim();
257        if let Some(rest) = trimmed.strip_prefix(&needle) {
258            let rest = rest.trim_start_matches([' ', ':']);
259            let val = rest.trim().trim_start_matches('"');
260            if let Some(end) = val.find('"') {
261                let name = &val[..end];
262                if !name.is_empty() {
263                    return Some(name.to_string());
264                }
265            }
266        }
267    }
268    None
269}
270
271// ---------------------------------------------------------------------------
272// Migration helpers
273// ---------------------------------------------------------------------------
274
275fn verify_ownership(old_dir: &Path, project_root: &str) -> bool {
276    let knowledge_path = old_dir.join("knowledge.json");
277    let Ok(content) = std::fs::read_to_string(&knowledge_path) else {
278        return true;
279    };
280
281    let stored_root: Option<String> = serde_json::from_str::<serde_json::Value>(&content)
282        .ok()
283        .and_then(|v| v.get("project_root")?.as_str().map(String::from));
284
285    match stored_root {
286        Some(stored) if !stored.is_empty() => stored == project_root,
287        _ => true,
288    }
289}
290
291fn copy_dir_contents(src: &Path, dst: &Path) -> Result<(), String> {
292    std::fs::create_dir_all(dst).map_err(|e| e.to_string())?;
293
294    for entry in std::fs::read_dir(src).map_err(|e| e.to_string())?.flatten() {
295        let src_path = entry.path();
296        let dst_path = dst.join(entry.file_name());
297
298        if src_path.is_dir() {
299            copy_dir_contents(&src_path, &dst_path)?;
300        } else {
301            std::fs::copy(&src_path, &dst_path).map_err(|e| e.to_string())?;
302        }
303    }
304    Ok(())
305}
306
307// ---------------------------------------------------------------------------
308// Tests
309// ---------------------------------------------------------------------------
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314    use std::fs;
315
316    #[test]
317    fn path_only_matches_legacy_behaviour() {
318        let h = hash_path_only("/workspace");
319        assert_eq!(h.len(), 16);
320        let h2 = hash_path_only("/workspace");
321        assert_eq!(h, h2);
322    }
323
324    #[test]
325    fn composite_differs_when_identity_present() {
326        let dir = tempfile::tempdir().unwrap();
327        let root = dir.path().to_str().unwrap();
328
329        let old = hash_path_only(root);
330        let no_identity = hash_project_root(root);
331        assert_eq!(old, no_identity, "without identity, hashes must match");
332
333        fs::create_dir_all(dir.path().join(".git")).unwrap();
334        fs::write(
335            dir.path().join(".git").join("config"),
336            "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
337        )
338        .unwrap();
339
340        let with_identity = hash_project_root(root);
341        assert_ne!(old, with_identity, "identity must change hash");
342    }
343
344    #[test]
345    fn docker_collision_avoided() {
346        let dir_a = tempfile::tempdir().unwrap();
347        let dir_b = tempfile::tempdir().unwrap();
348
349        let shared_path = "/workspace";
350
351        fs::create_dir_all(dir_a.path().join(".git")).unwrap();
352        fs::write(
353            dir_a.path().join(".git").join("config"),
354            "[remote \"origin\"]\n\turl = git@github.com:user/repo-a.git\n",
355        )
356        .unwrap();
357
358        fs::create_dir_all(dir_b.path().join(".git")).unwrap();
359        fs::write(
360            dir_b.path().join(".git").join("config"),
361            "[remote \"origin\"]\n\turl = git@github.com:user/repo-b.git\n",
362        )
363        .unwrap();
364
365        let hash_a = {
366            let mut hasher = DefaultHasher::new();
367            shared_path.hash(&mut hasher);
368            let id = project_identity(dir_a.path().to_str().unwrap()).unwrap();
369            id.hash(&mut hasher);
370            format!("{:016x}", hasher.finish())
371        };
372        let hash_b = {
373            let mut hasher = DefaultHasher::new();
374            shared_path.hash(&mut hasher);
375            let id = project_identity(dir_b.path().to_str().unwrap()).unwrap();
376            id.hash(&mut hasher);
377            format!("{:016x}", hasher.finish())
378        };
379
380        assert_ne!(
381            hash_a, hash_b,
382            "different repos at same path must produce different hashes"
383        );
384    }
385
386    #[test]
387    fn git_url_normalization() {
388        assert_eq!(
389            normalize_git_url("git@github.com:User/Repo.git"),
390            "github.com/user/repo"
391        );
392        assert_eq!(
393            normalize_git_url("https://github.com/User/Repo.git"),
394            "https://github.com/user/repo"
395        );
396        assert_eq!(
397            normalize_git_url("git@gitlab.com:org/sub/project.git"),
398            "gitlab.com/org/sub/project"
399        );
400    }
401
402    #[test]
403    fn identity_from_cargo_toml() {
404        let dir = tempfile::tempdir().unwrap();
405        fs::write(
406            dir.path().join("Cargo.toml"),
407            "[package]\nname = \"my-crate\"\nversion = \"0.1.0\"\n",
408        )
409        .unwrap();
410
411        let id = project_identity(dir.path().to_str().unwrap());
412        assert_eq!(id, Some("cargo:my-crate".into()));
413    }
414
415    #[test]
416    fn identity_from_package_json() {
417        let dir = tempfile::tempdir().unwrap();
418        fs::write(
419            dir.path().join("package.json"),
420            "{\n  \"name\": \"@scope/my-app\",\n  \"version\": \"1.0.0\"\n}\n",
421        )
422        .unwrap();
423
424        let id = project_identity(dir.path().to_str().unwrap());
425        assert_eq!(id, Some("npm:@scope/my-app".into()));
426    }
427
428    #[test]
429    fn identity_from_pyproject() {
430        let dir = tempfile::tempdir().unwrap();
431        fs::write(
432            dir.path().join("pyproject.toml"),
433            "[project]\nname = \"my-python-lib\"\nversion = \"2.0\"\n",
434        )
435        .unwrap();
436
437        let id = project_identity(dir.path().to_str().unwrap());
438        assert_eq!(id, Some("python:my-python-lib".into()));
439    }
440
441    #[test]
442    fn identity_from_poetry_pyproject() {
443        let dir = tempfile::tempdir().unwrap();
444        fs::write(
445            dir.path().join("pyproject.toml"),
446            "[tool.poetry]\nname = \"poetry-app\"\nversion = \"1.0\"\n",
447        )
448        .unwrap();
449
450        let id = project_identity(dir.path().to_str().unwrap());
451        assert_eq!(id, Some("python:poetry-app".into()));
452    }
453
454    #[test]
455    fn identity_from_go_mod() {
456        let dir = tempfile::tempdir().unwrap();
457        fs::write(
458            dir.path().join("go.mod"),
459            "module github.com/user/myservice\n\ngo 1.21\n",
460        )
461        .unwrap();
462
463        let id = project_identity(dir.path().to_str().unwrap());
464        assert_eq!(id, Some("go:github.com/user/myservice".into()));
465    }
466
467    #[test]
468    fn identity_from_composer() {
469        let dir = tempfile::tempdir().unwrap();
470        fs::write(
471            dir.path().join("composer.json"),
472            "{\n  \"name\": \"vendor/my-php-lib\"\n}\n",
473        )
474        .unwrap();
475
476        let id = project_identity(dir.path().to_str().unwrap());
477        assert_eq!(id, Some("composer:vendor/my-php-lib".into()));
478    }
479
480    #[test]
481    fn identity_from_gradle() {
482        let dir = tempfile::tempdir().unwrap();
483        fs::write(
484            dir.path().join("settings.gradle"),
485            "rootProject.name = 'my-java-app'\n",
486        )
487        .unwrap();
488
489        let id = project_identity(dir.path().to_str().unwrap());
490        assert_eq!(id, Some("gradle:my-java-app".into()));
491    }
492
493    #[test]
494    fn identity_from_dotnet_sln() {
495        let dir = tempfile::tempdir().unwrap();
496        fs::write(dir.path().join("MyApp.sln"), "").unwrap();
497
498        let id = project_identity(dir.path().to_str().unwrap());
499        assert_eq!(id, Some("dotnet:MyApp".into()));
500    }
501
502    #[test]
503    fn identity_git_takes_priority_over_cargo() {
504        let dir = tempfile::tempdir().unwrap();
505        fs::create_dir_all(dir.path().join(".git")).unwrap();
506        fs::write(
507            dir.path().join(".git").join("config"),
508            "[remote \"origin\"]\n\turl = git@github.com:user/repo.git\n",
509        )
510        .unwrap();
511        fs::write(
512            dir.path().join("Cargo.toml"),
513            "[package]\nname = \"my-crate\"\n",
514        )
515        .unwrap();
516
517        let id = project_identity(dir.path().to_str().unwrap());
518        assert_eq!(id, Some("git:github.com/user/repo".into()));
519    }
520
521    #[test]
522    fn no_identity_for_empty_dir() {
523        let dir = tempfile::tempdir().unwrap();
524        let id = project_identity(dir.path().to_str().unwrap());
525        assert!(id.is_none());
526    }
527
528    #[test]
529    fn identity_from_lean_ctx_id() {
530        let dir = tempfile::tempdir().unwrap();
531        fs::write(dir.path().join(".lean-ctx-id"), "my-docker-project\n").unwrap();
532
533        let id = project_identity(dir.path().to_str().unwrap());
534        assert_eq!(id, Some("explicit:my-docker-project".into()));
535    }
536
537    #[test]
538    fn lean_ctx_id_takes_priority_over_git() {
539        let dir = tempfile::tempdir().unwrap();
540        fs::write(dir.path().join(".lean-ctx-id"), "override-name").unwrap();
541        fs::create_dir_all(dir.path().join(".git")).unwrap();
542        fs::write(
543            dir.path().join(".git").join("config"),
544            "[remote \"origin\"]\n\turl = git@github.com:user/repo.git\n",
545        )
546        .unwrap();
547
548        let id = project_identity(dir.path().to_str().unwrap());
549        assert_eq!(id, Some("explicit:override-name".into()));
550    }
551
552    #[test]
553    fn docker_different_projects_same_path_with_lean_ctx_id() {
554        let dir_a = tempfile::tempdir().unwrap();
555        let dir_b = tempfile::tempdir().unwrap();
556
557        fs::write(dir_a.path().join(".lean-ctx-id"), "project-alpha").unwrap();
558        fs::write(dir_b.path().join(".lean-ctx-id"), "project-beta").unwrap();
559
560        let id_a = project_identity(dir_a.path().to_str().unwrap());
561        let id_b = project_identity(dir_b.path().to_str().unwrap());
562        assert_ne!(id_a, id_b);
563    }
564
565    #[test]
566    fn fallback_hash_equals_legacy_when_no_identity() {
567        let h_new = hash_project_root("/some/path/without/project");
568        let h_old = hash_path_only("/some/path/without/project");
569        assert_eq!(
570            h_new, h_old,
571            "must be backward-compatible when no identity is found"
572        );
573    }
574
575    #[test]
576    fn migration_copies_files() {
577        let tmp = tempfile::tempdir().unwrap();
578        let knowledge_base = tmp.path().join("knowledge");
579        let old_hash = "aaaa000000000000";
580        let new_hash = "bbbb111111111111";
581
582        let old_dir = knowledge_base.join(old_hash);
583        let new_dir = knowledge_base.join(new_hash);
584        fs::create_dir_all(&old_dir).unwrap();
585        fs::write(
586            old_dir.join("knowledge.json"),
587            r#"{"project_root":"/workspace"}"#,
588        )
589        .unwrap();
590        fs::write(old_dir.join("gotchas.json"), "{}").unwrap();
591
592        copy_dir_contents(&old_dir, &new_dir).unwrap();
593
594        assert!(new_dir.join("knowledge.json").exists());
595        assert!(new_dir.join("gotchas.json").exists());
596        assert!(
597            old_dir.join("knowledge.json").exists(),
598            "old dir must remain intact"
599        );
600    }
601
602    #[test]
603    fn ownership_check_rejects_foreign_data() {
604        let tmp = tempfile::tempdir().unwrap();
605        let dir = tmp.path().join("knowledge").join("hash123");
606        fs::create_dir_all(&dir).unwrap();
607        fs::write(
608            dir.join("knowledge.json"),
609            r#"{"project_root":"/other/project"}"#,
610        )
611        .unwrap();
612
613        assert!(!verify_ownership(&dir, "/workspace"));
614    }
615
616    #[test]
617    fn ownership_check_accepts_matching_root() {
618        let tmp = tempfile::tempdir().unwrap();
619        let dir = tmp.path().join("knowledge").join("hash123");
620        fs::create_dir_all(&dir).unwrap();
621        fs::write(
622            dir.join("knowledge.json"),
623            r#"{"project_root":"/workspace"}"#,
624        )
625        .unwrap();
626
627        assert!(verify_ownership(&dir, "/workspace"));
628    }
629
630    #[test]
631    fn ownership_check_accepts_empty_stored_root() {
632        let tmp = tempfile::tempdir().unwrap();
633        let dir = tmp.path().join("knowledge").join("hash123");
634        fs::create_dir_all(&dir).unwrap();
635        fs::write(dir.join("knowledge.json"), r#"{"project_root":""}"#).unwrap();
636
637        assert!(verify_ownership(&dir, "/workspace"));
638    }
639
640    #[test]
641    fn ownership_check_accepts_missing_knowledge_json() {
642        let tmp = tempfile::tempdir().unwrap();
643        let dir = tmp.path().join("knowledge").join("hash123");
644        fs::create_dir_all(&dir).unwrap();
645
646        assert!(verify_ownership(&dir, "/workspace"));
647    }
648}