Skip to main content

lean_ctx/core/
project_hash.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5/// Computes a composite hash from the project root path and any detected
6/// project identity markers (git remote, manifest file, etc.).
7///
8/// This prevents hash collisions when different projects share the same
9/// mount path (e.g. Docker volumes at `/workspace`).
10pub(crate) fn hash_project_root(root: &str) -> String {
11    let mut hasher = DefaultHasher::new();
12    root.hash(&mut hasher);
13
14    if let Some(identity) = project_identity(root) {
15        identity.hash(&mut hasher);
16    }
17
18    format!("{:016x}", hasher.finish())
19}
20
21/// Legacy path-only hash used before v3.3.2.
22/// Kept for auto-migration from old knowledge directories.
23pub(crate) fn hash_path_only(root: &str) -> String {
24    let mut hasher = DefaultHasher::new();
25    root.hash(&mut hasher);
26    format!("{:016x}", hasher.finish())
27}
28
29/// Extracts a stable project identity string from well-known config files.
30///
31/// Checks (in priority order):
32///   1. `.git/config`   → remote "origin" URL
33///   2. `Cargo.toml`    → `[package] name`
34///   3. `package.json`  → `"name"` field
35///   4. `pyproject.toml`→ `[project] name`
36///   5. `go.mod`        → `module` path
37///   6. `composer.json` → `"name"` field
38///   7. `build.gradle`  / `build.gradle.kts` → existence as a marker
39///   8. `*.sln`         → first `.sln` filename
40///
41/// Returns `None` when no identity marker is found, in which case
42/// the hash falls back to path-only (same behaviour as pre-3.3.2).
43pub(crate) fn project_identity(root: &str) -> Option<String> {
44    let root = Path::new(root);
45
46    if let Some(url) = git_remote_url(root) {
47        return Some(format!("git:{url}"));
48    }
49    if let Some(name) = cargo_package_name(root) {
50        return Some(format!("cargo:{name}"));
51    }
52    if let Some(name) = npm_package_name(root) {
53        return Some(format!("npm:{name}"));
54    }
55    if let Some(name) = pyproject_name(root) {
56        return Some(format!("python:{name}"));
57    }
58    if let Some(module) = go_module(root) {
59        return Some(format!("go:{module}"));
60    }
61    if let Some(name) = composer_name(root) {
62        return Some(format!("composer:{name}"));
63    }
64    if let Some(name) = gradle_project(root) {
65        return Some(format!("gradle:{name}"));
66    }
67    if let Some(name) = dotnet_solution(root) {
68        return Some(format!("dotnet:{name}"));
69    }
70
71    None
72}
73
74/// Copies all files from `old_hash` dir to `new_hash` dir when the composite
75/// hash differs from the legacy path-only hash.  Leaves the old directory
76/// intact so sibling projects sharing the same mount path can still migrate
77/// their own data independently.
78pub(crate) fn migrate_if_needed(old_hash: &str, new_hash: &str, project_root: &str) {
79    if old_hash == new_hash {
80        return;
81    }
82
83    let Ok(data_dir) = crate::core::data_dir::lean_ctx_data_dir() else {
84        return;
85    };
86
87    let old_dir = data_dir.join("knowledge").join(old_hash);
88    let new_dir = data_dir.join("knowledge").join(new_hash);
89
90    if !old_dir.exists() || new_dir.exists() {
91        return;
92    }
93
94    if !verify_ownership(&old_dir, project_root) {
95        return;
96    }
97
98    if let Err(e) = copy_dir_contents(&old_dir, &new_dir) {
99        tracing::error!("lean-ctx: knowledge migration failed: {e}");
100    }
101}
102
103// ---------------------------------------------------------------------------
104// Identity detectors
105// ---------------------------------------------------------------------------
106
107fn git_remote_url(root: &Path) -> Option<String> {
108    let config = root.join(".git").join("config");
109    let content = std::fs::read_to_string(config).ok()?;
110
111    let mut in_origin = false;
112    for line in content.lines() {
113        let trimmed = line.trim();
114        if trimmed.starts_with('[') {
115            in_origin = trimmed == r#"[remote "origin"]"#;
116            continue;
117        }
118        if in_origin {
119            if let Some(url) = trimmed.strip_prefix("url") {
120                let url = url.trim_start_matches([' ', '=']);
121                let url = url.trim();
122                if !url.is_empty() {
123                    return Some(normalize_git_url(url));
124                }
125            }
126        }
127    }
128    None
129}
130
131fn normalize_git_url(url: &str) -> String {
132    let url = url.trim_end_matches(".git");
133    let url = url
134        .strip_prefix("git@")
135        .map_or_else(|| url.to_string(), |s| s.replacen(':', "/", 1));
136    url.to_lowercase()
137}
138
139fn cargo_package_name(root: &Path) -> Option<String> {
140    extract_toml_value(&root.join("Cargo.toml"), "name", Some("[package]"))
141}
142
143fn npm_package_name(root: &Path) -> Option<String> {
144    extract_json_string_field(&root.join("package.json"), "name")
145}
146
147fn pyproject_name(root: &Path) -> Option<String> {
148    extract_toml_value(&root.join("pyproject.toml"), "name", Some("[project]"))
149        .or_else(|| extract_toml_value(&root.join("pyproject.toml"), "name", Some("[tool.poetry]")))
150}
151
152fn go_module(root: &Path) -> Option<String> {
153    let content = std::fs::read_to_string(root.join("go.mod")).ok()?;
154    let first = content.lines().next()?;
155    first.strip_prefix("module").map(|s| s.trim().to_string())
156}
157
158fn composer_name(root: &Path) -> Option<String> {
159    extract_json_string_field(&root.join("composer.json"), "name")
160}
161
162fn gradle_project(root: &Path) -> Option<String> {
163    let settings = root.join("settings.gradle");
164    let settings_kts = root.join("settings.gradle.kts");
165
166    let path = if settings.exists() {
167        settings
168    } else if settings_kts.exists() {
169        settings_kts
170    } else {
171        return None;
172    };
173
174    let content = std::fs::read_to_string(path).ok()?;
175    for line in content.lines() {
176        let trimmed = line.trim();
177        if let Some(rest) = trimmed.strip_prefix("rootProject.name") {
178            let rest = rest.trim_start_matches([' ', '=']);
179            let name = rest.trim().trim_matches(['\'', '"']);
180            if !name.is_empty() {
181                return Some(name.to_string());
182            }
183        }
184    }
185    None
186}
187
188fn dotnet_solution(root: &Path) -> Option<String> {
189    let entries = std::fs::read_dir(root).ok()?;
190    for entry in entries.flatten() {
191        if let Some(ext) = entry.path().extension() {
192            if ext == "sln" {
193                return entry
194                    .path()
195                    .file_stem()
196                    .and_then(|s| s.to_str())
197                    .map(String::from);
198            }
199        }
200    }
201    None
202}
203
204// ---------------------------------------------------------------------------
205// TOML / JSON helpers (lightweight, no extra deps)
206// ---------------------------------------------------------------------------
207
208fn extract_toml_value(path: &Path, key: &str, section: Option<&str>) -> Option<String> {
209    let content = std::fs::read_to_string(path).ok()?;
210    let mut in_section = section.is_none();
211    let target_section = section.unwrap_or("");
212
213    for line in content.lines() {
214        let trimmed = line.trim();
215
216        if trimmed.starts_with('[') {
217            in_section = trimmed == target_section;
218            continue;
219        }
220
221        if in_section {
222            if let Some(rest) = trimmed.strip_prefix(key) {
223                let rest = rest.trim_start();
224                if let Some(rest) = rest.strip_prefix('=') {
225                    let val = rest.trim().trim_matches('"');
226                    if !val.is_empty() {
227                        return Some(val.to_string());
228                    }
229                }
230            }
231        }
232    }
233    None
234}
235
236fn extract_json_string_field(path: &Path, field: &str) -> Option<String> {
237    let content = std::fs::read_to_string(path).ok()?;
238    let needle = format!("\"{field}\"");
239    for line in content.lines() {
240        let trimmed = line.trim();
241        if let Some(rest) = trimmed.strip_prefix(&needle) {
242            let rest = rest.trim_start_matches([' ', ':']);
243            let val = rest.trim().trim_start_matches('"');
244            if let Some(end) = val.find('"') {
245                let name = &val[..end];
246                if !name.is_empty() {
247                    return Some(name.to_string());
248                }
249            }
250        }
251    }
252    None
253}
254
255// ---------------------------------------------------------------------------
256// Migration helpers
257// ---------------------------------------------------------------------------
258
259fn verify_ownership(old_dir: &Path, project_root: &str) -> bool {
260    let knowledge_path = old_dir.join("knowledge.json");
261    let Ok(content) = std::fs::read_to_string(&knowledge_path) else {
262        return true;
263    };
264
265    let stored_root: Option<String> = serde_json::from_str::<serde_json::Value>(&content)
266        .ok()
267        .and_then(|v| v.get("project_root")?.as_str().map(String::from));
268
269    match stored_root {
270        Some(stored) if !stored.is_empty() => stored == project_root,
271        _ => true,
272    }
273}
274
275fn copy_dir_contents(src: &Path, dst: &Path) -> Result<(), String> {
276    std::fs::create_dir_all(dst).map_err(|e| e.to_string())?;
277
278    for entry in std::fs::read_dir(src).map_err(|e| e.to_string())?.flatten() {
279        let src_path = entry.path();
280        let dst_path = dst.join(entry.file_name());
281
282        if src_path.is_dir() {
283            copy_dir_contents(&src_path, &dst_path)?;
284        } else {
285            std::fs::copy(&src_path, &dst_path).map_err(|e| e.to_string())?;
286        }
287    }
288    Ok(())
289}
290
291// ---------------------------------------------------------------------------
292// Tests
293// ---------------------------------------------------------------------------
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use std::fs;
299
300    #[test]
301    fn path_only_matches_legacy_behaviour() {
302        let h = hash_path_only("/workspace");
303        assert_eq!(h.len(), 16);
304        let h2 = hash_path_only("/workspace");
305        assert_eq!(h, h2);
306    }
307
308    #[test]
309    fn composite_differs_when_identity_present() {
310        let dir = tempfile::tempdir().unwrap();
311        let root = dir.path().to_str().unwrap();
312
313        let old = hash_path_only(root);
314        let no_identity = hash_project_root(root);
315        assert_eq!(old, no_identity, "without identity, hashes must match");
316
317        fs::create_dir_all(dir.path().join(".git")).unwrap();
318        fs::write(
319            dir.path().join(".git").join("config"),
320            "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
321        )
322        .unwrap();
323
324        let with_identity = hash_project_root(root);
325        assert_ne!(old, with_identity, "identity must change hash");
326    }
327
328    #[test]
329    fn docker_collision_avoided() {
330        let dir_a = tempfile::tempdir().unwrap();
331        let dir_b = tempfile::tempdir().unwrap();
332
333        let shared_path = "/workspace";
334
335        fs::create_dir_all(dir_a.path().join(".git")).unwrap();
336        fs::write(
337            dir_a.path().join(".git").join("config"),
338            "[remote \"origin\"]\n\turl = git@github.com:user/repo-a.git\n",
339        )
340        .unwrap();
341
342        fs::create_dir_all(dir_b.path().join(".git")).unwrap();
343        fs::write(
344            dir_b.path().join(".git").join("config"),
345            "[remote \"origin\"]\n\turl = git@github.com:user/repo-b.git\n",
346        )
347        .unwrap();
348
349        let hash_a = {
350            let mut hasher = DefaultHasher::new();
351            shared_path.hash(&mut hasher);
352            let id = project_identity(dir_a.path().to_str().unwrap()).unwrap();
353            id.hash(&mut hasher);
354            format!("{:016x}", hasher.finish())
355        };
356        let hash_b = {
357            let mut hasher = DefaultHasher::new();
358            shared_path.hash(&mut hasher);
359            let id = project_identity(dir_b.path().to_str().unwrap()).unwrap();
360            id.hash(&mut hasher);
361            format!("{:016x}", hasher.finish())
362        };
363
364        assert_ne!(
365            hash_a, hash_b,
366            "different repos at same path must produce different hashes"
367        );
368    }
369
370    #[test]
371    fn git_url_normalization() {
372        assert_eq!(
373            normalize_git_url("git@github.com:User/Repo.git"),
374            "github.com/user/repo"
375        );
376        assert_eq!(
377            normalize_git_url("https://github.com/User/Repo.git"),
378            "https://github.com/user/repo"
379        );
380        assert_eq!(
381            normalize_git_url("git@gitlab.com:org/sub/project.git"),
382            "gitlab.com/org/sub/project"
383        );
384    }
385
386    #[test]
387    fn identity_from_cargo_toml() {
388        let dir = tempfile::tempdir().unwrap();
389        fs::write(
390            dir.path().join("Cargo.toml"),
391            "[package]\nname = \"my-crate\"\nversion = \"0.1.0\"\n",
392        )
393        .unwrap();
394
395        let id = project_identity(dir.path().to_str().unwrap());
396        assert_eq!(id, Some("cargo:my-crate".into()));
397    }
398
399    #[test]
400    fn identity_from_package_json() {
401        let dir = tempfile::tempdir().unwrap();
402        fs::write(
403            dir.path().join("package.json"),
404            "{\n  \"name\": \"@scope/my-app\",\n  \"version\": \"1.0.0\"\n}\n",
405        )
406        .unwrap();
407
408        let id = project_identity(dir.path().to_str().unwrap());
409        assert_eq!(id, Some("npm:@scope/my-app".into()));
410    }
411
412    #[test]
413    fn identity_from_pyproject() {
414        let dir = tempfile::tempdir().unwrap();
415        fs::write(
416            dir.path().join("pyproject.toml"),
417            "[project]\nname = \"my-python-lib\"\nversion = \"2.0\"\n",
418        )
419        .unwrap();
420
421        let id = project_identity(dir.path().to_str().unwrap());
422        assert_eq!(id, Some("python:my-python-lib".into()));
423    }
424
425    #[test]
426    fn identity_from_poetry_pyproject() {
427        let dir = tempfile::tempdir().unwrap();
428        fs::write(
429            dir.path().join("pyproject.toml"),
430            "[tool.poetry]\nname = \"poetry-app\"\nversion = \"1.0\"\n",
431        )
432        .unwrap();
433
434        let id = project_identity(dir.path().to_str().unwrap());
435        assert_eq!(id, Some("python:poetry-app".into()));
436    }
437
438    #[test]
439    fn identity_from_go_mod() {
440        let dir = tempfile::tempdir().unwrap();
441        fs::write(
442            dir.path().join("go.mod"),
443            "module github.com/user/myservice\n\ngo 1.21\n",
444        )
445        .unwrap();
446
447        let id = project_identity(dir.path().to_str().unwrap());
448        assert_eq!(id, Some("go:github.com/user/myservice".into()));
449    }
450
451    #[test]
452    fn identity_from_composer() {
453        let dir = tempfile::tempdir().unwrap();
454        fs::write(
455            dir.path().join("composer.json"),
456            "{\n  \"name\": \"vendor/my-php-lib\"\n}\n",
457        )
458        .unwrap();
459
460        let id = project_identity(dir.path().to_str().unwrap());
461        assert_eq!(id, Some("composer:vendor/my-php-lib".into()));
462    }
463
464    #[test]
465    fn identity_from_gradle() {
466        let dir = tempfile::tempdir().unwrap();
467        fs::write(
468            dir.path().join("settings.gradle"),
469            "rootProject.name = 'my-java-app'\n",
470        )
471        .unwrap();
472
473        let id = project_identity(dir.path().to_str().unwrap());
474        assert_eq!(id, Some("gradle:my-java-app".into()));
475    }
476
477    #[test]
478    fn identity_from_dotnet_sln() {
479        let dir = tempfile::tempdir().unwrap();
480        fs::write(dir.path().join("MyApp.sln"), "").unwrap();
481
482        let id = project_identity(dir.path().to_str().unwrap());
483        assert_eq!(id, Some("dotnet:MyApp".into()));
484    }
485
486    #[test]
487    fn identity_git_takes_priority_over_cargo() {
488        let dir = tempfile::tempdir().unwrap();
489        fs::create_dir_all(dir.path().join(".git")).unwrap();
490        fs::write(
491            dir.path().join(".git").join("config"),
492            "[remote \"origin\"]\n\turl = git@github.com:user/repo.git\n",
493        )
494        .unwrap();
495        fs::write(
496            dir.path().join("Cargo.toml"),
497            "[package]\nname = \"my-crate\"\n",
498        )
499        .unwrap();
500
501        let id = project_identity(dir.path().to_str().unwrap());
502        assert_eq!(id, Some("git:github.com/user/repo".into()));
503    }
504
505    #[test]
506    fn no_identity_for_empty_dir() {
507        let dir = tempfile::tempdir().unwrap();
508        let id = project_identity(dir.path().to_str().unwrap());
509        assert!(id.is_none());
510    }
511
512    #[test]
513    fn fallback_hash_equals_legacy_when_no_identity() {
514        let h_new = hash_project_root("/some/path/without/project");
515        let h_old = hash_path_only("/some/path/without/project");
516        assert_eq!(
517            h_new, h_old,
518            "must be backward-compatible when no identity is found"
519        );
520    }
521
522    #[test]
523    fn migration_copies_files() {
524        let tmp = tempfile::tempdir().unwrap();
525        let knowledge_base = tmp.path().join("knowledge");
526        let old_hash = "aaaa000000000000";
527        let new_hash = "bbbb111111111111";
528
529        let old_dir = knowledge_base.join(old_hash);
530        let new_dir = knowledge_base.join(new_hash);
531        fs::create_dir_all(&old_dir).unwrap();
532        fs::write(
533            old_dir.join("knowledge.json"),
534            r#"{"project_root":"/workspace"}"#,
535        )
536        .unwrap();
537        fs::write(old_dir.join("gotchas.json"), "{}").unwrap();
538
539        copy_dir_contents(&old_dir, &new_dir).unwrap();
540
541        assert!(new_dir.join("knowledge.json").exists());
542        assert!(new_dir.join("gotchas.json").exists());
543        assert!(
544            old_dir.join("knowledge.json").exists(),
545            "old dir must remain intact"
546        );
547    }
548
549    #[test]
550    fn ownership_check_rejects_foreign_data() {
551        let tmp = tempfile::tempdir().unwrap();
552        let dir = tmp.path().join("knowledge").join("hash123");
553        fs::create_dir_all(&dir).unwrap();
554        fs::write(
555            dir.join("knowledge.json"),
556            r#"{"project_root":"/other/project"}"#,
557        )
558        .unwrap();
559
560        assert!(!verify_ownership(&dir, "/workspace"));
561    }
562
563    #[test]
564    fn ownership_check_accepts_matching_root() {
565        let tmp = tempfile::tempdir().unwrap();
566        let dir = tmp.path().join("knowledge").join("hash123");
567        fs::create_dir_all(&dir).unwrap();
568        fs::write(
569            dir.join("knowledge.json"),
570            r#"{"project_root":"/workspace"}"#,
571        )
572        .unwrap();
573
574        assert!(verify_ownership(&dir, "/workspace"));
575    }
576
577    #[test]
578    fn ownership_check_accepts_empty_stored_root() {
579        let tmp = tempfile::tempdir().unwrap();
580        let dir = tmp.path().join("knowledge").join("hash123");
581        fs::create_dir_all(&dir).unwrap();
582        fs::write(dir.join("knowledge.json"), r#"{"project_root":""}"#).unwrap();
583
584        assert!(verify_ownership(&dir, "/workspace"));
585    }
586
587    #[test]
588    fn ownership_check_accepts_missing_knowledge_json() {
589        let tmp = tempfile::tempdir().unwrap();
590        let dir = tmp.path().join("knowledge").join("hash123");
591        fs::create_dir_all(&dir).unwrap();
592
593        assert!(verify_ownership(&dir, "/workspace"));
594    }
595}