Skip to main content

lean_ctx/core/
project_hash.rs

1use std::collections::hash_map::DefaultHasher;
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4
5/// Computes a composite hash from the project root path and any detected
6/// project identity markers (git remote, manifest file, etc.).
7///
8/// This prevents hash collisions when different projects share the same
9/// mount path (e.g. Docker volumes at `/workspace`).
10pub(crate) fn hash_project_root(root: &str) -> String {
11    let mut hasher = DefaultHasher::new();
12    root.hash(&mut hasher);
13
14    if let Some(identity) = project_identity(root) {
15        identity.hash(&mut hasher);
16    }
17
18    format!("{:016x}", hasher.finish())
19}
20
21/// Legacy path-only hash used before v3.3.2.
22/// Kept for auto-migration from old knowledge directories.
23pub(crate) fn hash_path_only(root: &str) -> String {
24    let mut hasher = DefaultHasher::new();
25    root.hash(&mut hasher);
26    format!("{:016x}", hasher.finish())
27}
28
29/// Extracts a stable project identity string from well-known config files.
30///
31/// Checks (in priority order):
32///   1. `.git/config`   → remote "origin" URL
33///   2. `Cargo.toml`    → `[package] name`
34///   3. `package.json`  → `"name"` field
35///   4. `pyproject.toml`→ `[project] name`
36///   5. `go.mod`        → `module` path
37///   6. `composer.json` → `"name"` field
38///   7. `build.gradle`  / `build.gradle.kts` → existence as a marker
39///   8. `*.sln`         → first `.sln` filename
40///
41/// Returns `None` when no identity marker is found, in which case
42/// the hash falls back to path-only (same behaviour as pre-3.3.2).
43pub(crate) fn project_identity(root: &str) -> Option<String> {
44    let root = Path::new(root);
45
46    if let Some(url) = git_remote_url(root) {
47        return Some(format!("git:{url}"));
48    }
49    if let Some(name) = cargo_package_name(root) {
50        return Some(format!("cargo:{name}"));
51    }
52    if let Some(name) = npm_package_name(root) {
53        return Some(format!("npm:{name}"));
54    }
55    if let Some(name) = pyproject_name(root) {
56        return Some(format!("python:{name}"));
57    }
58    if let Some(module) = go_module(root) {
59        return Some(format!("go:{module}"));
60    }
61    if let Some(name) = composer_name(root) {
62        return Some(format!("composer:{name}"));
63    }
64    if let Some(name) = gradle_project(root) {
65        return Some(format!("gradle:{name}"));
66    }
67    if let Some(name) = dotnet_solution(root) {
68        return Some(format!("dotnet:{name}"));
69    }
70
71    None
72}
73
74/// Copies all files from `old_hash` dir to `new_hash` dir when the composite
75/// hash differs from the legacy path-only hash.  Leaves the old directory
76/// intact so sibling projects sharing the same mount path can still migrate
77/// their own data independently.
78pub(crate) fn migrate_if_needed(old_hash: &str, new_hash: &str, project_root: &str) {
79    if old_hash == new_hash {
80        return;
81    }
82
83    let data_dir = match crate::core::data_dir::lean_ctx_data_dir() {
84        Ok(d) => d,
85        Err(_) => return,
86    };
87
88    let old_dir = data_dir.join("knowledge").join(old_hash);
89    let new_dir = data_dir.join("knowledge").join(new_hash);
90
91    if !old_dir.exists() || new_dir.exists() {
92        return;
93    }
94
95    if !verify_ownership(&old_dir, project_root) {
96        return;
97    }
98
99    if let Err(e) = copy_dir_contents(&old_dir, &new_dir) {
100        eprintln!("lean-ctx: knowledge migration failed: {e}");
101    }
102}
103
104// ---------------------------------------------------------------------------
105// Identity detectors
106// ---------------------------------------------------------------------------
107
108fn git_remote_url(root: &Path) -> Option<String> {
109    let config = root.join(".git").join("config");
110    let content = std::fs::read_to_string(config).ok()?;
111
112    let mut in_origin = false;
113    for line in content.lines() {
114        let trimmed = line.trim();
115        if trimmed.starts_with('[') {
116            in_origin = trimmed == r#"[remote "origin"]"#;
117            continue;
118        }
119        if in_origin {
120            if let Some(url) = trimmed.strip_prefix("url") {
121                let url = url.trim_start_matches([' ', '=']);
122                let url = url.trim();
123                if !url.is_empty() {
124                    return Some(normalize_git_url(url));
125                }
126            }
127        }
128    }
129    None
130}
131
132fn normalize_git_url(url: &str) -> String {
133    let url = url.trim_end_matches(".git");
134    let url = url
135        .strip_prefix("git@")
136        .map(|s| s.replacen(':', "/", 1))
137        .unwrap_or_else(|| url.to_string());
138    url.to_lowercase()
139}
140
141fn cargo_package_name(root: &Path) -> Option<String> {
142    extract_toml_value(&root.join("Cargo.toml"), "name", Some("[package]"))
143}
144
145fn npm_package_name(root: &Path) -> Option<String> {
146    extract_json_string_field(&root.join("package.json"), "name")
147}
148
149fn pyproject_name(root: &Path) -> Option<String> {
150    extract_toml_value(&root.join("pyproject.toml"), "name", Some("[project]"))
151        .or_else(|| extract_toml_value(&root.join("pyproject.toml"), "name", Some("[tool.poetry]")))
152}
153
154fn go_module(root: &Path) -> Option<String> {
155    let content = std::fs::read_to_string(root.join("go.mod")).ok()?;
156    let first = content.lines().next()?;
157    first.strip_prefix("module").map(|s| s.trim().to_string())
158}
159
160fn composer_name(root: &Path) -> Option<String> {
161    extract_json_string_field(&root.join("composer.json"), "name")
162}
163
164fn gradle_project(root: &Path) -> Option<String> {
165    let settings = root.join("settings.gradle");
166    let settings_kts = root.join("settings.gradle.kts");
167
168    let path = if settings.exists() {
169        settings
170    } else if settings_kts.exists() {
171        settings_kts
172    } else {
173        return None;
174    };
175
176    let content = std::fs::read_to_string(path).ok()?;
177    for line in content.lines() {
178        let trimmed = line.trim();
179        if let Some(rest) = trimmed.strip_prefix("rootProject.name") {
180            let rest = rest.trim_start_matches([' ', '=']);
181            let name = rest.trim().trim_matches(['\'', '"']);
182            if !name.is_empty() {
183                return Some(name.to_string());
184            }
185        }
186    }
187    None
188}
189
190fn dotnet_solution(root: &Path) -> Option<String> {
191    let entries = std::fs::read_dir(root).ok()?;
192    for entry in entries.flatten() {
193        if let Some(ext) = entry.path().extension() {
194            if ext == "sln" {
195                return entry
196                    .path()
197                    .file_stem()
198                    .and_then(|s| s.to_str())
199                    .map(String::from);
200            }
201        }
202    }
203    None
204}
205
206// ---------------------------------------------------------------------------
207// TOML / JSON helpers (lightweight, no extra deps)
208// ---------------------------------------------------------------------------
209
210fn extract_toml_value(path: &Path, key: &str, section: Option<&str>) -> Option<String> {
211    let content = std::fs::read_to_string(path).ok()?;
212    let mut in_section = section.is_none();
213    let target_section = section.unwrap_or("");
214
215    for line in content.lines() {
216        let trimmed = line.trim();
217
218        if trimmed.starts_with('[') {
219            in_section = trimmed == target_section;
220            continue;
221        }
222
223        if in_section {
224            if let Some(rest) = trimmed.strip_prefix(key) {
225                let rest = rest.trim_start();
226                if let Some(rest) = rest.strip_prefix('=') {
227                    let val = rest.trim().trim_matches('"');
228                    if !val.is_empty() {
229                        return Some(val.to_string());
230                    }
231                }
232            }
233        }
234    }
235    None
236}
237
238fn extract_json_string_field(path: &Path, field: &str) -> Option<String> {
239    let content = std::fs::read_to_string(path).ok()?;
240    let needle = format!("\"{field}\"");
241    for line in content.lines() {
242        let trimmed = line.trim();
243        if let Some(rest) = trimmed.strip_prefix(&needle) {
244            let rest = rest.trim_start_matches([' ', ':']);
245            let val = rest.trim().trim_start_matches('"');
246            if let Some(end) = val.find('"') {
247                let name = &val[..end];
248                if !name.is_empty() {
249                    return Some(name.to_string());
250                }
251            }
252        }
253    }
254    None
255}
256
257// ---------------------------------------------------------------------------
258// Migration helpers
259// ---------------------------------------------------------------------------
260
261fn verify_ownership(old_dir: &Path, project_root: &str) -> bool {
262    let knowledge_path = old_dir.join("knowledge.json");
263    let content = match std::fs::read_to_string(&knowledge_path) {
264        Ok(c) => c,
265        Err(_) => return true, // No knowledge.json — safe to migrate gotchas etc.
266    };
267
268    let stored_root: Option<String> = serde_json::from_str::<serde_json::Value>(&content)
269        .ok()
270        .and_then(|v| v.get("project_root")?.as_str().map(String::from));
271
272    match stored_root {
273        Some(stored) if !stored.is_empty() => stored == project_root,
274        _ => true,
275    }
276}
277
278fn copy_dir_contents(src: &Path, dst: &Path) -> Result<(), String> {
279    std::fs::create_dir_all(dst).map_err(|e| e.to_string())?;
280
281    for entry in std::fs::read_dir(src).map_err(|e| e.to_string())?.flatten() {
282        let src_path = entry.path();
283        let dst_path = dst.join(entry.file_name());
284
285        if src_path.is_dir() {
286            copy_dir_contents(&src_path, &dst_path)?;
287        } else {
288            std::fs::copy(&src_path, &dst_path).map_err(|e| e.to_string())?;
289        }
290    }
291    Ok(())
292}
293
294// ---------------------------------------------------------------------------
295// Tests
296// ---------------------------------------------------------------------------
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use std::fs;
302
303    #[test]
304    fn path_only_matches_legacy_behaviour() {
305        let h = hash_path_only("/workspace");
306        assert_eq!(h.len(), 16);
307        let h2 = hash_path_only("/workspace");
308        assert_eq!(h, h2);
309    }
310
311    #[test]
312    fn composite_differs_when_identity_present() {
313        let dir = tempfile::tempdir().unwrap();
314        let root = dir.path().to_str().unwrap();
315
316        let old = hash_path_only(root);
317        let no_identity = hash_project_root(root);
318        assert_eq!(old, no_identity, "without identity, hashes must match");
319
320        fs::create_dir_all(dir.path().join(".git")).unwrap();
321        fs::write(
322            dir.path().join(".git").join("config"),
323            "[remote \"origin\"]\n\turl = git@github.com:user/my-repo.git\n",
324        )
325        .unwrap();
326
327        let with_identity = hash_project_root(root);
328        assert_ne!(old, with_identity, "identity must change hash");
329    }
330
331    #[test]
332    fn docker_collision_avoided() {
333        let dir_a = tempfile::tempdir().unwrap();
334        let dir_b = tempfile::tempdir().unwrap();
335
336        let shared_path = "/workspace";
337
338        fs::create_dir_all(dir_a.path().join(".git")).unwrap();
339        fs::write(
340            dir_a.path().join(".git").join("config"),
341            "[remote \"origin\"]\n\turl = git@github.com:user/repo-a.git\n",
342        )
343        .unwrap();
344
345        fs::create_dir_all(dir_b.path().join(".git")).unwrap();
346        fs::write(
347            dir_b.path().join(".git").join("config"),
348            "[remote \"origin\"]\n\turl = git@github.com:user/repo-b.git\n",
349        )
350        .unwrap();
351
352        let hash_a = {
353            let mut hasher = DefaultHasher::new();
354            shared_path.hash(&mut hasher);
355            let id = project_identity(dir_a.path().to_str().unwrap()).unwrap();
356            id.hash(&mut hasher);
357            format!("{:016x}", hasher.finish())
358        };
359        let hash_b = {
360            let mut hasher = DefaultHasher::new();
361            shared_path.hash(&mut hasher);
362            let id = project_identity(dir_b.path().to_str().unwrap()).unwrap();
363            id.hash(&mut hasher);
364            format!("{:016x}", hasher.finish())
365        };
366
367        assert_ne!(
368            hash_a, hash_b,
369            "different repos at same path must produce different hashes"
370        );
371    }
372
373    #[test]
374    fn git_url_normalization() {
375        assert_eq!(
376            normalize_git_url("git@github.com:User/Repo.git"),
377            "github.com/user/repo"
378        );
379        assert_eq!(
380            normalize_git_url("https://github.com/User/Repo.git"),
381            "https://github.com/user/repo"
382        );
383        assert_eq!(
384            normalize_git_url("git@gitlab.com:org/sub/project.git"),
385            "gitlab.com/org/sub/project"
386        );
387    }
388
389    #[test]
390    fn identity_from_cargo_toml() {
391        let dir = tempfile::tempdir().unwrap();
392        fs::write(
393            dir.path().join("Cargo.toml"),
394            "[package]\nname = \"my-crate\"\nversion = \"0.1.0\"\n",
395        )
396        .unwrap();
397
398        let id = project_identity(dir.path().to_str().unwrap());
399        assert_eq!(id, Some("cargo:my-crate".into()));
400    }
401
402    #[test]
403    fn identity_from_package_json() {
404        let dir = tempfile::tempdir().unwrap();
405        fs::write(
406            dir.path().join("package.json"),
407            "{\n  \"name\": \"@scope/my-app\",\n  \"version\": \"1.0.0\"\n}\n",
408        )
409        .unwrap();
410
411        let id = project_identity(dir.path().to_str().unwrap());
412        assert_eq!(id, Some("npm:@scope/my-app".into()));
413    }
414
415    #[test]
416    fn identity_from_pyproject() {
417        let dir = tempfile::tempdir().unwrap();
418        fs::write(
419            dir.path().join("pyproject.toml"),
420            "[project]\nname = \"my-python-lib\"\nversion = \"2.0\"\n",
421        )
422        .unwrap();
423
424        let id = project_identity(dir.path().to_str().unwrap());
425        assert_eq!(id, Some("python:my-python-lib".into()));
426    }
427
428    #[test]
429    fn identity_from_poetry_pyproject() {
430        let dir = tempfile::tempdir().unwrap();
431        fs::write(
432            dir.path().join("pyproject.toml"),
433            "[tool.poetry]\nname = \"poetry-app\"\nversion = \"1.0\"\n",
434        )
435        .unwrap();
436
437        let id = project_identity(dir.path().to_str().unwrap());
438        assert_eq!(id, Some("python:poetry-app".into()));
439    }
440
441    #[test]
442    fn identity_from_go_mod() {
443        let dir = tempfile::tempdir().unwrap();
444        fs::write(
445            dir.path().join("go.mod"),
446            "module github.com/user/myservice\n\ngo 1.21\n",
447        )
448        .unwrap();
449
450        let id = project_identity(dir.path().to_str().unwrap());
451        assert_eq!(id, Some("go:github.com/user/myservice".into()));
452    }
453
454    #[test]
455    fn identity_from_composer() {
456        let dir = tempfile::tempdir().unwrap();
457        fs::write(
458            dir.path().join("composer.json"),
459            "{\n  \"name\": \"vendor/my-php-lib\"\n}\n",
460        )
461        .unwrap();
462
463        let id = project_identity(dir.path().to_str().unwrap());
464        assert_eq!(id, Some("composer:vendor/my-php-lib".into()));
465    }
466
467    #[test]
468    fn identity_from_gradle() {
469        let dir = tempfile::tempdir().unwrap();
470        fs::write(
471            dir.path().join("settings.gradle"),
472            "rootProject.name = 'my-java-app'\n",
473        )
474        .unwrap();
475
476        let id = project_identity(dir.path().to_str().unwrap());
477        assert_eq!(id, Some("gradle:my-java-app".into()));
478    }
479
480    #[test]
481    fn identity_from_dotnet_sln() {
482        let dir = tempfile::tempdir().unwrap();
483        fs::write(dir.path().join("MyApp.sln"), "").unwrap();
484
485        let id = project_identity(dir.path().to_str().unwrap());
486        assert_eq!(id, Some("dotnet:MyApp".into()));
487    }
488
489    #[test]
490    fn identity_git_takes_priority_over_cargo() {
491        let dir = tempfile::tempdir().unwrap();
492        fs::create_dir_all(dir.path().join(".git")).unwrap();
493        fs::write(
494            dir.path().join(".git").join("config"),
495            "[remote \"origin\"]\n\turl = git@github.com:user/repo.git\n",
496        )
497        .unwrap();
498        fs::write(
499            dir.path().join("Cargo.toml"),
500            "[package]\nname = \"my-crate\"\n",
501        )
502        .unwrap();
503
504        let id = project_identity(dir.path().to_str().unwrap());
505        assert_eq!(id, Some("git:github.com/user/repo".into()));
506    }
507
508    #[test]
509    fn no_identity_for_empty_dir() {
510        let dir = tempfile::tempdir().unwrap();
511        let id = project_identity(dir.path().to_str().unwrap());
512        assert!(id.is_none());
513    }
514
515    #[test]
516    fn fallback_hash_equals_legacy_when_no_identity() {
517        let h_new = hash_project_root("/some/path/without/project");
518        let h_old = hash_path_only("/some/path/without/project");
519        assert_eq!(
520            h_new, h_old,
521            "must be backward-compatible when no identity is found"
522        );
523    }
524
525    #[test]
526    fn migration_copies_files() {
527        let tmp = tempfile::tempdir().unwrap();
528        let knowledge_base = tmp.path().join("knowledge");
529        let old_hash = "aaaa000000000000";
530        let new_hash = "bbbb111111111111";
531
532        let old_dir = knowledge_base.join(old_hash);
533        let new_dir = knowledge_base.join(new_hash);
534        fs::create_dir_all(&old_dir).unwrap();
535        fs::write(
536            old_dir.join("knowledge.json"),
537            r#"{"project_root":"/workspace"}"#,
538        )
539        .unwrap();
540        fs::write(old_dir.join("gotchas.json"), "{}").unwrap();
541
542        copy_dir_contents(&old_dir, &new_dir).unwrap();
543
544        assert!(new_dir.join("knowledge.json").exists());
545        assert!(new_dir.join("gotchas.json").exists());
546        assert!(
547            old_dir.join("knowledge.json").exists(),
548            "old dir must remain intact"
549        );
550    }
551
552    #[test]
553    fn ownership_check_rejects_foreign_data() {
554        let tmp = tempfile::tempdir().unwrap();
555        let dir = tmp.path().join("knowledge").join("hash123");
556        fs::create_dir_all(&dir).unwrap();
557        fs::write(
558            dir.join("knowledge.json"),
559            r#"{"project_root":"/other/project"}"#,
560        )
561        .unwrap();
562
563        assert!(!verify_ownership(&dir, "/workspace"));
564    }
565
566    #[test]
567    fn ownership_check_accepts_matching_root() {
568        let tmp = tempfile::tempdir().unwrap();
569        let dir = tmp.path().join("knowledge").join("hash123");
570        fs::create_dir_all(&dir).unwrap();
571        fs::write(
572            dir.join("knowledge.json"),
573            r#"{"project_root":"/workspace"}"#,
574        )
575        .unwrap();
576
577        assert!(verify_ownership(&dir, "/workspace"));
578    }
579
580    #[test]
581    fn ownership_check_accepts_empty_stored_root() {
582        let tmp = tempfile::tempdir().unwrap();
583        let dir = tmp.path().join("knowledge").join("hash123");
584        fs::create_dir_all(&dir).unwrap();
585        fs::write(dir.join("knowledge.json"), r#"{"project_root":""}"#).unwrap();
586
587        assert!(verify_ownership(&dir, "/workspace"));
588    }
589
590    #[test]
591    fn ownership_check_accepts_missing_knowledge_json() {
592        let tmp = tempfile::tempdir().unwrap();
593        let dir = tmp.path().join("knowledge").join("hash123");
594        fs::create_dir_all(&dir).unwrap();
595
596        assert!(verify_ownership(&dir, "/workspace"));
597    }
598}