Skip to main content

codelens_engine/
project.rs

1use anyhow::{Context, Result, bail};
2use std::path::{Path, PathBuf};
3
4#[derive(Debug, Clone)]
5pub struct ProjectRoot {
6    root: PathBuf,
7}
8
9const ROOT_MARKERS: &[&str] = &[
10    ".git",
11    ".codelens",
12    "build.gradle.kts",
13    "build.gradle",
14    "package.json",
15    "pyproject.toml",
16    "Cargo.toml",
17    "pom.xml",
18    "go.mod",
19];
20
21impl ProjectRoot {
22    /// Create a ProjectRoot, auto-detecting the actual root by walking up from
23    /// the given path until a root marker (.git, Cargo.toml, etc.) is found.
24    /// Falls back to the given path if no marker is found.
25    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
26        let start = path.as_ref().canonicalize().with_context(|| {
27            format!("failed to resolve project root {}", path.as_ref().display())
28        })?;
29        if !start.is_dir() {
30            bail!("project root is not a directory: {}", start.display());
31        }
32        let root = detect_root(&start).unwrap_or_else(|| start.clone());
33        Ok(Self { root })
34    }
35
36    /// Create a ProjectRoot at the exact given path without auto-detection.
37    pub fn new_exact(path: impl AsRef<Path>) -> Result<Self> {
38        let root = path.as_ref().canonicalize().with_context(|| {
39            format!("failed to resolve project root {}", path.as_ref().display())
40        })?;
41        if !root.is_dir() {
42            bail!("project root is not a directory: {}", root.display());
43        }
44        Ok(Self { root })
45    }
46
47    pub fn as_path(&self) -> &Path {
48        &self.root
49    }
50
51    pub fn resolve(&self, relative_or_absolute: impl AsRef<Path>) -> Result<PathBuf> {
52        let path = relative_or_absolute.as_ref();
53        let candidate = if path.is_absolute() {
54            path.to_path_buf()
55        } else {
56            self.root.join(path)
57        };
58        let normalized = normalize_path(&candidate);
59        if !normalized.starts_with(&self.root) {
60            bail!(
61                "path escapes project root: {} (root: {})",
62                normalized.display(),
63                self.root.display()
64            );
65        }
66        // If the path exists, verify the real (symlink-resolved) path also stays within root
67        if normalized.exists()
68            && let Ok(real) = normalized.canonicalize()
69            && !real.starts_with(&self.root)
70        {
71            bail!(
72                "symlink escapes project root: {} → {} (root: {})",
73                normalized.display(),
74                real.display(),
75                self.root.display()
76            );
77        }
78        // Resolve symlinks so the returned path matches what's stored in the index.
79        if normalized.exists()
80            && let Ok(real) = normalized.canonicalize()
81            && real.starts_with(&self.root)
82        {
83            return Ok(real);
84        }
85        Ok(normalized)
86    }
87
88    pub fn to_relative(&self, path: impl AsRef<Path>) -> String {
89        let path = path.as_ref();
90        let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
91        canonical
92            .strip_prefix(&self.root)
93            .unwrap_or(&canonical)
94            .to_string_lossy()
95            .replace('\\', "/")
96    }
97}
98
99// ── Shared directory exclusion & file collection ────────────────────────
100
101pub const EXCLUDED_DIRS: &[&str] = &[
102    // VCS & IDE
103    ".git",
104    ".idea",
105    ".vscode",
106    ".cursor",
107    ".claude",
108    // Build output
109    ".gradle",
110    "build",
111    "dist",
112    "out",
113    "node_modules",
114    "vendor",
115    "__pycache__",
116    "target",
117    ".next",
118    // Virtual environments
119    ".venv",
120    "venv",
121    ".tox",
122    "env",
123    // Caches (common polluters — can contain 40K+ symbols from deps)
124    ".cache",
125    ".ruff_cache",
126    ".pytest_cache",
127    ".mypy_cache",
128    ".fastembed_cache",
129    // Editor extensions (e.g. Antigravity/Windsurf bundled JS)
130    ".antigravity",
131    ".windsurf",
132    // Cloud & external mounts
133    "Library",
134    // CodeLens runtime
135    ".codelens",
136];
137
138/// Returns `true` if any component of `path` matches an excluded directory.
139pub fn is_excluded(path: &Path) -> bool {
140    path.components().any(|component| {
141        let value = component.as_os_str().to_string_lossy();
142        EXCLUDED_DIRS.contains(&value.as_ref())
143    })
144}
145
146/// Walk `root` collecting files that pass `filter`, skipping excluded dirs.
147pub fn collect_files(root: &Path, filter: impl Fn(&Path) -> bool) -> Result<Vec<PathBuf>> {
148    use walkdir::WalkDir;
149    let mut files = Vec::new();
150    for entry in WalkDir::new(root)
151        .into_iter()
152        .filter_entry(|entry| !is_excluded(entry.path()))
153    {
154        let entry = entry?;
155        if entry.file_type().is_file() && filter(entry.path()) {
156            files.push(entry.path().to_path_buf());
157        }
158    }
159    Ok(files)
160}
161
162/// Walk `root` and return the canonical extension tag of the dominant
163/// source language by file count (e.g. `rs`, `py`, `ts`, `go`). Returns
164/// `None` when the project contains fewer than 3 source files in total,
165/// or when no single language holds a clear plurality.
166///
167/// v1.5 Phase 2j MCP follow-up. The engine helper walks the project
168/// once at activation time and hands the result to the MCP tool layer,
169/// which then exports `CODELENS_EMBED_HINT_AUTO_LANG=<lang>` so the
170/// engine's `auto_hint_should_enable` gate can consult
171/// `language_supports_nl_stack` on subsequent embedding calls.
172///
173/// Walk scope is capped (16 k files) to avoid pathological cases on
174/// very large monorepos — the goal is to classify the project by
175/// dominant language, not to enumerate every file. Directories in
176/// `EXCLUDED_DIRS` are skipped (same filter as `collect_files`). Only
177/// files with an extension recognised by the language registry are
178/// counted; build artefacts / README / Markdown are ignored.
179///
180/// The returned tag is the canonical extension string (e.g. `rs`,
181/// `py`) — exactly what `CODELENS_EMBED_HINT_AUTO_LANG` expects and
182/// what `crate::embedding::language_supports_nl_stack` accepts.
183pub fn compute_dominant_language(root: &Path) -> Option<String> {
184    use std::collections::HashMap;
185    use walkdir::WalkDir;
186
187    const WALK_CAP: usize = 16_384;
188    const MIN_FILES: usize = 3;
189
190    let mut counts: HashMap<String, usize> = HashMap::new();
191    let mut total = 0usize;
192
193    for entry in WalkDir::new(root)
194        .into_iter()
195        .filter_entry(|entry| !is_excluded(entry.path()))
196    {
197        let Ok(entry) = entry else {
198            continue;
199        };
200        if !entry.file_type().is_file() {
201            continue;
202        }
203        let Some(ext) = entry.path().extension() else {
204            continue;
205        };
206        let Some(ext_str) = ext.to_str() else {
207            continue;
208        };
209        let ext_lower = ext_str.to_ascii_lowercase();
210        // Only count extensions we know are source languages. This uses
211        // the language registry so future language additions stay in
212        // sync automatically. The import is local to avoid a cyclic
213        // module dependency with `lang_config`.
214        if crate::lang_registry::for_extension(&ext_lower).is_none() {
215            continue;
216        }
217        *counts.entry(ext_lower).or_insert(0) += 1;
218        total += 1;
219        if total >= WALK_CAP {
220            break;
221        }
222    }
223
224    if total < MIN_FILES {
225        return None;
226    }
227
228    // Find the extension with the highest count. A strict plurality is
229    // not required (return whichever wins) but the caller can use the
230    // count ratio via `compute_dominant_language_with_count` if they
231    // want to impose a threshold. For v1.5 Phase 2j we accept any
232    // plurality and let the downstream `language_supports_nl_stack`
233    // decide whether the tag maps to an allowed language.
234    counts
235        .into_iter()
236        .max_by_key(|(_, count)| *count)
237        .map(|(ext, _)| ext)
238}
239
240/// Walk up from `start` until a directory containing a root marker is found.
241fn detect_root(start: &Path) -> Option<PathBuf> {
242    let home = dirs_fallback();
243    let mut current = start.to_path_buf();
244    loop {
245        // `~/.codelens` stores global CodeLens state, so treating the home directory as an
246        // inferred project root causes unrelated folders to collapse onto `$HOME`.
247        // If the user really wants to operate on `$HOME`, they can pass it explicitly.
248        if current != start && Some(current.as_path()) == home.as_deref() {
249            break;
250        }
251        for marker in ROOT_MARKERS {
252            if current.join(marker).exists() {
253                return Some(current);
254            }
255        }
256        // Don't go above home directory
257        if Some(current.as_path()) == home.as_deref() {
258            break;
259        }
260        if !current.pop() {
261            break;
262        }
263    }
264    None
265}
266
267fn dirs_fallback() -> Option<PathBuf> {
268    std::env::var_os("HOME")
269        .map(PathBuf::from)
270        .map(|path| path.canonicalize().unwrap_or(path))
271}
272
273// ── Framework detection ─────────────────────────────────────────────────
274
275pub fn detect_frameworks(project: &Path) -> Vec<String> {
276    let mut frameworks = Vec::new();
277
278    // Python
279    if project.join("manage.py").exists() {
280        frameworks.push("django".into());
281    }
282    if has_dependency(project, "fastapi") {
283        frameworks.push("fastapi".into());
284    }
285    if has_dependency(project, "flask") {
286        frameworks.push("flask".into());
287    }
288
289    // JavaScript/TypeScript
290    if project.join("next.config.js").exists()
291        || project.join("next.config.mjs").exists()
292        || project.join("next.config.ts").exists()
293    {
294        frameworks.push("nextjs".into());
295    }
296    if has_node_dependency(project, "express") {
297        frameworks.push("express".into());
298    }
299    if has_node_dependency(project, "@nestjs/core") {
300        frameworks.push("nestjs".into());
301    }
302    if project.join("vite.config.ts").exists() || project.join("vite.config.js").exists() {
303        frameworks.push("vite".into());
304    }
305
306    // Rust
307    if project.join("Cargo.toml").exists() {
308        if has_cargo_dependency(project, "actix-web") {
309            frameworks.push("actix-web".into());
310        }
311        if has_cargo_dependency(project, "axum") {
312            frameworks.push("axum".into());
313        }
314        if has_cargo_dependency(project, "rocket") {
315            frameworks.push("rocket".into());
316        }
317    }
318
319    // Go
320    if has_go_dependency(project, "gin-gonic/gin") {
321        frameworks.push("gin".into());
322    }
323    if has_go_dependency(project, "gofiber/fiber") {
324        frameworks.push("fiber".into());
325    }
326
327    // Java/Kotlin
328    if has_gradle_or_maven_dependency(project, "spring-boot") {
329        frameworks.push("spring-boot".into());
330    }
331
332    frameworks
333}
334
335fn read_file_text(path: &Path) -> Option<String> {
336    std::fs::read_to_string(path).ok()
337}
338
339fn has_dependency(project: &Path, name: &str) -> bool {
340    let req = project.join("requirements.txt");
341    if let Some(text) = read_file_text(&req)
342        && text.contains(name)
343    {
344        return true;
345    }
346    let pyproject = project.join("pyproject.toml");
347    if let Some(text) = read_file_text(&pyproject)
348        && text.contains(name)
349    {
350        return true;
351    }
352    false
353}
354
355fn has_node_dependency(project: &Path, name: &str) -> bool {
356    let pkg = project.join("package.json");
357    if let Some(text) = read_file_text(&pkg) {
358        return text.contains(name);
359    }
360    false
361}
362
363fn has_cargo_dependency(project: &Path, name: &str) -> bool {
364    let cargo = project.join("Cargo.toml");
365    if let Some(text) = read_file_text(&cargo) {
366        return text.contains(name);
367    }
368    false
369}
370
371fn has_go_dependency(project: &Path, name: &str) -> bool {
372    let gomod = project.join("go.mod");
373    if let Some(text) = read_file_text(&gomod) {
374        return text.contains(name);
375    }
376    false
377}
378
379fn has_gradle_or_maven_dependency(project: &Path, name: &str) -> bool {
380    for file in &["build.gradle", "build.gradle.kts", "pom.xml"] {
381        if let Some(text) = read_file_text(&project.join(file))
382            && text.contains(name)
383        {
384            return true;
385        }
386    }
387    false
388}
389
390// ── Workspace/monorepo detection ────────────────────────────────────────
391
392#[derive(Debug, Clone, serde::Serialize)]
393pub struct WorkspacePackage {
394    pub name: String,
395    pub path: String,
396    pub package_type: String,
397}
398
399pub fn detect_workspace_packages(project: &Path) -> Vec<WorkspacePackage> {
400    let mut packages = Vec::new();
401
402    // Cargo workspace
403    let cargo_toml = project.join("Cargo.toml");
404    if cargo_toml.is_file()
405        && let Ok(content) = std::fs::read_to_string(&cargo_toml)
406        && content.contains("[workspace]")
407    {
408        for line in content.lines() {
409            let trimmed = line.trim().trim_matches('"').trim_matches(',');
410            if trimmed.contains("crates/") || trimmed.contains("packages/") {
411                let pattern = trimmed.trim_matches('"').trim_matches(',').trim();
412                if let Some(stripped) = pattern.strip_suffix("/*") {
413                    // Glob pattern: "crates/*" → scan directory
414                    let dir = project.join(stripped);
415                    if dir.is_dir() {
416                        for entry in std::fs::read_dir(&dir).into_iter().flatten().flatten() {
417                            if entry.path().join("Cargo.toml").is_file() {
418                                packages.push(WorkspacePackage {
419                                    name: entry.file_name().to_string_lossy().to_string(),
420                                    path: entry
421                                        .path()
422                                        .strip_prefix(project)
423                                        .unwrap_or(&entry.path())
424                                        .to_string_lossy()
425                                        .to_string(),
426                                    package_type: "cargo".to_string(),
427                                });
428                            }
429                        }
430                    }
431                } else {
432                    // Explicit path: "crates/codelens-core"
433                    let dir = project.join(pattern);
434                    if dir.join("Cargo.toml").is_file() {
435                        packages.push(WorkspacePackage {
436                            name: dir
437                                .file_name()
438                                .unwrap_or_default()
439                                .to_string_lossy()
440                                .to_string(),
441                            path: pattern.to_string(),
442                            package_type: "cargo".to_string(),
443                        });
444                    }
445                }
446            }
447        }
448    }
449
450    // npm workspace (package.json with "workspaces")
451    let pkg_json = project.join("package.json");
452    if pkg_json.is_file()
453        && let Ok(content) = std::fs::read_to_string(&pkg_json)
454        && content.contains("\"workspaces\"")
455    {
456        for dir_name in &["packages", "apps", "libs"] {
457            let dir = project.join(dir_name);
458            if dir.is_dir() {
459                for entry in std::fs::read_dir(&dir).into_iter().flatten().flatten() {
460                    if entry.path().join("package.json").is_file() {
461                        packages.push(WorkspacePackage {
462                            name: entry.file_name().to_string_lossy().to_string(),
463                            path: entry
464                                .path()
465                                .strip_prefix(project)
466                                .unwrap_or(&entry.path())
467                                .to_string_lossy()
468                                .to_string(),
469                            package_type: "npm".to_string(),
470                        });
471                    }
472                }
473            }
474        }
475    }
476
477    // Go workspace (go.work)
478    let go_work = project.join("go.work");
479    if go_work.is_file()
480        && let Ok(content) = std::fs::read_to_string(&go_work)
481    {
482        for line in content.lines() {
483            let trimmed = line.trim();
484            if !trimmed.starts_with("use")
485                && !trimmed.starts_with("go")
486                && !trimmed.starts_with("//")
487                && !trimmed.is_empty()
488                && trimmed != "("
489                && trimmed != ")"
490            {
491                let dir = project.join(trimmed);
492                if dir.join("go.mod").is_file() {
493                    packages.push(WorkspacePackage {
494                        name: trimmed.to_string(),
495                        path: trimmed.to_string(),
496                        package_type: "go".to_string(),
497                    });
498                }
499            }
500        }
501    }
502
503    packages
504}
505
506fn normalize_path(path: &Path) -> PathBuf {
507    let mut normalized = PathBuf::new();
508    for component in path.components() {
509        match component {
510            std::path::Component::CurDir => {}
511            std::path::Component::ParentDir => {
512                normalized.pop();
513            }
514            _ => normalized.push(component.as_os_str()),
515        }
516    }
517    normalized
518}
519
520#[cfg(test)]
521mod tests {
522    use super::ProjectRoot;
523    use std::{
524        env, fs,
525        sync::{Mutex, OnceLock},
526    };
527
528    #[test]
529    fn rejects_path_escape() {
530        let dir = tempfile_dir();
531        let project = ProjectRoot::new(&dir).expect("project root");
532        let err = project
533            .resolve("../outside.txt")
534            .expect_err("should reject escape");
535        assert!(err.to_string().contains("escapes project root"));
536    }
537
538    #[test]
539    fn makes_relative_paths() {
540        let dir = tempfile_dir();
541        let nested = dir.join("src/lib.rs");
542        fs::create_dir_all(nested.parent().expect("parent")).expect("mkdir");
543        fs::write(&nested, "fn main() {}\n").expect("write file");
544
545        let project = ProjectRoot::new(&dir).expect("project root");
546        assert_eq!(project.to_relative(&nested), "src/lib.rs");
547    }
548
549    #[test]
550    fn does_not_promote_home_directory_from_global_codelens_marker() {
551        let _guard = env_lock().lock().expect("lock");
552        let home = tempfile_dir();
553        let nested = home.join("Downloads/codelens");
554        fs::create_dir_all(home.join(".codelens")).expect("mkdir global codelens");
555        fs::create_dir_all(&nested).expect("mkdir nested");
556
557        let previous_home = env::var_os("HOME");
558        unsafe {
559            env::set_var("HOME", &home);
560        }
561
562        let project = ProjectRoot::new(&nested).expect("project root");
563
564        match previous_home {
565            Some(value) => unsafe { env::set_var("HOME", value) },
566            None => unsafe { env::remove_var("HOME") },
567        }
568
569        assert_eq!(
570            project.as_path(),
571            nested.canonicalize().expect("canonical nested").as_path()
572        );
573    }
574
575    #[test]
576    fn still_detects_project_root_before_home_directory() {
577        let _guard = env_lock().lock().expect("lock");
578        let home = tempfile_dir();
579        let project_root = home.join("workspace/app");
580        let nested = project_root.join("src/features");
581        fs::create_dir_all(home.join(".codelens")).expect("mkdir global codelens");
582        fs::create_dir_all(&nested).expect("mkdir nested");
583        fs::write(
584            project_root.join("Cargo.toml"),
585            "[package]\nname = \"demo\"\n",
586        )
587        .expect("write cargo");
588
589        let previous_home = env::var_os("HOME");
590        unsafe {
591            env::set_var("HOME", &home);
592        }
593
594        let project = ProjectRoot::new(&nested).expect("project root");
595
596        match previous_home {
597            Some(value) => unsafe { env::set_var("HOME", value) },
598            None => unsafe { env::remove_var("HOME") },
599        }
600
601        assert_eq!(
602            project.as_path(),
603            project_root
604                .canonicalize()
605                .expect("canonical project root")
606                .as_path()
607        );
608    }
609
610    /// Unique per-test subdirectory inside `tempfile_dir()` to avoid
611    /// parallel-execution collisions on the nanosecond-timestamp path.
612    fn fresh_test_dir(label: &str) -> std::path::PathBuf {
613        let dir = tempfile_dir().join(label);
614        fs::create_dir_all(&dir).expect("mkdir fresh test dir");
615        dir
616    }
617
618    #[test]
619    fn compute_dominant_language_picks_rust_for_rust_heavy_project() {
620        let dir = fresh_test_dir("phase2j_rust_heavy");
621        // 5 Rust files, 1 Python file, 1 unknown extension file
622        fs::create_dir_all(dir.join("src")).expect("mkdir src");
623        fs::write(dir.join("Cargo.toml"), "[package]\nname = \"x\"\n").expect("Cargo.toml");
624        for name in ["a.rs", "b.rs", "c.rs", "d.rs", "e.rs"] {
625            fs::write(dir.join("src").join(name), "pub fn f() {}\n").expect("write rs");
626        }
627        fs::write(dir.join("scripts.py"), "def f():\n    pass\n").expect("write py");
628        fs::write(dir.join("README.md"), "# README\n").expect("write md");
629
630        let lang = super::compute_dominant_language(&dir).expect("dominant lang");
631        assert_eq!(lang, "rs", "expected rs dominant, got {lang}");
632    }
633
634    #[test]
635    fn compute_dominant_language_picks_python_for_python_heavy_project() {
636        let dir = fresh_test_dir("phase2j_python_heavy");
637        // 4 Python files, 1 Rust file
638        fs::create_dir_all(dir.join("pkg")).expect("mkdir pkg");
639        for name in ["mod_a.py", "mod_b.py", "mod_c.py", "mod_d.py"] {
640            fs::write(dir.join("pkg").join(name), "def f():\n    pass\n").expect("write py");
641        }
642        fs::write(dir.join("build.rs"), "fn main() {}\n").expect("write rs");
643
644        let lang = super::compute_dominant_language(&dir).expect("dominant lang");
645        assert_eq!(lang, "py", "expected py dominant, got {lang}");
646    }
647
648    #[test]
649    fn compute_dominant_language_returns_none_below_min_file_count() {
650        let dir = fresh_test_dir("phase2j_below_min");
651        // Only 2 source files (below MIN_FILES = 3)
652        fs::write(dir.join("only.rs"), "fn x() {}\n").expect("write rs");
653        fs::write(dir.join("other.py"), "def y(): pass\n").expect("write py");
654
655        let lang = super::compute_dominant_language(&dir);
656        assert!(lang.is_none(), "expected None below 3 files, got {lang:?}");
657    }
658
659    #[test]
660    fn compute_dominant_language_skips_excluded_dirs() {
661        let dir = fresh_test_dir("phase2j_excluded_dirs");
662        fs::create_dir_all(dir.join("src")).expect("mkdir src");
663        fs::create_dir_all(dir.join("node_modules/foo")).expect("mkdir node_modules");
664        fs::create_dir_all(dir.join("target")).expect("mkdir target");
665        // 3 real Rust source files
666        for name in ["a.rs", "b.rs", "c.rs"] {
667            fs::write(dir.join("src").join(name), "fn f() {}\n").expect("write src rs");
668        }
669        // 10 fake JS files inside node_modules that must be skipped
670        for i in 0..10 {
671            fs::write(
672                dir.join("node_modules/foo").join(format!("x{i}.js")),
673                "module.exports = {};\n",
674            )
675            .expect("write node_modules js");
676        }
677        // 10 fake build artefacts in target/ that must be skipped
678        for i in 0..10 {
679            fs::write(
680                dir.join("target").join(format!("build{i}.rs")),
681                "fn f() {}\n",
682            )
683            .expect("write target rs");
684        }
685
686        let lang = super::compute_dominant_language(&dir).expect("dominant lang");
687        // Only the 3 src/*.rs files should be counted — not the 10
688        // node_modules JS files and not the 10 target build artefacts.
689        assert_eq!(lang, "rs", "expected rs from src only, got {lang}");
690    }
691
692    fn env_lock() -> &'static Mutex<()> {
693        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
694        LOCK.get_or_init(|| Mutex::new(()))
695    }
696
697    fn tempfile_dir() -> std::path::PathBuf {
698        let dir = std::env::temp_dir().join(format!(
699            "codelens-core-project-{}",
700            std::time::SystemTime::now()
701                .duration_since(std::time::UNIX_EPOCH)
702                .expect("time")
703                .as_nanos()
704        ));
705        fs::create_dir_all(&dir).expect("create tempdir");
706        dir
707    }
708}