Skip to main content

codelens_engine/
project.rs

1use anyhow::{Context, Result, bail};
2use globset::{Glob, GlobMatcher};
3use std::path::{Path, PathBuf};
4
5#[derive(Debug, Clone)]
6pub struct ProjectRoot {
7    root: PathBuf,
8}
9
10const ROOT_MARKERS: &[&str] = &[
11    ".git",
12    ".codelens",
13    "build.gradle.kts",
14    "build.gradle",
15    "package.json",
16    "pyproject.toml",
17    "Cargo.toml",
18    "pom.xml",
19    "go.mod",
20];
21
22impl ProjectRoot {
23    /// Create a ProjectRoot, auto-detecting the actual root by walking up from
24    /// the given path until a root marker (.git, Cargo.toml, etc.) is found.
25    /// Falls back to the given path if no marker is found.
26    pub fn new(path: impl AsRef<Path>) -> Result<Self> {
27        let start = path.as_ref().canonicalize().with_context(|| {
28            format!("failed to resolve project root {}", path.as_ref().display())
29        })?;
30        if !start.is_dir() {
31            bail!("project root is not a directory: {}", start.display());
32        }
33        let root = detect_root(&start).unwrap_or_else(|| start.clone());
34        Ok(Self { root })
35    }
36
37    /// Create a ProjectRoot at the exact given path without auto-detection.
38    pub fn new_exact(path: impl AsRef<Path>) -> Result<Self> {
39        let root = path.as_ref().canonicalize().with_context(|| {
40            format!("failed to resolve project root {}", path.as_ref().display())
41        })?;
42        if !root.is_dir() {
43            bail!("project root is not a directory: {}", root.display());
44        }
45        Ok(Self { root })
46    }
47
48    pub fn as_path(&self) -> &Path {
49        &self.root
50    }
51
52    pub fn resolve(&self, relative_or_absolute: impl AsRef<Path>) -> Result<PathBuf> {
53        let path = relative_or_absolute.as_ref();
54        let candidate = if path.is_absolute() {
55            path.to_path_buf()
56        } else {
57            self.root.join(path)
58        };
59        let normalized = normalize_path(&candidate);
60        if !normalized.starts_with(&self.root) {
61            bail!(
62                "path escapes project root: {} (root: {})",
63                normalized.display(),
64                self.root.display()
65            );
66        }
67        // If the path exists, verify the real (symlink-resolved) path also stays within root
68        if normalized.exists()
69            && let Ok(real) = normalized.canonicalize()
70            && !real.starts_with(&self.root)
71        {
72            bail!(
73                "symlink escapes project root: {} → {} (root: {})",
74                normalized.display(),
75                real.display(),
76                self.root.display()
77            );
78        }
79        // Resolve symlinks so the returned path matches what's stored in the index.
80        if normalized.exists()
81            && let Ok(real) = normalized.canonicalize()
82            && real.starts_with(&self.root)
83        {
84            return Ok(real);
85        }
86        Ok(normalized)
87    }
88
89    pub fn to_relative(&self, path: impl AsRef<Path>) -> String {
90        let path = path.as_ref();
91        let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
92        canonical
93            .strip_prefix(&self.root)
94            .unwrap_or(&canonical)
95            .to_string_lossy()
96            .replace('\\', "/")
97    }
98}
99
100// ── Shared directory exclusion & file collection ────────────────────────
101
102pub const EXCLUDED_DIRS: &[&str] = &[
103    // VCS & IDE
104    ".git",
105    ".idea",
106    ".vscode",
107    ".cursor",
108    ".claude",
109    ".claire",
110    ".serena",
111    ".superpowers",
112    // Build output
113    ".gradle",
114    "build",
115    "dist",
116    "generated",
117    "out",
118    "node_modules",
119    "vendor",
120    "__pycache__",
121    "target",
122    ".next",
123    "win-unpacked",
124    // Virtual environments
125    ".venv",
126    "venv",
127    ".tox",
128    "env",
129    // Caches (common polluters — can contain 40K+ symbols from deps)
130    ".cache",
131    ".ruff_cache",
132    ".pytest_cache",
133    ".mypy_cache",
134    ".fastembed_cache",
135    // Editor extensions (e.g. Antigravity/Windsurf bundled JS)
136    ".antigravity",
137    ".windsurf",
138    // Cloud & external mounts
139    "Library",
140    // CodeLens runtime
141    ".codelens",
142    // Git worktrees (dev artifacts at top-level, e.g. `git worktree add
143    // .worktrees/feature-x`). Indexing them duplicates symbols against
144    // the main tree and pollutes `find_referencing_symbols` /
145    // `semantic_search` results with stale branch versions.
146    ".worktrees",
147];
148
149/// Returns `true` if any component of `path` matches an excluded directory.
150pub fn is_excluded(path: &Path) -> bool {
151    if path.components().any(|component| {
152        let value = component.as_os_str().to_string_lossy();
153        EXCLUDED_DIRS.contains(&value.as_ref()) || value.starts_with("backup-")
154    }) {
155        return true;
156    }
157
158    path.file_name()
159        .and_then(|file_name| file_name.to_str())
160        .is_some_and(is_generated_or_lock_file)
161}
162
163/// Root-relative variant of [`is_excluded`]: only the components *below*
164/// `root` are matched against [`EXCLUDED_DIRS`], so a project legitimately
165/// rooted under an excluded-name ancestor (`~/.claude/worktrees/...`,
166/// `~/Library/...`, `~/dev/build/...`) is not silently emptied to zero
167/// files (#358). Walkers must pass the same `root` they hand to `WalkDir`
168/// so the prefix strips textually without canonicalization cost.
169///
170/// A `path` outside `root` falls back to whole-path matching — the
171/// fail-safe direction: the fallback can only exclude more, never leak an
172/// excluded directory back in.
173pub fn is_excluded_within(root: &Path, path: &Path) -> bool {
174    match path.strip_prefix(root) {
175        Ok(relative) => is_excluded(relative),
176        Err(_) => is_excluded(path),
177    }
178}
179
180fn is_generated_or_lock_file(file_name: &str) -> bool {
181    matches!(
182        file_name,
183        "package-lock.json"
184            | "pnpm-lock.yaml"
185            | "yarn.lock"
186            | "bun.lock"
187            | "bun.lockb"
188            | "LICENSES.chromium.html"
189    ) || file_name.ends_with(".min.js")
190        || file_name.ends_with(".bundle.js")
191        || file_name.ends_with(".bundle.iife.js")
192        || file_name.ends_with("-bundle.js")
193        || file_name.ends_with(".gen.ts")
194        || file_name.ends_with(".gen.tsx")
195        || file_name.ends_with(".generated.ts")
196        || file_name.ends_with(".generated.tsx")
197}
198
199/// Walk `root` collecting files that pass `filter`, skipping excluded dirs.
200pub fn collect_files(root: &Path, filter: impl Fn(&Path) -> bool) -> Result<Vec<PathBuf>> {
201    use walkdir::WalkDir;
202    let project_excludes = ProjectExcludeConfig::load(root);
203    let mut files = Vec::new();
204    for entry in WalkDir::new(root).into_iter().filter_entry(|entry| {
205        !is_excluded_within(root, entry.path()) && !project_excludes.is_excluded(root, entry.path())
206    }) {
207        let entry = entry?;
208        if entry.file_type().is_file() && filter(entry.path()) {
209            files.push(entry.path().to_path_buf());
210        }
211    }
212    Ok(files)
213}
214
215#[derive(Debug, Default)]
216struct ProjectExcludeConfig {
217    matchers: Vec<GlobMatcher>,
218}
219
220impl ProjectExcludeConfig {
221    fn load(root: &Path) -> Self {
222        let config_path = root.join(".codelens/config.json");
223        let Ok(content) = std::fs::read_to_string(config_path) else {
224            return Self::default();
225        };
226        let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) else {
227            return Self::default();
228        };
229        let mut patterns = Vec::new();
230        collect_string_array(&json, &["index", "exclude_paths"], &mut patterns);
231        collect_string_array(&json, &["index", "exclude"], &mut patterns);
232        collect_string_array(&json, &["exclude_paths"], &mut patterns);
233
234        let mut matchers = Vec::new();
235        for pattern in patterns {
236            for candidate in expand_exclude_pattern(&pattern) {
237                if let Ok(glob) = Glob::new(&candidate) {
238                    matchers.push(glob.compile_matcher());
239                }
240            }
241        }
242        Self { matchers }
243    }
244
245    fn is_excluded(&self, root: &Path, path: &Path) -> bool {
246        if self.matchers.is_empty() {
247            return false;
248        }
249        let relative = path
250            .strip_prefix(root)
251            .unwrap_or(path)
252            .to_string_lossy()
253            .replace('\\', "/");
254        self.matchers
255            .iter()
256            .any(|matcher| matcher.is_match(relative.as_str()))
257    }
258}
259
260fn collect_string_array(json: &serde_json::Value, path: &[&str], out: &mut Vec<String>) {
261    let mut current = json;
262    for segment in path {
263        let Some(next) = current.get(segment) else {
264            return;
265        };
266        current = next;
267    }
268    if let Some(values) = current.as_array() {
269        out.extend(
270            values
271                .iter()
272                .filter_map(|value| value.as_str())
273                .map(str::trim)
274                .filter(|value| !value.is_empty() && !value.starts_with('/'))
275                .map(ToOwned::to_owned),
276        );
277    }
278}
279
280fn expand_exclude_pattern(pattern: &str) -> Vec<String> {
281    let normalized = pattern.trim().trim_start_matches("./").replace('\\', "/");
282    if normalized.is_empty() || normalized.contains("..") {
283        return Vec::new();
284    }
285    let has_glob = normalized.contains('*')
286        || normalized.contains('?')
287        || normalized.contains('[')
288        || normalized.contains('{');
289    if has_glob || normalized.ends_with('/') {
290        return vec![normalized];
291    }
292    vec![normalized.clone(), format!("{normalized}/**")]
293}
294
295/// Walk `root` and return the canonical extension tag of the dominant
296/// source language by file count (e.g. `rs`, `py`, `ts`, `go`). Returns
297/// `None` when the project contains fewer than 3 source files in total,
298/// or when no single language holds a clear plurality.
299///
300/// v1.5 Phase 2j MCP follow-up. The engine helper walks the project
301/// once at activation time and hands the result to the MCP tool layer,
302/// which then exports `CODELENS_EMBED_HINT_AUTO_LANG=<lang>` so the
303/// engine's `auto_hint_should_enable` gate can consult
304/// `language_supports_nl_stack` on subsequent embedding calls.
305///
306/// Walk scope is capped (16 k files) to avoid pathological cases on
307/// very large monorepos — the goal is to classify the project by
308/// dominant language, not to enumerate every file. Directories in
309/// `EXCLUDED_DIRS` are skipped (same filter as `collect_files`). Only
310/// files with an extension recognised by the language registry are
311/// counted; build artefacts / README / Markdown are ignored.
312///
313/// The returned tag is the canonical extension string (e.g. `rs`,
314/// `py`) — exactly what `CODELENS_EMBED_HINT_AUTO_LANG` expects and
315/// what `crate::embedding::language_supports_nl_stack` accepts.
316pub fn compute_dominant_language(root: &Path) -> Option<String> {
317    use std::collections::HashMap;
318    use walkdir::WalkDir;
319
320    const WALK_CAP: usize = 16_384;
321    const MIN_FILES: usize = 3;
322
323    let mut counts: HashMap<String, usize> = HashMap::new();
324    let mut total = 0usize;
325
326    for entry in WalkDir::new(root)
327        .into_iter()
328        .filter_entry(|entry| !is_excluded_within(root, entry.path()))
329    {
330        let Ok(entry) = entry else {
331            continue;
332        };
333        if !entry.file_type().is_file() {
334            continue;
335        }
336        let Some(ext) = entry.path().extension() else {
337            continue;
338        };
339        let Some(ext_str) = ext.to_str() else {
340            continue;
341        };
342        let ext_lower = ext_str.to_ascii_lowercase();
343        // Only count extensions we know are source languages. This uses
344        // the language registry so future language additions stay in
345        // sync automatically. The import is local to avoid a cyclic
346        // module dependency with `lang_config`.
347        if crate::lang_registry::for_extension(&ext_lower).is_none() {
348            continue;
349        }
350        *counts.entry(ext_lower).or_insert(0) += 1;
351        total += 1;
352        if total >= WALK_CAP {
353            break;
354        }
355    }
356
357    if total < MIN_FILES {
358        return None;
359    }
360
361    // Find the extension with the highest count. A strict plurality is
362    // not required (return whichever wins) but the caller can use the
363    // count ratio via `compute_dominant_language_with_count` if they
364    // want to impose a threshold. For v1.5 Phase 2j we accept any
365    // plurality and let the downstream `language_supports_nl_stack`
366    // decide whether the tag maps to an allowed language.
367    counts
368        .into_iter()
369        .max_by_key(|(_, count)| *count)
370        .map(|(ext, _)| ext)
371}
372
373/// Walk up from `start` until a directory containing a root marker is found.
374fn detect_root(start: &Path) -> Option<PathBuf> {
375    let home = dirs_fallback();
376    let temp = temp_dir_fallback();
377    detect_root_with_bounds(start, home.as_deref(), temp.as_deref())
378}
379
380fn detect_root_with_bounds(
381    start: &Path,
382    home: Option<&Path>,
383    temp: Option<&Path>,
384) -> Option<PathBuf> {
385    let mut current = start.to_path_buf();
386    loop {
387        // `~/.codelens` stores global CodeLens state, so treating the home directory as an
388        // inferred project root causes unrelated folders to collapse onto `$HOME`.
389        // If the user really wants to operate on `$HOME`, they can pass it explicitly.
390        if current != start && Some(current.as_path()) == home {
391            break;
392        }
393        for marker in ROOT_MARKERS {
394            if marker == &".codelens" && current != start && is_temp_root(&current, temp) {
395                continue;
396            }
397            if current.join(marker).exists() {
398                return Some(current);
399            }
400        }
401        // Don't go above home directory
402        if Some(current.as_path()) == home {
403            break;
404        }
405        if !current.pop() {
406            break;
407        }
408    }
409    None
410}
411
412fn dirs_fallback() -> Option<PathBuf> {
413    std::env::var_os("HOME")
414        .map(PathBuf::from)
415        .map(|path| path.canonicalize().unwrap_or(path))
416}
417
418fn temp_dir_fallback() -> Option<PathBuf> {
419    let path = std::env::temp_dir();
420    path.canonicalize().ok().or(Some(path))
421}
422
423fn is_temp_root(path: &Path, configured_temp: Option<&Path>) -> bool {
424    if Some(path) == configured_temp {
425        return true;
426    }
427    ["/tmp", "/private/tmp", "/var/tmp"]
428        .iter()
429        .filter_map(|candidate| Path::new(candidate).canonicalize().ok())
430        .any(|candidate| candidate == path)
431}
432
433// ── Framework detection ─────────────────────────────────────────────────
434
435pub fn detect_frameworks(project: &Path) -> Vec<String> {
436    let mut frameworks = Vec::new();
437
438    // Python
439    if project.join("manage.py").exists() {
440        frameworks.push("django".into());
441    }
442    if has_dependency(project, "fastapi") {
443        frameworks.push("fastapi".into());
444    }
445    if has_dependency(project, "flask") {
446        frameworks.push("flask".into());
447    }
448
449    // JavaScript/TypeScript
450    if project.join("next.config.js").exists()
451        || project.join("next.config.mjs").exists()
452        || project.join("next.config.ts").exists()
453    {
454        frameworks.push("nextjs".into());
455    }
456    if has_node_dependency(project, "express") {
457        frameworks.push("express".into());
458    }
459    if has_node_dependency(project, "@nestjs/core") {
460        frameworks.push("nestjs".into());
461    }
462    if project.join("vite.config.ts").exists() || project.join("vite.config.js").exists() {
463        frameworks.push("vite".into());
464    }
465
466    // Rust
467    if project.join("Cargo.toml").exists() {
468        if has_cargo_dependency(project, "actix-web") {
469            frameworks.push("actix-web".into());
470        }
471        if has_cargo_dependency(project, "axum") {
472            frameworks.push("axum".into());
473        }
474        if has_cargo_dependency(project, "rocket") {
475            frameworks.push("rocket".into());
476        }
477    }
478
479    // Go
480    if has_go_dependency(project, "gin-gonic/gin") {
481        frameworks.push("gin".into());
482    }
483    if has_go_dependency(project, "gofiber/fiber") {
484        frameworks.push("fiber".into());
485    }
486
487    // Java/Kotlin
488    if has_gradle_or_maven_dependency(project, "spring-boot") {
489        frameworks.push("spring-boot".into());
490    }
491
492    frameworks
493}
494
495fn read_file_text(path: &Path) -> Option<String> {
496    std::fs::read_to_string(path).ok()
497}
498
499fn has_dependency(project: &Path, name: &str) -> bool {
500    let req = project.join("requirements.txt");
501    if let Some(text) = read_file_text(&req)
502        && text.contains(name)
503    {
504        return true;
505    }
506    let pyproject = project.join("pyproject.toml");
507    if let Some(text) = read_file_text(&pyproject)
508        && text.contains(name)
509    {
510        return true;
511    }
512    false
513}
514
515fn has_node_dependency(project: &Path, name: &str) -> bool {
516    let pkg = project.join("package.json");
517    if let Some(text) = read_file_text(&pkg) {
518        return text.contains(name);
519    }
520    false
521}
522
523fn has_cargo_dependency(project: &Path, name: &str) -> bool {
524    let cargo = project.join("Cargo.toml");
525    if let Some(text) = read_file_text(&cargo) {
526        return text.contains(name);
527    }
528    false
529}
530
531fn has_go_dependency(project: &Path, name: &str) -> bool {
532    let gomod = project.join("go.mod");
533    if let Some(text) = read_file_text(&gomod) {
534        return text.contains(name);
535    }
536    false
537}
538
539fn has_gradle_or_maven_dependency(project: &Path, name: &str) -> bool {
540    for file in &["build.gradle", "build.gradle.kts", "pom.xml"] {
541        if let Some(text) = read_file_text(&project.join(file))
542            && text.contains(name)
543        {
544            return true;
545        }
546    }
547    false
548}
549
550// ── Workspace/monorepo detection ────────────────────────────────────────
551
552#[derive(Debug, Clone, serde::Serialize)]
553pub struct WorkspacePackage {
554    pub name: String,
555    pub path: String,
556    pub package_type: String,
557}
558
559pub fn detect_workspace_packages(project: &Path) -> Vec<WorkspacePackage> {
560    let mut packages = Vec::new();
561
562    // Cargo workspace
563    let cargo_toml = project.join("Cargo.toml");
564    if cargo_toml.is_file()
565        && let Ok(content) = std::fs::read_to_string(&cargo_toml)
566        && content.contains("[workspace]")
567    {
568        for line in content.lines() {
569            let trimmed = line.trim().trim_matches('"').trim_matches(',');
570            if !trimmed.contains("crates/") && !trimmed.contains("packages/") {
571                continue;
572            }
573            // Multi-line TOML arrays put one path per line and the existing
574            // contains() guard handles them. Single-line forms like
575            // `members = ["crates/foo", "crates/bar"]` collapse the whole
576            // array into one line, so split on `,` between the brackets and
577            // process each path independently.
578            let mut candidates: Vec<&str> = Vec::new();
579            if let (Some(start), Some(end)) = (trimmed.find('['), trimmed.rfind(']'))
580                && start < end
581            {
582                candidates.extend(trimmed[start + 1..end].split(','));
583            }
584            if candidates.is_empty() {
585                candidates.push(trimmed);
586            }
587            for raw in candidates {
588                let pattern = raw.trim().trim_matches('"').trim_matches(',').trim();
589                if pattern.is_empty()
590                    || (!pattern.contains("crates/") && !pattern.contains("packages/"))
591                {
592                    continue;
593                }
594                if let Some(stripped) = pattern.strip_suffix("/*") {
595                    // Glob pattern: "crates/*" → scan directory
596                    let dir = project.join(stripped);
597                    if dir.is_dir() {
598                        for entry in std::fs::read_dir(&dir).into_iter().flatten().flatten() {
599                            if entry.path().join("Cargo.toml").is_file() {
600                                packages.push(WorkspacePackage {
601                                    name: entry.file_name().to_string_lossy().to_string(),
602                                    path: entry
603                                        .path()
604                                        .strip_prefix(project)
605                                        .unwrap_or(&entry.path())
606                                        .to_string_lossy()
607                                        .to_string(),
608                                    package_type: "cargo".to_string(),
609                                });
610                            }
611                        }
612                    }
613                } else {
614                    // Explicit path: "crates/codelens-core"
615                    let dir = project.join(pattern);
616                    if dir.join("Cargo.toml").is_file() {
617                        packages.push(WorkspacePackage {
618                            name: dir
619                                .file_name()
620                                .unwrap_or_default()
621                                .to_string_lossy()
622                                .to_string(),
623                            path: pattern.to_string(),
624                            package_type: "cargo".to_string(),
625                        });
626                    }
627                }
628            }
629        }
630    }
631
632    // npm workspace (package.json with "workspaces")
633    let pkg_json = project.join("package.json");
634    if pkg_json.is_file()
635        && let Ok(content) = std::fs::read_to_string(&pkg_json)
636        && content.contains("\"workspaces\"")
637    {
638        for dir_name in &["packages", "apps", "libs"] {
639            let dir = project.join(dir_name);
640            if dir.is_dir() {
641                for entry in std::fs::read_dir(&dir).into_iter().flatten().flatten() {
642                    if entry.path().join("package.json").is_file() {
643                        packages.push(WorkspacePackage {
644                            name: entry.file_name().to_string_lossy().to_string(),
645                            path: entry
646                                .path()
647                                .strip_prefix(project)
648                                .unwrap_or(&entry.path())
649                                .to_string_lossy()
650                                .to_string(),
651                            package_type: "npm".to_string(),
652                        });
653                    }
654                }
655            }
656        }
657    }
658
659    // Go workspace (go.work)
660    let go_work = project.join("go.work");
661    if go_work.is_file()
662        && let Ok(content) = std::fs::read_to_string(&go_work)
663    {
664        for line in content.lines() {
665            let trimmed = line.trim();
666            if !trimmed.starts_with("use")
667                && !trimmed.starts_with("go")
668                && !trimmed.starts_with("//")
669                && !trimmed.is_empty()
670                && trimmed != "("
671                && trimmed != ")"
672            {
673                let dir = project.join(trimmed);
674                if dir.join("go.mod").is_file() {
675                    packages.push(WorkspacePackage {
676                        name: trimmed.to_string(),
677                        path: trimmed.to_string(),
678                        package_type: "go".to_string(),
679                    });
680                }
681            }
682        }
683    }
684
685    // Cargo.toml is parsed line-by-line for `crates/*` mentions, which
686    // double-counts paths listed in both `[workspace] members` and
687    // `[workspace] default-members`. Sort + dedup on the (path, name,
688    // package_type) tuple so callers receive each workspace package
689    // once regardless of how many sections reference it.
690    packages.sort_by(|a, b| {
691        a.path
692            .cmp(&b.path)
693            .then_with(|| a.name.cmp(&b.name))
694            .then_with(|| a.package_type.cmp(&b.package_type))
695    });
696    packages
697        .dedup_by(|a, b| a.path == b.path && a.name == b.name && a.package_type == b.package_type);
698    packages
699}
700
701fn normalize_path(path: &Path) -> PathBuf {
702    let mut normalized = PathBuf::new();
703    for component in path.components() {
704        match component {
705            std::path::Component::CurDir => {}
706            std::path::Component::ParentDir => {
707                normalized.pop();
708            }
709            _ => normalized.push(component.as_os_str()),
710        }
711    }
712    normalized
713}
714
715#[cfg(test)]
716mod tests {
717    use super::{ProjectRoot, collect_files, is_excluded, is_excluded_within};
718    use std::{fs, path::Path};
719
720    #[test]
721    fn workspace_packages_dedup_when_members_and_default_members_share_paths() {
722        use super::detect_workspace_packages;
723        let (_td, temp) = tempfile_dir();
724        let crate_dir = temp.join("crates/foo");
725        fs::create_dir_all(&crate_dir).expect("mkdir crate");
726        fs::write(
727            crate_dir.join("Cargo.toml"),
728            "[package]\nname = \"foo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
729        )
730        .expect("write crate cargo");
731        // Multi-line TOML array form mirrors how Cargo formats workspace
732        // members in real repos and is what the line-grep heuristic in
733        // `detect_workspace_packages` recognizes today. Same path appears
734        // in both `members` and `default-members` so dedup is the only
735        // thing under test.
736        fs::write(
737            temp.join("Cargo.toml"),
738            "[workspace]\nmembers = [\n    \"crates/foo\",\n]\ndefault-members = [\n    \"crates/foo\",\n]\n",
739        )
740        .expect("write root cargo");
741
742        let pkgs = detect_workspace_packages(&temp);
743        assert_eq!(
744            pkgs.len(),
745            1,
746            "members + default-members listing the same path should dedup, got {pkgs:?}"
747        );
748        assert_eq!(pkgs[0].name, "foo");
749        assert_eq!(pkgs[0].path, "crates/foo");
750        assert_eq!(pkgs[0].package_type, "cargo");
751    }
752
753    #[test]
754    fn workspace_packages_recognizes_single_line_toml_array() {
755        use super::detect_workspace_packages;
756        let (_td, temp) = tempfile_dir();
757        let crate_dir = temp.join("crates/foo");
758        fs::create_dir_all(&crate_dir).expect("mkdir crate");
759        fs::write(
760            crate_dir.join("Cargo.toml"),
761            "[package]\nname = \"foo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
762        )
763        .expect("write crate cargo");
764        // Single-line TOML array form (`members = ["crates/foo"]`) — what
765        // single-crate workspaces in small repos tend to use.
766        fs::write(
767            temp.join("Cargo.toml"),
768            "[workspace]\nmembers = [\"crates/foo\"]\n",
769        )
770        .expect("write root cargo");
771
772        let pkgs = detect_workspace_packages(&temp);
773        assert_eq!(
774            pkgs.len(),
775            1,
776            "single-line members array should be recognized, got {pkgs:?}"
777        );
778        assert_eq!(pkgs[0].name, "foo");
779        assert_eq!(pkgs[0].path, "crates/foo");
780        assert_eq!(pkgs[0].package_type, "cargo");
781    }
782
783    #[test]
784    fn workspace_packages_handles_single_line_array_with_multiple_paths() {
785        use super::detect_workspace_packages;
786        let (_td, temp) = tempfile_dir();
787        for name in &["foo", "bar"] {
788            let crate_dir = temp.join("crates").join(name);
789            fs::create_dir_all(&crate_dir).expect("mkdir crate");
790            fs::write(
791                crate_dir.join("Cargo.toml"),
792                format!("[package]\nname = \"{name}\"\nversion = \"0.1.0\"\nedition = \"2021\"\n"),
793            )
794            .expect("write crate cargo");
795        }
796        fs::write(
797            temp.join("Cargo.toml"),
798            "[workspace]\nmembers = [\"crates/foo\", \"crates/bar\"]\n",
799        )
800        .expect("write root cargo");
801
802        let mut pkgs = detect_workspace_packages(&temp);
803        pkgs.sort_by(|a, b| a.path.cmp(&b.path));
804        assert_eq!(
805            pkgs.len(),
806            2,
807            "single-line array with two paths, got {pkgs:?}"
808        );
809        assert_eq!(pkgs[0].name, "bar");
810        assert_eq!(pkgs[1].name, "foo");
811    }
812
813    #[test]
814    fn excludes_agent_worktree_directories() {
815        // Regression guard: agent worktrees are copies of the source tree and
816        // must never appear in walks (dead_code, embedding, symbol indexing).
817        assert!(is_excluded(Path::new(
818            ".claire/worktrees/agent-abc/src/lib.rs"
819        )));
820        assert!(is_excluded(Path::new(
821            ".claude/worktrees/agent-xyz/main.rs"
822        )));
823        assert!(is_excluded(Path::new("project/.claire/anything.rs")));
824        assert!(is_excluded(Path::new("project/.serena/memories/index.md")));
825        assert!(is_excluded(Path::new(
826            "project/.superpowers/plans/phase-one.md"
827        )));
828        // Top-level `.worktrees/` (git worktree add target) — discovered
829        // during dogfooding where `find_referencing_symbols` returned only
830        // worktree paths and missed the main tree entirely.
831        assert!(is_excluded(Path::new(
832            ".worktrees/feature-x/crates/codelens-engine/src/lib.rs"
833        )));
834        assert!(is_excluded(Path::new(
835            "project/.worktrees/branch-y/src/main.rs"
836        )));
837        // And the usual suspects stay excluded.
838        assert!(is_excluded(Path::new("node_modules/foo/index.js")));
839        assert!(is_excluded(Path::new("target/debug/build.rs")));
840        assert!(is_excluded(Path::new(
841            "app/release/win-unpacked/resources/app.asar.unpacked/index.js"
842        )));
843        // Non-excluded paths should pass through.
844        assert!(!is_excluded(Path::new("crates/codelens-engine/src/lib.rs")));
845        assert!(!is_excluded(Path::new("src/claire_not_a_dir.rs")));
846        assert!(!is_excluded(Path::new("src/release_notes.ts")));
847    }
848
849    #[test]
850    fn root_relative_exclusion_ignores_excluded_name_ancestors() {
851        // #358 regression: a project legitimately rooted under an
852        // excluded-name ancestor (`~/.claude/...`, `~/Library/...`,
853        // `~/dev/build/...`) must not have its entire tree filtered.
854        let root = Path::new("/Users/u/.claude/jobs/abc/tmp/external-repos/django");
855        assert!(!is_excluded_within(root, &root.join("django/shortcuts.py")));
856        let lib_root = Path::new("/Users/u/Library/Mobile Documents/proj");
857        assert!(!is_excluded_within(lib_root, &lib_root.join("src/main.rs")));
858        let build_root = Path::new("/home/u/dev/build/service");
859        assert!(!is_excluded_within(
860            build_root,
861            &build_root.join("api/handler.go")
862        ));
863
864        // Exclusions BELOW the root still apply unchanged.
865        assert!(is_excluded_within(
866            root,
867            &root.join("node_modules/pkg/index.js")
868        ));
869        assert!(is_excluded_within(root, &root.join(".git/config")));
870        assert!(is_excluded_within(
871            lib_root,
872            &lib_root.join("target/debug/main.rs")
873        ));
874
875        // A path outside the root falls back to whole-path matching
876        // (fail-safe: excludes more, never less).
877        assert!(is_excluded_within(
878            root,
879            Path::new("/somewhere/else/node_modules/x.js")
880        ));
881        // The root itself (empty relative path) is never excluded.
882        assert!(!is_excluded_within(root, root));
883    }
884
885    #[test]
886    fn collect_files_indexes_project_rooted_under_dot_directory() {
887        // #358 end-to-end: collect_files on a temp project whose ancestors
888        // include a `.claude` component must still discover source files.
889        let temp = std::env::temp_dir().join(format!(
890            "codelens-358-{}-{:?}",
891            std::process::id(),
892            std::thread::current().id()
893        ));
894        let root = temp.join(".claude").join("worktrees").join("proj");
895        std::fs::create_dir_all(root.join("src")).expect("mkdir");
896        std::fs::create_dir_all(root.join("node_modules/dep")).expect("mkdir nm");
897        std::fs::write(root.join("src/lib.rs"), "pub fn f() {}\n").expect("write");
898        std::fs::write(root.join("node_modules/dep/x.js"), "x\n").expect("write nm");
899
900        let files = collect_files(&root, |p| {
901            p.extension().is_some_and(|e| e == "rs" || e == "js")
902        })
903        .expect("collect");
904        let rels: Vec<String> = files
905            .iter()
906            .map(|f| f.strip_prefix(&root).unwrap().to_string_lossy().to_string())
907            .collect();
908        assert!(
909            rels.contains(&"src/lib.rs".to_string()),
910            "source file under dot-dir-rooted project must be collected, got {rels:?}"
911        );
912        assert!(
913            !rels.iter().any(|r| r.contains("node_modules")),
914            "in-project exclusions must still apply, got {rels:?}"
915        );
916        let _ = std::fs::remove_dir_all(&temp);
917    }
918
919    #[test]
920    fn excludes_generated_lock_and_backup_artifacts() {
921        assert!(is_excluded(Path::new("package-lock.json")));
922        assert!(is_excluded(Path::new("app/pnpm-lock.yaml")));
923        assert!(is_excluded(Path::new("extension/background-bundle.js")));
924        assert!(is_excluded(Path::new("extension/shared.bundle.iife.js")));
925        assert!(is_excluded(Path::new("web/assets/app.min.js")));
926        assert!(is_excluded(Path::new(
927            "app/release/win-unpacked/LICENSES.chromium.html"
928        )));
929        assert!(is_excluded(Path::new("web/src/routeTree.gen.ts")));
930        assert!(is_excluded(Path::new("web/generated/schema.ts")));
931        assert!(is_excluded(Path::new(
932            "app/backup-20260214_171635_arch-improve/src/main.ts"
933        )));
934
935        assert!(!is_excluded(Path::new("src/background.ts")));
936        assert!(!is_excluded(Path::new("src/bundle-controller.ts")));
937        assert!(!is_excluded(Path::new("src/package-lock-handler.ts")));
938    }
939
940    #[test]
941    fn project_config_excludes_opt_in_vendor_paths() {
942        let (_td, temp) = tempfile_dir();
943        fs::create_dir_all(temp.join(".codelens")).expect("mkdir codelens");
944        fs::create_dir_all(temp.join("src")).expect("mkdir src");
945        fs::create_dir_all(temp.join("companion-core-v4.3.4/companion/lib")).expect("mkdir vendor");
946        fs::create_dir_all(temp.join("local-generated/nested")).expect("mkdir generated");
947        fs::write(
948            temp.join(".codelens/config.json"),
949            r#"{"index":{"exclude_paths":["companion-core-v4.3.4/**","local-generated"]}}"#,
950        )
951        .expect("write config");
952        fs::write(temp.join("src/service.ts"), "export const service = 1;\n").expect("write src");
953        fs::write(
954            temp.join("companion-core-v4.3.4/companion/lib/Registry.ts"),
955            "export const registry = 1;\n",
956        )
957        .expect("write vendor");
958        fs::write(
959            temp.join("local-generated/nested/output.ts"),
960            "export const generated = 1;\n",
961        )
962        .expect("write generated");
963
964        let files = collect_files(&temp, |path| {
965            path.extension().is_some_and(|ext| ext == "ts")
966        })
967        .expect("collect files");
968        let relative: Vec<String> = files
969            .iter()
970            .map(|path| {
971                path.strip_prefix(&temp)
972                    .expect("relative")
973                    .to_string_lossy()
974                    .replace('\\', "/")
975            })
976            .collect();
977        assert_eq!(relative, vec!["src/service.ts"]);
978        assert!(!is_excluded(Path::new(
979            "companion-core-v4.3.4/companion/lib/Registry.ts"
980        )));
981    }
982
983    #[test]
984    fn rejects_path_escape() {
985        let (_td, dir) = tempfile_dir();
986        let project = ProjectRoot::new_exact(&dir).expect("project root");
987        let err = project
988            .resolve("../outside.txt")
989            .expect_err("should reject escape");
990        assert!(err.to_string().contains("escapes project root"));
991    }
992
993    #[test]
994    fn makes_relative_paths() {
995        let (_td, dir) = tempfile_dir();
996        let nested = dir.join("src/lib.rs");
997        fs::create_dir_all(nested.parent().expect("parent")).expect("mkdir");
998        fs::write(&nested, "fn main() {}\n").expect("write file");
999
1000        let project = ProjectRoot::new_exact(&dir).expect("project root");
1001        assert_eq!(project.to_relative(&nested), "src/lib.rs");
1002    }
1003
1004    #[test]
1005    fn does_not_promote_home_directory_from_global_codelens_marker() {
1006        let (_td, home) = tempfile_dir();
1007        let nested = home.join("Downloads/codelens");
1008        fs::create_dir_all(home.join(".codelens")).expect("mkdir global codelens");
1009        fs::create_dir_all(&nested).expect("mkdir nested");
1010
1011        let detected = super::detect_root_with_bounds(
1012            &nested.canonicalize().expect("canonical nested"),
1013            Some(&home.canonicalize().expect("canonical home")),
1014            None,
1015        );
1016
1017        assert!(detected.is_none());
1018    }
1019
1020    #[test]
1021    fn does_not_promote_temp_directory_from_global_codelens_marker() {
1022        let (_td, temp_root) = tempfile_dir();
1023        let nested = temp_root.join("projectless-fixture");
1024        fs::create_dir_all(temp_root.join(".codelens")).expect("mkdir temp codelens");
1025        fs::create_dir_all(&nested).expect("mkdir nested");
1026
1027        let detected = super::detect_root_with_bounds(
1028            &nested.canonicalize().expect("canonical nested"),
1029            None,
1030            Some(&temp_root.canonicalize().expect("canonical temp")),
1031        );
1032
1033        assert!(detected.is_none());
1034    }
1035
1036    #[test]
1037    fn standard_tmp_paths_are_treated_as_global_temp_roots() {
1038        let tmp = Path::new("/tmp")
1039            .canonicalize()
1040            .expect("standard /tmp should exist");
1041        assert!(super::is_temp_root(&tmp, None));
1042    }
1043
1044    #[test]
1045    fn still_detects_project_root_before_home_directory() {
1046        let (_td, home) = tempfile_dir();
1047        let project_root = home.join("workspace/app");
1048        let nested = project_root.join("src/features");
1049        fs::create_dir_all(home.join(".codelens")).expect("mkdir global codelens");
1050        fs::create_dir_all(&nested).expect("mkdir nested");
1051        fs::write(
1052            project_root.join("Cargo.toml"),
1053            "[package]\nname = \"demo\"\n",
1054        )
1055        .expect("write cargo");
1056
1057        let detected = super::detect_root_with_bounds(
1058            &nested.canonicalize().expect("canonical nested"),
1059            Some(&home.canonicalize().expect("canonical home")),
1060            None,
1061        )
1062        .expect("project root");
1063
1064        assert_eq!(
1065            detected.as_path(),
1066            project_root
1067                .canonicalize()
1068                .expect("canonical project root")
1069                .as_path()
1070        );
1071    }
1072
1073    /// Unique per-test subdirectory inside `tempfile_dir()` to avoid
1074    /// parallel-execution collisions. Returns the `TempDir` guard so the
1075    /// directory survives until the caller drops it; otherwise `tempfile`
1076    /// cleans up at the end of this fn and downstream writes hit
1077    /// `NotFound`.
1078    fn fresh_test_dir(label: &str) -> (tempfile::TempDir, std::path::PathBuf) {
1079        let (td, base) = tempfile_dir();
1080        let dir = base.join(label);
1081        fs::create_dir_all(&dir).expect("mkdir fresh test dir");
1082        (td, dir)
1083    }
1084
1085    #[test]
1086    fn compute_dominant_language_picks_rust_for_rust_heavy_project() {
1087        let (_td, dir) = fresh_test_dir("phase2j_rust_heavy");
1088        // 5 Rust files, 1 Python file, 1 unknown extension file
1089        fs::create_dir_all(dir.join("src")).expect("mkdir src");
1090        fs::write(dir.join("Cargo.toml"), "[package]\nname = \"x\"\n").expect("Cargo.toml");
1091        for name in ["a.rs", "b.rs", "c.rs", "d.rs", "e.rs"] {
1092            fs::write(dir.join("src").join(name), "pub fn f() {}\n").expect("write rs");
1093        }
1094        fs::write(dir.join("scripts.py"), "def f():\n    pass\n").expect("write py");
1095        fs::write(dir.join("README.md"), "# README\n").expect("write md");
1096
1097        let lang = super::compute_dominant_language(&dir).expect("dominant lang");
1098        assert_eq!(lang, "rs", "expected rs dominant, got {lang}");
1099    }
1100
1101    #[test]
1102    fn compute_dominant_language_picks_python_for_python_heavy_project() {
1103        let (_td, dir) = fresh_test_dir("phase2j_python_heavy");
1104        // 4 Python files, 1 Rust file
1105        fs::create_dir_all(dir.join("pkg")).expect("mkdir pkg");
1106        for name in ["mod_a.py", "mod_b.py", "mod_c.py", "mod_d.py"] {
1107            fs::write(dir.join("pkg").join(name), "def f():\n    pass\n").expect("write py");
1108        }
1109        fs::write(dir.join("build.rs"), "fn main() {}\n").expect("write rs");
1110
1111        let lang = super::compute_dominant_language(&dir).expect("dominant lang");
1112        assert_eq!(lang, "py", "expected py dominant, got {lang}");
1113    }
1114
1115    #[test]
1116    fn compute_dominant_language_returns_none_below_min_file_count() {
1117        let (_td, dir) = fresh_test_dir("phase2j_below_min");
1118        // Only 2 source files (below MIN_FILES = 3)
1119        fs::write(dir.join("only.rs"), "fn x() {}\n").expect("write rs");
1120        fs::write(dir.join("other.py"), "def y(): pass\n").expect("write py");
1121
1122        let lang = super::compute_dominant_language(&dir);
1123        assert!(lang.is_none(), "expected None below 3 files, got {lang:?}");
1124    }
1125
1126    #[test]
1127    fn compute_dominant_language_skips_excluded_dirs() {
1128        let (_td, dir) = fresh_test_dir("phase2j_excluded_dirs");
1129        fs::create_dir_all(dir.join("src")).expect("mkdir src");
1130        fs::create_dir_all(dir.join("node_modules/foo")).expect("mkdir node_modules");
1131        fs::create_dir_all(dir.join("target")).expect("mkdir target");
1132        // 3 real Rust source files
1133        for name in ["a.rs", "b.rs", "c.rs"] {
1134            fs::write(dir.join("src").join(name), "fn f() {}\n").expect("write src rs");
1135        }
1136        // 10 fake JS files inside node_modules that must be skipped
1137        for i in 0..10 {
1138            fs::write(
1139                dir.join("node_modules/foo").join(format!("x{i}.js")),
1140                "module.exports = {};\n",
1141            )
1142            .expect("write node_modules js");
1143        }
1144        // 10 fake build artefacts in target/ that must be skipped
1145        for i in 0..10 {
1146            fs::write(
1147                dir.join("target").join(format!("build{i}.rs")),
1148                "fn f() {}\n",
1149            )
1150            .expect("write target rs");
1151        }
1152
1153        let lang = super::compute_dominant_language(&dir).expect("dominant lang");
1154        // Only the 3 src/*.rs files should be counted — not the 10
1155        // node_modules JS files and not the 10 target build artefacts.
1156        assert_eq!(lang, "rs", "expected rs from src only, got {lang}");
1157    }
1158
1159    fn tempfile_dir() -> (tempfile::TempDir, std::path::PathBuf) {
1160        let (td, dir) = crate::test_helpers::make_unique_temp_dir("codelens-core-project-");
1161        fs::create_dir_all(&dir).expect("create tempdir");
1162        (td, dir)
1163    }
1164}