Skip to main content

projd_core/
discover.rs

1//! Project root discovery for directories that may contain multiple independent
2//! projects (different VCS, nested vendored repos, monorepos, documentation
3//! sites, datasets, ...).
4//!
5//! `discover_roots` walks a directory tree and classifies each directory as
6//! one of `Root` (one or more recognized project root kinds), `Container`
7//! (a parent directory worth descending into), or `Skip` (build output,
8//! dependency caches, hidden tooling directories). Termination is driven by
9//! root recognition rather than depth; `max_depth` only guards against
10//! pathological filesystem structures.
11
12use std::collections::{BTreeMap, BTreeSet};
13use std::ffi::OsStr;
14use std::fs;
15use std::path::{Path, PathBuf};
16
17use anyhow::{Context, Result, bail};
18use serde::{Deserialize, Serialize};
19
20#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
21#[serde(rename_all = "snake_case")]
22pub enum RootKind {
23    GitRepo,
24    HgRepo,
25    SvnRepo,
26    FossilRepo,
27    BzrRepo,
28    CargoWorkspace,
29    CargoPackage,
30    NpmPackage,
31    PnpmWorkspace,
32    YarnWorkspace,
33    LernaWorkspace,
34    TurboWorkspace,
35    PythonProject,
36    GoWorkspace,
37    GoModule,
38    MavenProject,
39    GradleWorkspace,
40    GradleProject,
41    CMakeProject,
42    RubyProject,
43    PhpProject,
44    ElixirProject,
45    DotnetProject,
46    MdBook,
47    MkDocs,
48    Jekyll,
49    Sphinx,
50    Docusaurus,
51    Hugo,
52    Gatsby,
53    Astro,
54    DocFx,
55    DvcDataset,
56}
57
58#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum RootCategory {
61    Vcs,
62    Workspace,
63    Package,
64    Docs,
65    Data,
66}
67
68#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
69#[serde(rename_all = "snake_case")]
70pub enum Confidence {
71    Weak,
72    Medium,
73    Strong,
74}
75
76#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
77pub struct DiscoveredRoot {
78    pub path: PathBuf,
79    pub kinds: Vec<RootKind>,
80    pub confidence: Confidence,
81}
82
83#[derive(Clone, Debug)]
84pub struct DiscoverOptions {
85    pub max_depth: usize,
86    pub min_confidence: Confidence,
87    pub include_kinds: Option<BTreeSet<RootKind>>,
88    pub expand_workspaces: bool,
89    pub nested_vcs: bool,
90}
91
92impl Default for DiscoverOptions {
93    fn default() -> Self {
94        Self {
95            max_depth: 8,
96            min_confidence: Confidence::Medium,
97            include_kinds: None,
98            expand_workspaces: false,
99            nested_vcs: false,
100        }
101    }
102}
103
104impl RootKind {
105    pub fn category(self) -> RootCategory {
106        use RootKind::*;
107        match self {
108            GitRepo | HgRepo | SvnRepo | FossilRepo | BzrRepo => RootCategory::Vcs,
109            CargoWorkspace | PnpmWorkspace | YarnWorkspace | LernaWorkspace | TurboWorkspace
110            | GoWorkspace | GradleWorkspace => RootCategory::Workspace,
111            CargoPackage | NpmPackage | PythonProject | GoModule | MavenProject | GradleProject
112            | CMakeProject | RubyProject | PhpProject | ElixirProject | DotnetProject => {
113                RootCategory::Package
114            }
115            MdBook | MkDocs | Jekyll | Sphinx | Docusaurus | Hugo | Gatsby | Astro | DocFx => {
116                RootCategory::Docs
117            }
118            DvcDataset => RootCategory::Data,
119        }
120    }
121
122    pub fn token(self) -> &'static str {
123        use RootKind::*;
124        match self {
125            GitRepo => "git_repo",
126            HgRepo => "hg_repo",
127            SvnRepo => "svn_repo",
128            FossilRepo => "fossil_repo",
129            BzrRepo => "bzr_repo",
130            CargoWorkspace => "cargo_workspace",
131            CargoPackage => "cargo_package",
132            NpmPackage => "npm_package",
133            PnpmWorkspace => "pnpm_workspace",
134            YarnWorkspace => "yarn_workspace",
135            LernaWorkspace => "lerna_workspace",
136            TurboWorkspace => "turbo_workspace",
137            PythonProject => "python_project",
138            GoWorkspace => "go_workspace",
139            GoModule => "go_module",
140            MavenProject => "maven_project",
141            GradleWorkspace => "gradle_workspace",
142            GradleProject => "gradle_project",
143            CMakeProject => "cmake_project",
144            RubyProject => "ruby_project",
145            PhpProject => "php_project",
146            ElixirProject => "elixir_project",
147            DotnetProject => "dotnet_project",
148            MdBook => "mdbook",
149            MkDocs => "mkdocs",
150            Jekyll => "jekyll",
151            Sphinx => "sphinx",
152            Docusaurus => "docusaurus",
153            Hugo => "hugo",
154            Gatsby => "gatsby",
155            Astro => "astro",
156            DocFx => "docfx",
157            DvcDataset => "dvc_dataset",
158        }
159    }
160
161    pub fn from_token(token: &str) -> Option<Self> {
162        let normalized = token.trim().to_ascii_lowercase().replace('-', "_");
163        match normalized.as_str() {
164            "git" | "git_repo" => Some(Self::GitRepo),
165            "hg" | "mercurial" | "hg_repo" => Some(Self::HgRepo),
166            "svn" | "subversion" | "svn_repo" => Some(Self::SvnRepo),
167            "fossil" | "fossil_repo" => Some(Self::FossilRepo),
168            "bzr" | "bazaar" | "bzr_repo" => Some(Self::BzrRepo),
169            "cargo" | "cargo_package" => Some(Self::CargoPackage),
170            "cargo_workspace" => Some(Self::CargoWorkspace),
171            "npm" | "npm_package" | "node" => Some(Self::NpmPackage),
172            "pnpm" | "pnpm_workspace" => Some(Self::PnpmWorkspace),
173            "yarn" | "yarn_workspace" => Some(Self::YarnWorkspace),
174            "lerna" | "lerna_workspace" => Some(Self::LernaWorkspace),
175            "turbo" | "turbo_workspace" => Some(Self::TurboWorkspace),
176            "python" | "pyproject" | "python_project" => Some(Self::PythonProject),
177            "go" | "go_module" => Some(Self::GoModule),
178            "go_workspace" => Some(Self::GoWorkspace),
179            "maven" | "maven_project" => Some(Self::MavenProject),
180            "gradle" | "gradle_project" => Some(Self::GradleProject),
181            "gradle_workspace" => Some(Self::GradleWorkspace),
182            "cmake" | "cmake_project" => Some(Self::CMakeProject),
183            "ruby" | "ruby_project" => Some(Self::RubyProject),
184            "php" | "composer" | "php_project" => Some(Self::PhpProject),
185            "elixir" | "mix" | "elixir_project" => Some(Self::ElixirProject),
186            "dotnet" | "csharp" | "dotnet_project" => Some(Self::DotnetProject),
187            "mdbook" => Some(Self::MdBook),
188            "mkdocs" => Some(Self::MkDocs),
189            "jekyll" => Some(Self::Jekyll),
190            "sphinx" => Some(Self::Sphinx),
191            "docusaurus" => Some(Self::Docusaurus),
192            "hugo" => Some(Self::Hugo),
193            "gatsby" => Some(Self::Gatsby),
194            "astro" => Some(Self::Astro),
195            "docfx" => Some(Self::DocFx),
196            "dvc" | "dvc_dataset" => Some(Self::DvcDataset),
197            _ => None,
198        }
199    }
200}
201
202impl Confidence {
203    pub fn token(self) -> &'static str {
204        match self {
205            Self::Weak => "weak",
206            Self::Medium => "medium",
207            Self::Strong => "strong",
208        }
209    }
210
211    pub fn from_token(token: &str) -> Option<Self> {
212        match token.trim().to_ascii_lowercase().as_str() {
213            "weak" => Some(Self::Weak),
214            "medium" => Some(Self::Medium),
215            "strong" => Some(Self::Strong),
216            _ => None,
217        }
218    }
219}
220
221pub fn discover_roots(
222    path: impl AsRef<Path>,
223    opts: &DiscoverOptions,
224) -> Result<Vec<DiscoveredRoot>> {
225    let input = path.as_ref();
226    let canonical = fs::canonicalize(input)
227        .with_context(|| format!("failed to resolve `{}`", input.display()))?;
228    if !canonical.is_dir() {
229        bail!("`{}` is not a directory", canonical.display());
230    }
231
232    let mut results: Vec<DiscoveredRoot> = Vec::new();
233    let mut stack: Vec<(PathBuf, usize)> = vec![(canonical, 0)];
234
235    while let Some((dir, depth)) = stack.pop() {
236        match classify_dir(&dir) {
237            DirClass::Skip => continue,
238            DirClass::Root { kinds, confidence } => {
239                let is_workspace = kinds
240                    .iter()
241                    .any(|kind| kind.category() == RootCategory::Workspace);
242                let is_vcs = kinds
243                    .iter()
244                    .any(|kind| kind.category() == RootCategory::Vcs);
245                results.push(DiscoveredRoot {
246                    path: dir.clone(),
247                    kinds,
248                    confidence,
249                });
250
251                let descend =
252                    (is_workspace && opts.expand_workspaces) || (is_vcs && opts.nested_vcs);
253                if !descend || depth >= opts.max_depth {
254                    continue;
255                }
256                push_children(&dir, depth, &mut stack);
257            }
258            DirClass::Container => {
259                if depth >= opts.max_depth {
260                    continue;
261                }
262                push_children(&dir, depth, &mut stack);
263            }
264        }
265    }
266
267    results.retain(|root| root.confidence >= opts.min_confidence);
268    if let Some(filter) = &opts.include_kinds {
269        results.retain(|root| root.kinds.iter().any(|kind| filter.contains(kind)));
270    }
271    results.sort_by(|left, right| left.path.cmp(&right.path));
272    Ok(results)
273}
274
275pub fn summarize_roots(roots: &[DiscoveredRoot]) -> DiscoverSummary {
276    let mut by_kind: BTreeMap<RootKind, usize> = BTreeMap::new();
277    let mut by_confidence: BTreeMap<Confidence, usize> = BTreeMap::new();
278    for root in roots {
279        *by_confidence.entry(root.confidence).or_insert(0) += 1;
280        for kind in &root.kinds {
281            *by_kind.entry(*kind).or_insert(0) += 1;
282        }
283    }
284    DiscoverSummary {
285        total: roots.len(),
286        by_kind,
287        by_confidence,
288    }
289}
290
291#[derive(Clone, Debug, Eq, PartialEq)]
292pub struct DiscoverSummary {
293    pub total: usize,
294    pub by_kind: BTreeMap<RootKind, usize>,
295    pub by_confidence: BTreeMap<Confidence, usize>,
296}
297
298pub fn render_discover_markdown(
299    root: &Path,
300    roots: &[DiscoveredRoot],
301    summary: &DiscoverSummary,
302) -> String {
303    let mut out = String::new();
304    out.push_str("# Projd Discover Report\n\n");
305    out.push_str(&format!("- Root: `{}`\n", root.display()));
306    out.push_str(&format!("- Total project roots: {}\n", summary.total));
307    if !summary.by_confidence.is_empty() {
308        let parts: Vec<String> = [Confidence::Strong, Confidence::Medium, Confidence::Weak]
309            .iter()
310            .filter_map(|level| {
311                summary
312                    .by_confidence
313                    .get(level)
314                    .map(|count| format!("{}: {}", level.token(), count))
315            })
316            .collect();
317        if !parts.is_empty() {
318            out.push_str(&format!("- Confidence: {}\n", parts.join(" / ")));
319        }
320    }
321    if !summary.by_kind.is_empty() {
322        out.push_str("- Kinds:\n");
323        for (kind, count) in &summary.by_kind {
324            out.push_str(&format!("  - {}: {}\n", kind.token(), count));
325        }
326    }
327    out.push('\n');
328
329    if roots.is_empty() {
330        out.push_str("No project roots found.\n");
331        return out;
332    }
333
334    out.push_str("| Path | Kinds | Confidence | Category |\n");
335    out.push_str("| --- | --- | --- | --- |\n");
336    for entry in roots {
337        let rel = relative_display(root, &entry.path);
338        let kinds: Vec<&'static str> = entry.kinds.iter().map(|k| k.token()).collect();
339        let mut cats: BTreeSet<&'static str> = BTreeSet::new();
340        for kind in &entry.kinds {
341            cats.insert(category_token(kind.category()));
342        }
343        out.push_str(&format!(
344            "| `{}` | {} | {} | {} |\n",
345            rel,
346            kinds.join(", "),
347            entry.confidence.token(),
348            cats.into_iter().collect::<Vec<_>>().join(", "),
349        ));
350    }
351    out
352}
353
354pub fn render_discover_json(
355    root: &Path,
356    roots: &[DiscoveredRoot],
357    summary: &DiscoverSummary,
358) -> Result<String> {
359    #[derive(Serialize)]
360    struct ReportView<'a> {
361        root: &'a Path,
362        total: usize,
363        by_kind: BTreeMap<&'static str, usize>,
364        by_confidence: BTreeMap<&'static str, usize>,
365        roots: Vec<RootView<'a>>,
366    }
367
368    #[derive(Serialize)]
369    struct RootView<'a> {
370        path: &'a Path,
371        relative_path: String,
372        kinds: Vec<&'static str>,
373        confidence: &'static str,
374        category: Vec<&'static str>,
375    }
376
377    let report = ReportView {
378        root,
379        total: summary.total,
380        by_kind: summary
381            .by_kind
382            .iter()
383            .map(|(kind, count)| (kind.token(), *count))
384            .collect(),
385        by_confidence: summary
386            .by_confidence
387            .iter()
388            .map(|(level, count)| (level.token(), *count))
389            .collect(),
390        roots: roots
391            .iter()
392            .map(|entry| {
393                let mut cats: BTreeSet<&'static str> = BTreeSet::new();
394                for kind in &entry.kinds {
395                    cats.insert(category_token(kind.category()));
396                }
397                RootView {
398                    path: &entry.path,
399                    relative_path: relative_display(root, &entry.path),
400                    kinds: entry.kinds.iter().map(|kind| kind.token()).collect(),
401                    confidence: entry.confidence.token(),
402                    category: cats.into_iter().collect(),
403                }
404            })
405            .collect(),
406    };
407
408    serde_json::to_string_pretty(&report).context("failed to serialize discover report as JSON")
409}
410
411pub fn category_token(category: RootCategory) -> &'static str {
412    match category {
413        RootCategory::Vcs => "vcs",
414        RootCategory::Workspace => "workspace",
415        RootCategory::Package => "package",
416        RootCategory::Docs => "docs",
417        RootCategory::Data => "data",
418    }
419}
420
421pub fn relative_display(base: &Path, target: &Path) -> String {
422    match target.strip_prefix(base) {
423        Ok(rel) => {
424            let s = rel.display().to_string();
425            if s.is_empty() { ".".to_string() } else { s }
426        }
427        Err(_) => target.display().to_string(),
428    }
429}
430
431enum DirClass {
432    Root {
433        kinds: Vec<RootKind>,
434        confidence: Confidence,
435    },
436    Container,
437    Skip,
438}
439
440fn push_children(dir: &Path, depth: usize, stack: &mut Vec<(PathBuf, usize)>) {
441    let Ok(entries) = fs::read_dir(dir) else {
442        return;
443    };
444    let mut children: Vec<PathBuf> = Vec::new();
445    for entry in entries.flatten() {
446        let Ok(file_type) = entry.file_type() else {
447            continue;
448        };
449        if file_type.is_symlink() || !file_type.is_dir() {
450            continue;
451        }
452        if should_skip_directory(&entry.file_name()) {
453            continue;
454        }
455        children.push(entry.path());
456    }
457    children.sort();
458    for path in children.into_iter().rev() {
459        stack.push((path, depth + 1));
460    }
461}
462
463fn classify_dir(dir: &Path) -> DirClass {
464    if !dir.is_dir() {
465        return DirClass::Skip;
466    }
467    if dir.file_name().map(should_skip_directory).unwrap_or(false) {
468        return DirClass::Skip;
469    }
470
471    let mut kinds: Vec<RootKind> = Vec::new();
472    let mut confidence = Confidence::Weak;
473
474    let record =
475        |kind: RootKind, level: Confidence, kinds: &mut Vec<RootKind>, conf: &mut Confidence| {
476            kinds.push(kind);
477            if level > *conf {
478                *conf = level;
479            }
480        };
481
482    // VCS roots.
483    if dir.join(".git").exists() {
484        record(
485            RootKind::GitRepo,
486            Confidence::Strong,
487            &mut kinds,
488            &mut confidence,
489        );
490    }
491    if dir.join(".hg").is_dir() {
492        record(
493            RootKind::HgRepo,
494            Confidence::Strong,
495            &mut kinds,
496            &mut confidence,
497        );
498    }
499    if dir.join(".svn").is_dir() {
500        record(
501            RootKind::SvnRepo,
502            Confidence::Strong,
503            &mut kinds,
504            &mut confidence,
505        );
506    }
507    if dir.join(".fslckout").is_file() || dir.join("_FOSSIL_").is_file() {
508        record(
509            RootKind::FossilRepo,
510            Confidence::Strong,
511            &mut kinds,
512            &mut confidence,
513        );
514    }
515    if dir.join(".bzr").is_dir() {
516        record(
517            RootKind::BzrRepo,
518            Confidence::Strong,
519            &mut kinds,
520            &mut confidence,
521        );
522    }
523
524    // Cargo.
525    let cargo_toml = dir.join("Cargo.toml");
526    if cargo_toml.is_file() {
527        let value = read_toml(&cargo_toml);
528        let has_workspace = value.as_ref().and_then(|v| v.get("workspace")).is_some();
529        let has_package = value.as_ref().and_then(|v| v.get("package")).is_some();
530        if has_workspace {
531            record(
532                RootKind::CargoWorkspace,
533                Confidence::Strong,
534                &mut kinds,
535                &mut confidence,
536            );
537        }
538        if has_package {
539            record(
540                RootKind::CargoPackage,
541                Confidence::Strong,
542                &mut kinds,
543                &mut confidence,
544            );
545        }
546        if !has_workspace && !has_package {
547            record(
548                RootKind::CargoPackage,
549                Confidence::Medium,
550                &mut kinds,
551                &mut confidence,
552            );
553        }
554    }
555
556    // Node / JavaScript ecosystems.
557    let package_json = dir.join("package.json");
558    if package_json.is_file() {
559        record(
560            RootKind::NpmPackage,
561            Confidence::Strong,
562            &mut kinds,
563            &mut confidence,
564        );
565        if let Some(value) = read_json(&package_json) {
566            if value.get("workspaces").is_some() {
567                record(
568                    RootKind::YarnWorkspace,
569                    Confidence::Strong,
570                    &mut kinds,
571                    &mut confidence,
572                );
573            }
574        }
575    }
576    if dir.join("pnpm-workspace.yaml").is_file() || dir.join("pnpm-workspace.yml").is_file() {
577        record(
578            RootKind::PnpmWorkspace,
579            Confidence::Strong,
580            &mut kinds,
581            &mut confidence,
582        );
583    }
584    if dir.join("lerna.json").is_file() {
585        record(
586            RootKind::LernaWorkspace,
587            Confidence::Strong,
588            &mut kinds,
589            &mut confidence,
590        );
591    }
592    if dir.join("turbo.json").is_file() {
593        record(
594            RootKind::TurboWorkspace,
595            Confidence::Strong,
596            &mut kinds,
597            &mut confidence,
598        );
599    }
600
601    // Python.
602    if dir.join("pyproject.toml").is_file() {
603        record(
604            RootKind::PythonProject,
605            Confidence::Strong,
606            &mut kinds,
607            &mut confidence,
608        );
609    }
610
611    // Go.
612    if dir.join("go.work").is_file() {
613        record(
614            RootKind::GoWorkspace,
615            Confidence::Strong,
616            &mut kinds,
617            &mut confidence,
618        );
619    }
620    if dir.join("go.mod").is_file() {
621        record(
622            RootKind::GoModule,
623            Confidence::Strong,
624            &mut kinds,
625            &mut confidence,
626        );
627    }
628
629    // JVM.
630    if dir.join("pom.xml").is_file() {
631        record(
632            RootKind::MavenProject,
633            Confidence::Strong,
634            &mut kinds,
635            &mut confidence,
636        );
637    }
638    let gradle_settings =
639        dir.join("settings.gradle").is_file() || dir.join("settings.gradle.kts").is_file();
640    let gradle_build = dir.join("build.gradle").is_file() || dir.join("build.gradle.kts").is_file();
641    if gradle_settings {
642        record(
643            RootKind::GradleWorkspace,
644            Confidence::Strong,
645            &mut kinds,
646            &mut confidence,
647        );
648    } else if gradle_build {
649        record(
650            RootKind::GradleProject,
651            Confidence::Strong,
652            &mut kinds,
653            &mut confidence,
654        );
655    }
656
657    // C / C++.
658    if dir.join("CMakeLists.txt").is_file() {
659        record(
660            RootKind::CMakeProject,
661            Confidence::Strong,
662            &mut kinds,
663            &mut confidence,
664        );
665    }
666
667    // Ruby / PHP / Elixir.
668    if dir.join("Gemfile").is_file() {
669        record(
670            RootKind::RubyProject,
671            Confidence::Strong,
672            &mut kinds,
673            &mut confidence,
674        );
675    }
676    if dir.join("composer.json").is_file() {
677        record(
678            RootKind::PhpProject,
679            Confidence::Strong,
680            &mut kinds,
681            &mut confidence,
682        );
683    }
684    if dir.join("mix.exs").is_file() {
685        record(
686            RootKind::ElixirProject,
687            Confidence::Strong,
688            &mut kinds,
689            &mut confidence,
690        );
691    }
692
693    if dir_has_extension(dir, &["csproj", "fsproj", "vbproj", "sln"]) {
694        record(
695            RootKind::DotnetProject,
696            Confidence::Strong,
697            &mut kinds,
698            &mut confidence,
699        );
700    }
701
702    // Documentation / static site frameworks.
703    if dir.join("book.toml").is_file() {
704        record(
705            RootKind::MdBook,
706            Confidence::Medium,
707            &mut kinds,
708            &mut confidence,
709        );
710    }
711    if dir.join("mkdocs.yml").is_file() || dir.join("mkdocs.yaml").is_file() {
712        record(
713            RootKind::MkDocs,
714            Confidence::Medium,
715            &mut kinds,
716            &mut confidence,
717        );
718    }
719    if dir.join("_config.yml").is_file() {
720        record(
721            RootKind::Jekyll,
722            Confidence::Medium,
723            &mut kinds,
724            &mut confidence,
725        );
726    }
727    if dir.join("conf.py").is_file() && dir.join("index.rst").is_file() {
728        record(
729            RootKind::Sphinx,
730            Confidence::Medium,
731            &mut kinds,
732            &mut confidence,
733        );
734    }
735    if file_with_any_extension(dir, "docusaurus.config", &["js", "ts", "mjs", "cjs"]) {
736        record(
737            RootKind::Docusaurus,
738            Confidence::Medium,
739            &mut kinds,
740            &mut confidence,
741        );
742    }
743    if dir.join("hugo.toml").is_file()
744        || dir.join("hugo.yaml").is_file()
745        || dir.join("hugo.json").is_file()
746    {
747        record(
748            RootKind::Hugo,
749            Confidence::Medium,
750            &mut kinds,
751            &mut confidence,
752        );
753    }
754    if file_with_any_extension(dir, "gatsby-config", &["js", "ts", "mjs", "cjs"]) {
755        record(
756            RootKind::Gatsby,
757            Confidence::Medium,
758            &mut kinds,
759            &mut confidence,
760        );
761    }
762    if file_with_any_extension(dir, "astro.config", &["js", "ts", "mjs", "cjs"]) {
763        record(
764            RootKind::Astro,
765            Confidence::Medium,
766            &mut kinds,
767            &mut confidence,
768        );
769    }
770    if dir.join("docfx.json").is_file() {
771        record(
772            RootKind::DocFx,
773            Confidence::Medium,
774            &mut kinds,
775            &mut confidence,
776        );
777    }
778
779    // Data.
780    if dir.join("dvc.yaml").is_file() || dir.join(".dvc").is_dir() {
781        record(
782            RootKind::DvcDataset,
783            Confidence::Medium,
784            &mut kinds,
785            &mut confidence,
786        );
787    }
788
789    kinds.sort();
790    kinds.dedup();
791
792    if kinds.is_empty() {
793        DirClass::Container
794    } else {
795        DirClass::Root { kinds, confidence }
796    }
797}
798
799fn read_toml(path: &Path) -> Option<toml::Value> {
800    let content = fs::read_to_string(path).ok()?;
801    toml::from_str(&content).ok()
802}
803
804fn read_json(path: &Path) -> Option<serde_json::Value> {
805    serde_json::from_str(&fs::read_to_string(path).ok()?).ok()
806}
807
808fn dir_has_extension(dir: &Path, exts: &[&str]) -> bool {
809    let Ok(entries) = fs::read_dir(dir) else {
810        return false;
811    };
812    for entry in entries.flatten() {
813        let path = entry.path();
814        if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
815            let lower = ext.to_ascii_lowercase();
816            if exts.iter().any(|allowed| *allowed == lower) {
817                return true;
818            }
819        }
820    }
821    false
822}
823
824fn file_with_any_extension(dir: &Path, stem: &str, exts: &[&str]) -> bool {
825    exts.iter()
826        .any(|ext| dir.join(format!("{stem}.{ext}")).is_file())
827}
828
829fn should_skip_directory(name: &OsStr) -> bool {
830    let Some(s) = name.to_str() else {
831        return false;
832    };
833    matches!(
834        s,
835        ".git"
836            | ".hg"
837            | ".svn"
838            | ".bzr"
839            | "target"
840            | "node_modules"
841            | ".venv"
842            | "venv"
843            | "dist"
844            | "build"
845            | "out"
846            | "vendor"
847            | ".idea"
848            | ".vscode"
849            | ".cache"
850            | "__pycache__"
851            | ".gradle"
852            | ".tox"
853            | ".pytest_cache"
854            | ".mypy_cache"
855            | ".next"
856            | ".nuxt"
857            | ".turbo"
858            | ".parcel-cache"
859            | ".docusaurus"
860            | "_site"
861            | ".jekyll-cache"
862            | "Pods"
863            | "DerivedData"
864    )
865}
866
867#[cfg(test)]
868mod tests {
869    use super::*;
870    use std::fs;
871    use tempfile::tempdir;
872
873    fn write_file(dir: &Path, name: &str, content: &str) {
874        let path = dir.join(name);
875        if let Some(parent) = path.parent() {
876            fs::create_dir_all(parent).unwrap();
877        }
878        fs::write(path, content).unwrap();
879    }
880
881    fn make_dir(dir: &Path, name: &str) {
882        fs::create_dir_all(dir.join(name)).unwrap();
883    }
884
885    #[test]
886    fn empty_dir_yields_no_roots() {
887        let tmp = tempdir().unwrap();
888        let result = discover_roots(tmp.path(), &DiscoverOptions::default()).unwrap();
889        assert!(result.is_empty());
890    }
891
892    #[test]
893    fn single_cargo_package() {
894        let tmp = tempdir().unwrap();
895        write_file(
896            tmp.path(),
897            "Cargo.toml",
898            "[package]\nname = \"x\"\nversion = \"0.1.0\"\n",
899        );
900        let result = discover_roots(tmp.path(), &DiscoverOptions::default()).unwrap();
901        assert_eq!(result.len(), 1);
902        assert!(result[0].kinds.contains(&RootKind::CargoPackage));
903        assert_eq!(result[0].confidence, Confidence::Strong);
904    }
905
906    #[test]
907    fn workspace_default_does_not_expand() {
908        let tmp = tempdir().unwrap();
909        let root = tmp.path();
910        write_file(
911            root,
912            "Cargo.toml",
913            "[workspace]\nmembers = [\"a\", \"b\"]\n",
914        );
915        for member in ["a", "b"] {
916            write_file(
917                &root.join(member),
918                "Cargo.toml",
919                "[package]\nname = \"m\"\nversion = \"0.1.0\"\n",
920            );
921        }
922        let result = discover_roots(root, &DiscoverOptions::default()).unwrap();
923        assert_eq!(result.len(), 1);
924        assert!(result[0].kinds.contains(&RootKind::CargoWorkspace));
925    }
926
927    #[test]
928    fn workspace_expand_yields_members() {
929        let tmp = tempdir().unwrap();
930        let root = tmp.path();
931        write_file(
932            root,
933            "Cargo.toml",
934            "[workspace]\nmembers = [\"a\", \"b\"]\n",
935        );
936        for member in ["a", "b"] {
937            write_file(
938                &root.join(member),
939                "Cargo.toml",
940                "[package]\nname = \"m\"\nversion = \"0.1.0\"\n",
941            );
942        }
943        let opts = DiscoverOptions {
944            expand_workspaces: true,
945            ..DiscoverOptions::default()
946        };
947        let result = discover_roots(root, &opts).unwrap();
948        assert_eq!(result.len(), 3);
949    }
950
951    #[test]
952    fn side_by_side_repos_are_separate_roots() {
953        let tmp = tempdir().unwrap();
954        let root = tmp.path();
955        for repo in ["repoA", "repoB"] {
956            let dir = root.join(repo);
957            fs::create_dir_all(&dir).unwrap();
958            fs::create_dir(dir.join(".git")).unwrap();
959        }
960        let result = discover_roots(root, &DiscoverOptions::default()).unwrap();
961        assert_eq!(result.len(), 2);
962        assert!(result.iter().all(|r| r.kinds.contains(&RootKind::GitRepo)));
963    }
964
965    #[test]
966    fn mdbook_default_passes_medium_filter() {
967        let tmp = tempdir().unwrap();
968        write_file(tmp.path(), "book.toml", "[book]\ntitle = \"x\"\n");
969        let result = discover_roots(tmp.path(), &DiscoverOptions::default()).unwrap();
970        assert_eq!(result.len(), 1);
971        assert!(result[0].kinds.contains(&RootKind::MdBook));
972        assert_eq!(result[0].confidence, Confidence::Medium);
973    }
974
975    #[test]
976    fn min_confidence_strong_filters_mdbook() {
977        let tmp = tempdir().unwrap();
978        write_file(tmp.path(), "book.toml", "[book]\ntitle = \"x\"\n");
979        let opts = DiscoverOptions {
980            min_confidence: Confidence::Strong,
981            ..DiscoverOptions::default()
982        };
983        let result = discover_roots(tmp.path(), &opts).unwrap();
984        assert!(result.is_empty());
985    }
986
987    #[test]
988    fn nested_vcs_default_skipped() {
989        // Use a non-skipped directory name so we test the nested_vcs gate, not
990        // the directory-skip list (which intentionally excludes `vendor/`).
991        let tmp = tempdir().unwrap();
992        let outer = tmp.path();
993        fs::create_dir(outer.join(".git")).unwrap();
994        make_dir(outer, "third_party/dep");
995        fs::create_dir(outer.join("third_party/dep/.git")).unwrap();
996
997        let result = discover_roots(outer, &DiscoverOptions::default()).unwrap();
998        assert_eq!(result.len(), 1);
999
1000        let opts = DiscoverOptions {
1001            nested_vcs: true,
1002            ..DiscoverOptions::default()
1003        };
1004        let result = discover_roots(outer, &opts).unwrap();
1005        assert_eq!(result.len(), 2);
1006    }
1007
1008    #[test]
1009    fn include_kind_filter() {
1010        let tmp = tempdir().unwrap();
1011        let root = tmp.path();
1012        let a = root.join("a");
1013        let b = root.join("b");
1014        fs::create_dir_all(&a).unwrap();
1015        fs::create_dir_all(&b).unwrap();
1016        write_file(
1017            &a,
1018            "Cargo.toml",
1019            "[package]\nname=\"a\"\nversion=\"0.1.0\"\n",
1020        );
1021        write_file(&b, "package.json", "{\"name\":\"b\"}\n");
1022
1023        let mut filter = BTreeSet::new();
1024        filter.insert(RootKind::NpmPackage);
1025        let opts = DiscoverOptions {
1026            include_kinds: Some(filter),
1027            ..DiscoverOptions::default()
1028        };
1029        let result = discover_roots(root, &opts).unwrap();
1030        assert_eq!(result.len(), 1);
1031        assert!(result[0].kinds.contains(&RootKind::NpmPackage));
1032    }
1033
1034    #[test]
1035    fn skips_node_modules_and_target() {
1036        let tmp = tempdir().unwrap();
1037        let root = tmp.path();
1038        // hidden a fake project inside node_modules; must not be reported
1039        let buried = root.join("node_modules").join("inner");
1040        fs::create_dir_all(&buried).unwrap();
1041        write_file(&buried, "package.json", "{\"name\":\"inner\"}\n");
1042
1043        let result = discover_roots(root, &DiscoverOptions::default()).unwrap();
1044        assert!(result.is_empty());
1045    }
1046
1047    #[test]
1048    fn from_token_round_trip() {
1049        for kind in [
1050            RootKind::GitRepo,
1051            RootKind::CargoPackage,
1052            RootKind::MdBook,
1053            RootKind::DvcDataset,
1054        ] {
1055            assert_eq!(RootKind::from_token(kind.token()), Some(kind));
1056        }
1057        assert_eq!(RootKind::from_token("git"), Some(RootKind::GitRepo));
1058        assert_eq!(RootKind::from_token("cargo"), Some(RootKind::CargoPackage));
1059        assert_eq!(RootKind::from_token("nope"), None);
1060    }
1061}