1use std::collections::{BTreeMap, BTreeSet};
13use std::ffi::OsStr;
14use std::fs;
15use std::path::{Path, PathBuf};
16
17use anyhow::{Context, Result, bail};
18use serde::{Deserialize, Serialize};
19
20#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
21#[serde(rename_all = "snake_case")]
22pub enum RootKind {
23 GitRepo,
24 HgRepo,
25 SvnRepo,
26 FossilRepo,
27 BzrRepo,
28 CargoWorkspace,
29 CargoPackage,
30 NpmPackage,
31 PnpmWorkspace,
32 YarnWorkspace,
33 LernaWorkspace,
34 TurboWorkspace,
35 PythonProject,
36 GoWorkspace,
37 GoModule,
38 MavenProject,
39 GradleWorkspace,
40 GradleProject,
41 CMakeProject,
42 RubyProject,
43 PhpProject,
44 ElixirProject,
45 DotnetProject,
46 MdBook,
47 MkDocs,
48 Jekyll,
49 Sphinx,
50 Docusaurus,
51 Hugo,
52 Gatsby,
53 Astro,
54 DocFx,
55 DvcDataset,
56}
57
58#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum RootCategory {
61 Vcs,
62 Workspace,
63 Package,
64 Docs,
65 Data,
66}
67
68#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
69#[serde(rename_all = "snake_case")]
70pub enum Confidence {
71 Weak,
72 Medium,
73 Strong,
74}
75
76#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
77pub struct DiscoveredRoot {
78 pub path: PathBuf,
79 pub kinds: Vec<RootKind>,
80 pub confidence: Confidence,
81}
82
83#[derive(Clone, Debug)]
84pub struct DiscoverOptions {
85 pub max_depth: usize,
86 pub min_confidence: Confidence,
87 pub include_kinds: Option<BTreeSet<RootKind>>,
88 pub expand_workspaces: bool,
89 pub nested_vcs: bool,
90}
91
92impl Default for DiscoverOptions {
93 fn default() -> Self {
94 Self {
95 max_depth: 8,
96 min_confidence: Confidence::Medium,
97 include_kinds: None,
98 expand_workspaces: false,
99 nested_vcs: false,
100 }
101 }
102}
103
104impl RootKind {
105 pub fn category(self) -> RootCategory {
106 use RootKind::*;
107 match self {
108 GitRepo | HgRepo | SvnRepo | FossilRepo | BzrRepo => RootCategory::Vcs,
109 CargoWorkspace | PnpmWorkspace | YarnWorkspace | LernaWorkspace | TurboWorkspace
110 | GoWorkspace | GradleWorkspace => RootCategory::Workspace,
111 CargoPackage | NpmPackage | PythonProject | GoModule | MavenProject | GradleProject
112 | CMakeProject | RubyProject | PhpProject | ElixirProject | DotnetProject => {
113 RootCategory::Package
114 }
115 MdBook | MkDocs | Jekyll | Sphinx | Docusaurus | Hugo | Gatsby | Astro | DocFx => {
116 RootCategory::Docs
117 }
118 DvcDataset => RootCategory::Data,
119 }
120 }
121
122 pub fn token(self) -> &'static str {
123 use RootKind::*;
124 match self {
125 GitRepo => "git_repo",
126 HgRepo => "hg_repo",
127 SvnRepo => "svn_repo",
128 FossilRepo => "fossil_repo",
129 BzrRepo => "bzr_repo",
130 CargoWorkspace => "cargo_workspace",
131 CargoPackage => "cargo_package",
132 NpmPackage => "npm_package",
133 PnpmWorkspace => "pnpm_workspace",
134 YarnWorkspace => "yarn_workspace",
135 LernaWorkspace => "lerna_workspace",
136 TurboWorkspace => "turbo_workspace",
137 PythonProject => "python_project",
138 GoWorkspace => "go_workspace",
139 GoModule => "go_module",
140 MavenProject => "maven_project",
141 GradleWorkspace => "gradle_workspace",
142 GradleProject => "gradle_project",
143 CMakeProject => "cmake_project",
144 RubyProject => "ruby_project",
145 PhpProject => "php_project",
146 ElixirProject => "elixir_project",
147 DotnetProject => "dotnet_project",
148 MdBook => "mdbook",
149 MkDocs => "mkdocs",
150 Jekyll => "jekyll",
151 Sphinx => "sphinx",
152 Docusaurus => "docusaurus",
153 Hugo => "hugo",
154 Gatsby => "gatsby",
155 Astro => "astro",
156 DocFx => "docfx",
157 DvcDataset => "dvc_dataset",
158 }
159 }
160
161 pub fn from_token(token: &str) -> Option<Self> {
162 let normalized = token.trim().to_ascii_lowercase().replace('-', "_");
163 match normalized.as_str() {
164 "git" | "git_repo" => Some(Self::GitRepo),
165 "hg" | "mercurial" | "hg_repo" => Some(Self::HgRepo),
166 "svn" | "subversion" | "svn_repo" => Some(Self::SvnRepo),
167 "fossil" | "fossil_repo" => Some(Self::FossilRepo),
168 "bzr" | "bazaar" | "bzr_repo" => Some(Self::BzrRepo),
169 "cargo" | "cargo_package" => Some(Self::CargoPackage),
170 "cargo_workspace" => Some(Self::CargoWorkspace),
171 "npm" | "npm_package" | "node" => Some(Self::NpmPackage),
172 "pnpm" | "pnpm_workspace" => Some(Self::PnpmWorkspace),
173 "yarn" | "yarn_workspace" => Some(Self::YarnWorkspace),
174 "lerna" | "lerna_workspace" => Some(Self::LernaWorkspace),
175 "turbo" | "turbo_workspace" => Some(Self::TurboWorkspace),
176 "python" | "pyproject" | "python_project" => Some(Self::PythonProject),
177 "go" | "go_module" => Some(Self::GoModule),
178 "go_workspace" => Some(Self::GoWorkspace),
179 "maven" | "maven_project" => Some(Self::MavenProject),
180 "gradle" | "gradle_project" => Some(Self::GradleProject),
181 "gradle_workspace" => Some(Self::GradleWorkspace),
182 "cmake" | "cmake_project" => Some(Self::CMakeProject),
183 "ruby" | "ruby_project" => Some(Self::RubyProject),
184 "php" | "composer" | "php_project" => Some(Self::PhpProject),
185 "elixir" | "mix" | "elixir_project" => Some(Self::ElixirProject),
186 "dotnet" | "csharp" | "dotnet_project" => Some(Self::DotnetProject),
187 "mdbook" => Some(Self::MdBook),
188 "mkdocs" => Some(Self::MkDocs),
189 "jekyll" => Some(Self::Jekyll),
190 "sphinx" => Some(Self::Sphinx),
191 "docusaurus" => Some(Self::Docusaurus),
192 "hugo" => Some(Self::Hugo),
193 "gatsby" => Some(Self::Gatsby),
194 "astro" => Some(Self::Astro),
195 "docfx" => Some(Self::DocFx),
196 "dvc" | "dvc_dataset" => Some(Self::DvcDataset),
197 _ => None,
198 }
199 }
200}
201
202impl Confidence {
203 pub fn token(self) -> &'static str {
204 match self {
205 Self::Weak => "weak",
206 Self::Medium => "medium",
207 Self::Strong => "strong",
208 }
209 }
210
211 pub fn from_token(token: &str) -> Option<Self> {
212 match token.trim().to_ascii_lowercase().as_str() {
213 "weak" => Some(Self::Weak),
214 "medium" => Some(Self::Medium),
215 "strong" => Some(Self::Strong),
216 _ => None,
217 }
218 }
219}
220
221pub fn discover_roots(
222 path: impl AsRef<Path>,
223 opts: &DiscoverOptions,
224) -> Result<Vec<DiscoveredRoot>> {
225 let input = path.as_ref();
226 let canonical = fs::canonicalize(input)
227 .with_context(|| format!("failed to resolve `{}`", input.display()))?;
228 if !canonical.is_dir() {
229 bail!("`{}` is not a directory", canonical.display());
230 }
231
232 let mut results: Vec<DiscoveredRoot> = Vec::new();
233 let mut stack: Vec<(PathBuf, usize)> = vec![(canonical, 0)];
234
235 while let Some((dir, depth)) = stack.pop() {
236 match classify_dir(&dir) {
237 DirClass::Skip => continue,
238 DirClass::Root { kinds, confidence } => {
239 let is_workspace = kinds
240 .iter()
241 .any(|kind| kind.category() == RootCategory::Workspace);
242 let is_vcs = kinds
243 .iter()
244 .any(|kind| kind.category() == RootCategory::Vcs);
245 results.push(DiscoveredRoot {
246 path: dir.clone(),
247 kinds,
248 confidence,
249 });
250
251 let descend =
252 (is_workspace && opts.expand_workspaces) || (is_vcs && opts.nested_vcs);
253 if !descend || depth >= opts.max_depth {
254 continue;
255 }
256 push_children(&dir, depth, &mut stack);
257 }
258 DirClass::Container => {
259 if depth >= opts.max_depth {
260 continue;
261 }
262 push_children(&dir, depth, &mut stack);
263 }
264 }
265 }
266
267 results.retain(|root| root.confidence >= opts.min_confidence);
268 if let Some(filter) = &opts.include_kinds {
269 results.retain(|root| root.kinds.iter().any(|kind| filter.contains(kind)));
270 }
271 results.sort_by(|left, right| left.path.cmp(&right.path));
272 Ok(results)
273}
274
275pub fn summarize_roots(roots: &[DiscoveredRoot]) -> DiscoverSummary {
276 let mut by_kind: BTreeMap<RootKind, usize> = BTreeMap::new();
277 let mut by_confidence: BTreeMap<Confidence, usize> = BTreeMap::new();
278 for root in roots {
279 *by_confidence.entry(root.confidence).or_insert(0) += 1;
280 for kind in &root.kinds {
281 *by_kind.entry(*kind).or_insert(0) += 1;
282 }
283 }
284 DiscoverSummary {
285 total: roots.len(),
286 by_kind,
287 by_confidence,
288 }
289}
290
291#[derive(Clone, Debug, Eq, PartialEq)]
292pub struct DiscoverSummary {
293 pub total: usize,
294 pub by_kind: BTreeMap<RootKind, usize>,
295 pub by_confidence: BTreeMap<Confidence, usize>,
296}
297
298pub fn render_discover_markdown(
299 root: &Path,
300 roots: &[DiscoveredRoot],
301 summary: &DiscoverSummary,
302) -> String {
303 let mut out = String::new();
304 out.push_str("# Projd Discover Report\n\n");
305 out.push_str(&format!("- Root: `{}`\n", root.display()));
306 out.push_str(&format!("- Total project roots: {}\n", summary.total));
307 if !summary.by_confidence.is_empty() {
308 let parts: Vec<String> = [Confidence::Strong, Confidence::Medium, Confidence::Weak]
309 .iter()
310 .filter_map(|level| {
311 summary
312 .by_confidence
313 .get(level)
314 .map(|count| format!("{}: {}", level.token(), count))
315 })
316 .collect();
317 if !parts.is_empty() {
318 out.push_str(&format!("- Confidence: {}\n", parts.join(" / ")));
319 }
320 }
321 if !summary.by_kind.is_empty() {
322 out.push_str("- Kinds:\n");
323 for (kind, count) in &summary.by_kind {
324 out.push_str(&format!(" - {}: {}\n", kind.token(), count));
325 }
326 }
327 out.push('\n');
328
329 if roots.is_empty() {
330 out.push_str("No project roots found.\n");
331 return out;
332 }
333
334 out.push_str("| Path | Kinds | Confidence | Category |\n");
335 out.push_str("| --- | --- | --- | --- |\n");
336 for entry in roots {
337 let rel = relative_display(root, &entry.path);
338 let kinds: Vec<&'static str> = entry.kinds.iter().map(|k| k.token()).collect();
339 let mut cats: BTreeSet<&'static str> = BTreeSet::new();
340 for kind in &entry.kinds {
341 cats.insert(category_token(kind.category()));
342 }
343 out.push_str(&format!(
344 "| `{}` | {} | {} | {} |\n",
345 rel,
346 kinds.join(", "),
347 entry.confidence.token(),
348 cats.into_iter().collect::<Vec<_>>().join(", "),
349 ));
350 }
351 out
352}
353
354pub fn render_discover_json(
355 root: &Path,
356 roots: &[DiscoveredRoot],
357 summary: &DiscoverSummary,
358) -> Result<String> {
359 #[derive(Serialize)]
360 struct ReportView<'a> {
361 root: &'a Path,
362 total: usize,
363 by_kind: BTreeMap<&'static str, usize>,
364 by_confidence: BTreeMap<&'static str, usize>,
365 roots: Vec<RootView<'a>>,
366 }
367
368 #[derive(Serialize)]
369 struct RootView<'a> {
370 path: &'a Path,
371 relative_path: String,
372 kinds: Vec<&'static str>,
373 confidence: &'static str,
374 category: Vec<&'static str>,
375 }
376
377 let report = ReportView {
378 root,
379 total: summary.total,
380 by_kind: summary
381 .by_kind
382 .iter()
383 .map(|(kind, count)| (kind.token(), *count))
384 .collect(),
385 by_confidence: summary
386 .by_confidence
387 .iter()
388 .map(|(level, count)| (level.token(), *count))
389 .collect(),
390 roots: roots
391 .iter()
392 .map(|entry| {
393 let mut cats: BTreeSet<&'static str> = BTreeSet::new();
394 for kind in &entry.kinds {
395 cats.insert(category_token(kind.category()));
396 }
397 RootView {
398 path: &entry.path,
399 relative_path: relative_display(root, &entry.path),
400 kinds: entry.kinds.iter().map(|kind| kind.token()).collect(),
401 confidence: entry.confidence.token(),
402 category: cats.into_iter().collect(),
403 }
404 })
405 .collect(),
406 };
407
408 serde_json::to_string_pretty(&report).context("failed to serialize discover report as JSON")
409}
410
411pub fn category_token(category: RootCategory) -> &'static str {
412 match category {
413 RootCategory::Vcs => "vcs",
414 RootCategory::Workspace => "workspace",
415 RootCategory::Package => "package",
416 RootCategory::Docs => "docs",
417 RootCategory::Data => "data",
418 }
419}
420
421pub fn relative_display(base: &Path, target: &Path) -> String {
422 match target.strip_prefix(base) {
423 Ok(rel) => {
424 let s = rel.display().to_string();
425 if s.is_empty() { ".".to_string() } else { s }
426 }
427 Err(_) => target.display().to_string(),
428 }
429}
430
431enum DirClass {
432 Root {
433 kinds: Vec<RootKind>,
434 confidence: Confidence,
435 },
436 Container,
437 Skip,
438}
439
440fn push_children(dir: &Path, depth: usize, stack: &mut Vec<(PathBuf, usize)>) {
441 let Ok(entries) = fs::read_dir(dir) else {
442 return;
443 };
444 let mut children: Vec<PathBuf> = Vec::new();
445 for entry in entries.flatten() {
446 let Ok(file_type) = entry.file_type() else {
447 continue;
448 };
449 if file_type.is_symlink() || !file_type.is_dir() {
450 continue;
451 }
452 if should_skip_directory(&entry.file_name()) {
453 continue;
454 }
455 children.push(entry.path());
456 }
457 children.sort();
458 for path in children.into_iter().rev() {
459 stack.push((path, depth + 1));
460 }
461}
462
463fn classify_dir(dir: &Path) -> DirClass {
464 if !dir.is_dir() {
465 return DirClass::Skip;
466 }
467 if dir.file_name().map(should_skip_directory).unwrap_or(false) {
468 return DirClass::Skip;
469 }
470
471 let mut kinds: Vec<RootKind> = Vec::new();
472 let mut confidence = Confidence::Weak;
473
474 let record =
475 |kind: RootKind, level: Confidence, kinds: &mut Vec<RootKind>, conf: &mut Confidence| {
476 kinds.push(kind);
477 if level > *conf {
478 *conf = level;
479 }
480 };
481
482 if dir.join(".git").exists() {
484 record(
485 RootKind::GitRepo,
486 Confidence::Strong,
487 &mut kinds,
488 &mut confidence,
489 );
490 }
491 if dir.join(".hg").is_dir() {
492 record(
493 RootKind::HgRepo,
494 Confidence::Strong,
495 &mut kinds,
496 &mut confidence,
497 );
498 }
499 if dir.join(".svn").is_dir() {
500 record(
501 RootKind::SvnRepo,
502 Confidence::Strong,
503 &mut kinds,
504 &mut confidence,
505 );
506 }
507 if dir.join(".fslckout").is_file() || dir.join("_FOSSIL_").is_file() {
508 record(
509 RootKind::FossilRepo,
510 Confidence::Strong,
511 &mut kinds,
512 &mut confidence,
513 );
514 }
515 if dir.join(".bzr").is_dir() {
516 record(
517 RootKind::BzrRepo,
518 Confidence::Strong,
519 &mut kinds,
520 &mut confidence,
521 );
522 }
523
524 let cargo_toml = dir.join("Cargo.toml");
526 if cargo_toml.is_file() {
527 let value = read_toml(&cargo_toml);
528 let has_workspace = value.as_ref().and_then(|v| v.get("workspace")).is_some();
529 let has_package = value.as_ref().and_then(|v| v.get("package")).is_some();
530 if has_workspace {
531 record(
532 RootKind::CargoWorkspace,
533 Confidence::Strong,
534 &mut kinds,
535 &mut confidence,
536 );
537 }
538 if has_package {
539 record(
540 RootKind::CargoPackage,
541 Confidence::Strong,
542 &mut kinds,
543 &mut confidence,
544 );
545 }
546 if !has_workspace && !has_package {
547 record(
548 RootKind::CargoPackage,
549 Confidence::Medium,
550 &mut kinds,
551 &mut confidence,
552 );
553 }
554 }
555
556 let package_json = dir.join("package.json");
558 if package_json.is_file() {
559 record(
560 RootKind::NpmPackage,
561 Confidence::Strong,
562 &mut kinds,
563 &mut confidence,
564 );
565 if let Some(value) = read_json(&package_json) {
566 if value.get("workspaces").is_some() {
567 record(
568 RootKind::YarnWorkspace,
569 Confidence::Strong,
570 &mut kinds,
571 &mut confidence,
572 );
573 }
574 }
575 }
576 if dir.join("pnpm-workspace.yaml").is_file() || dir.join("pnpm-workspace.yml").is_file() {
577 record(
578 RootKind::PnpmWorkspace,
579 Confidence::Strong,
580 &mut kinds,
581 &mut confidence,
582 );
583 }
584 if dir.join("lerna.json").is_file() {
585 record(
586 RootKind::LernaWorkspace,
587 Confidence::Strong,
588 &mut kinds,
589 &mut confidence,
590 );
591 }
592 if dir.join("turbo.json").is_file() {
593 record(
594 RootKind::TurboWorkspace,
595 Confidence::Strong,
596 &mut kinds,
597 &mut confidence,
598 );
599 }
600
601 if dir.join("pyproject.toml").is_file() {
603 record(
604 RootKind::PythonProject,
605 Confidence::Strong,
606 &mut kinds,
607 &mut confidence,
608 );
609 }
610
611 if dir.join("go.work").is_file() {
613 record(
614 RootKind::GoWorkspace,
615 Confidence::Strong,
616 &mut kinds,
617 &mut confidence,
618 );
619 }
620 if dir.join("go.mod").is_file() {
621 record(
622 RootKind::GoModule,
623 Confidence::Strong,
624 &mut kinds,
625 &mut confidence,
626 );
627 }
628
629 if dir.join("pom.xml").is_file() {
631 record(
632 RootKind::MavenProject,
633 Confidence::Strong,
634 &mut kinds,
635 &mut confidence,
636 );
637 }
638 let gradle_settings =
639 dir.join("settings.gradle").is_file() || dir.join("settings.gradle.kts").is_file();
640 let gradle_build = dir.join("build.gradle").is_file() || dir.join("build.gradle.kts").is_file();
641 if gradle_settings {
642 record(
643 RootKind::GradleWorkspace,
644 Confidence::Strong,
645 &mut kinds,
646 &mut confidence,
647 );
648 } else if gradle_build {
649 record(
650 RootKind::GradleProject,
651 Confidence::Strong,
652 &mut kinds,
653 &mut confidence,
654 );
655 }
656
657 if dir.join("CMakeLists.txt").is_file() {
659 record(
660 RootKind::CMakeProject,
661 Confidence::Strong,
662 &mut kinds,
663 &mut confidence,
664 );
665 }
666
667 if dir.join("Gemfile").is_file() {
669 record(
670 RootKind::RubyProject,
671 Confidence::Strong,
672 &mut kinds,
673 &mut confidence,
674 );
675 }
676 if dir.join("composer.json").is_file() {
677 record(
678 RootKind::PhpProject,
679 Confidence::Strong,
680 &mut kinds,
681 &mut confidence,
682 );
683 }
684 if dir.join("mix.exs").is_file() {
685 record(
686 RootKind::ElixirProject,
687 Confidence::Strong,
688 &mut kinds,
689 &mut confidence,
690 );
691 }
692
693 if dir_has_extension(dir, &["csproj", "fsproj", "vbproj", "sln"]) {
694 record(
695 RootKind::DotnetProject,
696 Confidence::Strong,
697 &mut kinds,
698 &mut confidence,
699 );
700 }
701
702 if dir.join("book.toml").is_file() {
704 record(
705 RootKind::MdBook,
706 Confidence::Medium,
707 &mut kinds,
708 &mut confidence,
709 );
710 }
711 if dir.join("mkdocs.yml").is_file() || dir.join("mkdocs.yaml").is_file() {
712 record(
713 RootKind::MkDocs,
714 Confidence::Medium,
715 &mut kinds,
716 &mut confidence,
717 );
718 }
719 if dir.join("_config.yml").is_file() {
720 record(
721 RootKind::Jekyll,
722 Confidence::Medium,
723 &mut kinds,
724 &mut confidence,
725 );
726 }
727 if dir.join("conf.py").is_file() && dir.join("index.rst").is_file() {
728 record(
729 RootKind::Sphinx,
730 Confidence::Medium,
731 &mut kinds,
732 &mut confidence,
733 );
734 }
735 if file_with_any_extension(dir, "docusaurus.config", &["js", "ts", "mjs", "cjs"]) {
736 record(
737 RootKind::Docusaurus,
738 Confidence::Medium,
739 &mut kinds,
740 &mut confidence,
741 );
742 }
743 if dir.join("hugo.toml").is_file()
744 || dir.join("hugo.yaml").is_file()
745 || dir.join("hugo.json").is_file()
746 {
747 record(
748 RootKind::Hugo,
749 Confidence::Medium,
750 &mut kinds,
751 &mut confidence,
752 );
753 }
754 if file_with_any_extension(dir, "gatsby-config", &["js", "ts", "mjs", "cjs"]) {
755 record(
756 RootKind::Gatsby,
757 Confidence::Medium,
758 &mut kinds,
759 &mut confidence,
760 );
761 }
762 if file_with_any_extension(dir, "astro.config", &["js", "ts", "mjs", "cjs"]) {
763 record(
764 RootKind::Astro,
765 Confidence::Medium,
766 &mut kinds,
767 &mut confidence,
768 );
769 }
770 if dir.join("docfx.json").is_file() {
771 record(
772 RootKind::DocFx,
773 Confidence::Medium,
774 &mut kinds,
775 &mut confidence,
776 );
777 }
778
779 if dir.join("dvc.yaml").is_file() || dir.join(".dvc").is_dir() {
781 record(
782 RootKind::DvcDataset,
783 Confidence::Medium,
784 &mut kinds,
785 &mut confidence,
786 );
787 }
788
789 kinds.sort();
790 kinds.dedup();
791
792 if kinds.is_empty() {
793 DirClass::Container
794 } else {
795 DirClass::Root { kinds, confidence }
796 }
797}
798
799fn read_toml(path: &Path) -> Option<toml::Value> {
800 let content = fs::read_to_string(path).ok()?;
801 toml::from_str(&content).ok()
802}
803
804fn read_json(path: &Path) -> Option<serde_json::Value> {
805 serde_json::from_str(&fs::read_to_string(path).ok()?).ok()
806}
807
808fn dir_has_extension(dir: &Path, exts: &[&str]) -> bool {
809 let Ok(entries) = fs::read_dir(dir) else {
810 return false;
811 };
812 for entry in entries.flatten() {
813 let path = entry.path();
814 if let Some(ext) = path.extension().and_then(|s| s.to_str()) {
815 let lower = ext.to_ascii_lowercase();
816 if exts.iter().any(|allowed| *allowed == lower) {
817 return true;
818 }
819 }
820 }
821 false
822}
823
824fn file_with_any_extension(dir: &Path, stem: &str, exts: &[&str]) -> bool {
825 exts.iter()
826 .any(|ext| dir.join(format!("{stem}.{ext}")).is_file())
827}
828
829fn should_skip_directory(name: &OsStr) -> bool {
830 let Some(s) = name.to_str() else {
831 return false;
832 };
833 matches!(
834 s,
835 ".git"
836 | ".hg"
837 | ".svn"
838 | ".bzr"
839 | "target"
840 | "node_modules"
841 | ".venv"
842 | "venv"
843 | "dist"
844 | "build"
845 | "out"
846 | "vendor"
847 | ".idea"
848 | ".vscode"
849 | ".cache"
850 | "__pycache__"
851 | ".gradle"
852 | ".tox"
853 | ".pytest_cache"
854 | ".mypy_cache"
855 | ".next"
856 | ".nuxt"
857 | ".turbo"
858 | ".parcel-cache"
859 | ".docusaurus"
860 | "_site"
861 | ".jekyll-cache"
862 | "Pods"
863 | "DerivedData"
864 )
865}
866
867#[cfg(test)]
868mod tests {
869 use super::*;
870 use std::fs;
871 use tempfile::tempdir;
872
873 fn write_file(dir: &Path, name: &str, content: &str) {
874 let path = dir.join(name);
875 if let Some(parent) = path.parent() {
876 fs::create_dir_all(parent).unwrap();
877 }
878 fs::write(path, content).unwrap();
879 }
880
881 fn make_dir(dir: &Path, name: &str) {
882 fs::create_dir_all(dir.join(name)).unwrap();
883 }
884
885 #[test]
886 fn empty_dir_yields_no_roots() {
887 let tmp = tempdir().unwrap();
888 let result = discover_roots(tmp.path(), &DiscoverOptions::default()).unwrap();
889 assert!(result.is_empty());
890 }
891
892 #[test]
893 fn single_cargo_package() {
894 let tmp = tempdir().unwrap();
895 write_file(
896 tmp.path(),
897 "Cargo.toml",
898 "[package]\nname = \"x\"\nversion = \"0.1.0\"\n",
899 );
900 let result = discover_roots(tmp.path(), &DiscoverOptions::default()).unwrap();
901 assert_eq!(result.len(), 1);
902 assert!(result[0].kinds.contains(&RootKind::CargoPackage));
903 assert_eq!(result[0].confidence, Confidence::Strong);
904 }
905
906 #[test]
907 fn workspace_default_does_not_expand() {
908 let tmp = tempdir().unwrap();
909 let root = tmp.path();
910 write_file(
911 root,
912 "Cargo.toml",
913 "[workspace]\nmembers = [\"a\", \"b\"]\n",
914 );
915 for member in ["a", "b"] {
916 write_file(
917 &root.join(member),
918 "Cargo.toml",
919 "[package]\nname = \"m\"\nversion = \"0.1.0\"\n",
920 );
921 }
922 let result = discover_roots(root, &DiscoverOptions::default()).unwrap();
923 assert_eq!(result.len(), 1);
924 assert!(result[0].kinds.contains(&RootKind::CargoWorkspace));
925 }
926
927 #[test]
928 fn workspace_expand_yields_members() {
929 let tmp = tempdir().unwrap();
930 let root = tmp.path();
931 write_file(
932 root,
933 "Cargo.toml",
934 "[workspace]\nmembers = [\"a\", \"b\"]\n",
935 );
936 for member in ["a", "b"] {
937 write_file(
938 &root.join(member),
939 "Cargo.toml",
940 "[package]\nname = \"m\"\nversion = \"0.1.0\"\n",
941 );
942 }
943 let opts = DiscoverOptions {
944 expand_workspaces: true,
945 ..DiscoverOptions::default()
946 };
947 let result = discover_roots(root, &opts).unwrap();
948 assert_eq!(result.len(), 3);
949 }
950
951 #[test]
952 fn side_by_side_repos_are_separate_roots() {
953 let tmp = tempdir().unwrap();
954 let root = tmp.path();
955 for repo in ["repoA", "repoB"] {
956 let dir = root.join(repo);
957 fs::create_dir_all(&dir).unwrap();
958 fs::create_dir(dir.join(".git")).unwrap();
959 }
960 let result = discover_roots(root, &DiscoverOptions::default()).unwrap();
961 assert_eq!(result.len(), 2);
962 assert!(result.iter().all(|r| r.kinds.contains(&RootKind::GitRepo)));
963 }
964
965 #[test]
966 fn mdbook_default_passes_medium_filter() {
967 let tmp = tempdir().unwrap();
968 write_file(tmp.path(), "book.toml", "[book]\ntitle = \"x\"\n");
969 let result = discover_roots(tmp.path(), &DiscoverOptions::default()).unwrap();
970 assert_eq!(result.len(), 1);
971 assert!(result[0].kinds.contains(&RootKind::MdBook));
972 assert_eq!(result[0].confidence, Confidence::Medium);
973 }
974
975 #[test]
976 fn min_confidence_strong_filters_mdbook() {
977 let tmp = tempdir().unwrap();
978 write_file(tmp.path(), "book.toml", "[book]\ntitle = \"x\"\n");
979 let opts = DiscoverOptions {
980 min_confidence: Confidence::Strong,
981 ..DiscoverOptions::default()
982 };
983 let result = discover_roots(tmp.path(), &opts).unwrap();
984 assert!(result.is_empty());
985 }
986
987 #[test]
988 fn nested_vcs_default_skipped() {
989 let tmp = tempdir().unwrap();
992 let outer = tmp.path();
993 fs::create_dir(outer.join(".git")).unwrap();
994 make_dir(outer, "third_party/dep");
995 fs::create_dir(outer.join("third_party/dep/.git")).unwrap();
996
997 let result = discover_roots(outer, &DiscoverOptions::default()).unwrap();
998 assert_eq!(result.len(), 1);
999
1000 let opts = DiscoverOptions {
1001 nested_vcs: true,
1002 ..DiscoverOptions::default()
1003 };
1004 let result = discover_roots(outer, &opts).unwrap();
1005 assert_eq!(result.len(), 2);
1006 }
1007
1008 #[test]
1009 fn include_kind_filter() {
1010 let tmp = tempdir().unwrap();
1011 let root = tmp.path();
1012 let a = root.join("a");
1013 let b = root.join("b");
1014 fs::create_dir_all(&a).unwrap();
1015 fs::create_dir_all(&b).unwrap();
1016 write_file(
1017 &a,
1018 "Cargo.toml",
1019 "[package]\nname=\"a\"\nversion=\"0.1.0\"\n",
1020 );
1021 write_file(&b, "package.json", "{\"name\":\"b\"}\n");
1022
1023 let mut filter = BTreeSet::new();
1024 filter.insert(RootKind::NpmPackage);
1025 let opts = DiscoverOptions {
1026 include_kinds: Some(filter),
1027 ..DiscoverOptions::default()
1028 };
1029 let result = discover_roots(root, &opts).unwrap();
1030 assert_eq!(result.len(), 1);
1031 assert!(result[0].kinds.contains(&RootKind::NpmPackage));
1032 }
1033
1034 #[test]
1035 fn skips_node_modules_and_target() {
1036 let tmp = tempdir().unwrap();
1037 let root = tmp.path();
1038 let buried = root.join("node_modules").join("inner");
1040 fs::create_dir_all(&buried).unwrap();
1041 write_file(&buried, "package.json", "{\"name\":\"inner\"}\n");
1042
1043 let result = discover_roots(root, &DiscoverOptions::default()).unwrap();
1044 assert!(result.is_empty());
1045 }
1046
1047 #[test]
1048 fn from_token_round_trip() {
1049 for kind in [
1050 RootKind::GitRepo,
1051 RootKind::CargoPackage,
1052 RootKind::MdBook,
1053 RootKind::DvcDataset,
1054 ] {
1055 assert_eq!(RootKind::from_token(kind.token()), Some(kind));
1056 }
1057 assert_eq!(RootKind::from_token("git"), Some(RootKind::GitRepo));
1058 assert_eq!(RootKind::from_token("cargo"), Some(RootKind::CargoPackage));
1059 assert_eq!(RootKind::from_token("nope"), None);
1060 }
1061}