Skip to main content

standarbuild_detect/
discover.rs

1//! Recursive workspace scan: starting from a root directory, walk down up to
2//! `max_depth` levels, run every registered [`crate::Detector`], and build a
3//! [`DetectionResult`] containing both the projects and the workspace manifests
4//! found.
5//!
6//! After the walk, [`reconcile_members`] cross-references projects and
7//! workspaces: each project gets a `member_of` list of workspace roots that
8//! declare it as a member (multi-entry when the same root has overlapping
9//! workspace manifests, e.g. Cargo + Bazel claiming the same crate).
10
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13
14use crate::detector::{DetectorHit, DetectorRegistry};
15use crate::kind::KindId;
16use crate::workspace::WorkspaceKindId;
17
18/// Single project entry in a [`DetectionResult`].
19#[derive(Debug, Clone)]
20#[cfg_attr(feature = "serde", derive(serde::Serialize))]
21pub struct ProjectInfo {
22    /// Project kind (Rust / Node / Bun / ...).
23    pub kind: KindId,
24    /// Human label, derived from manifest `name` or directory basename.
25    pub label: String,
26    /// POSIX-style path relative to the scan root (`./packages/web`,
27    /// `.` when the scan root itself is the project).
28    pub rel_path: String,
29    /// Absolute on-disk path of the project root.
30    #[cfg_attr(feature = "serde", serde(serialize_with = "crate::path_norm::serialize_path"))]
31    pub absolute_path: PathBuf,
32    /// Files / patterns that triggered detection.
33    pub signals: Vec<String>,
34    /// Absolute paths of workspace roots that declare this project as a
35    /// member. Empty when the project is orphan (not part of any
36    /// detected workspace). Multi-entry when overlapping workspaces
37    /// claim the same project (rare; e.g. Cargo + Bazel).
38    #[cfg_attr(feature = "serde", serde(serialize_with = "serialize_path_vec"))]
39    pub member_of: Vec<PathBuf>,
40}
41
42/// Single workspace entry in a [`DetectionResult`].
43#[derive(Debug, Clone)]
44#[cfg_attr(feature = "serde", derive(serde::Serialize))]
45pub struct WorkspaceInfo {
46    /// Workspace kind (Cargo / Npm / Pnpm / ...).
47    pub kind: WorkspaceKindId,
48    /// Absolute on-disk path of the workspace root (where the manifest
49    /// lives).
50    #[cfg_attr(feature = "serde", serde(serialize_with = "crate::path_norm::serialize_path"))]
51    pub root: PathBuf,
52    /// Absolute paths to member project roots, in the order the
53    /// manifest declares them.
54    #[cfg_attr(feature = "serde", serde(serialize_with = "serialize_path_vec"))]
55    pub members: Vec<PathBuf>,
56    /// Files / patterns that triggered detection.
57    pub signals: Vec<String>,
58}
59
60/// Aggregated result of [`discover`] / [`discover_with`].
61#[derive(Debug, Clone, Default)]
62#[cfg_attr(feature = "serde", derive(serde::Serialize))]
63pub struct DetectionResult {
64    /// All projects found, in walk order (deepest entry of the scan
65    /// root first, then breadth-first descendants sorted by name).
66    pub projects: Vec<ProjectInfo>,
67    /// All workspace manifests found, in walk order.
68    pub workspaces: Vec<WorkspaceInfo>,
69}
70
71impl DetectionResult {
72    /// True when neither a project nor a workspace was detected.
73    pub fn is_empty(&self) -> bool {
74        self.projects.is_empty() && self.workspaces.is_empty()
75    }
76}
77
78/// Strategy for choosing a project's `label`.
79#[derive(Debug, Clone, Copy)]
80pub enum LabelStrategy {
81    /// Use the directory basename verbatim.
82    Basename,
83    /// For Rust / Node / Bun projects, read `name` from `Cargo.toml` or
84    /// `package.json`. Falls back to the basename for other kinds.
85    PreferManifestName,
86}
87
88/// Tuning knobs for [`discover`] / [`discover_with`].
89#[derive(Debug, Clone)]
90pub struct DiscoverOptions {
91    /// Max recursion depth. `0` = only the scan root itself.
92    pub max_depth: usize,
93    /// Directory names to skip entirely (`target`, `node_modules`, ...).
94    pub skip_dirs: Vec<String>,
95    /// Skip directories whose name starts with `.` (default: true).
96    pub skip_dotdirs: bool,
97    /// How to label projects.
98    pub label_strategy: LabelStrategy,
99    /// When `true`, also emit a `ProjectInfo { kind: UNKNOWN, ... }` for
100    /// depth-1 directories that didn't match any detector. Useful when
101    /// callers want to surface raw children even when they look empty.
102    pub include_unknown_at_depth_one: bool,
103}
104
105impl Default for DiscoverOptions {
106    fn default() -> Self {
107        Self {
108            max_depth: 4,
109            skip_dirs: default_skip_dirs(),
110            skip_dotdirs: true,
111            label_strategy: LabelStrategy::PreferManifestName,
112            include_unknown_at_depth_one: true,
113        }
114    }
115}
116
117fn default_skip_dirs() -> Vec<String> {
118    [
119        "node_modules",
120        "target",
121        "dist",
122        "build",
123        "out",
124        "__pycache__",
125        ".venv",
126        "venv",
127    ]
128    .iter()
129    .map(|s| s.to_string())
130    .collect()
131}
132
133/// Convenience wrapper around [`discover_with`] using the built-in
134/// [`DetectorRegistry`].
135pub fn discover(base_dir: &Path, opts: &DiscoverOptions) -> DetectionResult {
136    discover_with(base_dir, opts, &DetectorRegistry::with_builtins())
137}
138
139/// Recursive scan against an explicit registry. Extra registered
140/// detectors (custom kinds) participate exactly like the built-ins.
141pub fn discover_with(
142    base_dir: &Path,
143    opts: &DiscoverOptions,
144    registry: &DetectorRegistry,
145) -> DetectionResult {
146    let mut result = DetectionResult::default();
147
148    visit(base_dir, base_dir, 0, opts, registry, &mut result);
149    dedupe_labels(&mut result.projects);
150    reconcile_members(&mut result);
151    result
152}
153
154fn visit(
155    base_dir: &Path,
156    current: &Path,
157    depth: usize,
158    opts: &DiscoverOptions,
159    registry: &DetectorRegistry,
160    result: &mut DetectionResult,
161) {
162    let hits = registry.detect(current);
163    record_hits(base_dir, current, depth, opts, &hits, result);
164
165    if depth >= opts.max_depth {
166        return;
167    }
168    let Ok(entries) = std::fs::read_dir(current) else {
169        return;
170    };
171    let mut entries: Vec<_> = entries.flatten().collect();
172    entries.sort_by_key(|e| e.file_name());
173    for entry in entries {
174        let path = entry.path();
175        if !path.is_dir() {
176            continue;
177        }
178        let name = entry.file_name().to_string_lossy().into_owned();
179        if should_skip(&name, opts) {
180            continue;
181        }
182        visit(base_dir, &path, depth + 1, opts, registry, result);
183    }
184}
185
186fn record_hits(
187    base_dir: &Path,
188    current: &Path,
189    depth: usize,
190    opts: &DiscoverOptions,
191    hits: &[DetectorHit],
192    result: &mut DetectionResult,
193) {
194    if hits.is_empty() {
195        // Optionally surface unknown depth-1 dirs as orphan projects.
196        if depth == 1 && opts.include_unknown_at_depth_one {
197            let (label, rel_path) = label_and_relpath(base_dir, current, &KindId::UNKNOWN, opts);
198            result.projects.push(ProjectInfo {
199                kind: KindId::UNKNOWN,
200                label,
201                rel_path,
202                absolute_path: current.to_path_buf(),
203                signals: Vec::new(),
204                member_of: Vec::new(),
205            });
206        }
207        return;
208    }
209
210    for hit in hits {
211        match hit {
212            DetectorHit::Project { kind, signals } => {
213                let (label, rel_path) = label_and_relpath(base_dir, current, kind, opts);
214                result.projects.push(ProjectInfo {
215                    kind: kind.clone(),
216                    label,
217                    rel_path,
218                    absolute_path: current.to_path_buf(),
219                    signals: signals.clone(),
220                    member_of: Vec::new(),
221                });
222            }
223            DetectorHit::Workspace { kind, members, signals } => {
224                result.workspaces.push(WorkspaceInfo {
225                    kind: kind.clone(),
226                    root: current.to_path_buf(),
227                    members: members.clone(),
228                    signals: signals.clone(),
229                });
230            }
231            DetectorHit::Both {
232                project_kind,
233                workspace_kind,
234                members,
235                signals,
236            } => {
237                let (label, rel_path) =
238                    label_and_relpath(base_dir, current, project_kind, opts);
239                result.projects.push(ProjectInfo {
240                    kind: project_kind.clone(),
241                    label,
242                    rel_path,
243                    absolute_path: current.to_path_buf(),
244                    signals: signals.clone(),
245                    member_of: Vec::new(),
246                });
247                result.workspaces.push(WorkspaceInfo {
248                    kind: workspace_kind.clone(),
249                    root: current.to_path_buf(),
250                    members: members.clone(),
251                    signals: signals.clone(),
252                });
253            }
254        }
255    }
256}
257
258fn reconcile_members(result: &mut DetectionResult) {
259    // Build a lookup: absolute member path → list of workspace roots that
260    // declare it as a member.
261    let mut owner_of: HashMap<PathBuf, Vec<PathBuf>> = HashMap::new();
262    for ws in &result.workspaces {
263        for member_path in &ws.members {
264            owner_of
265                .entry(member_path.clone())
266                .or_default()
267                .push(ws.root.clone());
268        }
269    }
270    for project in &mut result.projects {
271        if let Some(roots) = owner_of.get(&project.absolute_path) {
272            project.member_of = roots.clone();
273        }
274    }
275}
276
277fn should_skip(name: &str, opts: &DiscoverOptions) -> bool {
278    if opts.skip_dotdirs && name.starts_with('.') {
279        return true;
280    }
281    opts.skip_dirs.iter().any(|d| d == name)
282}
283
284fn dedupe_labels(projects: &mut [ProjectInfo]) {
285    let mut seen: HashMap<String, usize> = HashMap::new();
286    for p in projects.iter() {
287        *seen.entry(p.label.clone()).or_insert(0) += 1;
288    }
289    for p in projects.iter_mut() {
290        if seen.get(&p.label).copied().unwrap_or(0) > 1 {
291            let rel = p.rel_path.trim_start_matches("./");
292            if !rel.is_empty() && rel != "." {
293                p.label = rel.replace('/', "-");
294            }
295        }
296    }
297}
298
299fn label_and_relpath(
300    base_dir: &Path,
301    dir: &Path,
302    kind: &KindId,
303    opts: &DiscoverOptions,
304) -> (String, String) {
305    let basename = dir
306        .file_name()
307        .and_then(|s| s.to_str())
308        .map(|s| s.to_string())
309        .unwrap_or_else(|| "root".to_string());
310    let rel = dir
311        .strip_prefix(base_dir)
312        .ok()
313        .map(|p| p.to_string_lossy().replace('\\', "/"))
314        .unwrap_or_default();
315    let rel_path = if rel.is_empty() { ".".to_string() } else { format!("./{}", rel) };
316    let label = label_for(&basename, dir, kind, opts.label_strategy);
317    (label, rel_path)
318}
319
320fn label_for(basename: &str, dir: &Path, kind: &KindId, strategy: LabelStrategy) -> String {
321    match strategy {
322        LabelStrategy::Basename => basename.to_string(),
323        LabelStrategy::PreferManifestName => match kind.as_str() {
324            "rust" => read_cargo_package_name(dir).unwrap_or_else(|| basename.to_string()),
325            "node" | "bun" => read_package_json_name(dir).unwrap_or_else(|| basename.to_string()),
326            _ => basename.to_string(),
327        },
328    }
329}
330
331fn read_cargo_package_name(dir: &Path) -> Option<String> {
332    let text = std::fs::read_to_string(dir.join("Cargo.toml")).ok()?;
333    let v: toml::Value = toml::from_str(&text).ok()?;
334    v.get("package")?
335        .get("name")?
336        .as_str()
337        .map(|s| s.to_string())
338}
339
340fn read_package_json_name(dir: &Path) -> Option<String> {
341    let text = std::fs::read_to_string(dir.join("package.json")).ok()?;
342    let v: serde_json::Value = serde_json::from_str(&text).ok()?;
343    v.get("name")?.as_str().map(|s| s.to_string())
344}
345
346#[cfg(feature = "serde")]
347fn serialize_path_vec<S>(paths: &[PathBuf], s: S) -> Result<S::Ok, S::Error>
348where
349    S: serde::Serializer,
350{
351    use serde::ser::SerializeSeq;
352    let mut seq = s.serialize_seq(Some(paths.len()))?;
353    for p in paths {
354        seq.serialize_element(&crate::path_norm::to_posix(p))?;
355    }
356    seq.end()
357}