standarbuild-detect 0.3.0

Detect project kind (Rust, Node, Bun, Deno, Python, Lua, C/C++) AND workspace kind (Cargo, Npm/Pnpm/Yarn/Bun, Deno, Go, Lerna, Nx, Turborepo, Mira) in polyglot monorepos
Documentation
//! [`Detector`] trait + [`DetectorRegistry`] for composing project-AND-workspace
//! detection. Built-ins live in [`crate::builtin`]; downstream crates
//! implement `Detector` for any extra kind (project or workspace, or both).

use std::path::{Path, PathBuf};

use crate::kind::KindId;
use crate::workspace::WorkspaceKindId;

/// Result of probing a directory with a single [`Detector`].
///
/// - [`DetectorHit::Project`] — the dir is a project of some kind.
/// - [`DetectorHit::Workspace`] — the dir is a workspace manifest declaring
///   a set of member project roots.
/// - [`DetectorHit::Both`] — the same manifest declares both (e.g. a
///   `Cargo.toml` with `[package]` AND `[workspace]`).
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type", rename_all = "lowercase"))]
pub enum DetectorHit {
    /// A standalone project — no workspace manifest detected by this
    /// detector at this dir.
    Project {
        /// What kind of project (Rust, Node, …).
        kind: KindId,
        /// Files / patterns that triggered detection.
        signals: Vec<String>,
    },
    /// A workspace manifest at this dir, declaring member project roots.
    Workspace {
        /// What kind of workspace organizer (Cargo, Npm, …).
        kind: WorkspaceKindId,
        /// Absolute paths to member project roots, in declared order.
        /// Empty when the manifest declares a workspace but enumerates
        /// no members (rare; the workspace then "contains itself" via
        /// the project hit at the same path).
        members: Vec<PathBuf>,
        /// Files / patterns that triggered detection.
        signals: Vec<String>,
    },
    /// Combined: the same on-disk artifact declares both project and
    /// workspace responsibilities. Typical for a Cargo workspace whose
    /// root is also a crate, or a Node monorepo whose root has a
    /// `package.json` with both `name` and `workspaces`.
    Both {
        /// Project facet kind.
        project_kind: KindId,
        /// Workspace facet kind.
        workspace_kind: WorkspaceKindId,
        /// Absolute paths to member project roots.
        members: Vec<PathBuf>,
        /// Shared signals (since both facets come from the same manifest).
        signals: Vec<String>,
    },
}

impl DetectorHit {
    /// Borrow the project-facet kind, if this hit carries one.
    pub fn project_kind(&self) -> Option<&KindId> {
        match self {
            Self::Project { kind, .. } | Self::Both { project_kind: kind, .. } => Some(kind),
            Self::Workspace { .. } => None,
        }
    }

    /// Borrow the workspace-facet kind, if this hit carries one.
    pub fn workspace_kind(&self) -> Option<&WorkspaceKindId> {
        match self {
            Self::Workspace { kind, .. } | Self::Both { workspace_kind: kind, .. } => Some(kind),
            Self::Project { .. } => None,
        }
    }

    /// Borrow the declared member paths (empty for project-only hits).
    pub fn members(&self) -> &[PathBuf] {
        match self {
            Self::Workspace { members, .. } | Self::Both { members, .. } => members,
            Self::Project { .. } => &[],
        }
    }

    /// Borrow the signals that triggered this hit.
    pub fn signals(&self) -> &[String] {
        match self {
            Self::Project { signals, .. }
            | Self::Workspace { signals, .. }
            | Self::Both { signals, .. } => signals,
        }
    }
}

/// Detection contract — implement for any custom kind / workspace.
pub trait Detector: Send + Sync {
    /// Stable identifier used for diagnostics and [`DetectorRegistry::remove`].
    /// Conventionally lowercase; e.g. `"rust"`, `"cargo-workspace"`,
    /// `"wgsl"`. Need not match any [`KindId`] / [`WorkspaceKindId`] slug.
    fn name(&self) -> &str;

    /// Probe `dir`. Return `Some(_)` if this detector recognises something
    /// at this directory; `None` otherwise.
    fn detect(&self, dir: &Path) -> Option<DetectorHit>;

    /// Tiebreaker when two detectors hit the SAME facet at the SAME dir
    /// (e.g. NodeDetector vs BunDetector both claim the project facet on
    /// a dir that has both `package.json` and `bun.lock`). Higher wins.
    /// Detectors with disjoint facets don't compete — both hits land in
    /// the final result.
    ///
    /// Built-in priorities (project facet): Rust=100, Bun=80, Deno=70,
    /// Node=50, Python=40, Lua=30, Cpp=20, C=10. Workspace facet uses
    /// similar shape: BunWs=80, PnpmWs=70, YarnWs=60, NpmWs=50, …
    fn priority(&self) -> i32 {
        0
    }

    /// Declare the project [`KindId`] this detector advertises, if any.
    /// Used by registry introspection (e.g. schema validation) to warn
    /// when a user declares a project `type` that no registered detector
    /// recognises. Workspace-only detectors return `None`. Default
    /// returns `None` — dynamic detectors that emit different kinds
    /// based on runtime conditions can leave it that way.
    fn declared_project_kind(&self) -> Option<KindId> {
        None
    }

    /// Declare the workspace [`WorkspaceKindId`] this detector
    /// advertises, if any. Used by registry introspection and the
    /// `DetectionResult` consumers that want to query the set of
    /// workspace kinds the registry knows about.
    fn declared_workspace_kind(&self) -> Option<WorkspaceKindId> {
        None
    }
}

/// Registry of registered [`Detector`]s. Use [`DetectorRegistry::with_builtins`]
/// for the default set, or [`DetectorRegistry::empty`] to build from scratch.
pub struct DetectorRegistry {
    detectors: Vec<Box<dyn Detector>>,
}

impl DetectorRegistry {
    /// New registry with no detectors. Use `add` to populate.
    pub fn empty() -> Self {
        Self { detectors: Vec::new() }
    }

    /// New registry preloaded with every detector this crate ships.
    pub fn with_builtins() -> Self {
        let mut r = Self::empty();
        crate::builtin::register_all(&mut r);
        r
    }

    /// Append a detector.
    pub fn add(&mut self, d: impl Detector + 'static) -> &mut Self {
        self.detectors.push(Box::new(d));
        self
    }

    /// Remove every detector whose `name()` equals `name`. Returns the
    /// number of detectors removed.
    pub fn remove(&mut self, name: &str) -> usize {
        let before = self.detectors.len();
        self.detectors.retain(|d| d.name() != name);
        before - self.detectors.len()
    }

    /// List the names of registered detectors, in registration order.
    pub fn names(&self) -> Vec<&str> {
        self.detectors.iter().map(|d| d.name()).collect()
    }

    /// Collect the project [`KindId`]s advertised by registered detectors.
    /// Returns deduplicated set in registration order. Detectors that
    /// don't advertise a kind (workspace-only or dynamic) are skipped.
    pub fn project_kinds(&self) -> Vec<KindId> {
        let mut seen: std::collections::HashSet<KindId> = std::collections::HashSet::new();
        let mut out = Vec::new();
        for d in &self.detectors {
            if let Some(k) = d.declared_project_kind() {
                if seen.insert(k.clone()) {
                    out.push(k);
                }
            }
        }
        out
    }

    /// Collect the [`WorkspaceKindId`]s advertised by registered detectors.
    pub fn workspace_kinds(&self) -> Vec<WorkspaceKindId> {
        let mut seen: std::collections::HashSet<WorkspaceKindId> =
            std::collections::HashSet::new();
        let mut out = Vec::new();
        for d in &self.detectors {
            if let Some(k) = d.declared_workspace_kind() {
                if seen.insert(k.clone()) {
                    out.push(k);
                }
            }
        }
        out
    }

    /// Run every detector against `dir` and collect the hits.
    ///
    /// Per-facet disambiguation rules:
    /// - **Project facet**: at most ONE project hit per directory wins —
    ///   the highest-priority detector across ALL project kinds. Two
    ///   detectors firing for the same dir (e.g. NodeDetector +
    ///   BunDetector when both `package.json` and `bun.lock` are present)
    ///   collapse to the higher-priority one (Bun=80 > Node=50).
    /// - **Workspace facet**: multiple workspace kinds CAN coexist at
    ///   the same root (Cargo + Npm in a Tauri-style repo). Within the
    ///   same `WorkspaceKindId`, the higher-priority detector wins.
    ///
    /// A `DetectorHit::Both` competes on both facets simultaneously.
    pub fn detect(&self, dir: &Path) -> Vec<DetectorHit> {
        if !dir.is_dir() {
            return Vec::new();
        }

        // Collect raw hits with their priority.
        let mut raw: Vec<(i32, DetectorHit)> = Vec::new();
        for d in &self.detectors {
            if let Some(h) = d.detect(dir) {
                raw.push((d.priority(), h));
            }
        }

        // Project facet: pick the single highest-priority across all kinds.
        let mut best_project: Option<(i32, usize)> = None;
        for (idx, (prio, hit)) in raw.iter().enumerate() {
            if hit.project_kind().is_some() {
                match best_project {
                    None => best_project = Some((*prio, idx)),
                    Some((p, _)) if *prio > p => best_project = Some((*prio, idx)),
                    _ => {}
                }
            }
        }

        // Workspace facet: pick the highest-priority PER WorkspaceKindId.
        let mut best_workspace: std::collections::HashMap<WorkspaceKindId, (i32, usize)> =
            std::collections::HashMap::new();
        for (idx, (prio, hit)) in raw.iter().enumerate() {
            if let Some(wk) = hit.workspace_kind() {
                match best_workspace.get(wk) {
                    None => {
                        best_workspace.insert(wk.clone(), (*prio, idx));
                    }
                    Some((p, _)) if prio > p => {
                        best_workspace.insert(wk.clone(), (*prio, idx));
                    }
                    _ => {}
                }
            }
        }

        // Construct the kept set, then rewrite hits so a Both whose project
        // facet lost to a higher-priority Project (rare) downgrades to
        // Workspace-only — and vice versa.
        let project_winner = best_project.map(|(_, idx)| idx);
        let workspace_winners: std::collections::HashSet<usize> =
            best_workspace.values().map(|(_, idx)| *idx).collect();

        let mut out = Vec::new();
        for (idx, (_, hit)) in raw.into_iter().enumerate() {
            let project_wins_here = project_winner == Some(idx);
            let workspace_wins_here = workspace_winners.contains(&idx);
            match hit {
                DetectorHit::Project { .. } if project_wins_here => out.push(hit),
                DetectorHit::Workspace { .. } if workspace_wins_here => out.push(hit),
                DetectorHit::Both {
                    project_kind,
                    workspace_kind,
                    members,
                    signals,
                } => match (project_wins_here, workspace_wins_here) {
                    (true, true) => out.push(DetectorHit::Both {
                        project_kind,
                        workspace_kind,
                        members,
                        signals,
                    }),
                    (true, false) => out.push(DetectorHit::Project {
                        kind: project_kind,
                        signals,
                    }),
                    (false, true) => out.push(DetectorHit::Workspace {
                        kind: workspace_kind,
                        members,
                        signals,
                    }),
                    (false, false) => {}
                },
                _ => {}
            }
        }
        out
    }
}

impl Default for DetectorRegistry {
    fn default() -> Self {
        Self::with_builtins()
    }
}