standarbuild-detect 0.1.0

Detect project kind (Rust, Node, Bun, Deno, Python, Lua, C/C++) and scan polyglot monorepo workspaces
Documentation
//! Recursive workspace scan: starting from a root directory, walk down up to
//! `max_depth` levels and collect every [`Discovered`] project.

use std::collections::HashMap;
use std::path::{Path, PathBuf};

use crate::detect::detect_in_dir;
use crate::kind::ProjectKind;

#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Discovered {
	pub label: String,
	pub rel_path: String,
	#[cfg_attr(feature = "serde", serde(serialize_with = "crate::path_norm::serialize_path"))]
	pub absolute_path: PathBuf,
	pub kind: ProjectKind,
	pub signals: Vec<String>,
}

#[derive(Debug, Clone, Copy)]
pub enum LabelStrategy {
	/// Use the directory basename verbatim.
	Basename,
	/// For Rust/Node/Bun projects, read `name` from `Cargo.toml` or
	/// `package.json`. Falls back to the basename for other kinds or when the
	/// manifest is unreadable.
	PreferManifestName,
}

#[derive(Debug, Clone)]
pub struct DiscoverOptions {
	pub max_depth: usize,
	pub skip_dirs: Vec<String>,
	/// Skip directories whose name starts with `.` (default: true).
	pub skip_dotdirs: bool,
	pub label_strategy: LabelStrategy,
	/// If true, also include `depth == 1` directories whose kind is `Unknown`.
	/// Useful when callers want to surface raw children even when they look
	/// empty. Default: true (matches the original `standarbuild init` behavior).
	pub include_unknown_at_depth_one: bool,
}

impl Default for DiscoverOptions {
	fn default() -> Self {
		Self {
			max_depth: 4,
			skip_dirs: default_skip_dirs(),
			skip_dotdirs: true,
			label_strategy: LabelStrategy::PreferManifestName,
			include_unknown_at_depth_one: true,
		}
	}
}

fn default_skip_dirs() -> Vec<String> {
	[
		"node_modules",
		"target",
		"dist",
		"build",
		"out",
		"__pycache__",
		".venv",
		"venv",
	]
	.iter()
	.map(|s| s.to_string())
	.collect()
}

pub fn discover(base_dir: &Path, opts: &DiscoverOptions) -> Vec<Discovered> {
	let mut out = Vec::new();

	let (self_kind, self_signals) = detect_in_dir(base_dir);
	if self_kind != ProjectKind::Unknown {
		let basename = base_dir
			.file_name()
			.and_then(|s| s.to_str())
			.map(|s| s.to_string())
			.unwrap_or_else(|| "root".to_string());
		let label = label_for(&basename, base_dir, self_kind, opts.label_strategy);
		out.push(Discovered {
			label,
			rel_path: ".".to_string(),
			absolute_path: base_dir.to_path_buf(),
			kind: self_kind,
			signals: self_signals,
		});
	}

	walk(base_dir, base_dir, 1, opts, &mut out);
	dedupe_labels(&mut out);
	out
}

fn walk(
	base_dir: &Path,
	current: &Path,
	depth: usize,
	opts: &DiscoverOptions,
	out: &mut Vec<Discovered>,
) {
	if depth > opts.max_depth {
		return;
	}
	let Ok(entries) = std::fs::read_dir(current) else {
		return;
	};
	let mut entries: Vec<_> = entries.flatten().collect();
	entries.sort_by_key(|e| e.file_name());
	for entry in entries {
		let path = entry.path();
		if !path.is_dir() {
			continue;
		}
		let name = entry.file_name().to_string_lossy().into_owned();
		if should_skip(&name, opts) {
			continue;
		}
		let rel = path
			.strip_prefix(base_dir)
			.unwrap_or(&path)
			.to_string_lossy()
			.replace('\\', "/");
		let (kind, signals) = detect_in_dir(&path);
		if kind != ProjectKind::Unknown || (depth == 1 && opts.include_unknown_at_depth_one) {
			let label = label_for(&name, &path, kind, opts.label_strategy);
			out.push(Discovered {
				label,
				rel_path: format!("./{}", rel),
				absolute_path: path.clone(),
				kind,
				signals,
			});
		}
		walk(base_dir, &path, depth + 1, opts, out);
	}
}

fn should_skip(name: &str, opts: &DiscoverOptions) -> bool {
	if opts.skip_dotdirs && name.starts_with('.') {
		return true;
	}
	opts.skip_dirs.iter().any(|d| d == name)
}

fn dedupe_labels(projects: &mut [Discovered]) {
	let mut seen: HashMap<String, usize> = HashMap::new();
	for p in projects.iter() {
		*seen.entry(p.label.clone()).or_insert(0) += 1;
	}
	for p in projects.iter_mut() {
		if seen.get(&p.label).copied().unwrap_or(0) > 1 {
			let rel = p.rel_path.trim_start_matches("./");
			if !rel.is_empty() && rel != "." {
				p.label = rel.replace('/', "-");
			}
		}
	}
}

fn label_for(basename: &str, dir: &Path, kind: ProjectKind, strategy: LabelStrategy) -> String {
	match strategy {
		LabelStrategy::Basename => basename.to_string(),
		LabelStrategy::PreferManifestName => match kind {
			ProjectKind::Rust => read_cargo_package_name(dir).unwrap_or_else(|| basename.to_string()),
			ProjectKind::Node | ProjectKind::Bun => {
				read_package_json_name(dir).unwrap_or_else(|| basename.to_string())
			}
			_ => basename.to_string(),
		},
	}
}

fn read_cargo_package_name(dir: &Path) -> Option<String> {
	let text = std::fs::read_to_string(dir.join("Cargo.toml")).ok()?;
	let v: toml::Value = toml::from_str(&text).ok()?;
	v.get("package")?
		.get("name")?
		.as_str()
		.map(|s| s.to_string())
}

fn read_package_json_name(dir: &Path) -> Option<String> {
	let text = std::fs::read_to_string(dir.join("package.json")).ok()?;
	let v: serde_json::Value = serde_json::from_str(&text).ok()?;
	v.get("name")?.as_str().map(|s| s.to_string())
}