use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::detector::DetectorRegistry;
use crate::kind::KindId;
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct Discovered {
pub label: String,
pub rel_path: String,
#[cfg_attr(feature = "serde", serde(serialize_with = "crate::path_norm::serialize_path"))]
pub absolute_path: PathBuf,
pub kind: KindId,
pub signals: Vec<String>,
}
#[derive(Debug, Clone, Copy)]
pub enum LabelStrategy {
Basename,
PreferManifestName,
}
#[derive(Debug, Clone)]
pub struct DiscoverOptions {
pub max_depth: usize,
pub skip_dirs: Vec<String>,
pub skip_dotdirs: bool,
pub label_strategy: LabelStrategy,
pub include_unknown_at_depth_one: bool,
}
impl Default for DiscoverOptions {
fn default() -> Self {
Self {
max_depth: 4,
skip_dirs: default_skip_dirs(),
skip_dotdirs: true,
label_strategy: LabelStrategy::PreferManifestName,
include_unknown_at_depth_one: true,
}
}
}
fn default_skip_dirs() -> Vec<String> {
[
"node_modules",
"target",
"dist",
"build",
"out",
"__pycache__",
".venv",
"venv",
]
.iter()
.map(|s| s.to_string())
.collect()
}
pub fn discover(base_dir: &Path, opts: &DiscoverOptions) -> Vec<Discovered> {
discover_with(base_dir, opts, &DetectorRegistry::with_builtins())
}
pub fn discover_with(
base_dir: &Path,
opts: &DiscoverOptions,
registry: &DetectorRegistry,
) -> Vec<Discovered> {
let mut out = Vec::new();
let self_match = registry.detect(base_dir);
if let Some(m) = self_match {
let basename = base_dir
.file_name()
.and_then(|s| s.to_str())
.map(|s| s.to_string())
.unwrap_or_else(|| "root".to_string());
let label = label_for(&basename, base_dir, &m.kind, opts.label_strategy);
out.push(Discovered {
label,
rel_path: ".".to_string(),
absolute_path: base_dir.to_path_buf(),
kind: m.kind,
signals: m.signals,
});
}
walk(base_dir, base_dir, 1, opts, registry, &mut out);
dedupe_labels(&mut out);
out
}
fn walk(
base_dir: &Path,
current: &Path,
depth: usize,
opts: &DiscoverOptions,
registry: &DetectorRegistry,
out: &mut Vec<Discovered>,
) {
if depth > opts.max_depth {
return;
}
let Ok(entries) = std::fs::read_dir(current) else {
return;
};
let mut entries: Vec<_> = entries.flatten().collect();
entries.sort_by_key(|e| e.file_name());
for entry in entries {
let path = entry.path();
if !path.is_dir() {
continue;
}
let name = entry.file_name().to_string_lossy().into_owned();
if should_skip(&name, opts) {
continue;
}
let rel = path
.strip_prefix(base_dir)
.unwrap_or(&path)
.to_string_lossy()
.replace('\\', "/");
let m = registry.detect(&path);
let matched = m.is_some();
if matched || (depth == 1 && opts.include_unknown_at_depth_one) {
let (kind, signals) = match m {
Some(dm) => (dm.kind, dm.signals),
None => (KindId::UNKNOWN, Vec::new()),
};
let label = label_for(&name, &path, &kind, opts.label_strategy);
out.push(Discovered {
label,
rel_path: format!("./{}", rel),
absolute_path: path.clone(),
kind,
signals,
});
}
walk(base_dir, &path, depth + 1, opts, registry, out);
}
}
fn should_skip(name: &str, opts: &DiscoverOptions) -> bool {
if opts.skip_dotdirs && name.starts_with('.') {
return true;
}
opts.skip_dirs.iter().any(|d| d == name)
}
fn dedupe_labels(projects: &mut [Discovered]) {
let mut seen: HashMap<String, usize> = HashMap::new();
for p in projects.iter() {
*seen.entry(p.label.clone()).or_insert(0) += 1;
}
for p in projects.iter_mut() {
if seen.get(&p.label).copied().unwrap_or(0) > 1 {
let rel = p.rel_path.trim_start_matches("./");
if !rel.is_empty() && rel != "." {
p.label = rel.replace('/', "-");
}
}
}
}
fn label_for(basename: &str, dir: &Path, kind: &KindId, strategy: LabelStrategy) -> String {
match strategy {
LabelStrategy::Basename => basename.to_string(),
LabelStrategy::PreferManifestName => match kind.as_str() {
"rust" => read_cargo_package_name(dir).unwrap_or_else(|| basename.to_string()),
"node" | "bun" => read_package_json_name(dir).unwrap_or_else(|| basename.to_string()),
_ => basename.to_string(),
},
}
}
fn read_cargo_package_name(dir: &Path) -> Option<String> {
let text = std::fs::read_to_string(dir.join("Cargo.toml")).ok()?;
let v: toml::Value = toml::from_str(&text).ok()?;
v.get("package")?
.get("name")?
.as_str()
.map(|s| s.to_string())
}
fn read_package_json_name(dir: &Path) -> Option<String> {
let text = std::fs::read_to_string(dir.join("package.json")).ok()?;
let v: serde_json::Value = serde_json::from_str(&text).ok()?;
v.get("name")?.as_str().map(|s| s.to_string())
}