use std::collections::{HashMap, HashSet};
use std::path::Path;
use crate::error::LaunchError;
use crate::fs::{FileSystem, normalize_dir};
use crate::signal::{Signal, SignalOutput};
use crate::types::{
DirContext, Discovery, Language, LanguageConfig, Service, merge_env_vars, merge_string_vecs,
};
pub fn discover(
root: &Path,
mut signals: Vec<Box<dyn Signal>>,
fs: &dyn FileSystem,
) -> Result<Discovery, LaunchError> {
walk(root, &mut signals, fs)?;
let outputs = generate(&mut signals, fs);
Ok(assemble(outputs, root))
}
static EXCLUDED: phf::Set<&'static str> = phf::phf_set! {
"node_modules", ".git", ".svn", ".hg", "vendor",
"__pycache__", ".mypy_cache", ".pytest_cache", ".tox", ".nox",
"target", "dist", "build", "out",
".next", ".nuxt", ".svelte-kit", ".output",
".vercel", ".netlify", ".cache", ".turbo",
".idea", ".vscode", ".vs", ".fleet",
"coverage", ".nyc_output", ".parcel-cache", ".webpack",
"deps", "_build", ".gradle", ".mvn",
"tmp", "temp",
};
fn walk(
root: &Path,
signals: &mut [Box<dyn Signal>],
fs: &dyn FileSystem,
) -> Result<(), LaunchError> {
let mut stack = vec![root.to_path_buf()];
while let Some(dir) = stack.pop() {
let entries = fs
.read_dir(&dir)
.map_err(|source| LaunchError::Filesystem {
path: dir.clone(),
source,
})?;
for entry in entries {
if entry.is_dir && EXCLUDED.contains(entry.name.as_str()) {
continue;
}
for signal in signals.iter_mut() {
signal.observe(&dir, &entry);
}
if entry.is_dir {
stack.push(entry.path);
}
}
}
Ok(())
}
pub fn walk_local(root: &Path, signals: &mut [Box<dyn Signal>]) -> Result<(), LaunchError> {
use crate::fs::DirEntry;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
let collected: Arc<Mutex<Vec<(PathBuf, DirEntry)>>> = Arc::new(Mutex::new(Vec::new()));
ignore::WalkBuilder::new(root)
.hidden(false)
.git_global(false)
.git_exclude(false)
.filter_entry(|e| {
if e.file_type().is_some_and(|ft| ft.is_dir()) {
if let Some(name) = e.file_name().to_str() {
return !EXCLUDED.contains(name);
}
}
true
})
.build_parallel()
.run(|| {
let collected = Arc::clone(&collected);
let root = root.to_path_buf();
Box::new(move |result| {
let entry = match result {
Ok(e) => e,
Err(_) => return ignore::WalkState::Continue,
};
if entry.depth() == 0 {
return ignore::WalkState::Continue;
}
let path = entry.path().to_path_buf();
let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir());
let name = entry.file_name().to_string_lossy().into_owned();
let rel = path.strip_prefix(&root).unwrap_or(&path);
let dir = rel
.parent()
.map(|p| {
if p.as_os_str().is_empty() {
PathBuf::from(".")
} else {
p.to_path_buf()
}
})
.unwrap_or_else(|| PathBuf::from("."));
#[allow(clippy::unwrap_used)] collected.lock().unwrap_or_else(|p| p.into_inner()).push((
dir,
DirEntry {
path: rel.to_path_buf(),
name,
is_dir,
},
));
ignore::WalkState::Continue
})
});
#[allow(clippy::expect_used)] let entries = Arc::try_unwrap(collected)
.expect("all walker threads finished")
.into_inner()
.unwrap_or_else(|p| p.into_inner());
let mut by_dir: HashMap<PathBuf, Vec<DirEntry>> = HashMap::new();
for (dir, entry) in entries {
by_dir.entry(dir).or_default().push(entry);
}
for (dir, dir_entries) in &by_dir {
for entry in dir_entries {
for signal in signals.iter_mut() {
signal.observe(dir, entry);
}
}
}
Ok(())
}
pub fn discover_local_impl(
root: &Path,
mut signals: Vec<Box<dyn Signal>>,
fs: &dyn FileSystem,
) -> Result<Discovery, LaunchError> {
walk_local(root, &mut signals)?;
let outputs = generate(&mut signals, fs);
Ok(assemble(outputs, root))
}
pub fn generate(signals: &mut [Box<dyn Signal>], fs: &dyn FileSystem) -> Vec<SignalOutput> {
let mut outputs = Vec::with_capacity(signals.len());
for signal in signals.iter_mut() {
match signal.generate(fs) {
Ok(output) => outputs.push(output),
Err(e) => eprintln!("signal {} failed: {e}", signal.name()),
}
}
outputs
}
fn assemble(outputs: Vec<SignalOutput>, root: &Path) -> Discovery {
let services = dedup_services(&outputs, root);
let contexts = merge_contexts(&outputs);
let mut services = layer_contexts(services, &contexts);
let claimed: HashSet<String> = services.iter().map(|s| s.dir.clone()).collect();
let promoted = promote_unclaimed(&contexts, &claimed, root);
let mut promoted = layer_contexts(promoted, &contexts);
services.append(&mut promoted);
let monorepo = outputs.into_iter().find_map(|o| o.monorepo);
if let Some(ref mono) = monorepo {
for service in &mut services {
if let Some(pkg) = mono.packages.values().find(|p| p.dir == service.dir) {
service.detected_by.push(format!("monorepo:{}", pkg.name));
}
}
}
Discovery { services, monorepo }
}
fn dir_name(dir: &str, _root: &Path) -> String {
if dir == "." || dir.is_empty() {
"app".into()
} else {
Path::new(dir)
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| dir.to_string())
}
}
fn merge_service(base: &mut Service, other: &Service) {
macro_rules! fill_field {
($field:ident) => {
if base.$field.is_none() {
base.$field.clone_from(&other.$field);
} else {
#[cfg(debug_assertions)]
if other.$field.is_some() {
eprintln!(
"[merge] {}/{}: dropping {} from {:?}, keeping {:?}",
base.dir,
base.name,
stringify!($field),
other.detected_by,
base.detected_by,
);
}
}
};
(copy $field:ident) => {
if base.$field.is_none() {
base.$field = other.$field;
} else {
#[cfg(debug_assertions)]
if other.$field.is_some() {
eprintln!(
"[merge] {}/{}: dropping {} from {:?}, keeping {:?}",
base.dir,
base.name,
stringify!($field),
other.detected_by,
base.detected_by,
);
}
}
};
}
fill_field!(copy language);
fill_field!(runtime);
fill_field!(framework);
fill_field!(package_manager);
fill_field!(language_config);
fill_field!(copy network);
fill_field!(copy exec_mode);
base.commands.fill_from(&other.commands);
fill_field!(dockerfile);
fill_field!(output_dir);
merge_env_vars(&mut base.env, &other.env);
merge_string_vecs(&mut base.system_deps, &other.system_deps);
base.volumes.extend(other.volumes.iter().cloned());
fill_field!(resources);
fill_field!(copy replicas);
fill_field!(copy restart);
fill_field!(healthcheck);
fill_field!(schedule);
let existing: HashSet<String> = base.detected_by.iter().cloned().collect();
for d in &other.detected_by {
if !existing.contains(d) {
base.detected_by.push(d.clone());
}
}
}
fn service_to_context(s: &Service) -> DirContext {
DirContext {
dir: s.dir.clone(),
language: s.language,
runtime: s.runtime.clone(),
framework: s.framework.clone(),
package_manager: s.package_manager.clone(),
language_config: s.language_config.clone(),
output_dir: s.output_dir.clone(),
commands: s.commands.clone(),
env: s.env.clone(),
system_deps: s.system_deps.clone(),
}
}
fn dedup_services(outputs: &[SignalOutput], root: &Path) -> Vec<Service> {
let mut by_dir: HashMap<String, Vec<Service>> = HashMap::new();
for output in outputs {
for service in &output.services {
let dir = normalize_dir(&service.dir);
by_dir
.entry(dir.into_owned())
.or_default()
.push(service.clone());
}
}
let mut result = Vec::new();
let mut dirs: Vec<&String> = by_dir.keys().collect();
dirs.sort();
for dir in dirs {
let services = &by_dir[dir];
let derived_name = dir_name(dir, root);
let (derived, explicit): (Vec<&Service>, Vec<&Service>) =
services.iter().partition(|s| s.name == derived_name);
if explicit.is_empty() {
let mut base = derived[0].clone();
for other in &derived[1..] {
merge_service(&mut base, other);
}
result.push(base);
} else {
let mut by_name: Vec<(String, Service)> = Vec::new();
for s in &explicit {
if let Some(entry) = by_name.iter_mut().find(|(n, _)| *n == s.name) {
merge_service(&mut entry.1, s);
} else {
by_name.push((s.name.clone(), (*s).clone()));
}
}
if !derived.is_empty() {
let mut derived_ctx = service_to_context(derived[0]);
for other in &derived[1..] {
let ctx = service_to_context(other);
derived_ctx.merge(&ctx);
}
for (_, service) in &mut by_name {
service.layer_context(&derived_ctx);
}
}
result.extend(by_name.into_iter().map(|(_, s)| s));
}
}
result
}
fn merge_contexts(outputs: &[SignalOutput]) -> HashMap<String, DirContext> {
let mut by_dir: HashMap<String, DirContext> = HashMap::new();
for output in outputs {
for ctx in &output.context {
let dir = normalize_dir(&ctx.dir);
by_dir
.entry(dir.into_owned())
.and_modify(|existing| existing.merge(ctx))
.or_insert_with(|| ctx.clone());
}
}
by_dir
}
fn layer_contexts(
mut services: Vec<Service>,
contexts: &HashMap<String, DirContext>,
) -> Vec<Service> {
for service in &mut services {
let chain = build_ancestor_chain(&service.dir, contexts);
if let Some(ctx) = chain {
service.layer_context(&ctx);
}
}
services
}
fn build_ancestor_chain(dir: &str, contexts: &HashMap<String, DirContext>) -> Option<DirContext> {
let normalized = normalize_dir(dir);
let mut ancestors: Vec<&DirContext> = Vec::new();
let path = Path::new(normalized.as_ref());
let mut current = Some(path);
while let Some(p) = current {
let key = if p.as_os_str().is_empty() || p.to_string_lossy() == "." {
".".to_string()
} else {
p.to_string_lossy().into_owned()
};
if let Some(ctx) = contexts.get(&key) {
ancestors.push(ctx);
}
current = p.parent().filter(|parent| parent != &p);
}
if let Some(ctx) = contexts.get(".") {
if !ancestors.iter().any(|a| a.dir == "." || a.dir.is_empty()) {
ancestors.push(ctx);
}
}
if ancestors.is_empty() {
return None;
}
ancestors.reverse();
let mut merged = ancestors[0].clone();
for ancestor in &ancestors[1..] {
let mut child = (*ancestor).clone();
child.merge(&merged); merged = child;
}
Some(merged)
}
fn promote_unclaimed(
contexts: &HashMap<String, DirContext>,
claimed: &HashSet<String>,
root: &Path,
) -> Vec<Service> {
let mut promoted = Vec::new();
for (dir, ctx) in contexts {
if claimed.contains(dir) {
continue;
}
let is_spa = matches!(&ctx.language_config, Some(LanguageConfig::Node(nc)) if nc.is_spa);
if ctx.commands.start.is_none() && ctx.language != Some(Language::Html) && !is_spa {
continue;
}
let name = dir_name(dir, root);
let mut service = Service {
name,
dir: dir.clone(),
language: ctx.language,
runtime: ctx.runtime.clone(),
framework: ctx.framework.clone(),
package_manager: ctx.package_manager.clone(),
language_config: ctx.language_config.clone(),
output_dir: ctx.output_dir.clone(),
commands: ctx.commands.clone(),
env: ctx.env.clone(),
system_deps: ctx.system_deps.clone(),
..Service::default()
};
service.detected_by.push(format!("context:{dir}"));
promoted.push(service);
}
promoted.sort_by(|a, b| a.dir.cmp(&b.dir));
promoted
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::fs::{DirEntry, MemoryFs};
use crate::signal::SignalOutput;
use crate::types::*;
use std::path::PathBuf;
use std::sync::Mutex;
fn svc(name: &str, dir: &str) -> Service {
Service {
name: name.into(),
dir: dir.into(),
..Service::default()
}
}
fn svc_with_lang(name: &str, dir: &str, lang: Language) -> Service {
Service {
name: name.into(),
dir: dir.into(),
language: Some(lang),
..Service::default()
}
}
fn ctx(dir: &str) -> DirContext {
DirContext {
dir: dir.into(),
..DirContext::default()
}
}
fn output_with_services(services: Vec<Service>) -> SignalOutput {
SignalOutput {
services,
..SignalOutput::default()
}
}
fn output_with_context(context: Vec<DirContext>) -> SignalOutput {
SignalOutput {
context,
..SignalOutput::default()
}
}
fn env(key: &str) -> EnvVar {
EnvVar {
key: key.into(),
default: None,
detected_by: Vec::new(),
}
}
fn env_with_default(key: &str, default: &str) -> EnvVar {
EnvVar {
key: key.into(),
default: Some(default.into()),
detected_by: Vec::new(),
}
}
#[test]
fn single_service_single_signal() {
let outputs = vec![output_with_services(vec![svc("app", ".")])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(disc.services[0].name, "app");
}
#[test]
fn dedup_same_dir_derived_names() {
let mut s1 = svc("api", "apps/api");
s1.language = Some(Language::TypeScript);
let mut s2 = svc("api", "apps/api");
s2.framework = Some("express".into());
let outputs = vec![
output_with_services(vec![s1]),
output_with_services(vec![s2]),
];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(disc.services[0].language, Some(Language::TypeScript)); assert_eq!(disc.services[0].framework.as_deref(), Some("express")); }
#[test]
fn dedup_explicit_wins_over_derived() {
let mut explicit = svc("web", "apps/api");
explicit.commands.start = Some("node server.js".into());
let mut derived = svc("api", "apps/api"); derived.language = Some(Language::TypeScript);
let outputs = vec![
output_with_services(vec![explicit]),
output_with_services(vec![derived]),
];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(disc.services[0].name, "web"); assert_eq!(disc.services[0].language, Some(Language::TypeScript)); }
#[test]
fn dedup_explicit_same_name_merged() {
let mut s1 = svc("web", ".");
s1.commands.start = Some("node index.js".into());
let mut s2 = svc("web", ".");
s2.language = Some(Language::JavaScript);
let outputs = vec![
output_with_services(vec![s1]),
output_with_services(vec![s2]),
];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(
disc.services[0].commands.start.as_deref(),
Some("node index.js")
);
assert_eq!(disc.services[0].language, Some(Language::JavaScript));
}
#[test]
fn context_layering() {
let s = svc("api", "apps/api");
let mut c = ctx("apps/api");
c.language = Some(Language::Go);
c.commands.build = Some("go build".into());
let outputs = vec![output_with_services(vec![s]), output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services[0].language, Some(Language::Go));
assert_eq!(disc.services[0].commands.build.as_deref(), Some("go build"));
}
#[test]
fn context_service_wins() {
let s = svc_with_lang("api", "apps/api", Language::TypeScript);
let mut c = ctx("apps/api");
c.language = Some(Language::JavaScript);
let outputs = vec![output_with_services(vec![s]), output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services[0].language, Some(Language::TypeScript));
}
#[test]
fn ancestor_inheritance() {
let s = svc("api", "apps/api");
let mut root_ctx = ctx(".");
root_ctx.runtime = Some(RuntimeInfo {
name: "node".into(),
version: Some("20.11.1".into()),
source: Some(".node-version".into()),
});
let outputs = vec![
output_with_services(vec![s]),
output_with_context(vec![root_ctx]),
];
let disc = assemble(outputs, Path::new("/project"));
let rt = disc.services[0].runtime.as_ref().unwrap();
assert_eq!(rt.version.as_deref(), Some("20.11.1"));
}
#[test]
fn ancestor_child_wins() {
let s = svc("api", "apps/api");
let mut root_ctx = ctx(".");
root_ctx.runtime = Some(RuntimeInfo {
name: "node".into(),
version: Some("18".into()),
source: None,
});
root_ctx.language = Some(Language::JavaScript);
let mut child_ctx = ctx("apps/api");
child_ctx.runtime = Some(RuntimeInfo {
name: "node".into(),
version: Some("20".into()),
source: None,
});
let outputs = vec![
output_with_services(vec![s]),
output_with_context(vec![root_ctx, child_ctx]),
];
let disc = assemble(outputs, Path::new("/project"));
let rt = disc.services[0].runtime.as_ref().unwrap();
assert_eq!(rt.version.as_deref(), Some("20")); assert_eq!(disc.services[0].language, Some(Language::JavaScript)); }
#[test]
fn promotion_with_start_command() {
let mut c = ctx(".");
c.commands.start = Some("node server.js".into());
c.language = Some(Language::JavaScript);
let outputs = vec![output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(disc.services[0].name, "app"); assert_eq!(
disc.services[0].commands.start.as_deref(),
Some("node server.js")
);
}
#[test]
fn no_promotion_without_start() {
let mut c = ctx("apps/lib");
c.language = Some(Language::TypeScript);
c.commands.build = Some("tsc".into());
let outputs = vec![output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 0);
}
#[test]
fn no_promotion_when_claimed() {
let s = svc("api", "apps/api");
let mut c = ctx("apps/api");
c.commands.start = Some("node index.js".into());
let outputs = vec![output_with_services(vec![s]), output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(disc.services[0].name, "api"); }
#[test]
fn env_dedup_service_wins() {
let mut s = svc("api", "apps/api");
s.env.push(env_with_default("PORT", "8080"));
let mut c = ctx("apps/api");
c.env.push(env_with_default("PORT", "3000"));
c.env.push(env("DATABASE_URL"));
let outputs = vec![output_with_services(vec![s]), output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
let port = disc.services[0]
.env
.iter()
.find(|e| e.key == "PORT")
.unwrap();
assert_eq!(port.default.as_deref(), Some("8080")); assert!(disc.services[0].env.iter().any(|e| e.key == "DATABASE_URL")); }
#[test]
fn env_ancestor_dedup() {
let s = svc("api", "apps/api");
let mut root_ctx = ctx(".");
root_ctx.env.push(env_with_default("PORT", "3000"));
root_ctx.env.push(env("GLOBAL_VAR"));
let mut child_ctx = ctx("apps/api");
child_ctx.env.push(env_with_default("PORT", "8080"));
child_ctx.env.push(env("LOCAL_VAR"));
let outputs = vec![
output_with_services(vec![s]),
output_with_context(vec![root_ctx, child_ctx]),
];
let disc = assemble(outputs, Path::new("/project"));
let port = disc.services[0]
.env
.iter()
.find(|e| e.key == "PORT")
.unwrap();
assert_eq!(port.default.as_deref(), Some("8080")); assert!(disc.services[0].env.iter().any(|e| e.key == "GLOBAL_VAR"));
assert!(disc.services[0].env.iter().any(|e| e.key == "LOCAL_VAR"));
}
struct RecordingSignal {
observed_dirs: Mutex<Vec<String>>,
}
impl RecordingSignal {
fn new() -> Self {
Self {
observed_dirs: Mutex::new(Vec::new()),
}
}
fn dirs(&self) -> Vec<String> {
self.observed_dirs.lock().unwrap().clone()
}
}
impl Signal for RecordingSignal {
fn name(&self) -> &'static str {
"recording"
}
fn observe(&mut self, dir: &Path, _entry: &DirEntry) {
let d = dir.to_string_lossy().into_owned();
let mut dirs = self.observed_dirs.lock().unwrap();
if !dirs.contains(&d) {
dirs.push(d);
}
}
fn generate(&mut self, _fs: &dyn FileSystem) -> Result<SignalOutput, LaunchError> {
Ok(SignalOutput::default())
}
}
#[test]
fn walk_skips_excluded() {
let fs = MemoryFs::new(&[
("package.json", "{}"),
("src/main.ts", ""),
("node_modules/express/package.json", "{}"),
(".git/config", ""),
]);
let mut signal = RecordingSignal::new();
let mut stack = vec![PathBuf::from(".")];
while let Some(dir) = stack.pop() {
let entries = fs.read_dir(&dir).unwrap();
for entry in &entries {
if entry.is_dir && EXCLUDED.contains(entry.name.as_str()) {
continue;
}
signal.observe(&dir, entry);
if entry.is_dir {
stack.push(entry.path.clone());
}
}
}
let dirs = signal.dirs();
assert!(dirs.contains(&".".to_string()));
assert!(dirs.iter().any(|d| d.contains("src")));
assert!(!dirs.iter().any(|d| d.contains("node_modules")));
assert!(!dirs.iter().any(|d| d.contains(".git")));
}
#[test]
fn empty_repo() {
let outputs: Vec<SignalOutput> = vec![];
let disc = assemble(outputs, Path::new("/project"));
assert!(disc.services.is_empty());
assert!(disc.monorepo.is_none());
}
#[test]
fn promotion_html_no_start() {
let mut c = ctx(".");
c.language = Some(Language::Html);
c.output_dir = Some(".".into());
let outputs = vec![output_with_context(vec![c])];
let disc = assemble(outputs, Path::new("/project"));
assert_eq!(disc.services.len(), 1);
assert_eq!(disc.services[0].language, Some(Language::Html));
assert_eq!(disc.services[0].output_dir.as_deref(), Some("."));
assert!(disc.services[0].commands.start.is_none());
}
}