use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::process::Command;
use crate::pack;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UndeclaredRepo {
pub path: PathBuf,
pub inferred_url: Option<String>,
}
#[derive(Debug, thiserror::Error)]
pub enum ScanError {
#[error("workspace root unreadable: {path}: {source}")]
WorkspaceUnreadable {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
pub fn scan_undeclared(
workspace: &Path,
depth: Option<usize>,
) -> Result<Vec<UndeclaredRepo>, ScanError> {
let registered = collect_registered_packs(workspace);
let mut found: Vec<UndeclaredRepo> = Vec::new();
walk(workspace, workspace, 0, depth, ®istered, &mut found)?;
found.sort_by(|a, b| a.path.cmp(&b.path));
Ok(found)
}
fn walk(
workspace: &Path,
dir: &Path,
depth: usize,
depth_cap: Option<usize>,
registered: &BTreeSet<PathBuf>,
out: &mut Vec<UndeclaredRepo>,
) -> Result<(), ScanError> {
let entries = std::fs::read_dir(dir)
.map_err(|source| ScanError::WorkspaceUnreadable { path: dir.to_path_buf(), source })?;
let mut is_repo = false;
let mut subdirs: Vec<PathBuf> = Vec::new();
for entry in entries.flatten() {
let Ok(ft) = entry.file_type() else { continue };
let name = entry.file_name();
let Some(name_str) = name.to_str() else { continue };
let path = entry.path();
if name_str == ".git" {
is_repo = true;
continue;
}
if name_str == ".grex" {
continue;
}
if !ft.is_dir() {
continue;
}
subdirs.push(path);
}
if is_repo && dir != workspace {
let rel = dir.strip_prefix(workspace).unwrap_or(dir).to_path_buf();
let is_registered = registered.contains(&rel);
let inside_registered = is_inside_registered(&rel, registered);
if !is_registered && !inside_registered {
out.push(UndeclaredRepo { path: rel, inferred_url: probe_origin_url(dir) });
}
return Ok(());
}
if let Some(cap) = depth_cap {
if depth >= cap {
return Ok(());
}
}
for sub in subdirs {
walk(workspace, &sub, depth + 1, depth_cap, registered, out)?;
}
Ok(())
}
fn is_inside_registered(rel: &Path, registered: &BTreeSet<PathBuf>) -> bool {
for reg in registered {
if rel != reg && rel.starts_with(reg) {
return true;
}
}
false
}
fn collect_registered_packs(workspace: &Path) -> BTreeSet<PathBuf> {
let mut out: BTreeSet<PathBuf> = BTreeSet::new();
walk_manifest(workspace, workspace, &mut out);
out
}
fn walk_manifest(workspace: &Path, meta_dir: &Path, out: &mut BTreeSet<PathBuf>) {
let manifest_path = meta_dir.join(".grex").join("pack.yaml");
let raw = match std::fs::read_to_string(&manifest_path) {
Ok(s) => s,
Err(_) => return,
};
let manifest = match pack::parse(&raw) {
Ok(m) => m,
Err(_) => return,
};
for child in &manifest.children {
let segment = child.path.clone().unwrap_or_else(|| child.effective_path());
let child_dir = meta_dir.join(&segment);
if let Ok(rel) = child_dir.strip_prefix(workspace) {
out.insert(rel.to_path_buf());
}
if child_dir.join(".grex").join("pack.yaml").is_file() {
walk_manifest(workspace, &child_dir, out);
}
}
}
fn probe_origin_url(repo: &Path) -> Option<String> {
let output = Command::new("git")
.arg("-C")
.arg(repo)
.args(["config", "--get", "remote.origin.url"])
.stderr(std::process::Stdio::null())
.output()
.ok()?;
if !output.status.success() {
return None;
}
let url = String::from_utf8(output.stdout).ok()?.trim().to_string();
if url.is_empty() {
None
} else {
Some(url)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
fn fake_repo(dir: &Path) {
fs::create_dir_all(dir.join(".git")).unwrap();
fs::write(dir.join(".git/HEAD"), b"ref: refs/heads/main\n").unwrap();
}
fn write_meta_yaml(meta_dir: &Path, name: &str, children: &[(&str, &str)]) {
let grex_dir = meta_dir.join(".grex");
fs::create_dir_all(&grex_dir).unwrap();
let mut yaml = format!("schema_version: \"1\"\nname: {name}\ntype: meta\n");
if !children.is_empty() {
yaml.push_str("children:\n");
for (segment, url) in children {
yaml.push_str(&format!(" - url: {url}\n path: {segment}\n"));
}
}
fs::write(grex_dir.join("pack.yaml"), yaml).unwrap();
}
#[test]
fn empty_workspace_returns_nothing() {
let d = tempdir().unwrap();
let found = scan_undeclared(d.path(), None).unwrap();
assert!(found.is_empty(), "no .git/ → no findings; got: {found:?}");
}
#[test]
fn registered_pack_with_git_is_filtered_out() {
let d = tempdir().unwrap();
write_meta_yaml(d.path(), "root", &[("alpha", "https://example/alpha.git")]);
fake_repo(&d.path().join("alpha"));
let found = scan_undeclared(d.path(), None).unwrap();
assert!(found.is_empty(), "registered pack must not be reported; got: {found:?}");
}
#[test]
fn untracked_repo_is_reported() {
let d = tempdir().unwrap();
write_meta_yaml(d.path(), "root", &[("alpha", "https://example/alpha.git")]);
fake_repo(&d.path().join("alpha"));
fake_repo(&d.path().join("vendor").join("legacy"));
let found = scan_undeclared(d.path(), None).unwrap();
assert_eq!(found.len(), 1, "exactly one undeclared repo; got: {found:?}");
assert_eq!(found[0].path, PathBuf::from("vendor").join("legacy"));
assert!(found[0].inferred_url.is_none());
}
#[test]
fn nested_tree_with_depth_one_only_walks_top_level() {
let d = tempdir().unwrap();
write_meta_yaml(d.path(), "root", &[("alpha", "https://example/alpha.git")]);
fake_repo(&d.path().join("alpha"));
fake_repo(&d.path().join("vendor").join("legacy"));
let depth1 = scan_undeclared(d.path(), Some(1)).unwrap();
assert!(depth1.is_empty(), "depth=1 must not find vendor/legacy/.git; got: {depth1:?}");
let unbounded = scan_undeclared(d.path(), None).unwrap();
assert_eq!(unbounded.len(), 1);
assert_eq!(unbounded[0].path, PathBuf::from("vendor").join("legacy"));
}
#[test]
fn registered_pack_subtree_is_skipped() {
let d = tempdir().unwrap();
write_meta_yaml(d.path(), "root", &[("alpha", "https://example/alpha.git")]);
fake_repo(&d.path().join("alpha"));
fake_repo(&d.path().join("alpha").join("inner"));
let found = scan_undeclared(d.path(), None).unwrap();
assert!(found.is_empty(), "interior of registered pack must be skipped; got: {found:?}");
}
#[test]
fn workspace_root_git_is_not_reported() {
let d = tempdir().unwrap();
fake_repo(d.path());
let found = scan_undeclared(d.path(), None).unwrap();
assert!(found.is_empty(), "workspace root must not be reported; got: {found:?}");
}
#[test]
fn dotgrex_dir_is_skipped() {
let d = tempdir().unwrap();
fs::create_dir_all(d.path().join(".grex")).unwrap();
fake_repo(&d.path().join(".grex").join("foreign"));
let found = scan_undeclared(d.path(), None).unwrap();
assert!(found.is_empty(), ".grex/ tree must be skipped; got: {found:?}");
}
#[test]
fn gitlink_file_variant_is_detected() {
let d = tempdir().unwrap();
let repo = d.path().join("worktree").join("alpha");
fs::create_dir_all(&repo).unwrap();
fs::write(repo.join(".git"), b"gitdir: /some/elsewhere/.git/worktrees/alpha\n").unwrap();
let found = scan_undeclared(d.path(), None).unwrap();
assert_eq!(found.len(), 1, ".git file (gitlink) must be detected; got: {found:?}");
assert_eq!(found[0].path, PathBuf::from("worktree").join("alpha"));
}
#[test]
fn results_are_sorted_for_determinism() {
let d = tempdir().unwrap();
fake_repo(&d.path().join("zebra"));
fake_repo(&d.path().join("alpha"));
fake_repo(&d.path().join("mango"));
let found = scan_undeclared(d.path(), None).unwrap();
let paths: Vec<&Path> = found.iter().map(|r| r.path.as_path()).collect();
assert_eq!(paths, vec![Path::new("alpha"), Path::new("mango"), Path::new("zebra")],);
}
}