use serde::Serialize;
use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
pub fn normalize_url(url: &str) -> String {
let s = url.trim();
let s = s
.strip_prefix("ssh://")
.or_else(|| s.strip_prefix("https://"))
.or_else(|| s.strip_prefix("http://"))
.or_else(|| s.strip_prefix("git://"))
.unwrap_or(s);
let s = s.strip_prefix("git@").unwrap_or(s);
let s = s.replacen(':', "/", 1);
let s = s.trim_end_matches('/').trim_end_matches(".git");
s.to_ascii_lowercase()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Entry {
pub name: String,
pub path: String,
pub url: String,
pub branch: Option<String>,
}
pub fn parse(text: &str) -> Vec<Entry> {
let mut out = Vec::new();
let mut cur: Option<Entry> = None;
for line in text.lines() {
let l = line.trim();
if let Some(rest) = l.strip_prefix("[submodule ") {
if let Some(e) = cur.take() {
out.push(e);
}
cur = Some(Entry {
name: rest.trim_matches(|c| c == '"' || c == ']').to_string(),
path: String::new(),
url: String::new(),
branch: None,
});
} else if let Some(v) = l.strip_prefix("path = ") {
if let Some(e) = cur.as_mut() {
e.path = v.trim().to_string();
}
} else if let Some(v) = l.strip_prefix("url = ") {
if let Some(e) = cur.as_mut() {
e.url = v.trim().to_string();
}
} else if let Some(v) = l.strip_prefix("branch = ") {
if let Some(e) = cur.as_mut() {
e.branch = Some(v.trim().to_string());
}
}
}
if let Some(e) = cur {
out.push(e);
}
out
}
#[derive(Debug, Serialize)]
pub struct Node {
pub id: String,
pub fan_in: usize,
pub has_submodules: bool,
pub paths: Vec<String>,
pub branch_conflicts: Vec<String>,
pub recommendation: &'static str,
}
#[derive(Debug, Serialize)]
pub struct Plan {
pub order: Vec<Node>,
}
pub fn scan(root: &Path, root_id: &str) -> Plan {
let mut children: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
let mut parents: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
let mut branches: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
let mut paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
#[allow(clippy::too_many_arguments)]
fn walk(
dir: &Path,
prefix: &str,
id: &str,
children: &mut BTreeMap<String, BTreeSet<String>>,
parents: &mut BTreeMap<String, BTreeSet<String>>,
branches: &mut BTreeMap<String, BTreeSet<String>>,
paths: &mut BTreeMap<String, BTreeSet<String>>,
seen: &mut BTreeSet<String>,
) {
children.entry(id.to_string()).or_default();
let gm = dir.join(".gitmodules");
let Ok(text) = std::fs::read_to_string(&gm) else {
return;
};
for e in parse(&text) {
let cid = normalize_url(&e.url);
let full = if prefix.is_empty() {
e.path.clone()
} else {
format!("{prefix}/{}", e.path)
};
children.get_mut(id).unwrap().insert(cid.clone());
parents
.entry(cid.clone())
.or_default()
.insert(id.to_string());
paths.entry(cid.clone()).or_default().insert(full.clone());
if let Some(b) = &e.branch {
branches.entry(cid.clone()).or_default().insert(b.clone());
}
children.entry(cid.clone()).or_default();
let sub = dir.join(&e.path);
if sub.join(".gitmodules").is_file() && seen.insert(cid.clone()) {
walk(&sub, &full, &cid, children, parents, branches, paths, seen);
}
}
}
let mut seen = BTreeSet::new();
seen.insert(root_id.to_string());
walk(
root,
"",
root_id,
&mut children,
&mut parents,
&mut branches,
&mut paths,
&mut seen,
);
let all: Vec<String> = children.keys().cloned().collect();
let mut order_ids: Vec<String> = Vec::new();
let mut placed: BTreeSet<String> = BTreeSet::new();
while order_ids.len() < all.len() {
let mut progressed = false;
for id in &all {
if placed.contains(id) {
continue;
}
let ready = children[id].iter().all(|c| placed.contains(c));
if ready {
order_ids.push(id.clone());
placed.insert(id.clone());
progressed = true;
}
}
if !progressed {
for id in &all {
if !placed.contains(id) {
order_ids.push(id.clone());
placed.insert(id.clone());
}
}
}
}
let order = order_ids
.into_iter()
.map(|id| {
let fan_in = parents.get(&id).map(|s| s.len()).unwrap_or(0);
let has_sub = !children[&id].is_empty();
let conflicts: Vec<String> = branches
.get(&id)
.filter(|b| b.len() > 1)
.map(|b| b.iter().cloned().collect())
.unwrap_or_default();
let recommendation = if id == root_id {
"root"
} else if fan_in >= 2 {
"lift-shared"
} else if has_sub {
"convert"
} else {
"leaf-convert"
};
let node_paths = paths
.get(&id)
.map(|s| s.iter().cloned().collect())
.unwrap_or_default();
Node {
id,
fan_in,
has_submodules: has_sub,
paths: node_paths,
branch_conflicts: conflicts,
recommendation,
}
})
.collect();
Plan { order }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn url_identity_unifies_schemes() {
let a = normalize_url("git@github.com:Acme/Foo.git");
let b = normalize_url("https://github.com/acme/foo");
let c = normalize_url("ssh://git@github.com/acme/foo.git");
assert_eq!(a, "github.com/acme/foo");
assert_eq!(a, b);
assert_eq!(b, c);
}
#[test]
fn parses_gitmodules_with_branch() {
let e = parse(
"[submodule \"monty\"]\n\tpath = monty\n\turl = git@github.com:x/monty.git\n\tbranch = dev\n",
);
assert_eq!(e.len(), 1);
assert_eq!(e[0].path, "monty");
assert_eq!(e[0].branch.as_deref(), Some("dev"));
}
#[test]
fn fan_in_drives_lift_recommendation() {
let d = tempfile::tempdir().unwrap();
let root = d.path();
std::fs::write(
root.join(".gitmodules"),
"[submodule \"s\"]\npath = s\nurl = git@h:o/samtools-rs.git\n\
[submodule \"b\"]\npath = b\nurl = git@h:o/bcftools-rs.git\n",
)
.unwrap();
for sub in ["s", "b"] {
let p = root.join(sub);
std::fs::create_dir_all(&p).unwrap();
std::fs::write(
p.join(".gitmodules"),
"[submodule \"h\"]\npath = h\nurl = https://h/o/htslib-rs.git\n",
)
.unwrap();
}
let plan = scan(root, "h/o/root");
let htslib = plan.order.iter().find(|n| n.id == "h/o/htslib-rs").unwrap();
assert_eq!(htslib.fan_in, 2);
assert_eq!(htslib.recommendation, "lift-shared");
let pos = |s: &str| plan.order.iter().position(|n| n.id == s).unwrap();
assert!(pos("h/o/htslib-rs") < pos("h/o/samtools-rs"));
assert!(pos("h/o/samtools-rs") < pos("h/o/root"));
}
}