repoverse 0.1.6

Multi-repo workspace tool: keep many git repos in sync and roll changes up across dependency boundaries
//! Recursive `.gitmodules` discovery, URL-identity dedup, and leaf-up
//! conversion classification for `rv adopt --plan`.

use serde::Serialize;
use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;

/// Canonical repo identity: `host/owner/repo`, lowercased, no `.git`.
/// Unifies `git@h:o/r.git`, `https://h/o/r`, `ssh://git@h/o/r`.
pub fn normalize_url(url: &str) -> String {
    let s = url.trim();
    let s = s
        .strip_prefix("ssh://")
        .or_else(|| s.strip_prefix("https://"))
        .or_else(|| s.strip_prefix("http://"))
        .or_else(|| s.strip_prefix("git://"))
        .unwrap_or(s);
    let s = s.strip_prefix("git@").unwrap_or(s);
    // host:owner/repo -> host/owner/repo (first colon only)
    let s = s.replacen(':', "/", 1);
    let s = s.trim_end_matches('/').trim_end_matches(".git");
    s.to_ascii_lowercase()
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Entry {
    pub name: String,
    pub path: String,
    pub url: String,
    pub branch: Option<String>,
}

/// Parse a `.gitmodules` file body.
pub fn parse(text: &str) -> Vec<Entry> {
    let mut out = Vec::new();
    let mut cur: Option<Entry> = None;
    for line in text.lines() {
        let l = line.trim();
        if let Some(rest) = l.strip_prefix("[submodule ") {
            if let Some(e) = cur.take() {
                out.push(e);
            }
            cur = Some(Entry {
                name: rest.trim_matches(|c| c == '"' || c == ']').to_string(),
                path: String::new(),
                url: String::new(),
                branch: None,
            });
        } else if let Some(v) = l.strip_prefix("path = ") {
            if let Some(e) = cur.as_mut() {
                e.path = v.trim().to_string();
            }
        } else if let Some(v) = l.strip_prefix("url = ") {
            if let Some(e) = cur.as_mut() {
                e.url = v.trim().to_string();
            }
        } else if let Some(v) = l.strip_prefix("branch = ") {
            if let Some(e) = cur.as_mut() {
                e.branch = Some(v.trim().to_string());
            }
        }
    }
    if let Some(e) = cur {
        out.push(e);
    }
    out
}

#[derive(Debug, Serialize)]
pub struct Node {
    pub id: String,
    /// distinct parent identities referencing this repo
    pub fan_in: usize,
    pub has_submodules: bool,
    /// every workspace-relative path this repo is vendored at
    pub paths: Vec<String>,
    /// conflicting branch pins seen across parents
    pub branch_conflicts: Vec<String>,
    pub recommendation: &'static str,
}

#[derive(Debug, Serialize)]
pub struct Plan {
    /// conversion order: leaves first, root last
    pub order: Vec<Node>,
}

/// Recursively walk `.gitmodules` starting at workspace `root`. `root_id` is
/// the identity of the top repo.
pub fn scan(root: &Path, root_id: &str) -> Plan {
    // identity -> set of child identities
    let mut children: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
    // identity -> distinct parent identities
    let mut parents: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
    // identity -> branch pins observed
    let mut branches: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
    // identity -> every workspace-relative path it is vendored at
    let mut paths: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();

    #[allow(clippy::too_many_arguments)]
    fn walk(
        dir: &Path,
        prefix: &str,
        id: &str,
        children: &mut BTreeMap<String, BTreeSet<String>>,
        parents: &mut BTreeMap<String, BTreeSet<String>>,
        branches: &mut BTreeMap<String, BTreeSet<String>>,
        paths: &mut BTreeMap<String, BTreeSet<String>>,
        seen: &mut BTreeSet<String>,
    ) {
        children.entry(id.to_string()).or_default();
        let gm = dir.join(".gitmodules");
        let Ok(text) = std::fs::read_to_string(&gm) else {
            return;
        };
        for e in parse(&text) {
            let cid = normalize_url(&e.url);
            let full = if prefix.is_empty() {
                e.path.clone()
            } else {
                format!("{prefix}/{}", e.path)
            };
            children.get_mut(id).unwrap().insert(cid.clone());
            parents
                .entry(cid.clone())
                .or_default()
                .insert(id.to_string());
            paths.entry(cid.clone()).or_default().insert(full.clone());
            if let Some(b) = &e.branch {
                branches.entry(cid.clone()).or_default().insert(b.clone());
            }
            children.entry(cid.clone()).or_default();
            // recurse into the checked-out submodule if present
            let sub = dir.join(&e.path);
            if sub.join(".gitmodules").is_file() && seen.insert(cid.clone()) {
                walk(&sub, &full, &cid, children, parents, branches, paths, seen);
            }
        }
    }

    let mut seen = BTreeSet::new();
    seen.insert(root_id.to_string());
    walk(
        root,
        "",
        root_id,
        &mut children,
        &mut parents,
        &mut branches,
        &mut paths,
        &mut seen,
    );

    // leaf-up order: a node after all its children (Kahn on children edges)
    let all: Vec<String> = children.keys().cloned().collect();
    let mut order_ids: Vec<String> = Vec::new();
    let mut placed: BTreeSet<String> = BTreeSet::new();
    while order_ids.len() < all.len() {
        let mut progressed = false;
        for id in &all {
            if placed.contains(id) {
                continue;
            }
            let ready = children[id].iter().all(|c| placed.contains(c));
            if ready {
                order_ids.push(id.clone());
                placed.insert(id.clone());
                progressed = true;
            }
        }
        if !progressed {
            // cycle / unresolved: append remainder deterministically
            for id in &all {
                if !placed.contains(id) {
                    order_ids.push(id.clone());
                    placed.insert(id.clone());
                }
            }
        }
    }

    let order = order_ids
        .into_iter()
        .map(|id| {
            let fan_in = parents.get(&id).map(|s| s.len()).unwrap_or(0);
            let has_sub = !children[&id].is_empty();
            let conflicts: Vec<String> = branches
                .get(&id)
                .filter(|b| b.len() > 1)
                .map(|b| b.iter().cloned().collect())
                .unwrap_or_default();
            let recommendation = if id == root_id {
                "root"
            } else if fan_in >= 2 {
                "lift-shared"
            } else if has_sub {
                "convert"
            } else {
                "leaf-convert"
            };
            let node_paths = paths
                .get(&id)
                .map(|s| s.iter().cloned().collect())
                .unwrap_or_default();
            Node {
                id,
                fan_in,
                has_submodules: has_sub,
                paths: node_paths,
                branch_conflicts: conflicts,
                recommendation,
            }
        })
        .collect();

    Plan { order }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn url_identity_unifies_schemes() {
        let a = normalize_url("git@github.com:Acme/Foo.git");
        let b = normalize_url("https://github.com/acme/foo");
        let c = normalize_url("ssh://git@github.com/acme/foo.git");
        assert_eq!(a, "github.com/acme/foo");
        assert_eq!(a, b);
        assert_eq!(b, c);
    }

    #[test]
    fn parses_gitmodules_with_branch() {
        let e = parse(
            "[submodule \"monty\"]\n\tpath = monty\n\turl = git@github.com:x/monty.git\n\tbranch = dev\n",
        );
        assert_eq!(e.len(), 1);
        assert_eq!(e[0].path, "monty");
        assert_eq!(e[0].branch.as_deref(), Some("dev"));
    }

    #[test]
    fn fan_in_drives_lift_recommendation() {
        let d = tempfile::tempdir().unwrap();
        let root = d.path();
        // root has samtools-rs and bcftools-rs; both vendor htslib-rs
        std::fs::write(
            root.join(".gitmodules"),
            "[submodule \"s\"]\npath = s\nurl = git@h:o/samtools-rs.git\n\
             [submodule \"b\"]\npath = b\nurl = git@h:o/bcftools-rs.git\n",
        )
        .unwrap();
        for sub in ["s", "b"] {
            let p = root.join(sub);
            std::fs::create_dir_all(&p).unwrap();
            std::fs::write(
                p.join(".gitmodules"),
                "[submodule \"h\"]\npath = h\nurl = https://h/o/htslib-rs.git\n",
            )
            .unwrap();
        }
        let plan = scan(root, "h/o/root");
        let htslib = plan.order.iter().find(|n| n.id == "h/o/htslib-rs").unwrap();
        assert_eq!(htslib.fan_in, 2);
        assert_eq!(htslib.recommendation, "lift-shared");
        // leaf-up: htslib-rs precedes samtools-rs precedes root
        let pos = |s: &str| plan.order.iter().position(|n| n.id == s).unwrap();
        assert!(pos("h/o/htslib-rs") < pos("h/o/samtools-rs"));
        assert!(pos("h/o/samtools-rs") < pos("h/o/root"));
    }
}