Skip to main content

contributor_graphs/
repo.rs

1use crate::model::Commit;
2use anyhow::{bail, Context, Result};
3use std::path::{Path, PathBuf};
4use std::process::Command;
5
6pub struct PreparedRepo {
7    pub git_dir: PathBuf,
8    /// `owner/repo` when the repository lives on GitHub.
9    pub slug: Option<String>,
10    /// Web URL for the repository, when known.
11    pub url: Option<String>,
12    pub display_name: String,
13    pub branch: String,
14}
15
16/// Resolve user input (local path, `owner/repo` slug, or git URL) into a local
17/// git directory we can run `git log` against. Remote repos are cloned as
18/// bare, treeless partial clones into a cache directory.
19pub fn prepare(input: &str, branch: Option<&str>) -> Result<PreparedRepo> {
20    let path = Path::new(input);
21    if path.exists() {
22        return prepare_local(path, branch);
23    }
24
25    if let Some(slug) = parse_github_url(input) {
26        let url = format!("https://github.com/{slug}");
27        return prepare_remote(&format!("{url}.git"), Some(slug), branch);
28    }
29    if looks_like_slug(input) {
30        let url = format!("https://github.com/{input}.git");
31        return prepare_remote(&url, Some(input.to_string()), branch);
32    }
33    if input.contains("://") || input.starts_with("git@") {
34        return prepare_remote(input, None, branch);
35    }
36
37    bail!(
38        "'{input}' is not a local path, an owner/repo GitHub slug, or a git URL.\n\
39         Examples: contributor-graphs .  |  contributor-graphs nf-core/rnaseq  |  \
40         contributor-graphs https://github.com/MultiQC/MultiQC"
41    )
42}
43
44fn prepare_local(path: &Path, branch: Option<&str>) -> Result<PreparedRepo> {
45    let canonical = path
46        .canonicalize()
47        .with_context(|| format!("cannot resolve path {}", path.display()))?;
48    let ok = git(&canonical, &["rev-parse", "--git-dir"]).is_ok();
49    if !ok {
50        bail!("{} is not a git repository", canonical.display());
51    }
52    let remote = git(&canonical, &["remote", "get-url", "origin"]).ok();
53    let slug = remote.as_deref().and_then(parse_github_url);
54    let display_name = slug.clone().unwrap_or_else(|| {
55        canonical
56            .file_name()
57            .map(|n| n.to_string_lossy().to_string())
58            .unwrap_or_else(|| "repository".into())
59    });
60    let url = slug.as_ref().map(|s| format!("https://github.com/{s}"));
61    let branch = resolve_branch(&canonical, branch);
62    Ok(PreparedRepo {
63        git_dir: canonical,
64        slug,
65        url,
66        display_name,
67        branch,
68    })
69}
70
71fn prepare_remote(
72    clone_url: &str,
73    slug: Option<String>,
74    branch: Option<&str>,
75) -> Result<PreparedRepo> {
76    let cache_key = sanitize(slug.as_deref().unwrap_or(clone_url));
77    let cache_dir = std::env::temp_dir()
78        .join("contributor-graphs")
79        .join(cache_key);
80
81    if cache_dir.join("HEAD").exists() {
82        eprintln!("  updating cached clone {}", cache_dir.display());
83        let fetched = git(
84            &cache_dir,
85            &[
86                "fetch",
87                "--quiet",
88                "--prune",
89                "origin",
90                "+refs/heads/*:refs/heads/*",
91            ],
92        )
93        .is_ok();
94        if !fetched {
95            eprintln!("  fetch failed, re-cloning");
96            let _ = std::fs::remove_dir_all(&cache_dir);
97        }
98    }
99    if !cache_dir.join("HEAD").exists() {
100        std::fs::create_dir_all(cache_dir.parent().unwrap()).ok();
101        eprintln!("  cloning {clone_url} (commit history only)");
102        let status = Command::new("git")
103            .args(["clone", "--bare", "--filter=tree:0", "--quiet", clone_url])
104            .arg(&cache_dir)
105            .status()
106            .context("failed to run git clone")?;
107        if !status.success() {
108            bail!("git clone of {clone_url} failed");
109        }
110        // Keep the remote HEAD up to date on later fetches.
111        let _ = git(&cache_dir, &["remote", "set-head", "origin", "--auto"]);
112    }
113
114    let display_name = slug.clone().unwrap_or_else(|| {
115        clone_url
116            .trim_end_matches(".git")
117            .rsplit('/')
118            .next()
119            .unwrap_or("repository")
120            .to_string()
121    });
122    let url = slug.as_ref().map(|s| format!("https://github.com/{s}"));
123    let branch = resolve_branch(&cache_dir, branch);
124    Ok(PreparedRepo {
125        git_dir: cache_dir,
126        slug,
127        url,
128        display_name,
129        branch,
130    })
131}
132
133fn resolve_branch(dir: &Path, requested: Option<&str>) -> String {
134    if let Some(b) = requested {
135        return b.to_string();
136    }
137    git(dir, &["symbolic-ref", "--short", "HEAD"]).unwrap_or_else(|_| "HEAD".into())
138}
139
140/// Run `git log` and parse one commit per line.
141pub fn read_commits(
142    repo: &PreparedRepo,
143    branch: Option<&str>,
144    since: Option<&str>,
145    until: Option<&str>,
146    no_merges: bool,
147) -> Result<Vec<Commit>> {
148    let mut cmd = Command::new("git");
149    cmd.arg("-C").arg(&repo.git_dir).args([
150        "log",
151        "--use-mailmap",
152        "--pretty=format:%H%x09%at%x09%aN%x09%aE",
153    ]);
154    if no_merges {
155        cmd.arg("--no-merges");
156    }
157    if let Some(s) = since {
158        cmd.arg(format!("--since={s}"));
159    }
160    if let Some(u) = until {
161        cmd.arg(format!("--until={u}"));
162    }
163    cmd.arg(branch.unwrap_or("HEAD")).arg("--");
164
165    let out = cmd.output().context("failed to run git log")?;
166    if !out.status.success() {
167        bail!(
168            "git log failed: {}",
169            String::from_utf8_lossy(&out.stderr).trim()
170        );
171    }
172    let text = String::from_utf8_lossy(&out.stdout);
173    let mut commits = Vec::new();
174    for line in text.lines() {
175        let mut parts = line.splitn(4, '\t');
176        let (Some(sha), Some(ts), Some(name), Some(email)) =
177            (parts.next(), parts.next(), parts.next(), parts.next())
178        else {
179            continue;
180        };
181        let Ok(ts) = ts.parse::<i64>() else { continue };
182        commits.push(Commit {
183            sha: sha.to_string(),
184            ts,
185            name: name.trim().to_string(),
186            email: email.trim().to_lowercase(),
187        });
188    }
189    Ok(commits)
190}
191
192fn git(dir: &Path, args: &[&str]) -> Result<String> {
193    let out = Command::new("git").arg("-C").arg(dir).args(args).output()?;
194    if !out.status.success() {
195        bail!("git {:?} failed", args);
196    }
197    Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
198}
199
200/// Extract `owner/repo` from common GitHub URL shapes.
201pub fn parse_github_url(url: &str) -> Option<String> {
202    let u = url.trim().trim_end_matches('/').trim_end_matches(".git");
203    let rest = u
204        .strip_prefix("git@github.com:")
205        .or_else(|| u.strip_prefix("https://github.com/"))
206        .or_else(|| u.strip_prefix("http://github.com/"))
207        .or_else(|| u.strip_prefix("ssh://git@github.com/"))
208        .or_else(|| u.strip_prefix("github.com/"))?;
209    let mut parts = rest.splitn(3, '/');
210    let owner = parts.next()?;
211    let repo = parts.next()?;
212    if owner.is_empty() || repo.is_empty() {
213        return None;
214    }
215    Some(format!("{owner}/{repo}"))
216}
217
218fn looks_like_slug(s: &str) -> bool {
219    let parts: Vec<&str> = s.split('/').collect();
220    parts.len() == 2
221        && parts.iter().all(|p| {
222            !p.is_empty()
223                && p.chars()
224                    .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
225        })
226}
227
228pub fn sanitize(s: &str) -> String {
229    s.chars()
230        .map(|c| {
231            if c.is_ascii_alphanumeric() || c == '-' || c == '.' {
232                c
233            } else {
234                '-'
235            }
236        })
237        .collect()
238}