Skip to main content

contributor_graphs/
repo.rs

1use crate::model::{Commit, CommitFilter, Release};
2use anyhow::{bail, Context, Result};
3use std::path::{Path, PathBuf};
4use std::process::Command;
5
6pub struct PreparedRepo {
7    pub git_dir: PathBuf,
8    /// `owner/repo` when the repository lives on GitHub.
9    pub slug: Option<String>,
10    /// Web URL for the repository, when known.
11    pub url: Option<String>,
12    pub display_name: String,
13    pub branch: String,
14    /// True for a cloned remote (has an `origin` to fetch from); false for a
15    /// local checkout. Drives whether freshness is checked via `git ls-remote`.
16    pub is_remote: bool,
17}
18
19/// Resolve user input (local path, `owner/repo` slug, or git URL) into a local
20/// git directory we can run `git log` against. Remote repos are cloned as
21/// bare, treeless partial clones into a cache directory. With `quiet`, the
22/// per-repo "cloning …" line is suppressed (the caller is showing a bar).
23pub fn prepare(input: &str, branch: Option<&str>, quiet: bool) -> Result<PreparedRepo> {
24    let path = Path::new(input);
25    if path.exists() {
26        return prepare_local(path, branch);
27    }
28
29    if let Some(slug) = parse_github_url(input) {
30        let url = format!("https://github.com/{slug}");
31        return prepare_remote(&format!("{url}.git"), Some(slug), branch, quiet);
32    }
33    if looks_like_slug(input) {
34        let url = format!("https://github.com/{input}.git");
35        return prepare_remote(&url, Some(input.to_string()), branch, quiet);
36    }
37    if input.contains("://") || input.starts_with("git@") {
38        return prepare_remote(input, None, branch, quiet);
39    }
40
41    bail!(
42        "'{input}' is not a local path, an owner/repo GitHub slug, or a git URL.\n\
43         Examples: contributor-graphs .  |  contributor-graphs nf-core/rnaseq  |  \
44         contributor-graphs https://github.com/MultiQC/MultiQC"
45    )
46}
47
48fn prepare_local(path: &Path, branch: Option<&str>) -> Result<PreparedRepo> {
49    let canonical = path
50        .canonicalize()
51        .with_context(|| format!("cannot resolve path {}", path.display()))?;
52    let ok = git(&canonical, &["rev-parse", "--git-dir"]).is_ok();
53    if !ok {
54        bail!("{} is not a git repository", canonical.display());
55    }
56    let remote = git(&canonical, &["remote", "get-url", "origin"]).ok();
57    let slug = remote.as_deref().and_then(parse_github_url);
58    let display_name = slug.clone().unwrap_or_else(|| {
59        canonical
60            .file_name()
61            .map(|n| n.to_string_lossy().to_string())
62            .unwrap_or_else(|| "repository".into())
63    });
64    let url = slug.as_ref().map(|s| format!("https://github.com/{s}"));
65    let branch = resolve_branch(&canonical, branch);
66    Ok(PreparedRepo {
67        git_dir: canonical,
68        slug,
69        url,
70        display_name,
71        branch,
72        is_remote: false,
73    })
74}
75
76/// Where cached clones live: under the tool's XDG cache dir, falling back to a
77/// temp directory if no home/XDG dir is available.
78fn clones_dir() -> PathBuf {
79    crate::cache::root()
80        .unwrap_or_else(std::env::temp_dir)
81        .join("clones")
82}
83
84fn prepare_remote(
85    clone_url: &str,
86    slug: Option<String>,
87    branch: Option<&str>,
88    quiet: bool,
89) -> Result<PreparedRepo> {
90    let cache_key = sanitize(slug.as_deref().unwrap_or(clone_url));
91    let cache_dir = clones_dir().join(cache_key);
92
93    // Clone on first sight only. Updating an existing clone is deferred to
94    // fetch(), which the caller calls just for repos whose history changed.
95    if !cache_dir.join("HEAD").exists() {
96        std::fs::create_dir_all(cache_dir.parent().unwrap()).ok();
97        if !quiet {
98            eprintln!("  cloning {clone_url} (commit history only)");
99        }
100        let status = Command::new("git")
101            .args(["clone", "--bare", "--filter=tree:0", "--quiet", clone_url])
102            .arg(&cache_dir)
103            .status()
104            .context("failed to run git clone")?;
105        if !status.success() {
106            bail!("git clone of {clone_url} failed");
107        }
108        // Keep the remote HEAD up to date on later fetches.
109        let _ = git(&cache_dir, &["remote", "set-head", "origin", "--auto"]);
110    }
111
112    let display_name = slug.clone().unwrap_or_else(|| {
113        clone_url
114            .trim_end_matches(".git")
115            .rsplit('/')
116            .next()
117            .unwrap_or("repository")
118            .to_string()
119    });
120    let url = slug.as_ref().map(|s| format!("https://github.com/{s}"));
121    let branch = resolve_branch(&cache_dir, branch);
122    Ok(PreparedRepo {
123        git_dir: cache_dir,
124        slug,
125        url,
126        display_name,
127        branch,
128        is_remote: true,
129    })
130}
131
132/// The current tip SHA of `branch` on the remote, read with `git ls-remote`
133/// (refs only, no object transfer). Used as a cheap freshness token without
134/// fetching. `None` if the repo is local or the remote can't be reached.
135pub fn remote_tip(repo: &PreparedRepo) -> Option<String> {
136    if !repo.is_remote {
137        return None;
138    }
139    let out = git(&repo.git_dir, &["ls-remote", "origin", &repo.branch]).ok()?;
140    out.split_whitespace().next().map(str::to_string)
141}
142
143/// The tip SHA of `branch` in the local clone/checkout.
144pub fn local_tip(repo: &PreparedRepo) -> Option<String> {
145    git(&repo.git_dir, &["rev-parse", &repo.branch]).ok()
146}
147
148/// Update a cached clone from its origin. Called only when the history is known
149/// to have changed; returns whether the fetch succeeded.
150pub fn fetch(repo: &PreparedRepo) -> bool {
151    eprintln!("  updating cached clone {}", repo.git_dir.display());
152    git(
153        &repo.git_dir,
154        &[
155            "fetch",
156            "--quiet",
157            "--prune",
158            "origin",
159            "+refs/heads/*:refs/heads/*",
160            // Keep tags current too, so release markers reflect new releases.
161            "+refs/tags/*:refs/tags/*",
162        ],
163    )
164    .is_ok()
165}
166
167/// Read every git tag with its date (tag date for annotated tags, commit date
168/// for lightweight ones), sorted oldest-first. Used to mark releases on the
169/// timeline. A local-only operation — no network — so it's cheap to call.
170pub fn read_tags(repo: &PreparedRepo) -> Vec<Release> {
171    let out = match git(
172        &repo.git_dir,
173        &[
174            "for-each-ref",
175            "--sort=creatordate",
176            "--format=%(refname:short)%09%(creatordate:unix)",
177            "refs/tags",
178        ],
179    ) {
180        Ok(s) => s,
181        Err(_) => return Vec::new(),
182    };
183    out.lines()
184        .filter_map(|line| {
185            let (name, ts) = line.split_once('\t')?;
186            let ts: i64 = ts.trim().parse().ok()?;
187            let name = name.trim();
188            (!name.is_empty() && ts > 0).then(|| Release {
189                name: name.to_string(),
190                ts,
191            })
192        })
193        .collect()
194}
195
196fn resolve_branch(dir: &Path, requested: Option<&str>) -> String {
197    if let Some(b) = requested {
198        return b.to_string();
199    }
200    git(dir, &["symbolic-ref", "--short", "HEAD"]).unwrap_or_else(|_| "HEAD".into())
201}
202
203/// Run `git log` and parse one commit per record. Records are separated by a
204/// record-separator byte (\x1e) so a commit's multi-value `Co-authored-by`
205/// trailers (joined with \x1f) can't be confused with the next commit.
206pub fn read_commits(
207    repo: &PreparedRepo,
208    branch: Option<&str>,
209    filter: &CommitFilter,
210) -> Result<Vec<Commit>> {
211    let mut cmd = Command::new("git");
212    cmd.arg("-C").arg(&repo.git_dir).args([
213        "log",
214        "--use-mailmap",
215        "--pretty=format:%x1e%H%x09%at%x09%aN%x09%aE%x09\
216         %(trailers:key=Co-authored-by,valueonly,separator=%x1f)",
217    ]);
218    if filter.no_merges {
219        cmd.arg("--no-merges");
220    }
221    if let Some(s) = &filter.since {
222        cmd.arg(format!("--since={s}"));
223    }
224    if let Some(u) = &filter.until {
225        cmd.arg(format!("--until={u}"));
226    }
227    cmd.arg(branch.unwrap_or("HEAD")).arg("--");
228
229    let out = cmd.output().context("failed to run git log")?;
230    if !out.status.success() {
231        bail!(
232            "git log failed: {}",
233            String::from_utf8_lossy(&out.stderr).trim()
234        );
235    }
236    let text = String::from_utf8_lossy(&out.stdout);
237    let mut commits = Vec::new();
238    for rec in text.split('\u{1e}') {
239        let mut parts = rec.splitn(5, '\t');
240        let (Some(sha), Some(ts), Some(name), Some(email)) =
241            (parts.next(), parts.next(), parts.next(), parts.next())
242        else {
243            continue;
244        };
245        let Ok(ts) = ts.parse::<i64>() else { continue };
246        let email = email.trim().to_lowercase();
247        let coauthors = parts
248            .next()
249            .into_iter()
250            .flat_map(|block| block.split('\u{1f}'))
251            .filter_map(parse_coauthor)
252            .filter(|(_, e)| e != &email)
253            .collect();
254        commits.push(Commit {
255            sha: sha.to_string(),
256            ts,
257            name: name.trim().to_string(),
258            email,
259            coauthors,
260            src: 0,
261        });
262    }
263    Ok(commits)
264}
265
266/// Parse a `Co-authored-by` value (`Name <email>`) into `(name, email)`.
267fn parse_coauthor(raw: &str) -> Option<(String, String)> {
268    let s = raw.trim();
269    if s.is_empty() {
270        return None;
271    }
272    match (s.find('<'), s.rfind('>')) {
273        (Some(lt), Some(gt)) if gt > lt => {
274            let name = s[..lt].trim().to_string();
275            let email = s[lt + 1..gt].trim().to_lowercase();
276            (!email.is_empty() || !name.is_empty()).then_some((name, email))
277        }
278        _ => Some((s.to_string(), String::new())),
279    }
280}
281
282fn git(dir: &Path, args: &[&str]) -> Result<String> {
283    let out = Command::new("git").arg("-C").arg(dir).args(args).output()?;
284    if !out.status.success() {
285        bail!("git {:?} failed", args);
286    }
287    Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
288}
289
290/// Extract `owner/repo` from common GitHub URL shapes.
291pub fn parse_github_url(url: &str) -> Option<String> {
292    let u = url.trim().trim_end_matches('/').trim_end_matches(".git");
293    let rest = u
294        .strip_prefix("git@github.com:")
295        .or_else(|| u.strip_prefix("https://github.com/"))
296        .or_else(|| u.strip_prefix("http://github.com/"))
297        .or_else(|| u.strip_prefix("ssh://git@github.com/"))
298        .or_else(|| u.strip_prefix("github.com/"))?;
299    let mut parts = rest.splitn(3, '/');
300    let owner = parts.next()?;
301    let repo = parts.next()?;
302    if owner.is_empty() || repo.is_empty() {
303        return None;
304    }
305    Some(format!("{owner}/{repo}"))
306}
307
308/// A bare GitHub owner name (org or user): a single token, not a local path,
309/// not a slug, not a URL. Used to decide whether to expand the input into all
310/// of that owner's repositories.
311pub fn looks_like_owner(input: &str) -> bool {
312    let s = input.trim();
313    if s.is_empty() || s.contains('/') || s.contains(':') || s.contains('.') {
314        return false;
315    }
316    if Path::new(s).exists() {
317        return false;
318    }
319    !s.starts_with('-') && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
320}
321
322fn looks_like_slug(s: &str) -> bool {
323    let parts: Vec<&str> = s.split('/').collect();
324    parts.len() == 2
325        && parts.iter().all(|p| {
326            !p.is_empty()
327                && p.chars()
328                    .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
329        })
330}
331
332pub fn sanitize(s: &str) -> String {
333    s.chars()
334        .map(|c| {
335            if c.is_ascii_alphanumeric() || c == '-' || c == '.' {
336                c
337            } else {
338                '-'
339            }
340        })
341        .collect()
342}