Skip to main content

contributor_graphs/
repo.rs

1use crate::model::{Commit, CommitFilter};
2use anyhow::{bail, Context, Result};
3use std::path::{Path, PathBuf};
4use std::process::Command;
5
6pub struct PreparedRepo {
7    pub git_dir: PathBuf,
8    /// `owner/repo` when the repository lives on GitHub.
9    pub slug: Option<String>,
10    /// Web URL for the repository, when known.
11    pub url: Option<String>,
12    pub display_name: String,
13    pub branch: String,
14    /// True for a cloned remote (has an `origin` to fetch from); false for a
15    /// local checkout. Drives whether freshness is checked via `git ls-remote`.
16    pub is_remote: bool,
17}
18
19/// Resolve user input (local path, `owner/repo` slug, or git URL) into a local
20/// git directory we can run `git log` against. Remote repos are cloned as
21/// bare, treeless partial clones into a cache directory.
22pub fn prepare(input: &str, branch: Option<&str>) -> Result<PreparedRepo> {
23    let path = Path::new(input);
24    if path.exists() {
25        return prepare_local(path, branch);
26    }
27
28    if let Some(slug) = parse_github_url(input) {
29        let url = format!("https://github.com/{slug}");
30        return prepare_remote(&format!("{url}.git"), Some(slug), branch);
31    }
32    if looks_like_slug(input) {
33        let url = format!("https://github.com/{input}.git");
34        return prepare_remote(&url, Some(input.to_string()), branch);
35    }
36    if input.contains("://") || input.starts_with("git@") {
37        return prepare_remote(input, None, branch);
38    }
39
40    bail!(
41        "'{input}' is not a local path, an owner/repo GitHub slug, or a git URL.\n\
42         Examples: contributor-graphs .  |  contributor-graphs nf-core/rnaseq  |  \
43         contributor-graphs https://github.com/MultiQC/MultiQC"
44    )
45}
46
47fn prepare_local(path: &Path, branch: Option<&str>) -> Result<PreparedRepo> {
48    let canonical = path
49        .canonicalize()
50        .with_context(|| format!("cannot resolve path {}", path.display()))?;
51    let ok = git(&canonical, &["rev-parse", "--git-dir"]).is_ok();
52    if !ok {
53        bail!("{} is not a git repository", canonical.display());
54    }
55    let remote = git(&canonical, &["remote", "get-url", "origin"]).ok();
56    let slug = remote.as_deref().and_then(parse_github_url);
57    let display_name = slug.clone().unwrap_or_else(|| {
58        canonical
59            .file_name()
60            .map(|n| n.to_string_lossy().to_string())
61            .unwrap_or_else(|| "repository".into())
62    });
63    let url = slug.as_ref().map(|s| format!("https://github.com/{s}"));
64    let branch = resolve_branch(&canonical, branch);
65    Ok(PreparedRepo {
66        git_dir: canonical,
67        slug,
68        url,
69        display_name,
70        branch,
71        is_remote: false,
72    })
73}
74
75/// Where cached clones live: under the tool's XDG cache dir, falling back to a
76/// temp directory if no home/XDG dir is available.
77fn clones_dir() -> PathBuf {
78    crate::cache::root()
79        .unwrap_or_else(std::env::temp_dir)
80        .join("clones")
81}
82
83fn prepare_remote(
84    clone_url: &str,
85    slug: Option<String>,
86    branch: Option<&str>,
87) -> Result<PreparedRepo> {
88    let cache_key = sanitize(slug.as_deref().unwrap_or(clone_url));
89    let cache_dir = clones_dir().join(cache_key);
90
91    // Clone on first sight only. Updating an existing clone is deferred to
92    // fetch(), which the caller calls just for repos whose history changed.
93    if !cache_dir.join("HEAD").exists() {
94        std::fs::create_dir_all(cache_dir.parent().unwrap()).ok();
95        eprintln!("  cloning {clone_url} (commit history only)");
96        let status = Command::new("git")
97            .args(["clone", "--bare", "--filter=tree:0", "--quiet", clone_url])
98            .arg(&cache_dir)
99            .status()
100            .context("failed to run git clone")?;
101        if !status.success() {
102            bail!("git clone of {clone_url} failed");
103        }
104        // Keep the remote HEAD up to date on later fetches.
105        let _ = git(&cache_dir, &["remote", "set-head", "origin", "--auto"]);
106    }
107
108    let display_name = slug.clone().unwrap_or_else(|| {
109        clone_url
110            .trim_end_matches(".git")
111            .rsplit('/')
112            .next()
113            .unwrap_or("repository")
114            .to_string()
115    });
116    let url = slug.as_ref().map(|s| format!("https://github.com/{s}"));
117    let branch = resolve_branch(&cache_dir, branch);
118    Ok(PreparedRepo {
119        git_dir: cache_dir,
120        slug,
121        url,
122        display_name,
123        branch,
124        is_remote: true,
125    })
126}
127
128/// The current tip SHA of `branch` on the remote, read with `git ls-remote`
129/// (refs only, no object transfer). Used as a cheap freshness token without
130/// fetching. `None` if the repo is local or the remote can't be reached.
131pub fn remote_tip(repo: &PreparedRepo) -> Option<String> {
132    if !repo.is_remote {
133        return None;
134    }
135    let out = git(&repo.git_dir, &["ls-remote", "origin", &repo.branch]).ok()?;
136    out.split_whitespace().next().map(str::to_string)
137}
138
139/// The tip SHA of `branch` in the local clone/checkout.
140pub fn local_tip(repo: &PreparedRepo) -> Option<String> {
141    git(&repo.git_dir, &["rev-parse", &repo.branch]).ok()
142}
143
144/// Update a cached clone from its origin. Called only when the history is known
145/// to have changed; returns whether the fetch succeeded.
146pub fn fetch(repo: &PreparedRepo) -> bool {
147    eprintln!("  updating cached clone {}", repo.git_dir.display());
148    git(
149        &repo.git_dir,
150        &[
151            "fetch",
152            "--quiet",
153            "--prune",
154            "origin",
155            "+refs/heads/*:refs/heads/*",
156        ],
157    )
158    .is_ok()
159}
160
161fn resolve_branch(dir: &Path, requested: Option<&str>) -> String {
162    if let Some(b) = requested {
163        return b.to_string();
164    }
165    git(dir, &["symbolic-ref", "--short", "HEAD"]).unwrap_or_else(|_| "HEAD".into())
166}
167
168/// Run `git log` and parse one commit per record. Records are separated by a
169/// record-separator byte (\x1e) so a commit's multi-value `Co-authored-by`
170/// trailers (joined with \x1f) can't be confused with the next commit.
171pub fn read_commits(
172    repo: &PreparedRepo,
173    branch: Option<&str>,
174    filter: &CommitFilter,
175) -> Result<Vec<Commit>> {
176    let mut cmd = Command::new("git");
177    cmd.arg("-C").arg(&repo.git_dir).args([
178        "log",
179        "--use-mailmap",
180        "--pretty=format:%x1e%H%x09%at%x09%aN%x09%aE%x09\
181         %(trailers:key=Co-authored-by,valueonly,separator=%x1f)",
182    ]);
183    if filter.no_merges {
184        cmd.arg("--no-merges");
185    }
186    if let Some(s) = &filter.since {
187        cmd.arg(format!("--since={s}"));
188    }
189    if let Some(u) = &filter.until {
190        cmd.arg(format!("--until={u}"));
191    }
192    cmd.arg(branch.unwrap_or("HEAD")).arg("--");
193
194    let out = cmd.output().context("failed to run git log")?;
195    if !out.status.success() {
196        bail!(
197            "git log failed: {}",
198            String::from_utf8_lossy(&out.stderr).trim()
199        );
200    }
201    let text = String::from_utf8_lossy(&out.stdout);
202    let mut commits = Vec::new();
203    for rec in text.split('\u{1e}') {
204        let mut parts = rec.splitn(5, '\t');
205        let (Some(sha), Some(ts), Some(name), Some(email)) =
206            (parts.next(), parts.next(), parts.next(), parts.next())
207        else {
208            continue;
209        };
210        let Ok(ts) = ts.parse::<i64>() else { continue };
211        let email = email.trim().to_lowercase();
212        let coauthors = parts
213            .next()
214            .into_iter()
215            .flat_map(|block| block.split('\u{1f}'))
216            .filter_map(parse_coauthor)
217            .filter(|(_, e)| e != &email)
218            .collect();
219        commits.push(Commit {
220            sha: sha.to_string(),
221            ts,
222            name: name.trim().to_string(),
223            email,
224            coauthors,
225            src: 0,
226        });
227    }
228    Ok(commits)
229}
230
231/// Parse a `Co-authored-by` value (`Name <email>`) into `(name, email)`.
232fn parse_coauthor(raw: &str) -> Option<(String, String)> {
233    let s = raw.trim();
234    if s.is_empty() {
235        return None;
236    }
237    match (s.find('<'), s.rfind('>')) {
238        (Some(lt), Some(gt)) if gt > lt => {
239            let name = s[..lt].trim().to_string();
240            let email = s[lt + 1..gt].trim().to_lowercase();
241            (!email.is_empty() || !name.is_empty()).then_some((name, email))
242        }
243        _ => Some((s.to_string(), String::new())),
244    }
245}
246
247fn git(dir: &Path, args: &[&str]) -> Result<String> {
248    let out = Command::new("git").arg("-C").arg(dir).args(args).output()?;
249    if !out.status.success() {
250        bail!("git {:?} failed", args);
251    }
252    Ok(String::from_utf8_lossy(&out.stdout).trim().to_string())
253}
254
255/// Extract `owner/repo` from common GitHub URL shapes.
256pub fn parse_github_url(url: &str) -> Option<String> {
257    let u = url.trim().trim_end_matches('/').trim_end_matches(".git");
258    let rest = u
259        .strip_prefix("git@github.com:")
260        .or_else(|| u.strip_prefix("https://github.com/"))
261        .or_else(|| u.strip_prefix("http://github.com/"))
262        .or_else(|| u.strip_prefix("ssh://git@github.com/"))
263        .or_else(|| u.strip_prefix("github.com/"))?;
264    let mut parts = rest.splitn(3, '/');
265    let owner = parts.next()?;
266    let repo = parts.next()?;
267    if owner.is_empty() || repo.is_empty() {
268        return None;
269    }
270    Some(format!("{owner}/{repo}"))
271}
272
273/// A bare GitHub owner name (org or user): a single token, not a local path,
274/// not a slug, not a URL. Used to decide whether to expand the input into all
275/// of that owner's repositories.
276pub fn looks_like_owner(input: &str) -> bool {
277    let s = input.trim();
278    if s.is_empty() || s.contains('/') || s.contains(':') || s.contains('.') {
279        return false;
280    }
281    if Path::new(s).exists() {
282        return false;
283    }
284    !s.starts_with('-') && s.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
285}
286
287fn looks_like_slug(s: &str) -> bool {
288    let parts: Vec<&str> = s.split('/').collect();
289    parts.len() == 2
290        && parts.iter().all(|p| {
291            !p.is_empty()
292                && p.chars()
293                    .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.'))
294        })
295}
296
297pub fn sanitize(s: &str) -> String {
298    s.chars()
299        .map(|c| {
300            if c.is_ascii_alphanumeric() || c == '-' || c == '.' {
301                c
302            } else {
303                '-'
304            }
305        })
306        .collect()
307}