Skip to main content

sloc_git/
ops.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4use std::path::Path;
5
6use anyhow::{bail, Context, Result};
7
8use crate::{GitCommit, GitRef, GitRefKind, RepoRefs};
9
10// ── low-level git runner ───────────────────────────────────────────────────────
11
12fn run_git(repo: &Path, args: &[&str]) -> Result<String> {
13    let mut cmd = std::process::Command::new("git");
14    // Opt-in SSL bypass for corporate/internal repos with self-signed certificates.
15    // Set SLOC_GIT_SSL_NO_VERIFY=1 in the environment to enable.
16    if std::env::var_os("SLOC_GIT_SSL_NO_VERIFY").is_some() {
17        cmd.args(["-c", "http.sslVerify=false"]);
18    }
19    let out = cmd
20        .args(args)
21        .current_dir(repo)
22        .output()
23        .context("failed to spawn git process")?;
24    if !out.status.success() {
25        let stderr = String::from_utf8_lossy(&out.stderr);
26        bail!("git {}: {}", args.first().unwrap_or(&""), stderr.trim());
27    }
28    Ok(String::from_utf8_lossy(&out.stdout).trim().to_owned())
29}
30
31// ── URL normalization ─────────────────────────────────────────────────────────
32
33/// Convert a repository browse URL into a clonable git URL.
34///
35/// Handles Bitbucket Server/Data Center (`/projects/{PROJ}/repos/{REPO}/...`),
36/// GitLab (`/path/repo/-/tree/...`), GitHub (`github.com/{owner}/{repo}/tree/...`),
37/// and Bitbucket Cloud (`bitbucket.org/{ws}/{repo}/src/...`). SSH URLs and URLs
38/// that already look like clone targets are returned unchanged.
39pub fn normalize_git_url(raw: &str) -> String {
40    let url = raw.trim();
41
42    // SSH URLs are already clonable — git handles `git@host:path` natively.
43    if url.starts_with("git@") || url.starts_with("ssh://") {
44        return url.to_owned();
45    }
46
47    let scheme = if url.starts_with("https://") {
48        "https"
49    } else if url.starts_with("http://") {
50        "http"
51    } else {
52        return url.to_owned();
53    };
54
55    let authority_and_path = &url[scheme.len() + 3..]; // strip "scheme://"
56    let (host, path) = match authority_and_path.find('/') {
57        Some(i) => (&authority_and_path[..i], &authority_and_path[i..]),
58        None => (authority_and_path, "/"),
59    };
60    let path = path.trim_end_matches('/');
61
62    // ── Bitbucket Server / Data Center ────────────────────────────────────────
63    // Browse URL: /{context}/projects/{PROJECT}/repos/{REPO}[/...]
64    // Clone URL:  /{context}/scm/{project_lower}/{repo}.git
65    // The Bitbucket context path defaults to "" (root) but some deployments use
66    // a prefix (e.g. /bitbucket). We preserve whatever prefix precedes /projects/.
67    {
68        let path_lower = path.to_lowercase();
69        if let Some(proj_pos) = path_lower.find("/projects/") {
70            let after = &path[proj_pos + "/projects/".len()..];
71            let parts: Vec<&str> = after.splitn(4, '/').collect();
72            // parts[0] = PROJECT_KEY, parts[1] = "repos", parts[2] = REPO, parts[3] = rest
73            if parts.len() >= 3 && parts[1].eq_ignore_ascii_case("repos") {
74                let context = &path[..proj_pos]; // e.g. "" or "/bitbucket"
75                let project = parts[0].to_lowercase();
76                let repo = parts[2].trim_end_matches(".git");
77                return format!("{scheme}://{host}{context}/scm/{project}/{repo}.git");
78            }
79        }
80    }
81
82    // ── GitLab (any host) ─────────────────────────────────────────────────────
83    // Browse URL: /path/to/repo/-/tree/branch
84    // Clone URL:  /path/to/repo.git
85    if let Some(idx) = path.find("/-/") {
86        let repo_path = &path[..idx];
87        let repo_path = repo_path.trim_end_matches(".git");
88        return format!("{scheme}://{host}{repo_path}.git");
89    }
90
91    // ── GitHub ────────────────────────────────────────────────────────────────
92    // Browse URL: github.com/{owner}/{repo}/{tree|blob|...}/...
93    // Clone URL:  github.com/{owner}/{repo}.git
94    if host == "github.com" || host.ends_with(".github.com") {
95        let p = path.trim_start_matches('/');
96        let parts: Vec<&str> = p.splitn(4, '/').collect();
97        if parts.len() >= 3
98            && matches!(
99                parts[2],
100                "tree" | "blob" | "commits" | "commit" | "releases" | "tags" | "branches"
101            )
102        {
103            let owner = parts[0];
104            let repo = parts[1].trim_end_matches(".git");
105            return format!("{scheme}://{host}/{owner}/{repo}.git");
106        }
107    }
108
109    // ── Bitbucket Cloud ───────────────────────────────────────────────────────
110    // Browse URL: bitbucket.org/{workspace}/{repo}/src/...
111    // Clone URL:  bitbucket.org/{workspace}/{repo}.git
112    if host == "bitbucket.org" {
113        let p = path.trim_start_matches('/');
114        let parts: Vec<&str> = p.splitn(4, '/').collect();
115        if parts.len() >= 3 && parts[2] == "src" {
116            let ws = parts[0];
117            let repo = parts[1].trim_end_matches(".git");
118            return format!("{scheme}://{host}/{ws}/{repo}.git");
119        }
120    }
121
122    url.to_owned()
123}
124
125// ── clone / fetch ─────────────────────────────────────────────────────────────
126
127fn validate_clone_url(url: &str) -> Result<()> {
128    let lower = url.to_lowercase();
129    let allowed = ["https://", "http://", "git://", "ssh://", "git@"];
130    if !allowed.iter().any(|p| lower.starts_with(p)) {
131        bail!(
132            "git URL rejected: only https://, http://, git://, ssh://, and git@ URLs are \
133             permitted (got {url:?})"
134        );
135    }
136    Ok(())
137}
138
139/// Clone `url` into `dest`, or fetch all refs if the repo already exists.
140///
141/// Browse URLs (GitHub, GitLab, Bitbucket web pages) are automatically converted
142/// to their corresponding git clone URLs before cloning.
143///
144/// # Errors
145/// Returns an error if the URL is rejected, the clone directory cannot be created,
146/// or the underlying `git clone` / `git fetch` command fails.
147pub fn clone_or_fetch(url: &str, dest: &Path) -> Result<()> {
148    let normalized = normalize_git_url(url);
149    let url = normalized.as_str();
150    validate_clone_url(url)?;
151    if dest.join(".git").exists() {
152        run_git(dest, &["fetch", "--all", "--tags", "--prune"])?;
153    } else {
154        std::fs::create_dir_all(dest).context("failed to create clone directory")?;
155        let dest_str = dest.to_str().unwrap_or(".");
156        let parent = dest.parent().unwrap_or(dest);
157        run_git(
158            parent,
159            &["clone", "--no-single-branch", "--depth=50", url, dest_str],
160        )?;
161    }
162    Ok(())
163}
164
165/// Resolve `ref_name` to its full SHA in `repo`.
166///
167/// # Errors
168/// Returns an error if `git rev-parse` fails (e.g. the ref does not exist).
169pub fn get_sha(repo: &Path, ref_name: &str) -> Result<String> {
170    run_git(repo, &["rev-parse", ref_name])
171}
172
173// ── worktree helpers ──────────────────────────────────────────────────────────
174
175/// Create a detached worktree at `worktree_path` pointing at `ref_name`.
176///
177/// # Errors
178/// Returns an error if `git worktree add` fails.
179pub fn create_worktree(repo: &Path, ref_name: &str, worktree_path: &Path) -> Result<()> {
180    let wt = worktree_path.to_str().unwrap_or(".");
181    run_git(repo, &["worktree", "add", "--detach", wt, ref_name])?;
182    Ok(())
183}
184
185/// Remove a worktree previously created with [`create_worktree`].
186///
187/// # Errors
188/// This function always succeeds; the underlying git command failure is intentionally ignored.
189pub fn destroy_worktree(repo: &Path, worktree_path: &Path) -> Result<()> {
190    let wt = worktree_path.to_str().unwrap_or(".");
191    let _ = run_git(repo, &["worktree", "remove", "--force", wt]);
192    Ok(())
193}
194
195// ── ref listing ───────────────────────────────────────────────────────────────
196
197/// Return all branches, tags, and recent commits for `repo`.
198///
199/// # Errors
200/// Returns an error if any underlying git command fails.
201pub fn list_refs(repo: &Path) -> Result<RepoRefs> {
202    Ok(RepoRefs {
203        branches: list_branches(repo)?,
204        tags: list_tags(repo)?,
205        recent_commits: list_commits(repo, "HEAD", 40)?,
206    })
207}
208
209fn list_branches(repo: &Path) -> Result<Vec<GitRef>> {
210    let fmt = "%(refname:short)|%(objectname:short)|%(creatordate:iso-strict)|%(subject)";
211    // Use -r (remote-tracking only) to avoid local/remote duplicates.
212    // Strip the leading remote name (e.g. "origin/") from each ref so the
213    // displayed name matches what the upstream repository calls the branch.
214    let out = run_git(repo, &["branch", "-r", &format!("--format={fmt}")])?;
215    let refs = out
216        .lines()
217        .filter(|l| !l.trim().is_empty())
218        .map(|l| parse_ref_line(l, GitRefKind::Branch))
219        // Drop symbolic HEAD pointers (e.g. origin/HEAD).
220        .filter(|r| r.name != "HEAD" && !r.name.ends_with("/HEAD"))
221        .map(|mut r| {
222            // Strip the remote prefix ("origin/", "upstream/", etc.).
223            if let Some(slash) = r.name.find('/') {
224                r.name = r.name[slash + 1..].to_owned();
225            }
226            r
227        })
228        .collect::<Vec<_>>();
229    Ok(refs)
230}
231
232fn list_tags(repo: &Path) -> Result<Vec<GitRef>> {
233    let fmt = "%(refname:short)|%(objectname:short)|%(creatordate:iso-strict)|%(subject)";
234    let out = run_git(
235        repo,
236        &["tag", "--sort=-creatordate", &format!("--format={fmt}")],
237    )?;
238    Ok(out
239        .lines()
240        .filter(|l| !l.trim().is_empty())
241        .map(|l| parse_ref_line(l, GitRefKind::Tag))
242        .collect())
243}
244
245fn parse_ref_line(line: &str, kind: GitRefKind) -> GitRef {
246    let parts: Vec<&str> = line.splitn(4, '|').collect();
247    let name = parts.first().copied().unwrap_or("").to_owned();
248    let sha = parts.get(1).copied().unwrap_or("").to_owned();
249    let date = parts.get(2).copied().and_then(parse_git_date);
250    let message = parts.get(3).map(|s| (*s).to_owned());
251    GitRef {
252        kind,
253        name,
254        sha,
255        date,
256        message,
257    }
258}
259
260// ── commit listing ────────────────────────────────────────────────────────────
261
262/// Return up to `limit` commits reachable from `ref_name`.
263///
264/// # Errors
265/// Returns an error if `git log` fails.
266pub fn list_commits(repo: &Path, ref_name: &str, limit: usize) -> Result<Vec<GitCommit>> {
267    let fmt = "%H|%h|%an|%aI|%s";
268    let n = format!("-{limit}");
269    let out = run_git(repo, &["log", ref_name, &format!("--format={fmt}"), &n])?;
270    Ok(out
271        .lines()
272        .filter(|l| !l.trim().is_empty())
273        .map(parse_commit_line)
274        .collect())
275}
276
277fn parse_commit_line(line: &str) -> GitCommit {
278    let p: Vec<&str> = line.splitn(5, '|').collect();
279    let sha = p.first().copied().unwrap_or("").to_owned();
280    let short_sha = p.get(1).copied().unwrap_or("").to_owned();
281    let author = p.get(2).copied().unwrap_or("").to_owned();
282    let date = p
283        .get(3)
284        .copied()
285        .and_then(parse_git_date)
286        .unwrap_or_default();
287    let subject = p.get(4).copied().unwrap_or("").to_owned();
288    GitCommit {
289        sha,
290        short_sha,
291        author,
292        date,
293        subject,
294    }
295}
296
297fn parse_git_date(s: &str) -> Option<chrono::DateTime<chrono::Utc>> {
298    chrono::DateTime::parse_from_rfc3339(s)
299        .ok()
300        .map(|d| d.with_timezone(&chrono::Utc))
301}