Skip to main content

sloc_git/
ops.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4use std::path::Path;
5
6use anyhow::{bail, Context, Result};
7
8use crate::{GitCommit, GitRef, GitRefKind, RepoRefs};
9
10// ── low-level git runner ───────────────────────────────────────────────────────
11
12fn run_git(repo: &Path, args: &[&str]) -> Result<String> {
13    let mut cmd = std::process::Command::new("git");
14    // Opt-in SSL bypass for corporate/internal repos with self-signed certificates.
15    // Set SLOC_GIT_SSL_NO_VERIFY=1 in the environment to enable.
16    if std::env::var_os("SLOC_GIT_SSL_NO_VERIFY").is_some() {
17        cmd.args(["-c", "http.sslVerify=false"]);
18    }
19    let out = cmd
20        .args(args)
21        .current_dir(repo)
22        .output()
23        .context("failed to spawn git process")?;
24    if !out.status.success() {
25        let stderr = String::from_utf8_lossy(&out.stderr);
26        bail!("git {}: {}", args.first().unwrap_or(&""), stderr.trim());
27    }
28    Ok(String::from_utf8_lossy(&out.stdout).trim().to_owned())
29}
30
31// ── URL normalization ─────────────────────────────────────────────────────────
32
33/// Convert a repository browse URL into a clonable git URL.
34///
35/// Handles Bitbucket Server/Data Center (`/projects/{PROJ}/repos/{REPO}/...`),
36/// GitLab (`/path/repo/-/tree/...`), GitHub (`github.com/{owner}/{repo}/tree/...`),
37/// and Bitbucket Cloud (`bitbucket.org/{ws}/{repo}/src/...`). SSH URLs and URLs
38/// that already look like clone targets are returned unchanged.
39#[must_use]
40pub fn normalize_git_url(raw: &str) -> String {
41    let url = raw.trim();
42    if url.starts_with("git@") || url.starts_with("ssh://") {
43        return url.to_owned();
44    }
45    let scheme = if url.starts_with("https://") {
46        "https"
47    } else if url.starts_with("http://") {
48        "http"
49    } else {
50        return url.to_owned();
51    };
52    let authority_and_path = &url[scheme.len() + 3..];
53    let (host, path) = authority_and_path
54        .find('/')
55        .map_or((authority_and_path, "/"), |i| {
56            (&authority_and_path[..i], &authority_and_path[i..])
57        });
58    let path = path.trim_end_matches('/');
59
60    try_normalize_bitbucket_server(scheme, host, path)
61        .or_else(|| try_normalize_gitlab(scheme, host, path))
62        .or_else(|| try_normalize_github(scheme, host, path))
63        .or_else(|| try_normalize_bitbucket_cloud(scheme, host, path))
64        .unwrap_or_else(|| url.to_owned())
65}
66
67// ── Bitbucket Server / Data Center ────────────────────────────────────────────
68// Browse URL: /{context}/projects/{PROJECT}/repos/{REPO}[/...]
69// Clone URL:  /{context}/scm/{project_lower}/{repo}.git
70fn try_normalize_bitbucket_server(scheme: &str, host: &str, path: &str) -> Option<String> {
71    let path_lower = path.to_lowercase();
72    let proj_pos = path_lower.find("/projects/")?;
73    let after = &path[proj_pos + "/projects/".len()..];
74    let parts: Vec<&str> = after.splitn(4, '/').collect();
75    if parts.len() < 3 || !parts[1].eq_ignore_ascii_case("repos") {
76        return None;
77    }
78    let context = &path[..proj_pos];
79    let project = parts[0].to_lowercase();
80    let repo = parts[2].trim_end_matches(".git");
81    Some(format!(
82        "{scheme}://{host}{context}/scm/{project}/{repo}.git"
83    ))
84}
85
86// ── GitLab (any host) ─────────────────────────────────────────────────────────
87// Browse URL: /path/to/repo/-/tree/branch  →  Clone URL: /path/to/repo.git
88fn try_normalize_gitlab(scheme: &str, host: &str, path: &str) -> Option<String> {
89    let idx = path.find("/-/")?;
90    let repo_path = path[..idx].trim_end_matches(".git");
91    Some(format!("{scheme}://{host}{repo_path}.git"))
92}
93
94// ── GitHub ────────────────────────────────────────────────────────────────────
95// Browse URL: github.com/{owner}/{repo}/{tree|blob|...}/...
96fn try_normalize_github(scheme: &str, host: &str, path: &str) -> Option<String> {
97    if host != "github.com" && !host.ends_with(".github.com") {
98        return None;
99    }
100    let p = path.trim_start_matches('/');
101    let parts: Vec<&str> = p.splitn(4, '/').collect();
102    if parts.len() < 3
103        || !matches!(
104            parts[2],
105            "tree" | "blob" | "commits" | "commit" | "releases" | "tags" | "branches"
106        )
107    {
108        return None;
109    }
110    let owner = parts[0];
111    let repo = parts[1].trim_end_matches(".git");
112    Some(format!("{scheme}://{host}/{owner}/{repo}.git"))
113}
114
115// ── Bitbucket Cloud ───────────────────────────────────────────────────────────
116// Browse URL: bitbucket.org/{workspace}/{repo}/src/...
117fn try_normalize_bitbucket_cloud(scheme: &str, host: &str, path: &str) -> Option<String> {
118    if host != "bitbucket.org" {
119        return None;
120    }
121    let p = path.trim_start_matches('/');
122    let parts: Vec<&str> = p.splitn(4, '/').collect();
123    if parts.len() < 3 || parts[2] != "src" {
124        return None;
125    }
126    let ws = parts[0];
127    let repo = parts[1].trim_end_matches(".git");
128    Some(format!("{scheme}://{host}/{ws}/{repo}.git"))
129}
130
131// ── clone / fetch ─────────────────────────────────────────────────────────────
132
133fn validate_clone_url(url: &str) -> Result<()> {
134    let lower = url.to_lowercase();
135    let allowed = ["https://", "http://", "git://", "ssh://", "git@"];
136    if !allowed.iter().any(|p| lower.starts_with(p)) {
137        bail!(
138            "git URL rejected: only https://, http://, git://, ssh://, and git@ URLs are \
139             permitted (got {url:?})"
140        );
141    }
142    Ok(())
143}
144
145/// Clone `url` into `dest`, or fetch all refs if the repo already exists.
146///
147/// Browse URLs (GitHub, GitLab, Bitbucket web pages) are automatically converted
148/// to their corresponding git clone URLs before cloning.
149///
150/// # Errors
151/// Returns an error if the URL is rejected, the clone directory cannot be created,
152/// or the underlying `git clone` / `git fetch` command fails.
153pub fn clone_or_fetch(url: &str, dest: &Path) -> Result<()> {
154    let normalized = normalize_git_url(url);
155    let url = normalized.as_str();
156    validate_clone_url(url)?;
157    if dest.join(".git").exists() {
158        run_git(dest, &["fetch", "--all", "--tags", "--prune"])?;
159    } else {
160        std::fs::create_dir_all(dest).context("failed to create clone directory")?;
161        let dest_str = dest.to_str().unwrap_or(".");
162        let parent = dest.parent().unwrap_or(dest);
163        run_git(
164            parent,
165            &["clone", "--no-single-branch", "--depth=50", url, dest_str],
166        )?;
167    }
168    Ok(())
169}
170
171/// Resolve `ref_name` to its full SHA in `repo`.
172///
173/// # Errors
174/// Returns an error if `git rev-parse` fails (e.g. the ref does not exist).
175pub fn get_sha(repo: &Path, ref_name: &str) -> Result<String> {
176    run_git(repo, &["rev-parse", ref_name])
177}
178
179// ── worktree helpers ──────────────────────────────────────────────────────────
180
181/// Create a detached worktree at `worktree_path` pointing at `ref_name`.
182///
183/// # Errors
184/// Returns an error if `git worktree add` fails.
185pub fn create_worktree(repo: &Path, ref_name: &str, worktree_path: &Path) -> Result<()> {
186    let wt = worktree_path.to_str().unwrap_or(".");
187    run_git(repo, &["worktree", "add", "--detach", wt, ref_name])?;
188    Ok(())
189}
190
191/// Remove a worktree previously created with [`create_worktree`].
192///
193/// # Errors
194/// This function always succeeds; the underlying git command failure is intentionally ignored.
195pub fn destroy_worktree(repo: &Path, worktree_path: &Path) -> Result<()> {
196    let wt = worktree_path.to_str().unwrap_or(".");
197    let _ = run_git(repo, &["worktree", "remove", "--force", wt]);
198    Ok(())
199}
200
201// ── ref listing ───────────────────────────────────────────────────────────────
202
203/// Return all branches, tags, and recent commits for `repo`.
204///
205/// # Errors
206/// Returns an error if any underlying git command fails.
207pub fn list_refs(repo: &Path) -> Result<RepoRefs> {
208    Ok(RepoRefs {
209        branches: list_branches(repo)?,
210        tags: list_tags(repo)?,
211        recent_commits: list_commits(repo, "HEAD", 40)?,
212    })
213}
214
215fn list_branches(repo: &Path) -> Result<Vec<GitRef>> {
216    let fmt = "%(refname:short)|%(objectname:short)|%(creatordate:iso-strict)|%(subject)";
217    // Use -r (remote-tracking only) to avoid local/remote duplicates.
218    // Strip the leading remote name (e.g. "origin/") from each ref so the
219    // displayed name matches what the upstream repository calls the branch.
220    let out = run_git(repo, &["branch", "-r", &format!("--format={fmt}")])?;
221    let refs = out
222        .lines()
223        .filter(|l| !l.trim().is_empty())
224        .map(|l| parse_ref_line(l, GitRefKind::Branch))
225        // Drop symbolic HEAD pointers (e.g. origin/HEAD).
226        .filter(|r| r.name != "HEAD" && !r.name.ends_with("/HEAD"))
227        .map(|mut r| {
228            // Strip the remote prefix ("origin/", "upstream/", etc.).
229            if let Some(slash) = r.name.find('/') {
230                r.name = r.name[slash + 1..].to_owned();
231            }
232            r
233        })
234        .collect::<Vec<_>>();
235    Ok(refs)
236}
237
238fn list_tags(repo: &Path) -> Result<Vec<GitRef>> {
239    let fmt = "%(refname:short)|%(objectname:short)|%(creatordate:iso-strict)|%(subject)";
240    let out = run_git(
241        repo,
242        &["tag", "--sort=-creatordate", &format!("--format={fmt}")],
243    )?;
244    Ok(out
245        .lines()
246        .filter(|l| !l.trim().is_empty())
247        .map(|l| parse_ref_line(l, GitRefKind::Tag))
248        .collect())
249}
250
251fn parse_ref_line(line: &str, kind: GitRefKind) -> GitRef {
252    let parts: Vec<&str> = line.splitn(4, '|').collect();
253    let name = parts.first().copied().unwrap_or("").to_owned();
254    let sha = parts.get(1).copied().unwrap_or("").to_owned();
255    let date = parts.get(2).copied().and_then(parse_git_date);
256    let message = parts.get(3).map(|s| (*s).to_owned());
257    GitRef {
258        kind,
259        name,
260        sha,
261        date,
262        message,
263    }
264}
265
266// ── commit listing ────────────────────────────────────────────────────────────
267
268/// Return up to `limit` commits reachable from `ref_name`.
269///
270/// # Errors
271/// Returns an error if `git log` fails.
272pub fn list_commits(repo: &Path, ref_name: &str, limit: usize) -> Result<Vec<GitCommit>> {
273    let fmt = "%H|%h|%an|%aI|%s";
274    let n = format!("-{limit}");
275    let out = run_git(repo, &["log", ref_name, &format!("--format={fmt}"), &n])?;
276    Ok(out
277        .lines()
278        .filter(|l| !l.trim().is_empty())
279        .map(parse_commit_line)
280        .collect())
281}
282
283fn parse_commit_line(line: &str) -> GitCommit {
284    let p: Vec<&str> = line.splitn(5, '|').collect();
285    let sha = p.first().copied().unwrap_or("").to_owned();
286    let short_sha = p.get(1).copied().unwrap_or("").to_owned();
287    let author = p.get(2).copied().unwrap_or("").to_owned();
288    let date = p
289        .get(3)
290        .copied()
291        .and_then(parse_git_date)
292        .unwrap_or_default();
293    let subject = p.get(4).copied().unwrap_or("").to_owned();
294    GitCommit {
295        sha,
296        short_sha,
297        author,
298        date,
299        subject,
300    }
301}
302
303fn parse_git_date(s: &str) -> Option<chrono::DateTime<chrono::Utc>> {
304    chrono::DateTime::parse_from_rfc3339(s)
305        .ok()
306        .map(|d| d.with_timezone(&chrono::Utc))
307}