Skip to main content

sloc_git/
ops.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4use std::path::Path;
5
6use anyhow::{bail, Context, Result};
7
8use crate::{GitCommit, GitRef, GitRefKind, RepoRefs};
9
10// ── low-level git runner ───────────────────────────────────────────────────────
11
12fn run_git(repo: &Path, args: &[&str]) -> Result<String> {
13    let mut cmd = std::process::Command::new("git");
14    // Opt-in SSL bypass for corporate/internal repos with self-signed certificates.
15    // Set SLOC_GIT_SSL_NO_VERIFY=1 in the environment to enable.
16    if std::env::var_os("SLOC_GIT_SSL_NO_VERIFY").is_some() {
17        cmd.args(["-c", "http.sslVerify=false"]);
18    }
19    let out = cmd
20        .args(args)
21        .current_dir(repo)
22        .output()
23        .context("failed to spawn git process")?;
24    if !out.status.success() {
25        let stderr = String::from_utf8_lossy(&out.stderr);
26        bail!("git {}: {}", args.first().unwrap_or(&""), stderr.trim());
27    }
28    Ok(String::from_utf8_lossy(&out.stdout).trim().to_owned())
29}
30
31// ── URL normalization ─────────────────────────────────────────────────────────
32
33/// Convert a repository browse URL into a clonable git URL.
34///
35/// Handles Bitbucket Server/Data Center (`/projects/{PROJ}/repos/{REPO}/...`),
36/// GitLab (`/path/repo/-/tree/...`), GitHub (`github.com/{owner}/{repo}/tree/...`),
37/// and Bitbucket Cloud (`bitbucket.org/{ws}/{repo}/src/...`). SSH URLs and URLs
38/// that already look like clone targets are returned unchanged.
39#[must_use]
40pub fn normalize_git_url(raw: &str) -> String {
41    let url = raw.trim();
42    if url.starts_with("git@") || url.starts_with("ssh://") {
43        return url.to_owned();
44    }
45    let scheme = if url.starts_with("https://") {
46        "https"
47    } else if url.starts_with("http://") {
48        "http"
49    } else {
50        return url.to_owned();
51    };
52    let authority_and_path = &url[scheme.len() + 3..];
53    let (host, path) = authority_and_path
54        .find('/')
55        .map_or((authority_and_path, "/"), |i| {
56            (&authority_and_path[..i], &authority_and_path[i..])
57        });
58    let path = path.trim_end_matches('/');
59
60    try_normalize_bitbucket_server(scheme, host, path)
61        .or_else(|| try_normalize_gitlab(scheme, host, path))
62        .or_else(|| try_normalize_github(scheme, host, path))
63        .or_else(|| try_normalize_bitbucket_cloud(scheme, host, path))
64        .unwrap_or_else(|| url.to_owned())
65}
66
67// ── Bitbucket Server / Data Center ────────────────────────────────────────────
68// Browse URL: /{context}/projects/{PROJECT}/repos/{REPO}[/...]
69// Clone URL:  /{context}/scm/{project_lower}/{repo}.git
70fn try_normalize_bitbucket_server(scheme: &str, host: &str, path: &str) -> Option<String> {
71    let path_lower = path.to_lowercase();
72    let proj_pos = path_lower.find("/projects/")?;
73    let after = &path[proj_pos + "/projects/".len()..];
74    let parts: Vec<&str> = after.splitn(4, '/').collect();
75    if parts.len() < 3 || !parts[1].eq_ignore_ascii_case("repos") {
76        return None;
77    }
78    let context = &path[..proj_pos];
79    let project = parts[0].to_lowercase();
80    let repo = parts[2].trim_end_matches(".git");
81    Some(format!(
82        "{scheme}://{host}{context}/scm/{project}/{repo}.git"
83    ))
84}
85
86// ── GitLab (any host) ─────────────────────────────────────────────────────────
87// Browse URL: /path/to/repo/-/tree/branch  →  Clone URL: /path/to/repo.git
88fn try_normalize_gitlab(scheme: &str, host: &str, path: &str) -> Option<String> {
89    let idx = path.find("/-/")?;
90    let repo_path = path[..idx].trim_end_matches(".git");
91    Some(format!("{scheme}://{host}{repo_path}.git"))
92}
93
94// ── GitHub ────────────────────────────────────────────────────────────────────
95// Browse URL: github.com/{owner}/{repo}/{tree|blob|...}/...
96fn try_normalize_github(scheme: &str, host: &str, path: &str) -> Option<String> {
97    if host != "github.com" && !host.ends_with(".github.com") {
98        return None;
99    }
100    let p = path.trim_start_matches('/');
101    let parts: Vec<&str> = p.splitn(4, '/').collect();
102    if parts.len() < 3
103        || !matches!(
104            parts[2],
105            "tree" | "blob" | "commits" | "commit" | "releases" | "tags" | "branches"
106        )
107    {
108        return None;
109    }
110    let owner = parts[0];
111    let repo = parts[1].trim_end_matches(".git");
112    Some(format!("{scheme}://{host}/{owner}/{repo}.git"))
113}
114
115// ── Bitbucket Cloud ───────────────────────────────────────────────────────────
116// Browse URL: bitbucket.org/{workspace}/{repo}/src/...
117fn try_normalize_bitbucket_cloud(scheme: &str, host: &str, path: &str) -> Option<String> {
118    if host != "bitbucket.org" {
119        return None;
120    }
121    let p = path.trim_start_matches('/');
122    let parts: Vec<&str> = p.splitn(4, '/').collect();
123    if parts.len() < 3 || parts[2] != "src" {
124        return None;
125    }
126    let ws = parts[0];
127    let repo = parts[1].trim_end_matches(".git");
128    Some(format!("{scheme}://{host}/{ws}/{repo}.git"))
129}
130
131// ── clone / fetch ─────────────────────────────────────────────────────────────
132
133fn validate_clone_url(url: &str) -> Result<()> {
134    let lower = url.to_lowercase();
135    // http:// is excluded to prevent SSRF against plaintext internal HTTP services.
136    let allowed = ["https://", "git://", "ssh://", "git@"];
137    if !allowed.iter().any(|p| lower.starts_with(p)) {
138        bail!(
139            "git URL rejected: only https://, git://, ssh://, and git@ URLs are \
140             permitted (got {url:?})"
141        );
142    }
143    // Block cloud instance metadata endpoints and link-local addresses.
144    let blocked = [
145        "169.254.",
146        "metadata.google.internal",
147        "100.100.100.",
148        "[fd",
149        "[fe80",
150    ];
151    if blocked.iter().any(|b| lower.contains(b)) {
152        bail!("git URL rejected: link-local and metadata service addresses are not permitted");
153    }
154    Ok(())
155}
156
157/// Clone `url` into `dest`, or fetch all refs if the repo already exists.
158///
159/// Browse URLs (GitHub, GitLab, Bitbucket web pages) are automatically converted
160/// to their corresponding git clone URLs before cloning.
161///
162/// # Errors
163/// Returns an error if the URL is rejected, the clone directory cannot be created,
164/// or the underlying `git clone` / `git fetch` command fails.
165pub fn clone_or_fetch(url: &str, dest: &Path) -> Result<()> {
166    let normalized = normalize_git_url(url);
167    let url = normalized.as_str();
168    validate_clone_url(url)?;
169    if dest.join(".git").exists() {
170        run_git(dest, &["fetch", "--all", "--tags", "--prune"])?;
171    } else {
172        std::fs::create_dir_all(dest).context("failed to create clone directory")?;
173        let dest_str = dest.to_str().unwrap_or(".");
174        let parent = dest.parent().unwrap_or(dest);
175        run_git(
176            parent,
177            &["clone", "--no-single-branch", "--depth=50", url, dest_str],
178        )?;
179    }
180    Ok(())
181}
182
183/// Resolve `ref_name` to its full SHA in `repo`.
184///
185/// # Errors
186/// Returns an error if `git rev-parse` fails (e.g. the ref does not exist).
187pub fn get_sha(repo: &Path, ref_name: &str) -> Result<String> {
188    run_git(repo, &["rev-parse", ref_name])
189}
190
191// ── worktree helpers ──────────────────────────────────────────────────────────
192
193/// Create a detached worktree at `worktree_path` pointing at `ref_name`.
194///
195/// # Errors
196/// Returns an error if `git worktree add` fails.
197pub fn create_worktree(repo: &Path, ref_name: &str, worktree_path: &Path) -> Result<()> {
198    let wt = worktree_path.to_str().unwrap_or(".");
199    run_git(repo, &["worktree", "add", "--detach", wt, ref_name])?;
200    Ok(())
201}
202
203/// Remove a worktree previously created with [`create_worktree`].
204///
205/// # Errors
206/// This function always succeeds; the underlying git command failure is intentionally ignored.
207pub fn destroy_worktree(repo: &Path, worktree_path: &Path) -> Result<()> {
208    let wt = worktree_path.to_str().unwrap_or(".");
209    let _ = run_git(repo, &["worktree", "remove", "--force", wt]);
210    Ok(())
211}
212
213// ── ref listing ───────────────────────────────────────────────────────────────
214
215/// Return all branches, tags, and recent commits for `repo`.
216///
217/// # Errors
218/// Returns an error if any underlying git command fails.
219pub fn list_refs(repo: &Path) -> Result<RepoRefs> {
220    Ok(RepoRefs {
221        branches: list_branches(repo)?,
222        tags: list_tags(repo)?,
223        recent_commits: list_commits(repo, "HEAD", 40)?,
224    })
225}
226
227fn list_branches(repo: &Path) -> Result<Vec<GitRef>> {
228    let fmt = "%(refname:short)|%(objectname:short)|%(creatordate:iso-strict)|%(subject)";
229    // Use -r (remote-tracking only) to avoid local/remote duplicates.
230    // Strip the leading remote name (e.g. "origin/") from each ref so the
231    // displayed name matches what the upstream repository calls the branch.
232    let out = run_git(repo, &["branch", "-r", &format!("--format={fmt}")])?;
233    let refs = out
234        .lines()
235        .filter(|l| !l.trim().is_empty())
236        .map(|l| parse_ref_line(l, GitRefKind::Branch))
237        // Drop symbolic HEAD pointers (e.g. origin/HEAD).
238        .filter(|r| r.name != "HEAD" && !r.name.ends_with("/HEAD"))
239        .map(|mut r| {
240            // Strip the remote prefix ("origin/", "upstream/", etc.).
241            if let Some(slash) = r.name.find('/') {
242                r.name = r.name[slash + 1..].to_owned();
243            }
244            r
245        })
246        .collect::<Vec<_>>();
247    Ok(refs)
248}
249
250fn list_tags(repo: &Path) -> Result<Vec<GitRef>> {
251    let fmt = "%(refname:short)|%(objectname:short)|%(creatordate:iso-strict)|%(subject)";
252    let out = run_git(
253        repo,
254        &["tag", "--sort=-creatordate", &format!("--format={fmt}")],
255    )?;
256    Ok(out
257        .lines()
258        .filter(|l| !l.trim().is_empty())
259        .map(|l| parse_ref_line(l, GitRefKind::Tag))
260        .collect())
261}
262
263fn parse_ref_line(line: &str, kind: GitRefKind) -> GitRef {
264    let parts: Vec<&str> = line.splitn(4, '|').collect();
265    let name = parts.first().copied().unwrap_or("").to_owned();
266    let sha = parts.get(1).copied().unwrap_or("").to_owned();
267    let date = parts.get(2).copied().and_then(parse_git_date);
268    let message = parts.get(3).map(|s| (*s).to_owned());
269    GitRef {
270        kind,
271        name,
272        sha,
273        date,
274        message,
275    }
276}
277
278// ── commit listing ────────────────────────────────────────────────────────────
279
280/// Return up to `limit` commits reachable from `ref_name`.
281///
282/// # Errors
283/// Returns an error if `git log` fails.
284pub fn list_commits(repo: &Path, ref_name: &str, limit: usize) -> Result<Vec<GitCommit>> {
285    let fmt = "%H|%h|%an|%aI|%s";
286    let n = format!("-{limit}");
287    let out = run_git(repo, &["log", ref_name, &format!("--format={fmt}"), &n])?;
288    Ok(out
289        .lines()
290        .filter(|l| !l.trim().is_empty())
291        .map(parse_commit_line)
292        .collect())
293}
294
295fn parse_commit_line(line: &str) -> GitCommit {
296    let p: Vec<&str> = line.splitn(5, '|').collect();
297    let sha = p.first().copied().unwrap_or("").to_owned();
298    let short_sha = p.get(1).copied().unwrap_or("").to_owned();
299    let author = p.get(2).copied().unwrap_or("").to_owned();
300    let date = p
301        .get(3)
302        .copied()
303        .and_then(parse_git_date)
304        .unwrap_or_default();
305    let subject = p.get(4).copied().unwrap_or("").to_owned();
306    GitCommit {
307        sha,
308        short_sha,
309        author,
310        date,
311        subject,
312    }
313}
314
315fn parse_git_date(s: &str) -> Option<chrono::DateTime<chrono::Utc>> {
316    chrono::DateTime::parse_from_rfc3339(s)
317        .ok()
318        .map(|d| d.with_timezone(&chrono::Utc))
319}