Skip to main content

hypha/git/
mod.rs

1//! Git operations using system `git` command.
2//!
3//! All functions shell out to `git` via `std::process::Command`.
4//! This eliminates the heavy `gix` dependency and works with any
5//! git transport (including dumb HTTP).
6
7use std::ffi::OsStr;
8use std::io::{Read, Seek, SeekFrom};
9use std::path::Path;
10use std::process::{Command, Output, Stdio};
11use std::time::{Duration, Instant};
12
13/// Default wall-clock limit for every system git invocation.
14pub const GIT_COMMAND_TIMEOUT: Duration = Duration::from_secs(300);
15
16const GIT_POLL_INTERVAL: Duration = Duration::from_millis(50);
17const BLOBLESS_FILTER: &str = "blob:none";
18pub const CMN_PROMISOR_REMOTE: &str = "cmn-promisor";
19
20/// Error type for git operations.
21#[derive(Debug, thiserror::Error)]
22pub enum GitError {
23    /// Failed to spawn or execute the git process.
24    #[error("failed to run git: {0}")]
25    Exec(#[from] std::io::Error),
26    /// Git command exited with non-zero status (stderr captured).
27    #[error("{0}")]
28    Command(String),
29    /// Git command exceeded the wall-clock timeout.
30    #[error("git command timed out after {timeout_secs}s: {command}")]
31    Timeout { command: String, timeout_secs: u64 },
32    /// Git content exceeded the configured local size budget.
33    #[error("git size budget exceeded: {0}")]
34    SizeLimit(String),
35    /// URL rejected by security validation.
36    #[error("rejected git URL: {0}")]
37    InvalidUrl(String),
38    /// Argument rejected by the option-injection guard.
39    #[error("rejected git argument: {0}")]
40    InvalidArg(String),
41}
42
43/// Disk budget for cloned/fetched git repositories.
44#[derive(Debug, Clone, Copy)]
45pub struct GitSizeLimits {
46    pub max_bytes: u64,
47    pub max_files: u64,
48}
49
50impl GitSizeLimits {
51    pub fn new(max_bytes: u64, max_files: u64) -> Self {
52        Self {
53            max_bytes,
54            max_files,
55        }
56    }
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub struct GitSizeStats {
61    pub bytes: u64,
62    pub files: u64,
63}
64
65/// Reject a value that could be misinterpreted by git as an option flag.
66///
67/// Refs, remote names, and URLs are passed positionally; a leading `-` would
68/// otherwise be parsed as a flag (e.g. a ref named `--upload-pack=...`).
69fn reject_option_like(value: &str, what: &str) -> Result<(), GitError> {
70    if value.starts_with('-') {
71        return Err(GitError::InvalidArg(format!(
72            "{} must not start with '-': {}",
73            what, value
74        )));
75    }
76    Ok(())
77}
78
79/// Validate that a git URL is safe for remote operations.
80///
81/// Delegates to `substrate::normalize_and_validate_url()` for SSRF protection (loopback,
82/// private IPs, link-local, CGNAT, userinfo), then additionally rejects
83/// non-HTTPS schemes that substrate allows for .onion/.i2p (git must be HTTPS-only).
84fn validate_remote_url(url: &str) -> Result<(), GitError> {
85    // substrate::normalize_and_validate_url covers: SSRF
86    // (private/reserved IPs, localhost, link-local, CGNAT), userinfo, bare
87    // hostnames, scheme validation, and trailing-slash normalization.
88    let normalized = substrate::normalize_and_validate_url(url)
89        .map_err(|e| GitError::InvalidUrl(e.to_string()))?;
90
91    // substrate allows HTTP for .onion/.i2p — git requires strict HTTPS
92    let parsed = reqwest::Url::parse(&normalized)
93        .map_err(|e| GitError::InvalidUrl(format!("invalid URL syntax ({})", e)))?;
94    if parsed.scheme() != "https" {
95        return Err(GitError::InvalidUrl(format!(
96            "only https:// URLs are allowed (got: {})",
97            url
98        )));
99    }
100    Ok(())
101}
102
103/// Run a git command (optionally in `dir`), erroring with captured stderr on
104/// non-zero exit. Returns the raw `Output` so callers can read stdout.
105fn display_command(program: &str, args: &[impl AsRef<OsStr>]) -> String {
106    let mut parts = vec![program.to_string()];
107    parts.extend(
108        args.iter()
109            .map(|arg| arg.as_ref().to_string_lossy().into_owned()),
110    );
111    parts.join(" ")
112}
113
114fn run_program_raw<S: AsRef<OsStr>>(
115    program: &str,
116    dir: Option<&Path>,
117    args: &[S],
118    timeout: Duration,
119) -> Result<Output, GitError> {
120    let command_display = display_command(program, args);
121    let mut stdout_file = tempfile::tempfile()?;
122    let mut stderr_file = tempfile::tempfile()?;
123
124    let mut cmd = Command::new(program);
125    cmd.args(args.iter().map(|arg| arg.as_ref()));
126    if let Some(d) = dir {
127        cmd.current_dir(d);
128    }
129    cmd.stdout(Stdio::from(stdout_file.try_clone()?));
130    cmd.stderr(Stdio::from(stderr_file.try_clone()?));
131
132    let mut child = cmd.spawn()?;
133    let started = Instant::now();
134    let status = loop {
135        if let Some(status) = child.try_wait()? {
136            break status;
137        }
138        if started.elapsed() >= timeout {
139            let _ = child.kill();
140            let _ = child.wait();
141            return Err(GitError::Timeout {
142                command: command_display,
143                timeout_secs: timeout.as_secs(),
144            });
145        }
146        std::thread::sleep(GIT_POLL_INTERVAL.min(timeout.saturating_sub(started.elapsed())));
147    };
148
149    let mut stdout = Vec::new();
150    stdout_file.seek(SeekFrom::Start(0))?;
151    stdout_file.read_to_end(&mut stdout)?;
152    let mut stderr = Vec::new();
153    stderr_file.seek(SeekFrom::Start(0))?;
154    stderr_file.read_to_end(&mut stderr)?;
155
156    Ok(Output {
157        status,
158        stdout,
159        stderr,
160    })
161}
162
163/// Run a git command (optionally in `dir`) and return the raw output without
164/// treating non-zero exit status as an error.
165fn run_git_raw_unchecked<S: AsRef<OsStr>>(
166    dir: Option<&Path>,
167    args: &[S],
168) -> Result<Output, GitError> {
169    run_program_raw("git", dir, args, GIT_COMMAND_TIMEOUT)
170}
171
172/// Run a git command (optionally in `dir`), erroring with captured stderr on
173/// non-zero exit. Returns the raw `Output` so callers can read stdout.
174fn run_git_raw<S: AsRef<OsStr>>(dir: Option<&Path>, args: &[S]) -> Result<Output, GitError> {
175    let output = run_git_raw_unchecked(dir, args)?;
176    if !output.status.success() {
177        let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
178        let message = if stderr.is_empty() {
179            format!(
180                "{} exited with {}",
181                display_command("git", args),
182                output.status
183            )
184        } else {
185            stderr
186        };
187        return Err(GitError::Command(message));
188    }
189    Ok(output)
190}
191
192/// Run a git command and return Ok(()) on success, or the stderr message on failure.
193fn run_git<S: AsRef<OsStr>>(args: &[S]) -> Result<(), GitError> {
194    run_git_raw(None, args).map(|_| ())
195}
196
197/// Run a git command in a specific directory.
198fn run_git_in<S: AsRef<OsStr>>(dir: &Path, args: &[S]) -> Result<(), GitError> {
199    run_git_raw(Some(dir), args).map(|_| ())
200}
201
202/// Run a git command in a specific directory and return stdout.
203fn run_git_output<S: AsRef<OsStr>>(dir: &Path, args: &[S]) -> Result<String, GitError> {
204    let output = run_git_raw(Some(dir), args)?;
205    Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
206}
207
208/// Check if system git is available.
209pub fn is_available() -> bool {
210    run_git_raw_unchecked(None, &["--version"])
211        .map(|output| output.status.success())
212        .unwrap_or(false)
213}
214
215fn clone_repo_args(url: &str, dest: &str, shallow: bool) -> Vec<String> {
216    let mut args = vec![
217        "clone".to_string(),
218        "--filter".to_string(),
219        BLOBLESS_FILTER.to_string(),
220    ];
221    if shallow {
222        args.extend(["--depth".to_string(), "1".to_string()]);
223    }
224    args.extend(["--".to_string(), url.to_string(), dest.to_string()]);
225    args
226}
227
228fn clone_bare_repo_args(url: &str, dest: &str) -> Vec<String> {
229    vec![
230        "clone".to_string(),
231        "--bare".to_string(),
232        "--filter".to_string(),
233        BLOBLESS_FILTER.to_string(),
234        "--".to_string(),
235        url.to_string(),
236        dest.to_string(),
237    ]
238}
239
240fn clone_from_local_args(local_bare_path: &Path, dest: &Path, no_checkout: bool) -> Vec<String> {
241    let src = format!("file://{}", local_bare_path.display());
242    let dest_str = dest.display().to_string();
243    let mut args = vec![
244        "clone".to_string(),
245        "--filter".to_string(),
246        BLOBLESS_FILTER.to_string(),
247    ];
248    if no_checkout {
249        args.push("--no-checkout".to_string());
250    }
251    args.extend(["--".to_string(), src, dest_str]);
252    args
253}
254
255fn fetch_to_bare_args(remote_url: &str) -> Vec<String> {
256    vec![
257        "fetch".to_string(),
258        "--filter".to_string(),
259        BLOBLESS_FILTER.to_string(),
260        "--force".to_string(),
261        remote_url.to_string(),
262        "+refs/heads/*:refs/heads/*".to_string(),
263    ]
264}
265
266/// Clone a git repository to the specified destination.
267///
268/// - `url`: Git repository URL
269/// - `dest`: Destination directory (must not exist)
270/// - `shallow`: If true, performs a shallow clone (depth 1)
271pub fn clone_repo(url: &str, dest: &Path, shallow: bool) -> Result<(), GitError> {
272    validate_remote_url(url)?;
273    let dest_str = dest.display().to_string();
274    run_git(&clone_repo_args(url, &dest_str, shallow))
275}
276
277/// Clone a git repository as a bare repository.
278pub fn clone_bare_repo(url: &str, dest: &Path) -> Result<(), GitError> {
279    validate_remote_url(url)?;
280    let dest_str = dest.display().to_string();
281    run_git(&clone_bare_repo_args(url, &dest_str))
282}
283
284/// Clone from a local bare repository to a working directory.
285pub fn clone_from_local(local_bare_path: &Path, dest: &Path) -> Result<(), GitError> {
286    run_git(&clone_from_local_args(local_bare_path, dest, false))
287}
288
289/// Clone from a local bare repository without checking out files.
290pub fn clone_from_local_no_checkout(local_bare_path: &Path, dest: &Path) -> Result<(), GitError> {
291    run_git(&clone_from_local_args(local_bare_path, dest, true))
292}
293
294/// Checkout a specific ref (commit SHA, tag, or branch).
295pub fn checkout_ref(repo_path: &Path, ref_spec: &str) -> Result<(), GitError> {
296    // `ref_spec` is untrusted (from spore manifests). Reject option-like values
297    // and pin it as a tree-ish with a trailing `--` so it can never be parsed
298    // as a flag or a pathspec.
299    reject_option_like(ref_spec, "git ref")?;
300    run_git_in(repo_path, &["checkout", ref_spec, "--"])
301}
302
303/// Initialize a new git repository at the given path.
304pub fn init_repo(path: &Path) -> Result<(), GitError> {
305    run_git_in(path, &["init"])
306}
307
308/// Configure a blobless promisor remote for lazy blob fetching.
309pub fn configure_blobless_promisor_remote(
310    repo_path: &Path,
311    remote_name: &str,
312    remote_url: &str,
313) -> Result<(), GitError> {
314    reject_option_like(remote_name, "remote name")?;
315    validate_remote_url(remote_url)?;
316    if get_remote_url(repo_path, remote_name)?.is_some() {
317        run_git_in(repo_path, &["remote", "set-url", remote_name, remote_url])?;
318    } else {
319        run_git_in(repo_path, &["remote", "add", remote_name, remote_url])?;
320    }
321    let promisor_key = format!("remote.{remote_name}.promisor");
322    run_git_in(repo_path, &["config", promisor_key.as_str(), "true"])?;
323    let filter_key = format!("remote.{remote_name}.partialclonefilter");
324    run_git_in(repo_path, &["config", filter_key.as_str(), BLOBLESS_FILTER])?;
325    run_git_in(
326        repo_path,
327        &["config", "extensions.partialClone", remote_name],
328    )
329}
330
331/// Configure origin as a blobless promisor remote for lazy blob fetching.
332pub fn configure_blobless_origin(repo_path: &Path, remote_url: &str) -> Result<(), GitError> {
333    configure_blobless_promisor_remote(repo_path, "origin", remote_url)
334}
335
336/// Add all files and create a commit. Returns the commit SHA.
337pub fn add_all_and_commit(repo_path: &Path, message: &str) -> Result<String, GitError> {
338    run_git_in(repo_path, &["add", "."])?;
339    run_git_in(
340        repo_path,
341        &[
342            "-c",
343            "user.name=CMN Hypha",
344            "-c",
345            "user.email=hypha@cmn.dev",
346            "commit",
347            "-m",
348            message,
349        ],
350    )?;
351    run_git_output(repo_path, &["rev-parse", "HEAD"])
352}
353
354/// Get the current HEAD commit ID as a string.
355pub fn get_head_commit(repo_path: &Path) -> Result<String, GitError> {
356    run_git_output(repo_path, &["rev-parse", "HEAD"])
357}
358
359/// Check if a commit exists in the repository.
360pub fn commit_exists(repo_path: &Path, commit_sha: &str) -> Result<bool, GitError> {
361    reject_option_like(commit_sha, "commit sha")?;
362    let output = run_git_raw_unchecked(Some(repo_path), &["cat-file", "-t", commit_sha])?;
363    Ok(output.status.success())
364}
365
366/// Fetch from a remote URL into a bare repository.
367pub fn fetch_to_bare(bare_repo_path: &Path, remote_url: &str) -> Result<(), GitError> {
368    validate_remote_url(remote_url)?;
369    run_git_in(bare_repo_path, &fetch_to_bare_args(remote_url))
370}
371
372/// Fetch from a named remote in the repository.
373pub fn fetch_from_remote(repo_path: &Path, remote_name: &str) -> Result<(), GitError> {
374    reject_option_like(remote_name, "remote name")?;
375    run_git_in(
376        repo_path,
377        &["fetch", "--filter", BLOBLESS_FILTER, remote_name],
378    )
379}
380
381/// Add a remote to the repository.
382pub fn add_remote(repo_path: &Path, remote_name: &str, remote_url: &str) -> Result<(), GitError> {
383    reject_option_like(remote_name, "remote name")?;
384    reject_option_like(remote_url, "remote url")?;
385    run_git_in(repo_path, &["remote", "add", remote_name, remote_url])
386}
387
388/// Set the URL for an existing remote.
389pub fn set_remote_url(repo_path: &Path, remote_name: &str, new_url: &str) -> Result<(), GitError> {
390    reject_option_like(remote_name, "remote name")?;
391    reject_option_like(new_url, "remote url")?;
392    run_git_in(repo_path, &["remote", "set-url", remote_name, new_url])
393}
394
395/// Check if the working directory has uncommitted changes.
396///
397/// Returns true if clean (no changes), false if dirty.
398pub fn is_working_dir_clean(repo_path: &Path) -> Result<bool, GitError> {
399    let output = run_git_output(repo_path, &["status", "--porcelain"])?;
400    Ok(output.is_empty())
401}
402
403/// Get the root commit from a bare repository.
404pub fn get_root_commit_bare(bare_repo_path: &Path) -> Result<String, GitError> {
405    run_git_output(bare_repo_path, &["rev-list", "--max-parents=0", "HEAD"])
406}
407
408/// Get the root commit SHA (first commit in history) from a working directory.
409pub fn get_root_commit(repo_path: &Path) -> Result<String, GitError> {
410    run_git_output(repo_path, &["rev-list", "--max-parents=0", "HEAD"])
411}
412
413/// Get the URL of a named remote, or None if the remote doesn't exist.
414pub fn get_remote_url(repo_path: &Path, remote: &str) -> Result<Option<String>, GitError> {
415    match run_git_output(repo_path, &["remote", "get-url", remote]) {
416        Ok(url) if url.is_empty() => Ok(None),
417        Ok(url) => Ok(Some(url)),
418        Err(_) => Ok(None),
419    }
420}
421
422/// Get the latest git commit timestamp for a path, if the path is in a repo.
423pub fn last_commit_epoch_ms(repo_path: &Path) -> Option<u64> {
424    let output = run_git_output(repo_path, &["log", "-1", "--format=%ct", "--", "."]).ok()?;
425    let epoch_s: u64 = output.parse().ok()?;
426    Some(epoch_s * 1000)
427}
428
429/// Walk a git checkout/cache path without following symlinks and enforce disk limits.
430pub fn enforce_size_budget(path: &Path, limits: GitSizeLimits) -> Result<GitSizeStats, GitError> {
431    let mut stats = GitSizeStats { bytes: 0, files: 0 };
432    let mut stack = vec![path.to_path_buf()];
433
434    while let Some(dir) = stack.pop() {
435        for entry in std::fs::read_dir(&dir)? {
436            let entry = entry?;
437            let path = entry.path();
438            let meta = std::fs::symlink_metadata(&path)?;
439
440            stats.files = stats.files.saturating_add(1);
441            stats.bytes = stats.bytes.saturating_add(meta.len());
442            if stats.files > limits.max_files {
443                return Err(GitError::SizeLimit(format!(
444                    "{} contains more than {} entries",
445                    path.display(),
446                    limits.max_files
447                )));
448            }
449            if stats.bytes > limits.max_bytes {
450                return Err(GitError::SizeLimit(format!(
451                    "{} exceeds {} bytes",
452                    path.display(),
453                    limits.max_bytes
454                )));
455            }
456
457            if meta.is_dir() {
458                stack.push(path);
459            }
460        }
461    }
462
463    Ok(stats)
464}
465
466#[cfg(test)]
467#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
468mod tests {
469    use super::*;
470    use std::fs;
471
472    #[test]
473    fn clone_args_use_blobless_filter_for_shallow_and_full() {
474        assert_eq!(
475            clone_repo_args("https://example.com/repo.git", "/tmp/repo", true),
476            [
477                "clone",
478                "--filter",
479                "blob:none",
480                "--depth",
481                "1",
482                "--",
483                "https://example.com/repo.git",
484                "/tmp/repo",
485            ]
486            .map(String::from)
487            .to_vec()
488        );
489        assert_eq!(
490            clone_repo_args("https://example.com/repo.git", "/tmp/repo", false),
491            [
492                "clone",
493                "--filter",
494                "blob:none",
495                "--",
496                "https://example.com/repo.git",
497                "/tmp/repo",
498            ]
499            .map(String::from)
500            .to_vec()
501        );
502    }
503
504    #[test]
505    fn bare_clone_and_fetch_args_use_blobless_filter() {
506        assert_eq!(
507            clone_bare_repo_args("https://example.com/repo.git", "/tmp/repo.git"),
508            [
509                "clone",
510                "--bare",
511                "--filter",
512                "blob:none",
513                "--",
514                "https://example.com/repo.git",
515                "/tmp/repo.git",
516            ]
517            .map(String::from)
518            .to_vec()
519        );
520        assert_eq!(
521            fetch_to_bare_args("https://example.com/repo.git"),
522            [
523                "fetch",
524                "--filter",
525                "blob:none",
526                "--force",
527                "https://example.com/repo.git",
528                "+refs/heads/*:refs/heads/*",
529            ]
530            .map(String::from)
531            .to_vec()
532        );
533    }
534
535    #[test]
536    fn enforce_size_budget_rejects_too_many_bytes() {
537        let dir = tempfile::tempdir().expect("tempdir");
538        fs::write(dir.path().join("large.bin"), [0u8; 16]).expect("write");
539
540        let err = enforce_size_budget(dir.path(), GitSizeLimits::new(8, 10)).unwrap_err();
541        assert!(matches!(err, GitError::SizeLimit(_)));
542    }
543
544    #[test]
545    fn enforce_size_budget_rejects_too_many_entries() {
546        let dir = tempfile::tempdir().expect("tempdir");
547        fs::write(dir.path().join("one.txt"), b"1").expect("write one");
548        fs::write(dir.path().join("two.txt"), b"2").expect("write two");
549
550        let err = enforce_size_budget(dir.path(), GitSizeLimits::new(1024, 1)).unwrap_err();
551        assert!(matches!(err, GitError::SizeLimit(_)));
552    }
553
554    #[cfg(unix)]
555    #[test]
556    fn run_program_raw_times_out() {
557        let err =
558            run_program_raw("sh", None, &["-c", "sleep 2"], Duration::from_millis(20)).unwrap_err();
559        assert!(matches!(err, GitError::Timeout { .. }));
560    }
561}