use std::ffi::OsStr;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use std::process::{Command, Output, Stdio};
use std::time::{Duration, Instant};
pub const GIT_COMMAND_TIMEOUT: Duration = Duration::from_secs(300);
const GIT_POLL_INTERVAL: Duration = Duration::from_millis(50);
const BLOBLESS_FILTER: &str = "blob:none";
pub const CMN_PROMISOR_REMOTE: &str = "cmn-promisor";
#[derive(Debug, thiserror::Error)]
pub enum GitError {
#[error("failed to run git: {0}")]
Exec(#[from] std::io::Error),
#[error("{0}")]
Command(String),
#[error("git command timed out after {timeout_secs}s: {command}")]
Timeout { command: String, timeout_secs: u64 },
#[error("git size budget exceeded: {0}")]
SizeLimit(String),
#[error("rejected git URL: {0}")]
InvalidUrl(String),
#[error("rejected git argument: {0}")]
InvalidArg(String),
}
#[derive(Debug, Clone, Copy)]
pub struct GitSizeLimits {
pub max_bytes: u64,
pub max_files: u64,
}
impl GitSizeLimits {
pub fn new(max_bytes: u64, max_files: u64) -> Self {
Self {
max_bytes,
max_files,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct GitSizeStats {
pub bytes: u64,
pub files: u64,
}
fn reject_option_like(value: &str, what: &str) -> Result<(), GitError> {
if value.starts_with('-') {
return Err(GitError::InvalidArg(format!(
"{} must not start with '-': {}",
what, value
)));
}
Ok(())
}
fn validate_remote_url(url: &str) -> Result<(), GitError> {
let normalized = substrate::normalize_and_validate_url(url)
.map_err(|e| GitError::InvalidUrl(e.to_string()))?;
let parsed = reqwest::Url::parse(&normalized)
.map_err(|e| GitError::InvalidUrl(format!("invalid URL syntax ({})", e)))?;
if parsed.scheme() != "https" {
return Err(GitError::InvalidUrl(format!(
"only https:// URLs are allowed (got: {})",
url
)));
}
Ok(())
}
fn display_command(program: &str, args: &[impl AsRef<OsStr>]) -> String {
let mut parts = vec![program.to_string()];
parts.extend(
args.iter()
.map(|arg| arg.as_ref().to_string_lossy().into_owned()),
);
parts.join(" ")
}
fn run_program_raw<S: AsRef<OsStr>>(
program: &str,
dir: Option<&Path>,
args: &[S],
timeout: Duration,
) -> Result<Output, GitError> {
let command_display = display_command(program, args);
let mut stdout_file = tempfile::tempfile()?;
let mut stderr_file = tempfile::tempfile()?;
let mut cmd = Command::new(program);
cmd.args(args.iter().map(|arg| arg.as_ref()));
if let Some(d) = dir {
cmd.current_dir(d);
}
cmd.stdout(Stdio::from(stdout_file.try_clone()?));
cmd.stderr(Stdio::from(stderr_file.try_clone()?));
let mut child = cmd.spawn()?;
let started = Instant::now();
let status = loop {
if let Some(status) = child.try_wait()? {
break status;
}
if started.elapsed() >= timeout {
let _ = child.kill();
let _ = child.wait();
return Err(GitError::Timeout {
command: command_display,
timeout_secs: timeout.as_secs(),
});
}
std::thread::sleep(GIT_POLL_INTERVAL.min(timeout.saturating_sub(started.elapsed())));
};
let mut stdout = Vec::new();
stdout_file.seek(SeekFrom::Start(0))?;
stdout_file.read_to_end(&mut stdout)?;
let mut stderr = Vec::new();
stderr_file.seek(SeekFrom::Start(0))?;
stderr_file.read_to_end(&mut stderr)?;
Ok(Output {
status,
stdout,
stderr,
})
}
fn run_git_raw_unchecked<S: AsRef<OsStr>>(
dir: Option<&Path>,
args: &[S],
) -> Result<Output, GitError> {
run_program_raw("git", dir, args, GIT_COMMAND_TIMEOUT)
}
fn run_git_raw<S: AsRef<OsStr>>(dir: Option<&Path>, args: &[S]) -> Result<Output, GitError> {
let output = run_git_raw_unchecked(dir, args)?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
let message = if stderr.is_empty() {
format!(
"{} exited with {}",
display_command("git", args),
output.status
)
} else {
stderr
};
return Err(GitError::Command(message));
}
Ok(output)
}
fn run_git<S: AsRef<OsStr>>(args: &[S]) -> Result<(), GitError> {
run_git_raw(None, args).map(|_| ())
}
fn run_git_in<S: AsRef<OsStr>>(dir: &Path, args: &[S]) -> Result<(), GitError> {
run_git_raw(Some(dir), args).map(|_| ())
}
fn run_git_output<S: AsRef<OsStr>>(dir: &Path, args: &[S]) -> Result<String, GitError> {
let output = run_git_raw(Some(dir), args)?;
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
}
pub fn is_available() -> bool {
run_git_raw_unchecked(None, &["--version"])
.map(|output| output.status.success())
.unwrap_or(false)
}
fn clone_repo_args(url: &str, dest: &str, shallow: bool) -> Vec<String> {
let mut args = vec![
"clone".to_string(),
"--filter".to_string(),
BLOBLESS_FILTER.to_string(),
];
if shallow {
args.extend(["--depth".to_string(), "1".to_string()]);
}
args.extend(["--".to_string(), url.to_string(), dest.to_string()]);
args
}
fn clone_bare_repo_args(url: &str, dest: &str) -> Vec<String> {
vec![
"clone".to_string(),
"--bare".to_string(),
"--filter".to_string(),
BLOBLESS_FILTER.to_string(),
"--".to_string(),
url.to_string(),
dest.to_string(),
]
}
fn clone_from_local_args(local_bare_path: &Path, dest: &Path, no_checkout: bool) -> Vec<String> {
let src = format!("file://{}", local_bare_path.display());
let dest_str = dest.display().to_string();
let mut args = vec![
"clone".to_string(),
"--filter".to_string(),
BLOBLESS_FILTER.to_string(),
];
if no_checkout {
args.push("--no-checkout".to_string());
}
args.extend(["--".to_string(), src, dest_str]);
args
}
fn fetch_to_bare_args(remote_url: &str) -> Vec<String> {
vec![
"fetch".to_string(),
"--filter".to_string(),
BLOBLESS_FILTER.to_string(),
"--force".to_string(),
remote_url.to_string(),
"+refs/heads/*:refs/heads/*".to_string(),
]
}
pub fn clone_repo(url: &str, dest: &Path, shallow: bool) -> Result<(), GitError> {
validate_remote_url(url)?;
let dest_str = dest.display().to_string();
run_git(&clone_repo_args(url, &dest_str, shallow))
}
pub fn clone_bare_repo(url: &str, dest: &Path) -> Result<(), GitError> {
validate_remote_url(url)?;
let dest_str = dest.display().to_string();
run_git(&clone_bare_repo_args(url, &dest_str))
}
pub fn clone_from_local(local_bare_path: &Path, dest: &Path) -> Result<(), GitError> {
run_git(&clone_from_local_args(local_bare_path, dest, false))
}
pub fn clone_from_local_no_checkout(local_bare_path: &Path, dest: &Path) -> Result<(), GitError> {
run_git(&clone_from_local_args(local_bare_path, dest, true))
}
pub fn checkout_ref(repo_path: &Path, ref_spec: &str) -> Result<(), GitError> {
reject_option_like(ref_spec, "git ref")?;
run_git_in(repo_path, &["checkout", ref_spec, "--"])
}
pub fn init_repo(path: &Path) -> Result<(), GitError> {
run_git_in(path, &["init"])
}
pub fn configure_blobless_promisor_remote(
repo_path: &Path,
remote_name: &str,
remote_url: &str,
) -> Result<(), GitError> {
reject_option_like(remote_name, "remote name")?;
validate_remote_url(remote_url)?;
if get_remote_url(repo_path, remote_name)?.is_some() {
run_git_in(repo_path, &["remote", "set-url", remote_name, remote_url])?;
} else {
run_git_in(repo_path, &["remote", "add", remote_name, remote_url])?;
}
let promisor_key = format!("remote.{remote_name}.promisor");
run_git_in(repo_path, &["config", promisor_key.as_str(), "true"])?;
let filter_key = format!("remote.{remote_name}.partialclonefilter");
run_git_in(repo_path, &["config", filter_key.as_str(), BLOBLESS_FILTER])?;
run_git_in(
repo_path,
&["config", "extensions.partialClone", remote_name],
)
}
pub fn configure_blobless_origin(repo_path: &Path, remote_url: &str) -> Result<(), GitError> {
configure_blobless_promisor_remote(repo_path, "origin", remote_url)
}
pub fn add_all_and_commit(repo_path: &Path, message: &str) -> Result<String, GitError> {
run_git_in(repo_path, &["add", "."])?;
run_git_in(
repo_path,
&[
"-c",
"user.name=CMN Hypha",
"-c",
"user.email=hypha@cmn.dev",
"commit",
"-m",
message,
],
)?;
run_git_output(repo_path, &["rev-parse", "HEAD"])
}
pub fn get_head_commit(repo_path: &Path) -> Result<String, GitError> {
run_git_output(repo_path, &["rev-parse", "HEAD"])
}
pub fn commit_exists(repo_path: &Path, commit_sha: &str) -> Result<bool, GitError> {
reject_option_like(commit_sha, "commit sha")?;
let output = run_git_raw_unchecked(Some(repo_path), &["cat-file", "-t", commit_sha])?;
Ok(output.status.success())
}
pub fn fetch_to_bare(bare_repo_path: &Path, remote_url: &str) -> Result<(), GitError> {
validate_remote_url(remote_url)?;
run_git_in(bare_repo_path, &fetch_to_bare_args(remote_url))
}
pub fn fetch_from_remote(repo_path: &Path, remote_name: &str) -> Result<(), GitError> {
reject_option_like(remote_name, "remote name")?;
run_git_in(
repo_path,
&["fetch", "--filter", BLOBLESS_FILTER, remote_name],
)
}
pub fn add_remote(repo_path: &Path, remote_name: &str, remote_url: &str) -> Result<(), GitError> {
reject_option_like(remote_name, "remote name")?;
reject_option_like(remote_url, "remote url")?;
run_git_in(repo_path, &["remote", "add", remote_name, remote_url])
}
pub fn set_remote_url(repo_path: &Path, remote_name: &str, new_url: &str) -> Result<(), GitError> {
reject_option_like(remote_name, "remote name")?;
reject_option_like(new_url, "remote url")?;
run_git_in(repo_path, &["remote", "set-url", remote_name, new_url])
}
pub fn is_working_dir_clean(repo_path: &Path) -> Result<bool, GitError> {
let output = run_git_output(repo_path, &["status", "--porcelain"])?;
Ok(output.is_empty())
}
pub fn get_root_commit_bare(bare_repo_path: &Path) -> Result<String, GitError> {
run_git_output(bare_repo_path, &["rev-list", "--max-parents=0", "HEAD"])
}
pub fn get_root_commit(repo_path: &Path) -> Result<String, GitError> {
run_git_output(repo_path, &["rev-list", "--max-parents=0", "HEAD"])
}
pub fn get_remote_url(repo_path: &Path, remote: &str) -> Result<Option<String>, GitError> {
match run_git_output(repo_path, &["remote", "get-url", remote]) {
Ok(url) if url.is_empty() => Ok(None),
Ok(url) => Ok(Some(url)),
Err(_) => Ok(None),
}
}
pub fn last_commit_epoch_ms(repo_path: &Path) -> Option<u64> {
let output = run_git_output(repo_path, &["log", "-1", "--format=%ct", "--", "."]).ok()?;
let epoch_s: u64 = output.parse().ok()?;
Some(epoch_s * 1000)
}
pub fn enforce_size_budget(path: &Path, limits: GitSizeLimits) -> Result<GitSizeStats, GitError> {
let mut stats = GitSizeStats { bytes: 0, files: 0 };
let mut stack = vec![path.to_path_buf()];
while let Some(dir) = stack.pop() {
for entry in std::fs::read_dir(&dir)? {
let entry = entry?;
let path = entry.path();
let meta = std::fs::symlink_metadata(&path)?;
stats.files = stats.files.saturating_add(1);
stats.bytes = stats.bytes.saturating_add(meta.len());
if stats.files > limits.max_files {
return Err(GitError::SizeLimit(format!(
"{} contains more than {} entries",
path.display(),
limits.max_files
)));
}
if stats.bytes > limits.max_bytes {
return Err(GitError::SizeLimit(format!(
"{} exceeds {} bytes",
path.display(),
limits.max_bytes
)));
}
if meta.is_dir() {
stack.push(path);
}
}
}
Ok(stats)
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests {
use super::*;
use std::fs;
#[test]
fn clone_args_use_blobless_filter_for_shallow_and_full() {
assert_eq!(
clone_repo_args("https://example.com/repo.git", "/tmp/repo", true),
[
"clone",
"--filter",
"blob:none",
"--depth",
"1",
"--",
"https://example.com/repo.git",
"/tmp/repo",
]
.map(String::from)
.to_vec()
);
assert_eq!(
clone_repo_args("https://example.com/repo.git", "/tmp/repo", false),
[
"clone",
"--filter",
"blob:none",
"--",
"https://example.com/repo.git",
"/tmp/repo",
]
.map(String::from)
.to_vec()
);
}
#[test]
fn bare_clone_and_fetch_args_use_blobless_filter() {
assert_eq!(
clone_bare_repo_args("https://example.com/repo.git", "/tmp/repo.git"),
[
"clone",
"--bare",
"--filter",
"blob:none",
"--",
"https://example.com/repo.git",
"/tmp/repo.git",
]
.map(String::from)
.to_vec()
);
assert_eq!(
fetch_to_bare_args("https://example.com/repo.git"),
[
"fetch",
"--filter",
"blob:none",
"--force",
"https://example.com/repo.git",
"+refs/heads/*:refs/heads/*",
]
.map(String::from)
.to_vec()
);
}
#[test]
fn enforce_size_budget_rejects_too_many_bytes() {
let dir = tempfile::tempdir().expect("tempdir");
fs::write(dir.path().join("large.bin"), [0u8; 16]).expect("write");
let err = enforce_size_budget(dir.path(), GitSizeLimits::new(8, 10)).unwrap_err();
assert!(matches!(err, GitError::SizeLimit(_)));
}
#[test]
fn enforce_size_budget_rejects_too_many_entries() {
let dir = tempfile::tempdir().expect("tempdir");
fs::write(dir.path().join("one.txt"), b"1").expect("write one");
fs::write(dir.path().join("two.txt"), b"2").expect("write two");
let err = enforce_size_budget(dir.path(), GitSizeLimits::new(1024, 1)).unwrap_err();
assert!(matches!(err, GitError::SizeLimit(_)));
}
#[cfg(unix)]
#[test]
fn run_program_raw_times_out() {
let err =
run_program_raw("sh", None, &["-c", "sleep 2"], Duration::from_millis(20)).unwrap_err();
assert!(matches!(err, GitError::Timeout { .. }));
}
}