1use std::ffi::OsStr;
8use std::io::{Read, Seek, SeekFrom};
9use std::path::Path;
10use std::process::{Command, Output, Stdio};
11use std::time::{Duration, Instant};
12
13pub const GIT_COMMAND_TIMEOUT: Duration = Duration::from_secs(300);
15
16const GIT_POLL_INTERVAL: Duration = Duration::from_millis(50);
17const BLOBLESS_FILTER: &str = "blob:none";
18pub const CMN_PROMISOR_REMOTE: &str = "cmn-promisor";
19
20#[derive(Debug, thiserror::Error)]
22pub enum GitError {
23 #[error("failed to run git: {0}")]
25 Exec(#[from] std::io::Error),
26 #[error("{0}")]
28 Command(String),
29 #[error("git command timed out after {timeout_secs}s: {command}")]
31 Timeout { command: String, timeout_secs: u64 },
32 #[error("git size budget exceeded: {0}")]
34 SizeLimit(String),
35 #[error("rejected git URL: {0}")]
37 InvalidUrl(String),
38 #[error("rejected git argument: {0}")]
40 InvalidArg(String),
41}
42
43#[derive(Debug, Clone, Copy)]
45pub struct GitSizeLimits {
46 pub max_bytes: u64,
47 pub max_files: u64,
48}
49
50impl GitSizeLimits {
51 pub fn new(max_bytes: u64, max_files: u64) -> Self {
52 Self {
53 max_bytes,
54 max_files,
55 }
56 }
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub struct GitSizeStats {
61 pub bytes: u64,
62 pub files: u64,
63}
64
65fn reject_option_like(value: &str, what: &str) -> Result<(), GitError> {
70 if value.starts_with('-') {
71 return Err(GitError::InvalidArg(format!(
72 "{} must not start with '-': {}",
73 what, value
74 )));
75 }
76 Ok(())
77}
78
79fn validate_remote_url(url: &str) -> Result<(), GitError> {
85 let normalized = substrate::normalize_and_validate_url(url)
89 .map_err(|e| GitError::InvalidUrl(e.to_string()))?;
90
91 let parsed = reqwest::Url::parse(&normalized)
93 .map_err(|e| GitError::InvalidUrl(format!("invalid URL syntax ({})", e)))?;
94 if parsed.scheme() != "https" {
95 return Err(GitError::InvalidUrl(format!(
96 "only https:// URLs are allowed (got: {})",
97 url
98 )));
99 }
100 Ok(())
101}
102
103fn display_command(program: &str, args: &[impl AsRef<OsStr>]) -> String {
106 let mut parts = vec![program.to_string()];
107 parts.extend(
108 args.iter()
109 .map(|arg| arg.as_ref().to_string_lossy().into_owned()),
110 );
111 parts.join(" ")
112}
113
114fn run_program_raw<S: AsRef<OsStr>>(
115 program: &str,
116 dir: Option<&Path>,
117 args: &[S],
118 timeout: Duration,
119) -> Result<Output, GitError> {
120 let command_display = display_command(program, args);
121 let mut stdout_file = tempfile::tempfile()?;
122 let mut stderr_file = tempfile::tempfile()?;
123
124 let mut cmd = Command::new(program);
125 cmd.args(args.iter().map(|arg| arg.as_ref()));
126 if let Some(d) = dir {
127 cmd.current_dir(d);
128 }
129 cmd.stdout(Stdio::from(stdout_file.try_clone()?));
130 cmd.stderr(Stdio::from(stderr_file.try_clone()?));
131
132 let mut child = cmd.spawn()?;
133 let started = Instant::now();
134 let status = loop {
135 if let Some(status) = child.try_wait()? {
136 break status;
137 }
138 if started.elapsed() >= timeout {
139 let _ = child.kill();
140 let _ = child.wait();
141 return Err(GitError::Timeout {
142 command: command_display,
143 timeout_secs: timeout.as_secs(),
144 });
145 }
146 std::thread::sleep(GIT_POLL_INTERVAL.min(timeout.saturating_sub(started.elapsed())));
147 };
148
149 let mut stdout = Vec::new();
150 stdout_file.seek(SeekFrom::Start(0))?;
151 stdout_file.read_to_end(&mut stdout)?;
152 let mut stderr = Vec::new();
153 stderr_file.seek(SeekFrom::Start(0))?;
154 stderr_file.read_to_end(&mut stderr)?;
155
156 Ok(Output {
157 status,
158 stdout,
159 stderr,
160 })
161}
162
163fn run_git_raw_unchecked<S: AsRef<OsStr>>(
166 dir: Option<&Path>,
167 args: &[S],
168) -> Result<Output, GitError> {
169 run_program_raw("git", dir, args, GIT_COMMAND_TIMEOUT)
170}
171
172fn run_git_raw<S: AsRef<OsStr>>(dir: Option<&Path>, args: &[S]) -> Result<Output, GitError> {
175 let output = run_git_raw_unchecked(dir, args)?;
176 if !output.status.success() {
177 let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
178 let message = if stderr.is_empty() {
179 format!(
180 "{} exited with {}",
181 display_command("git", args),
182 output.status
183 )
184 } else {
185 stderr
186 };
187 return Err(GitError::Command(message));
188 }
189 Ok(output)
190}
191
192fn run_git<S: AsRef<OsStr>>(args: &[S]) -> Result<(), GitError> {
194 run_git_raw(None, args).map(|_| ())
195}
196
197fn run_git_in<S: AsRef<OsStr>>(dir: &Path, args: &[S]) -> Result<(), GitError> {
199 run_git_raw(Some(dir), args).map(|_| ())
200}
201
202fn run_git_output<S: AsRef<OsStr>>(dir: &Path, args: &[S]) -> Result<String, GitError> {
204 let output = run_git_raw(Some(dir), args)?;
205 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
206}
207
208pub fn is_available() -> bool {
210 run_git_raw_unchecked(None, &["--version"])
211 .map(|output| output.status.success())
212 .unwrap_or(false)
213}
214
215fn clone_repo_args(url: &str, dest: &str, shallow: bool) -> Vec<String> {
216 let mut args = vec![
217 "clone".to_string(),
218 "--filter".to_string(),
219 BLOBLESS_FILTER.to_string(),
220 ];
221 if shallow {
222 args.extend(["--depth".to_string(), "1".to_string()]);
223 }
224 args.extend(["--".to_string(), url.to_string(), dest.to_string()]);
225 args
226}
227
228fn clone_bare_repo_args(url: &str, dest: &str) -> Vec<String> {
229 vec![
230 "clone".to_string(),
231 "--bare".to_string(),
232 "--filter".to_string(),
233 BLOBLESS_FILTER.to_string(),
234 "--".to_string(),
235 url.to_string(),
236 dest.to_string(),
237 ]
238}
239
240fn clone_from_local_args(local_bare_path: &Path, dest: &Path, no_checkout: bool) -> Vec<String> {
241 let src = format!("file://{}", local_bare_path.display());
242 let dest_str = dest.display().to_string();
243 let mut args = vec![
244 "clone".to_string(),
245 "--filter".to_string(),
246 BLOBLESS_FILTER.to_string(),
247 ];
248 if no_checkout {
249 args.push("--no-checkout".to_string());
250 }
251 args.extend(["--".to_string(), src, dest_str]);
252 args
253}
254
255fn fetch_to_bare_args(remote_url: &str) -> Vec<String> {
256 vec![
257 "fetch".to_string(),
258 "--filter".to_string(),
259 BLOBLESS_FILTER.to_string(),
260 "--force".to_string(),
261 remote_url.to_string(),
262 "+refs/heads/*:refs/heads/*".to_string(),
263 ]
264}
265
266pub fn clone_repo(url: &str, dest: &Path, shallow: bool) -> Result<(), GitError> {
272 validate_remote_url(url)?;
273 let dest_str = dest.display().to_string();
274 run_git(&clone_repo_args(url, &dest_str, shallow))
275}
276
277pub fn clone_bare_repo(url: &str, dest: &Path) -> Result<(), GitError> {
279 validate_remote_url(url)?;
280 let dest_str = dest.display().to_string();
281 run_git(&clone_bare_repo_args(url, &dest_str))
282}
283
284pub fn clone_from_local(local_bare_path: &Path, dest: &Path) -> Result<(), GitError> {
286 run_git(&clone_from_local_args(local_bare_path, dest, false))
287}
288
289pub fn clone_from_local_no_checkout(local_bare_path: &Path, dest: &Path) -> Result<(), GitError> {
291 run_git(&clone_from_local_args(local_bare_path, dest, true))
292}
293
294pub fn checkout_ref(repo_path: &Path, ref_spec: &str) -> Result<(), GitError> {
296 reject_option_like(ref_spec, "git ref")?;
300 run_git_in(repo_path, &["checkout", ref_spec, "--"])
301}
302
303pub fn init_repo(path: &Path) -> Result<(), GitError> {
305 run_git_in(path, &["init"])
306}
307
308pub fn configure_blobless_promisor_remote(
310 repo_path: &Path,
311 remote_name: &str,
312 remote_url: &str,
313) -> Result<(), GitError> {
314 reject_option_like(remote_name, "remote name")?;
315 validate_remote_url(remote_url)?;
316 if get_remote_url(repo_path, remote_name)?.is_some() {
317 run_git_in(repo_path, &["remote", "set-url", remote_name, remote_url])?;
318 } else {
319 run_git_in(repo_path, &["remote", "add", remote_name, remote_url])?;
320 }
321 let promisor_key = format!("remote.{remote_name}.promisor");
322 run_git_in(repo_path, &["config", promisor_key.as_str(), "true"])?;
323 let filter_key = format!("remote.{remote_name}.partialclonefilter");
324 run_git_in(repo_path, &["config", filter_key.as_str(), BLOBLESS_FILTER])?;
325 run_git_in(
326 repo_path,
327 &["config", "extensions.partialClone", remote_name],
328 )
329}
330
331pub fn configure_blobless_origin(repo_path: &Path, remote_url: &str) -> Result<(), GitError> {
333 configure_blobless_promisor_remote(repo_path, "origin", remote_url)
334}
335
336pub fn add_all_and_commit(repo_path: &Path, message: &str) -> Result<String, GitError> {
338 run_git_in(repo_path, &["add", "."])?;
339 run_git_in(
340 repo_path,
341 &[
342 "-c",
343 "user.name=CMN Hypha",
344 "-c",
345 "user.email=hypha@cmn.dev",
346 "commit",
347 "-m",
348 message,
349 ],
350 )?;
351 run_git_output(repo_path, &["rev-parse", "HEAD"])
352}
353
354pub fn get_head_commit(repo_path: &Path) -> Result<String, GitError> {
356 run_git_output(repo_path, &["rev-parse", "HEAD"])
357}
358
359pub fn commit_exists(repo_path: &Path, commit_sha: &str) -> Result<bool, GitError> {
361 reject_option_like(commit_sha, "commit sha")?;
362 let output = run_git_raw_unchecked(Some(repo_path), &["cat-file", "-t", commit_sha])?;
363 Ok(output.status.success())
364}
365
366pub fn fetch_to_bare(bare_repo_path: &Path, remote_url: &str) -> Result<(), GitError> {
368 validate_remote_url(remote_url)?;
369 run_git_in(bare_repo_path, &fetch_to_bare_args(remote_url))
370}
371
372pub fn fetch_from_remote(repo_path: &Path, remote_name: &str) -> Result<(), GitError> {
374 reject_option_like(remote_name, "remote name")?;
375 run_git_in(
376 repo_path,
377 &["fetch", "--filter", BLOBLESS_FILTER, remote_name],
378 )
379}
380
381pub fn add_remote(repo_path: &Path, remote_name: &str, remote_url: &str) -> Result<(), GitError> {
383 reject_option_like(remote_name, "remote name")?;
384 reject_option_like(remote_url, "remote url")?;
385 run_git_in(repo_path, &["remote", "add", remote_name, remote_url])
386}
387
388pub fn set_remote_url(repo_path: &Path, remote_name: &str, new_url: &str) -> Result<(), GitError> {
390 reject_option_like(remote_name, "remote name")?;
391 reject_option_like(new_url, "remote url")?;
392 run_git_in(repo_path, &["remote", "set-url", remote_name, new_url])
393}
394
395pub fn is_working_dir_clean(repo_path: &Path) -> Result<bool, GitError> {
399 let output = run_git_output(repo_path, &["status", "--porcelain"])?;
400 Ok(output.is_empty())
401}
402
403pub fn get_root_commit_bare(bare_repo_path: &Path) -> Result<String, GitError> {
405 run_git_output(bare_repo_path, &["rev-list", "--max-parents=0", "HEAD"])
406}
407
408pub fn get_root_commit(repo_path: &Path) -> Result<String, GitError> {
410 run_git_output(repo_path, &["rev-list", "--max-parents=0", "HEAD"])
411}
412
413pub fn get_remote_url(repo_path: &Path, remote: &str) -> Result<Option<String>, GitError> {
415 match run_git_output(repo_path, &["remote", "get-url", remote]) {
416 Ok(url) if url.is_empty() => Ok(None),
417 Ok(url) => Ok(Some(url)),
418 Err(_) => Ok(None),
419 }
420}
421
422pub fn last_commit_epoch_ms(repo_path: &Path) -> Option<u64> {
424 let output = run_git_output(repo_path, &["log", "-1", "--format=%ct", "--", "."]).ok()?;
425 let epoch_s: u64 = output.parse().ok()?;
426 Some(epoch_s * 1000)
427}
428
429pub fn enforce_size_budget(path: &Path, limits: GitSizeLimits) -> Result<GitSizeStats, GitError> {
431 let mut stats = GitSizeStats { bytes: 0, files: 0 };
432 let mut stack = vec![path.to_path_buf()];
433
434 while let Some(dir) = stack.pop() {
435 for entry in std::fs::read_dir(&dir)? {
436 let entry = entry?;
437 let path = entry.path();
438 let meta = std::fs::symlink_metadata(&path)?;
439
440 stats.files = stats.files.saturating_add(1);
441 stats.bytes = stats.bytes.saturating_add(meta.len());
442 if stats.files > limits.max_files {
443 return Err(GitError::SizeLimit(format!(
444 "{} contains more than {} entries",
445 path.display(),
446 limits.max_files
447 )));
448 }
449 if stats.bytes > limits.max_bytes {
450 return Err(GitError::SizeLimit(format!(
451 "{} exceeds {} bytes",
452 path.display(),
453 limits.max_bytes
454 )));
455 }
456
457 if meta.is_dir() {
458 stack.push(path);
459 }
460 }
461 }
462
463 Ok(stats)
464}
465
466#[cfg(test)]
467#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
468mod tests {
469 use super::*;
470 use std::fs;
471
472 #[test]
473 fn clone_args_use_blobless_filter_for_shallow_and_full() {
474 assert_eq!(
475 clone_repo_args("https://example.com/repo.git", "/tmp/repo", true),
476 [
477 "clone",
478 "--filter",
479 "blob:none",
480 "--depth",
481 "1",
482 "--",
483 "https://example.com/repo.git",
484 "/tmp/repo",
485 ]
486 .map(String::from)
487 .to_vec()
488 );
489 assert_eq!(
490 clone_repo_args("https://example.com/repo.git", "/tmp/repo", false),
491 [
492 "clone",
493 "--filter",
494 "blob:none",
495 "--",
496 "https://example.com/repo.git",
497 "/tmp/repo",
498 ]
499 .map(String::from)
500 .to_vec()
501 );
502 }
503
504 #[test]
505 fn bare_clone_and_fetch_args_use_blobless_filter() {
506 assert_eq!(
507 clone_bare_repo_args("https://example.com/repo.git", "/tmp/repo.git"),
508 [
509 "clone",
510 "--bare",
511 "--filter",
512 "blob:none",
513 "--",
514 "https://example.com/repo.git",
515 "/tmp/repo.git",
516 ]
517 .map(String::from)
518 .to_vec()
519 );
520 assert_eq!(
521 fetch_to_bare_args("https://example.com/repo.git"),
522 [
523 "fetch",
524 "--filter",
525 "blob:none",
526 "--force",
527 "https://example.com/repo.git",
528 "+refs/heads/*:refs/heads/*",
529 ]
530 .map(String::from)
531 .to_vec()
532 );
533 }
534
535 #[test]
536 fn enforce_size_budget_rejects_too_many_bytes() {
537 let dir = tempfile::tempdir().expect("tempdir");
538 fs::write(dir.path().join("large.bin"), [0u8; 16]).expect("write");
539
540 let err = enforce_size_budget(dir.path(), GitSizeLimits::new(8, 10)).unwrap_err();
541 assert!(matches!(err, GitError::SizeLimit(_)));
542 }
543
544 #[test]
545 fn enforce_size_budget_rejects_too_many_entries() {
546 let dir = tempfile::tempdir().expect("tempdir");
547 fs::write(dir.path().join("one.txt"), b"1").expect("write one");
548 fs::write(dir.path().join("two.txt"), b"2").expect("write two");
549
550 let err = enforce_size_budget(dir.path(), GitSizeLimits::new(1024, 1)).unwrap_err();
551 assert!(matches!(err, GitError::SizeLimit(_)));
552 }
553
554 #[cfg(unix)]
555 #[test]
556 fn run_program_raw_times_out() {
557 let err =
558 run_program_raw("sh", None, &["-c", "sleep 2"], Duration::from_millis(20)).unwrap_err();
559 assert!(matches!(err, GitError::Timeout { .. }));
560 }
561}