use super::error::RemoteError;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::LazyLock;
use std::time::Duration;
use tempfile::{NamedTempFile, TempDir};
static TOKEN_URL_PATTERN: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"https://[^@\s]+@").expect("TOKEN_URL_PATTERN is a valid regex literal")
});
static BEARER_PATTERN: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"Bearer\s+\S+").expect("BEARER_PATTERN is a valid regex literal")
});
pub struct ClonedRepo {
pub path: PathBuf,
pub url: String,
pub git_ref: String,
pub commit_sha: Option<String>,
_temp_dir: TempDir,
}
impl ClonedRepo {
pub fn path(&self) -> &Path {
&self.path
}
}
pub struct GitCloner {
auth_token: Option<String>,
timeout_secs: u64,
max_size_mb: u64,
}
impl Default for GitCloner {
fn default() -> Self {
Self::new()
}
}
impl GitCloner {
pub fn new() -> Self {
Self {
auth_token: None,
timeout_secs: 300, max_size_mb: 0, }
}
pub fn with_auth_token(mut self, token: Option<String>) -> Self {
self.auth_token = token;
self
}
pub fn with_timeout(mut self, secs: u64) -> Self {
self.timeout_secs = secs;
self
}
pub fn with_max_size(mut self, mb: u64) -> Self {
self.max_size_mb = mb;
self
}
pub fn clone(&self, url: &str, git_ref: &str) -> Result<ClonedRepo, RemoteError> {
self.validate_url(url)?;
self.check_git_available()?;
let temp_dir = TempDir::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
let repo_path = temp_dir.path().to_path_buf();
self.execute_clone(url, &repo_path, git_ref)?;
let commit_sha = self.get_commit_sha(&repo_path).ok();
Ok(ClonedRepo {
path: repo_path,
url: url.to_string(),
git_ref: git_ref.to_string(),
commit_sha,
_temp_dir: temp_dir,
})
}
fn validate_url(&self, url: &str) -> Result<(), RemoteError> {
if !url.starts_with("https://") && !url.starts_with("git@") {
return Err(RemoteError::InvalidUrl(format!(
"URL must start with https:// or git@: {}",
url
)));
}
if url.starts_with("https://github.com/") || url.starts_with("git@github.com:") {
return Ok(());
}
if url.starts_with("https://") {
return Ok(());
}
Err(RemoteError::InvalidUrl(format!(
"Unsupported URL format: {}",
url
)))
}
fn check_git_available(&self) -> Result<(), RemoteError> {
Command::new("git")
.arg("--version")
.output()
.map_err(|_| RemoteError::GitNotFound)?;
Ok(())
}
fn create_askpass_script(&self) -> Result<Option<NamedTempFile>, RemoteError> {
let Some(ref token) = self.auth_token else {
return Ok(None);
};
let mut script = NamedTempFile::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
writeln!(script, "#!/bin/sh").map_err(|e| RemoteError::TempDir(e.to_string()))?;
writeln!(script, "echo '{}'", token.replace('\'', "'\"'\"'"))
.map_err(|e| RemoteError::TempDir(e.to_string()))?;
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let path = script.path();
std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700))
.map_err(|e| RemoteError::TempDir(e.to_string()))?;
}
Ok(Some(script))
}
fn sanitize_error_message(&self, message: &str) -> String {
let mut sanitized = message.to_string();
if let Some(ref token) = self.auth_token {
sanitized = sanitized.replace(token, "[REDACTED]");
}
sanitized = TOKEN_URL_PATTERN
.replace_all(&sanitized, "https://[REDACTED]@")
.to_string();
sanitized = BEARER_PATTERN
.replace_all(&sanitized, "Bearer [REDACTED]")
.to_string();
sanitized
}
fn execute_clone(&self, url: &str, path: &Path, git_ref: &str) -> Result<(), RemoteError> {
let askpass_script = self.create_askpass_script()?;
let mut cmd = Command::new("git");
cmd.env("GIT_TEMPLATE_DIR", "");
if let Some(ref script) = askpass_script {
cmd.env("GIT_ASKPASS", script.path());
cmd.env("GIT_TERMINAL_PROMPT", "0");
}
cmd.args([
"clone",
"--depth",
"1",
"--single-branch",
"--no-tags",
"-c",
"core.hooksPath=/dev/null",
"-c",
"advice.detachedHead=false",
]);
if git_ref != "HEAD" && !git_ref.is_empty() {
cmd.args(["--branch", git_ref]);
}
cmd.arg(url);
cmd.arg(path);
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
let mut child = cmd.spawn().map_err(|e| RemoteError::CloneFailed {
url: url.to_string(),
message: self.sanitize_error_message(&e.to_string()),
})?;
let timeout = Duration::from_secs(self.timeout_secs);
let start = std::time::Instant::now();
loop {
match child.try_wait() {
Ok(Some(status)) => {
let output =
child
.wait_with_output()
.map_err(|e| RemoteError::CloneFailed {
url: url.to_string(),
message: self.sanitize_error_message(&e.to_string()),
})?;
if !status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let sanitized_stderr = self.sanitize_error_message(&stderr);
if stderr.contains("Repository not found") || stderr.contains("404") {
return Err(RemoteError::NotFound(url.to_string()));
}
if stderr.contains("Authentication failed")
|| stderr.contains("could not read Username")
{
return Err(RemoteError::AuthRequired(url.to_string()));
}
return Err(RemoteError::CloneFailed {
url: url.to_string(),
message: sanitized_stderr,
});
}
return Ok(());
}
Ok(None) => {
if start.elapsed() > timeout {
let _ = child.kill();
return Err(RemoteError::CloneFailed {
url: url.to_string(),
message: format!("Clone timed out after {} seconds", self.timeout_secs),
});
}
std::thread::sleep(Duration::from_millis(100));
}
Err(e) => {
return Err(RemoteError::CloneFailed {
url: url.to_string(),
message: self.sanitize_error_message(&e.to_string()),
});
}
}
}
}
fn get_commit_sha(&self, path: &Path) -> Result<String, RemoteError> {
let output = Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(path)
.output()
.map_err(|e| RemoteError::CloneFailed {
url: "".to_string(),
message: e.to_string(),
})?;
if output.status.success() {
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
} else {
Err(RemoteError::CloneFailed {
url: "".to_string(),
message: "Failed to get commit SHA".to_string(),
})
}
}
}
pub fn parse_github_url(url: &str) -> Option<(String, String)> {
if url.starts_with("https://github.com/") {
let path = url.trim_start_matches("https://github.com/");
let path = path.trim_end_matches(".git");
let parts: Vec<&str> = path.split('/').collect();
if parts.len() >= 2 {
return Some((parts[0].to_string(), parts[1].to_string()));
}
}
if url.starts_with("git@github.com:") {
let path = url.trim_start_matches("git@github.com:");
let path = path.trim_end_matches(".git");
let parts: Vec<&str> = path.split('/').collect();
if parts.len() >= 2 {
return Some((parts[0].to_string(), parts[1].to_string()));
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_github_url_https() {
let result = parse_github_url("https://github.com/owner/repo");
assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
let result = parse_github_url("https://github.com/owner/repo.git");
assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
}
#[test]
fn test_parse_github_url_ssh() {
let result = parse_github_url("git@github.com:owner/repo.git");
assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
}
#[test]
fn test_parse_github_url_invalid() {
assert!(parse_github_url("https://gitlab.com/owner/repo").is_none());
assert!(parse_github_url("not-a-url").is_none());
}
#[test]
fn test_validate_url_https() {
let cloner = GitCloner::new();
assert!(cloner.validate_url("https://github.com/owner/repo").is_ok());
assert!(cloner.validate_url("https://example.com/repo").is_ok());
}
#[test]
fn test_validate_url_invalid() {
let cloner = GitCloner::new();
assert!(cloner.validate_url("http://github.com/owner/repo").is_err());
assert!(cloner.validate_url("ftp://github.com/owner/repo").is_err());
}
#[test]
fn test_sanitize_error_message() {
let cloner = GitCloner::new().with_auth_token(Some("ghp_secret123".to_string()));
let msg = "failed with ghp_secret123 in message";
assert_eq!(
cloner.sanitize_error_message(msg),
"failed with [REDACTED] in message"
);
let msg = "failed: https://token123@github.com/repo";
assert!(cloner.sanitize_error_message(msg).contains("[REDACTED]"));
assert!(!cloner.sanitize_error_message(msg).contains("token123"));
}
#[test]
fn test_sanitize_error_message_no_token() {
let cloner = GitCloner::new();
let msg = "failed: https://sometoken@github.com/repo";
let sanitized = cloner.sanitize_error_message(msg);
assert!(sanitized.contains("[REDACTED]"));
}
#[test]
fn test_sanitize_bearer_token() {
let cloner = GitCloner::new();
let msg = "Authorization: Bearer ghp_secret123456";
let sanitized = cloner.sanitize_error_message(msg);
assert!(!sanitized.contains("ghp_secret123456"));
assert!(sanitized.contains("[REDACTED]"));
}
#[cfg(unix)]
#[test]
fn test_create_askpass_script() {
let cloner = GitCloner::new().with_auth_token(Some("test_token".to_string()));
let script = cloner.create_askpass_script().unwrap();
assert!(script.is_some());
let script = script.unwrap();
let path = script.path();
assert!(path.exists());
let metadata = std::fs::metadata(path).unwrap();
use std::os::unix::fs::PermissionsExt;
assert_eq!(metadata.permissions().mode() & 0o700, 0o700);
}
#[test]
fn test_create_askpass_script_no_token() {
let cloner = GitCloner::new();
let script = cloner.create_askpass_script().unwrap();
assert!(script.is_none());
}
#[test]
fn test_cloner_with_timeout() {
let cloner = GitCloner::new().with_timeout(60);
assert_eq!(cloner.timeout_secs, 60);
}
#[test]
fn test_cloner_with_max_size() {
let cloner = GitCloner::new().with_max_size(100);
assert_eq!(cloner.max_size_mb, 100);
}
}