1use super::error::RemoteError;
2use std::io::Write;
3use std::path::{Path, PathBuf};
4use std::process::{Command, Stdio};
5use std::sync::LazyLock;
6use std::time::Duration;
7use tempfile::{NamedTempFile, TempDir};
8
9static TOKEN_URL_PATTERN: LazyLock<regex::Regex> = LazyLock::new(|| {
10 regex::Regex::new(r"https://[^@\s]+@").expect("TOKEN_URL_PATTERN is a valid regex literal")
11});
12
13static BEARER_PATTERN: LazyLock<regex::Regex> = LazyLock::new(|| {
14 regex::Regex::new(r"Bearer\s+\S+").expect("BEARER_PATTERN is a valid regex literal")
15});
16
17pub struct ClonedRepo {
19 pub path: PathBuf,
21 pub url: String,
23 pub git_ref: String,
25 pub commit_sha: Option<String>,
27 _temp_dir: TempDir,
29}
30
31impl ClonedRepo {
32 pub fn path(&self) -> &Path {
34 &self.path
35 }
36}
37
38pub struct GitCloner {
40 auth_token: Option<String>,
42 timeout_secs: u64,
44 max_size_mb: u64,
46}
47
48impl Default for GitCloner {
49 fn default() -> Self {
50 Self::new()
51 }
52}
53
54impl GitCloner {
55 pub fn new() -> Self {
57 Self {
58 auth_token: None,
59 timeout_secs: 300, max_size_mb: 0, }
62 }
63
64 pub fn with_auth_token(mut self, token: Option<String>) -> Self {
66 self.auth_token = token;
67 self
68 }
69
70 pub fn with_timeout(mut self, secs: u64) -> Self {
72 self.timeout_secs = secs;
73 self
74 }
75
76 pub fn with_max_size(mut self, mb: u64) -> Self {
78 self.max_size_mb = mb;
79 self
80 }
81
82 pub fn clone(&self, url: &str, git_ref: &str) -> Result<ClonedRepo, RemoteError> {
91 self.validate_url(url)?;
93
94 self.check_git_available()?;
96
97 let temp_dir = TempDir::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
99 let repo_path = temp_dir.path().to_path_buf();
100
101 self.execute_clone(url, &repo_path, git_ref)?;
103
104 let commit_sha = self.get_commit_sha(&repo_path).ok();
106
107 Ok(ClonedRepo {
108 path: repo_path,
109 url: url.to_string(),
110 git_ref: git_ref.to_string(),
111 commit_sha,
112 _temp_dir: temp_dir,
113 })
114 }
115
116 fn validate_url(&self, url: &str) -> Result<(), RemoteError> {
118 if !url.starts_with("https://") && !url.starts_with("git@") {
120 return Err(RemoteError::InvalidUrl(format!(
121 "URL must start with https:// or git@: {}",
122 url
123 )));
124 }
125
126 if url.starts_with("https://github.com/") || url.starts_with("git@github.com:") {
128 return Ok(());
130 }
131
132 if url.starts_with("https://") {
134 return Ok(());
135 }
136
137 Err(RemoteError::InvalidUrl(format!(
138 "Unsupported URL format: {}",
139 url
140 )))
141 }
142
143 fn check_git_available(&self) -> Result<(), RemoteError> {
145 Command::new("git")
146 .arg("--version")
147 .output()
148 .map_err(|_| RemoteError::GitNotFound)?;
149 Ok(())
150 }
151
152 fn create_askpass_script(&self) -> Result<Option<NamedTempFile>, RemoteError> {
158 let Some(ref token) = self.auth_token else {
159 return Ok(None);
160 };
161
162 let mut script = NamedTempFile::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
163
164 writeln!(script, "#!/bin/sh").map_err(|e| RemoteError::TempDir(e.to_string()))?;
167 writeln!(script, "echo '{}'", token.replace('\'', "'\"'\"'"))
168 .map_err(|e| RemoteError::TempDir(e.to_string()))?;
169
170 #[cfg(unix)]
172 {
173 use std::os::unix::fs::PermissionsExt;
174 let path = script.path();
175 std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700))
176 .map_err(|e| RemoteError::TempDir(e.to_string()))?;
177 }
178
179 Ok(Some(script))
180 }
181
182 fn sanitize_error_message(&self, message: &str) -> String {
184 let mut sanitized = message.to_string();
185
186 if let Some(ref token) = self.auth_token {
188 sanitized = sanitized.replace(token, "[REDACTED]");
189 }
190
191 sanitized = TOKEN_URL_PATTERN
194 .replace_all(&sanitized, "https://[REDACTED]@")
195 .to_string();
196
197 sanitized = BEARER_PATTERN
199 .replace_all(&sanitized, "Bearer [REDACTED]")
200 .to_string();
201
202 sanitized
203 }
204
205 fn execute_clone(&self, url: &str, path: &Path, git_ref: &str) -> Result<(), RemoteError> {
207 let askpass_script = self.create_askpass_script()?;
209
210 let mut cmd = Command::new("git");
212
213 cmd.env("GIT_TEMPLATE_DIR", "");
215
216 if let Some(ref script) = askpass_script {
218 cmd.env("GIT_ASKPASS", script.path());
219 cmd.env("GIT_TERMINAL_PROMPT", "0");
221 }
222
223 cmd.args([
225 "clone",
226 "--depth",
227 "1",
228 "--single-branch",
229 "--no-tags",
230 "-c",
231 "core.hooksPath=/dev/null",
232 "-c",
233 "advice.detachedHead=false",
234 ]);
235
236 if git_ref != "HEAD" && !git_ref.is_empty() {
238 cmd.args(["--branch", git_ref]);
239 }
240
241 cmd.arg(url);
242 cmd.arg(path);
243
244 cmd.stdout(Stdio::piped());
246 cmd.stderr(Stdio::piped());
247
248 let mut child = cmd.spawn().map_err(|e| RemoteError::CloneFailed {
249 url: url.to_string(),
250 message: self.sanitize_error_message(&e.to_string()),
251 })?;
252
253 let timeout = Duration::from_secs(self.timeout_secs);
255 let start = std::time::Instant::now();
256
257 loop {
258 match child.try_wait() {
259 Ok(Some(status)) => {
260 let output =
262 child
263 .wait_with_output()
264 .map_err(|e| RemoteError::CloneFailed {
265 url: url.to_string(),
266 message: self.sanitize_error_message(&e.to_string()),
267 })?;
268
269 if !status.success() {
270 let stderr = String::from_utf8_lossy(&output.stderr);
271 let sanitized_stderr = self.sanitize_error_message(&stderr);
272
273 if stderr.contains("Repository not found") || stderr.contains("404") {
275 return Err(RemoteError::NotFound(url.to_string()));
276 }
277
278 if stderr.contains("Authentication failed")
279 || stderr.contains("could not read Username")
280 {
281 return Err(RemoteError::AuthRequired(url.to_string()));
282 }
283
284 return Err(RemoteError::CloneFailed {
285 url: url.to_string(),
286 message: sanitized_stderr,
287 });
288 }
289
290 return Ok(());
291 }
292 Ok(None) => {
293 if start.elapsed() > timeout {
295 let _ = child.kill();
297 return Err(RemoteError::CloneFailed {
298 url: url.to_string(),
299 message: format!("Clone timed out after {} seconds", self.timeout_secs),
300 });
301 }
302 std::thread::sleep(Duration::from_millis(100));
304 }
305 Err(e) => {
306 return Err(RemoteError::CloneFailed {
307 url: url.to_string(),
308 message: self.sanitize_error_message(&e.to_string()),
309 });
310 }
311 }
312 }
313 }
314
315 fn get_commit_sha(&self, path: &Path) -> Result<String, RemoteError> {
317 let output = Command::new("git")
318 .args(["rev-parse", "HEAD"])
319 .current_dir(path)
320 .output()
321 .map_err(|e| RemoteError::CloneFailed {
322 url: "".to_string(),
323 message: e.to_string(),
324 })?;
325
326 if output.status.success() {
327 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
328 } else {
329 Err(RemoteError::CloneFailed {
330 url: "".to_string(),
331 message: "Failed to get commit SHA".to_string(),
332 })
333 }
334 }
335}
336
337pub fn parse_github_url(url: &str) -> Option<(String, String)> {
339 if url.starts_with("https://github.com/") {
341 let path = url.trim_start_matches("https://github.com/");
342 let path = path.trim_end_matches(".git");
343 let parts: Vec<&str> = path.split('/').collect();
344 if parts.len() >= 2 {
345 return Some((parts[0].to_string(), parts[1].to_string()));
346 }
347 }
348
349 if url.starts_with("git@github.com:") {
351 let path = url.trim_start_matches("git@github.com:");
352 let path = path.trim_end_matches(".git");
353 let parts: Vec<&str> = path.split('/').collect();
354 if parts.len() >= 2 {
355 return Some((parts[0].to_string(), parts[1].to_string()));
356 }
357 }
358
359 None
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 #[test]
367 fn test_parse_github_url_https() {
368 let result = parse_github_url("https://github.com/owner/repo");
369 assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
370
371 let result = parse_github_url("https://github.com/owner/repo.git");
372 assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
373 }
374
375 #[test]
376 fn test_parse_github_url_ssh() {
377 let result = parse_github_url("git@github.com:owner/repo.git");
378 assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
379 }
380
381 #[test]
382 fn test_parse_github_url_invalid() {
383 assert!(parse_github_url("https://gitlab.com/owner/repo").is_none());
384 assert!(parse_github_url("not-a-url").is_none());
385 }
386
387 #[test]
388 fn test_validate_url_https() {
389 let cloner = GitCloner::new();
390 assert!(cloner.validate_url("https://github.com/owner/repo").is_ok());
391 assert!(cloner.validate_url("https://example.com/repo").is_ok());
392 }
393
394 #[test]
395 fn test_validate_url_invalid() {
396 let cloner = GitCloner::new();
397 assert!(cloner.validate_url("http://github.com/owner/repo").is_err());
398 assert!(cloner.validate_url("ftp://github.com/owner/repo").is_err());
399 }
400
401 #[test]
402 fn test_sanitize_error_message() {
403 let cloner = GitCloner::new().with_auth_token(Some("ghp_secret123".to_string()));
404
405 let msg = "failed with ghp_secret123 in message";
407 assert_eq!(
408 cloner.sanitize_error_message(msg),
409 "failed with [REDACTED] in message"
410 );
411
412 let msg = "failed: https://token123@github.com/repo";
414 assert!(cloner.sanitize_error_message(msg).contains("[REDACTED]"));
415 assert!(!cloner.sanitize_error_message(msg).contains("token123"));
416 }
417
418 #[test]
419 fn test_sanitize_error_message_no_token() {
420 let cloner = GitCloner::new();
421
422 let msg = "failed: https://sometoken@github.com/repo";
424 let sanitized = cloner.sanitize_error_message(msg);
425 assert!(sanitized.contains("[REDACTED]"));
426 }
427
428 #[test]
429 fn test_sanitize_bearer_token() {
430 let cloner = GitCloner::new();
431
432 let msg = "Authorization: Bearer ghp_secret123456";
433 let sanitized = cloner.sanitize_error_message(msg);
434 assert!(!sanitized.contains("ghp_secret123456"));
435 assert!(sanitized.contains("[REDACTED]"));
436 }
437
438 #[cfg(unix)]
439 #[test]
440 fn test_create_askpass_script() {
441 let cloner = GitCloner::new().with_auth_token(Some("test_token".to_string()));
442 let script = cloner.create_askpass_script().unwrap();
443
444 assert!(script.is_some());
445 let script = script.unwrap();
446
447 let path = script.path();
449 assert!(path.exists());
450
451 let metadata = std::fs::metadata(path).unwrap();
452 use std::os::unix::fs::PermissionsExt;
453 assert_eq!(metadata.permissions().mode() & 0o700, 0o700);
454 }
455
456 #[test]
457 fn test_create_askpass_script_no_token() {
458 let cloner = GitCloner::new();
459 let script = cloner.create_askpass_script().unwrap();
460 assert!(script.is_none());
461 }
462
463 #[test]
464 fn test_cloner_with_timeout() {
465 let cloner = GitCloner::new().with_timeout(60);
466 assert_eq!(cloner.timeout_secs, 60);
467 }
468
469 #[test]
470 fn test_cloner_with_max_size() {
471 let cloner = GitCloner::new().with_max_size(100);
472 assert_eq!(cloner.max_size_mb, 100);
473 }
474}