Skip to main content

cc_audit/remote/
clone.rs

1use super::error::RemoteError;
2use std::io::Write;
3use std::path::{Path, PathBuf};
4use std::process::{Command, Stdio};
5use std::time::Duration;
6use tempfile::{NamedTempFile, TempDir};
7
8/// Result of a successful clone operation
9pub struct ClonedRepo {
10    /// Path to the cloned repository
11    pub path: PathBuf,
12    /// Original repository URL
13    pub url: String,
14    /// Git ref that was checked out
15    pub git_ref: String,
16    /// Commit SHA of the checked out ref
17    pub commit_sha: Option<String>,
18    /// Temporary directory handle (dropped when ClonedRepo is dropped)
19    _temp_dir: TempDir,
20}
21
22impl ClonedRepo {
23    /// Get the path to the cloned repository
24    pub fn path(&self) -> &Path {
25        &self.path
26    }
27}
28
29/// Git repository cloner with security measures
30pub struct GitCloner {
31    /// Optional authentication token for private repositories
32    auth_token: Option<String>,
33    /// Clone timeout in seconds
34    timeout_secs: u64,
35    /// Maximum repository size in MB (0 = unlimited)
36    max_size_mb: u64,
37}
38
39impl Default for GitCloner {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl GitCloner {
46    /// Create a new GitCloner with default settings
47    pub fn new() -> Self {
48        Self {
49            auth_token: None,
50            timeout_secs: 300, // 5 minutes
51            max_size_mb: 0,    // unlimited
52        }
53    }
54
55    /// Set authentication token for private repositories
56    pub fn with_auth_token(mut self, token: Option<String>) -> Self {
57        self.auth_token = token;
58        self
59    }
60
61    /// Set clone timeout in seconds
62    pub fn with_timeout(mut self, secs: u64) -> Self {
63        self.timeout_secs = secs;
64        self
65    }
66
67    /// Set maximum repository size in MB
68    pub fn with_max_size(mut self, mb: u64) -> Self {
69        self.max_size_mb = mb;
70        self
71    }
72
73    /// Clone a repository with security measures
74    ///
75    /// Security measures:
76    /// - Uses shallow clone (depth=1)
77    /// - Disables git hooks (template and local)
78    /// - Uses temporary directory that is automatically cleaned up
79    /// - Token is passed via GIT_ASKPASS (not embedded in URL)
80    /// - Clone has configurable timeout
81    pub fn clone(&self, url: &str, git_ref: &str) -> Result<ClonedRepo, RemoteError> {
82        // Validate URL format
83        self.validate_url(url)?;
84
85        // Check if git is available
86        self.check_git_available()?;
87
88        // Create temporary directory
89        let temp_dir = TempDir::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
90        let repo_path = temp_dir.path().to_path_buf();
91
92        // Execute git clone with security measures (token via env, not URL)
93        self.execute_clone(url, &repo_path, git_ref)?;
94
95        // Get commit SHA
96        let commit_sha = self.get_commit_sha(&repo_path).ok();
97
98        Ok(ClonedRepo {
99            path: repo_path,
100            url: url.to_string(),
101            git_ref: git_ref.to_string(),
102            commit_sha,
103            _temp_dir: temp_dir,
104        })
105    }
106
107    /// Validate the repository URL format
108    fn validate_url(&self, url: &str) -> Result<(), RemoteError> {
109        // Check for basic URL structure
110        if !url.starts_with("https://") && !url.starts_with("git@") {
111            return Err(RemoteError::InvalidUrl(format!(
112                "URL must start with https:// or git@: {}",
113                url
114            )));
115        }
116
117        // Check for GitHub URL format
118        if url.starts_with("https://github.com/") || url.starts_with("git@github.com:") {
119            // Valid GitHub URL
120            return Ok(());
121        }
122
123        // Allow other HTTPS URLs but warn about non-GitHub sources
124        if url.starts_with("https://") {
125            return Ok(());
126        }
127
128        Err(RemoteError::InvalidUrl(format!(
129            "Unsupported URL format: {}",
130            url
131        )))
132    }
133
134    /// Check if git command is available
135    fn check_git_available(&self) -> Result<(), RemoteError> {
136        Command::new("git")
137            .arg("--version")
138            .output()
139            .map_err(|_| RemoteError::GitNotFound)?;
140        Ok(())
141    }
142
143    /// Create a temporary GIT_ASKPASS script that returns the token.
144    /// This is more secure than embedding the token in the URL because:
145    /// - Token is not visible in process list (ps aux)
146    /// - Token is not logged in git error messages
147    /// - Script is automatically cleaned up
148    fn create_askpass_script(&self) -> Result<Option<NamedTempFile>, RemoteError> {
149        let Some(ref token) = self.auth_token else {
150            return Ok(None);
151        };
152
153        let mut script = NamedTempFile::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
154
155        // Write a shell script that outputs the token
156        // The script receives the prompt as an argument but we ignore it
157        writeln!(script, "#!/bin/sh").map_err(|e| RemoteError::TempDir(e.to_string()))?;
158        writeln!(script, "echo '{}'", token.replace('\'', "'\"'\"'"))
159            .map_err(|e| RemoteError::TempDir(e.to_string()))?;
160
161        // Make the script executable (Unix only)
162        #[cfg(unix)]
163        {
164            use std::os::unix::fs::PermissionsExt;
165            let path = script.path();
166            std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o700))
167                .map_err(|e| RemoteError::TempDir(e.to_string()))?;
168        }
169
170        Ok(Some(script))
171    }
172
173    /// Sanitize error messages to remove any potential token leakage.
174    fn sanitize_error_message(&self, message: &str) -> String {
175        let mut sanitized = message.to_string();
176
177        // Remove any token-like patterns from error messages
178        if let Some(ref token) = self.auth_token {
179            sanitized = sanitized.replace(token, "[REDACTED]");
180        }
181
182        // Remove patterns that look like tokens embedded in URLs
183        // Pattern: https://TOKEN@github.com or similar
184        let token_pattern = regex::Regex::new(r"https://[^@\s]+@")
185            .unwrap_or_else(|_| regex::Regex::new("^$").unwrap());
186        sanitized = token_pattern
187            .replace_all(&sanitized, "https://[REDACTED]@")
188            .to_string();
189
190        // Also redact Bearer tokens
191        let bearer_pattern =
192            regex::Regex::new(r"Bearer\s+\S+").unwrap_or_else(|_| regex::Regex::new("^$").unwrap());
193        sanitized = bearer_pattern
194            .replace_all(&sanitized, "Bearer [REDACTED]")
195            .to_string();
196
197        sanitized
198    }
199
200    /// Execute git clone command with security measures and timeout.
201    fn execute_clone(&self, url: &str, path: &Path, git_ref: &str) -> Result<(), RemoteError> {
202        // Create askpass script for secure token handling
203        let askpass_script = self.create_askpass_script()?;
204
205        // Build the git clone command with security measures
206        let mut cmd = Command::new("git");
207
208        // Disable hooks for security
209        cmd.env("GIT_TEMPLATE_DIR", "");
210
211        // Set up authentication via GIT_ASKPASS if we have a token
212        if let Some(ref script) = askpass_script {
213            cmd.env("GIT_ASKPASS", script.path());
214            // Disable terminal prompts to force use of ASKPASS
215            cmd.env("GIT_TERMINAL_PROMPT", "0");
216        }
217
218        // Clone with shallow depth
219        cmd.args([
220            "clone",
221            "--depth",
222            "1",
223            "--single-branch",
224            "--no-tags",
225            "-c",
226            "core.hooksPath=/dev/null",
227            "-c",
228            "advice.detachedHead=false",
229        ]);
230
231        // Add branch/ref if not HEAD
232        if git_ref != "HEAD" && !git_ref.is_empty() {
233            cmd.args(["--branch", git_ref]);
234        }
235
236        cmd.arg(url);
237        cmd.arg(path);
238
239        // Execute with timeout using a child process
240        cmd.stdout(Stdio::piped());
241        cmd.stderr(Stdio::piped());
242
243        let mut child = cmd.spawn().map_err(|e| RemoteError::CloneFailed {
244            url: url.to_string(),
245            message: self.sanitize_error_message(&e.to_string()),
246        })?;
247
248        // Wait with timeout
249        let timeout = Duration::from_secs(self.timeout_secs);
250        let start = std::time::Instant::now();
251
252        loop {
253            match child.try_wait() {
254                Ok(Some(status)) => {
255                    // Process finished
256                    let output =
257                        child
258                            .wait_with_output()
259                            .map_err(|e| RemoteError::CloneFailed {
260                                url: url.to_string(),
261                                message: self.sanitize_error_message(&e.to_string()),
262                            })?;
263
264                    if !status.success() {
265                        let stderr = String::from_utf8_lossy(&output.stderr);
266                        let sanitized_stderr = self.sanitize_error_message(&stderr);
267
268                        // Check for common error patterns
269                        if stderr.contains("Repository not found") || stderr.contains("404") {
270                            return Err(RemoteError::NotFound(url.to_string()));
271                        }
272
273                        if stderr.contains("Authentication failed")
274                            || stderr.contains("could not read Username")
275                        {
276                            return Err(RemoteError::AuthRequired(url.to_string()));
277                        }
278
279                        return Err(RemoteError::CloneFailed {
280                            url: url.to_string(),
281                            message: sanitized_stderr,
282                        });
283                    }
284
285                    return Ok(());
286                }
287                Ok(None) => {
288                    // Process still running, check timeout
289                    if start.elapsed() > timeout {
290                        // Kill the process
291                        let _ = child.kill();
292                        return Err(RemoteError::CloneFailed {
293                            url: url.to_string(),
294                            message: format!("Clone timed out after {} seconds", self.timeout_secs),
295                        });
296                    }
297                    // Sleep briefly before checking again
298                    std::thread::sleep(Duration::from_millis(100));
299                }
300                Err(e) => {
301                    return Err(RemoteError::CloneFailed {
302                        url: url.to_string(),
303                        message: self.sanitize_error_message(&e.to_string()),
304                    });
305                }
306            }
307        }
308    }
309
310    /// Get the commit SHA of HEAD
311    fn get_commit_sha(&self, path: &Path) -> Result<String, RemoteError> {
312        let output = Command::new("git")
313            .args(["rev-parse", "HEAD"])
314            .current_dir(path)
315            .output()
316            .map_err(|e| RemoteError::CloneFailed {
317                url: "".to_string(),
318                message: e.to_string(),
319            })?;
320
321        if output.status.success() {
322            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
323        } else {
324            Err(RemoteError::CloneFailed {
325                url: "".to_string(),
326                message: "Failed to get commit SHA".to_string(),
327            })
328        }
329    }
330}
331
332/// Parse GitHub URL to extract owner and repo name
333pub fn parse_github_url(url: &str) -> Option<(String, String)> {
334    // Handle HTTPS URLs: https://github.com/owner/repo or https://github.com/owner/repo.git
335    if url.starts_with("https://github.com/") {
336        let path = url.trim_start_matches("https://github.com/");
337        let path = path.trim_end_matches(".git");
338        let parts: Vec<&str> = path.split('/').collect();
339        if parts.len() >= 2 {
340            return Some((parts[0].to_string(), parts[1].to_string()));
341        }
342    }
343
344    // Handle SSH URLs: git@github.com:owner/repo.git
345    if url.starts_with("git@github.com:") {
346        let path = url.trim_start_matches("git@github.com:");
347        let path = path.trim_end_matches(".git");
348        let parts: Vec<&str> = path.split('/').collect();
349        if parts.len() >= 2 {
350            return Some((parts[0].to_string(), parts[1].to_string()));
351        }
352    }
353
354    None
355}
356
357#[cfg(test)]
358mod tests {
359    use super::*;
360
361    #[test]
362    fn test_parse_github_url_https() {
363        let result = parse_github_url("https://github.com/owner/repo");
364        assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
365
366        let result = parse_github_url("https://github.com/owner/repo.git");
367        assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
368    }
369
370    #[test]
371    fn test_parse_github_url_ssh() {
372        let result = parse_github_url("git@github.com:owner/repo.git");
373        assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
374    }
375
376    #[test]
377    fn test_parse_github_url_invalid() {
378        assert!(parse_github_url("https://gitlab.com/owner/repo").is_none());
379        assert!(parse_github_url("not-a-url").is_none());
380    }
381
382    #[test]
383    fn test_validate_url_https() {
384        let cloner = GitCloner::new();
385        assert!(cloner.validate_url("https://github.com/owner/repo").is_ok());
386        assert!(cloner.validate_url("https://example.com/repo").is_ok());
387    }
388
389    #[test]
390    fn test_validate_url_invalid() {
391        let cloner = GitCloner::new();
392        assert!(cloner.validate_url("http://github.com/owner/repo").is_err());
393        assert!(cloner.validate_url("ftp://github.com/owner/repo").is_err());
394    }
395
396    #[test]
397    fn test_sanitize_error_message() {
398        let cloner = GitCloner::new().with_auth_token(Some("ghp_secret123".to_string()));
399
400        // Test direct token replacement
401        let msg = "failed with ghp_secret123 in message";
402        assert_eq!(
403            cloner.sanitize_error_message(msg),
404            "failed with [REDACTED] in message"
405        );
406
407        // Test URL token pattern
408        let msg = "failed: https://token123@github.com/repo";
409        assert!(cloner.sanitize_error_message(msg).contains("[REDACTED]"));
410        assert!(!cloner.sanitize_error_message(msg).contains("token123"));
411    }
412
413    #[test]
414    fn test_sanitize_error_message_no_token() {
415        let cloner = GitCloner::new();
416
417        // Without token, message should still sanitize URL patterns
418        let msg = "failed: https://sometoken@github.com/repo";
419        let sanitized = cloner.sanitize_error_message(msg);
420        assert!(sanitized.contains("[REDACTED]"));
421    }
422
423    #[test]
424    fn test_sanitize_bearer_token() {
425        let cloner = GitCloner::new();
426
427        let msg = "Authorization: Bearer ghp_secret123456";
428        let sanitized = cloner.sanitize_error_message(msg);
429        assert!(!sanitized.contains("ghp_secret123456"));
430        assert!(sanitized.contains("[REDACTED]"));
431    }
432
433    #[cfg(unix)]
434    #[test]
435    fn test_create_askpass_script() {
436        let cloner = GitCloner::new().with_auth_token(Some("test_token".to_string()));
437        let script = cloner.create_askpass_script().unwrap();
438
439        assert!(script.is_some());
440        let script = script.unwrap();
441
442        // Verify script exists and is executable
443        let path = script.path();
444        assert!(path.exists());
445
446        let metadata = std::fs::metadata(path).unwrap();
447        use std::os::unix::fs::PermissionsExt;
448        assert_eq!(metadata.permissions().mode() & 0o700, 0o700);
449    }
450
451    #[test]
452    fn test_create_askpass_script_no_token() {
453        let cloner = GitCloner::new();
454        let script = cloner.create_askpass_script().unwrap();
455        assert!(script.is_none());
456    }
457
458    #[test]
459    fn test_cloner_with_timeout() {
460        let cloner = GitCloner::new().with_timeout(60);
461        assert_eq!(cloner.timeout_secs, 60);
462    }
463
464    #[test]
465    fn test_cloner_with_max_size() {
466        let cloner = GitCloner::new().with_max_size(100);
467        assert_eq!(cloner.max_size_mb, 100);
468    }
469}