Skip to main content

cc_audit/remote/
clone.rs

1use super::error::RemoteError;
2use std::path::{Path, PathBuf};
3use std::process::Command;
4use tempfile::TempDir;
5
6/// Result of a successful clone operation
7pub struct ClonedRepo {
8    /// Path to the cloned repository
9    pub path: PathBuf,
10    /// Original repository URL
11    pub url: String,
12    /// Git ref that was checked out
13    pub git_ref: String,
14    /// Commit SHA of the checked out ref
15    pub commit_sha: Option<String>,
16    /// Temporary directory handle (dropped when ClonedRepo is dropped)
17    _temp_dir: TempDir,
18}
19
20impl ClonedRepo {
21    /// Get the path to the cloned repository
22    pub fn path(&self) -> &Path {
23        &self.path
24    }
25}
26
27/// Git repository cloner with security measures
28pub struct GitCloner {
29    /// Optional authentication token for private repositories
30    auth_token: Option<String>,
31    /// Clone timeout in seconds
32    timeout_secs: u64,
33    /// Maximum repository size in MB (0 = unlimited)
34    max_size_mb: u64,
35}
36
37impl Default for GitCloner {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl GitCloner {
44    /// Create a new GitCloner with default settings
45    pub fn new() -> Self {
46        Self {
47            auth_token: None,
48            timeout_secs: 300, // 5 minutes
49            max_size_mb: 0,    // unlimited
50        }
51    }
52
53    /// Set authentication token for private repositories
54    pub fn with_auth_token(mut self, token: Option<String>) -> Self {
55        self.auth_token = token;
56        self
57    }
58
59    /// Set clone timeout in seconds
60    pub fn with_timeout(mut self, secs: u64) -> Self {
61        self.timeout_secs = secs;
62        self
63    }
64
65    /// Set maximum repository size in MB
66    pub fn with_max_size(mut self, mb: u64) -> Self {
67        self.max_size_mb = mb;
68        self
69    }
70
71    /// Clone a repository with security measures
72    ///
73    /// Security measures:
74    /// - Uses shallow clone (depth=1)
75    /// - Disables git hooks (template and local)
76    /// - Uses temporary directory that is automatically cleaned up
77    pub fn clone(&self, url: &str, git_ref: &str) -> Result<ClonedRepo, RemoteError> {
78        // Validate URL format
79        self.validate_url(url)?;
80
81        // Check if git is available
82        self.check_git_available()?;
83
84        // Create temporary directory
85        let temp_dir = TempDir::new().map_err(|e| RemoteError::TempDir(e.to_string()))?;
86        let repo_path = temp_dir.path().to_path_buf();
87
88        // Build clone URL with auth token if provided
89        let clone_url = self.build_clone_url(url)?;
90
91        // Execute git clone with security measures
92        self.execute_clone(&clone_url, &repo_path, git_ref)?;
93
94        // Get commit SHA
95        let commit_sha = self.get_commit_sha(&repo_path).ok();
96
97        Ok(ClonedRepo {
98            path: repo_path,
99            url: url.to_string(),
100            git_ref: git_ref.to_string(),
101            commit_sha,
102            _temp_dir: temp_dir,
103        })
104    }
105
106    /// Validate the repository URL format
107    fn validate_url(&self, url: &str) -> Result<(), RemoteError> {
108        // Check for basic URL structure
109        if !url.starts_with("https://") && !url.starts_with("git@") {
110            return Err(RemoteError::InvalidUrl(format!(
111                "URL must start with https:// or git@: {}",
112                url
113            )));
114        }
115
116        // Check for GitHub URL format
117        if url.starts_with("https://github.com/") || url.starts_with("git@github.com:") {
118            // Valid GitHub URL
119            return Ok(());
120        }
121
122        // Allow other HTTPS URLs but warn about non-GitHub sources
123        if url.starts_with("https://") {
124            return Ok(());
125        }
126
127        Err(RemoteError::InvalidUrl(format!(
128            "Unsupported URL format: {}",
129            url
130        )))
131    }
132
133    /// Check if git command is available
134    fn check_git_available(&self) -> Result<(), RemoteError> {
135        Command::new("git")
136            .arg("--version")
137            .output()
138            .map_err(|_| RemoteError::GitNotFound)?;
139        Ok(())
140    }
141
142    /// Build clone URL with authentication if needed
143    fn build_clone_url(&self, url: &str) -> Result<String, RemoteError> {
144        if let Some(ref token) = self.auth_token {
145            // Insert token into HTTPS URL
146            if url.starts_with("https://github.com/") {
147                return Ok(url.replace(
148                    "https://github.com/",
149                    &format!("https://{}@github.com/", token),
150                ));
151            }
152        }
153        Ok(url.to_string())
154    }
155
156    /// Execute git clone command with security measures
157    fn execute_clone(&self, url: &str, path: &Path, git_ref: &str) -> Result<(), RemoteError> {
158        // Build the git clone command with security measures
159        let mut cmd = Command::new("git");
160
161        // Disable hooks for security
162        cmd.env("GIT_TEMPLATE_DIR", "");
163
164        // Clone with shallow depth
165        cmd.args([
166            "clone",
167            "--depth",
168            "1",
169            "--single-branch",
170            "--no-tags",
171            "-c",
172            "core.hooksPath=/dev/null",
173            "-c",
174            "advice.detachedHead=false",
175        ]);
176
177        // Add branch/ref if not HEAD
178        if git_ref != "HEAD" && !git_ref.is_empty() {
179            cmd.args(["--branch", git_ref]);
180        }
181
182        cmd.arg(url);
183        cmd.arg(path);
184
185        // Execute with timeout
186        let output = cmd.output().map_err(|e| RemoteError::CloneFailed {
187            url: url.to_string(),
188            message: e.to_string(),
189        })?;
190
191        if !output.status.success() {
192            let stderr = String::from_utf8_lossy(&output.stderr);
193
194            // Check for common error patterns
195            if stderr.contains("Repository not found") || stderr.contains("404") {
196                return Err(RemoteError::NotFound(url.to_string()));
197            }
198
199            if stderr.contains("Authentication failed")
200                || stderr.contains("could not read Username")
201            {
202                return Err(RemoteError::AuthRequired(url.to_string()));
203            }
204
205            return Err(RemoteError::CloneFailed {
206                url: url.to_string(),
207                message: stderr.to_string(),
208            });
209        }
210
211        Ok(())
212    }
213
214    /// Get the commit SHA of HEAD
215    fn get_commit_sha(&self, path: &Path) -> Result<String, RemoteError> {
216        let output = Command::new("git")
217            .args(["rev-parse", "HEAD"])
218            .current_dir(path)
219            .output()
220            .map_err(|e| RemoteError::CloneFailed {
221                url: "".to_string(),
222                message: e.to_string(),
223            })?;
224
225        if output.status.success() {
226            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
227        } else {
228            Err(RemoteError::CloneFailed {
229                url: "".to_string(),
230                message: "Failed to get commit SHA".to_string(),
231            })
232        }
233    }
234}
235
236/// Parse GitHub URL to extract owner and repo name
237pub fn parse_github_url(url: &str) -> Option<(String, String)> {
238    // Handle HTTPS URLs: https://github.com/owner/repo or https://github.com/owner/repo.git
239    if url.starts_with("https://github.com/") {
240        let path = url.trim_start_matches("https://github.com/");
241        let path = path.trim_end_matches(".git");
242        let parts: Vec<&str> = path.split('/').collect();
243        if parts.len() >= 2 {
244            return Some((parts[0].to_string(), parts[1].to_string()));
245        }
246    }
247
248    // Handle SSH URLs: git@github.com:owner/repo.git
249    if url.starts_with("git@github.com:") {
250        let path = url.trim_start_matches("git@github.com:");
251        let path = path.trim_end_matches(".git");
252        let parts: Vec<&str> = path.split('/').collect();
253        if parts.len() >= 2 {
254            return Some((parts[0].to_string(), parts[1].to_string()));
255        }
256    }
257
258    None
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    #[test]
266    fn test_parse_github_url_https() {
267        let result = parse_github_url("https://github.com/owner/repo");
268        assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
269
270        let result = parse_github_url("https://github.com/owner/repo.git");
271        assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
272    }
273
274    #[test]
275    fn test_parse_github_url_ssh() {
276        let result = parse_github_url("git@github.com:owner/repo.git");
277        assert_eq!(result, Some(("owner".to_string(), "repo".to_string())));
278    }
279
280    #[test]
281    fn test_parse_github_url_invalid() {
282        assert!(parse_github_url("https://gitlab.com/owner/repo").is_none());
283        assert!(parse_github_url("not-a-url").is_none());
284    }
285
286    #[test]
287    fn test_validate_url_https() {
288        let cloner = GitCloner::new();
289        assert!(cloner.validate_url("https://github.com/owner/repo").is_ok());
290        assert!(cloner.validate_url("https://example.com/repo").is_ok());
291    }
292
293    #[test]
294    fn test_validate_url_invalid() {
295        let cloner = GitCloner::new();
296        assert!(cloner.validate_url("http://github.com/owner/repo").is_err());
297        assert!(cloner.validate_url("ftp://github.com/owner/repo").is_err());
298    }
299
300    #[test]
301    fn test_build_clone_url_with_token() {
302        let cloner = GitCloner::new().with_auth_token(Some("ghp_token123".to_string()));
303        let url = cloner
304            .build_clone_url("https://github.com/owner/repo")
305            .unwrap();
306        assert_eq!(url, "https://ghp_token123@github.com/owner/repo");
307    }
308
309    #[test]
310    fn test_build_clone_url_without_token() {
311        let cloner = GitCloner::new();
312        let url = cloner
313            .build_clone_url("https://github.com/owner/repo")
314            .unwrap();
315        assert_eq!(url, "https://github.com/owner/repo");
316    }
317}