infiniloom_engine/
remote.rs

1//! Remote repository support
2//!
3//! Supports cloning and fetching from remote Git repositories (GitHub, GitLab, Bitbucket, etc.)
4
5use std::path::{Path, PathBuf};
6use std::process::Command;
7use tempfile::TempDir;
8use thiserror::Error;
9use url::Url;
10
11/// Supported Git providers
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum GitProvider {
14    GitHub,
15    GitLab,
16    Bitbucket,
17    Generic,
18}
19
20/// Parsed remote repository URL
21#[derive(Debug, Clone)]
22pub struct RemoteRepo {
23    /// Original URL
24    pub url: String,
25    /// Git provider
26    pub provider: GitProvider,
27    /// Repository owner/organization
28    pub owner: Option<String>,
29    /// Repository name
30    pub name: String,
31    /// Branch to clone (None = default branch)
32    pub branch: Option<String>,
33    /// Specific commit/tag to checkout
34    pub reference: Option<String>,
35    /// Subdirectory to extract (sparse checkout)
36    pub subdir: Option<String>,
37}
38
39impl RemoteRepo {
40    /// Parse a remote URL into a RemoteRepo
41    /// Supports formats:
42    /// - https://github.com/owner/repo
43    /// - https://github.com/owner/repo/tree/branch
44    /// - https://github.com/owner/repo/tree/branch/subdir
45    /// - github:owner/repo
46    /// - owner/repo (assumes GitHub)
47    /// - git@github.com:owner/repo.git
48    pub fn parse(input: &str) -> Result<Self, RemoteError> {
49        let input = input.trim();
50
51        // Handle shorthand formats
52        if let Some(rest) = input.strip_prefix("github:") {
53            return Self::parse_shorthand(rest, GitProvider::GitHub);
54        }
55        if let Some(rest) = input.strip_prefix("gitlab:") {
56            return Self::parse_shorthand(rest, GitProvider::GitLab);
57        }
58        if let Some(rest) = input.strip_prefix("bitbucket:") {
59            return Self::parse_shorthand(rest, GitProvider::Bitbucket);
60        }
61
62        // Handle owner/repo shorthand (assumes GitHub)
63        if !input.contains("://") && !input.contains('@') && input.contains('/') {
64            return Self::parse_shorthand(input, GitProvider::GitHub);
65        }
66
67        // Handle SSH URLs (git@github.com:owner/repo.git)
68        if input.starts_with("git@") {
69            return Self::parse_ssh_url(input);
70        }
71
72        // Handle HTTPS URLs
73        Self::parse_https_url(input)
74    }
75
76    fn parse_shorthand(input: &str, provider: GitProvider) -> Result<Self, RemoteError> {
77        let parts: Vec<&str> = input.split('/').collect();
78        if parts.len() < 2 {
79            return Err(RemoteError::InvalidUrl(format!("Invalid shorthand: {}", input)));
80        }
81
82        let owner = parts[0].to_owned();
83        let name = parts[1].trim_end_matches(".git").to_owned();
84
85        let (branch, subdir) = if parts.len() > 2 {
86            // Check if "tree" or "blob" is in path (GitHub URL format)
87            if parts.get(2) == Some(&"tree") || parts.get(2) == Some(&"blob") {
88                let branch = parts.get(3).map(|s| s.to_string());
89                let subdir = if parts.len() > 4 {
90                    Some(parts[4..].join("/"))
91                } else {
92                    None
93                };
94                (branch, subdir)
95            } else {
96                // Assume rest is subdir
97                (None, Some(parts[2..].join("/")))
98            }
99        } else {
100            (None, None)
101        };
102
103        Ok(Self {
104            url: Self::build_clone_url(provider, &owner, &name),
105            provider,
106            owner: Some(owner),
107            name,
108            branch,
109            reference: None,
110            subdir,
111        })
112    }
113
114    fn parse_ssh_url(input: &str) -> Result<Self, RemoteError> {
115        // git@github.com:owner/repo.git
116        let provider = if input.contains("github.com") {
117            GitProvider::GitHub
118        } else if input.contains("gitlab.com") {
119            GitProvider::GitLab
120        } else if input.contains("bitbucket.org") {
121            GitProvider::Bitbucket
122        } else {
123            GitProvider::Generic
124        };
125
126        // Extract owner/repo from path
127        let path_start = input
128            .find(':')
129            .ok_or_else(|| RemoteError::InvalidUrl("Invalid SSH URL format".to_owned()))?
130            + 1;
131        let path = &input[path_start..];
132
133        // For Generic providers, preserve the original SSH URL
134        // This ensures self-hosted Git servers (Gitea, self-hosted GitLab, etc.) work correctly
135        if provider == GitProvider::Generic {
136            let parts: Vec<&str> = path.split('/').collect();
137            if parts.len() < 2 {
138                return Err(RemoteError::InvalidUrl(format!(
139                    "Cannot parse owner/repo from SSH URL: {}",
140                    input
141                )));
142            }
143            let owner = parts[0].to_owned();
144            let name = parts[1].trim_end_matches(".git").to_owned();
145
146            return Ok(Self {
147                url: input.to_owned(), // Keep original SSH URL for generic providers
148                provider,
149                owner: Some(owner),
150                name,
151                branch: None,
152                reference: None,
153                subdir: None,
154            });
155        }
156
157        Self::parse_shorthand(path, provider)
158    }
159
160    fn parse_https_url(input: &str) -> Result<Self, RemoteError> {
161        let url = Url::parse(input).map_err(|e| RemoteError::InvalidUrl(e.to_string()))?;
162
163        let host = url.host_str().unwrap_or("");
164        let provider = if host.contains("github.com") {
165            GitProvider::GitHub
166        } else if host.contains("gitlab.com") {
167            GitProvider::GitLab
168        } else if host.contains("bitbucket.org") {
169            GitProvider::Bitbucket
170        } else {
171            GitProvider::Generic
172        };
173
174        let path = url.path().trim_start_matches('/');
175
176        // For Generic providers, preserve the original URL instead of rebuilding
177        // This ensures custom Git servers (self-hosted GitLab, Gitea, etc.) work correctly
178        if provider == GitProvider::Generic {
179            let parts: Vec<&str> = path.split('/').collect();
180            if parts.len() < 2 {
181                return Err(RemoteError::InvalidUrl(format!(
182                    "Cannot parse repository path from URL: {}",
183                    input
184                )));
185            }
186            let owner = parts[0].to_owned();
187            let name = parts[1].trim_end_matches(".git").to_owned();
188
189            return Ok(Self {
190                url: input.to_owned(), // Keep original URL for generic providers
191                provider,
192                owner: Some(owner),
193                name,
194                branch: None,
195                reference: None,
196                subdir: None,
197            });
198        }
199
200        Self::parse_shorthand(path, provider)
201    }
202
203    fn build_clone_url(provider: GitProvider, owner: &str, name: &str) -> String {
204        match provider {
205            GitProvider::GitHub => format!("https://github.com/{}/{}.git", owner, name),
206            GitProvider::GitLab => format!("https://gitlab.com/{}/{}.git", owner, name),
207            GitProvider::Bitbucket => format!("https://bitbucket.org/{}/{}.git", owner, name),
208            GitProvider::Generic => format!("https://example.com/{}/{}.git", owner, name),
209        }
210    }
211
212    /// Clone the repository to a temporary directory with RAII cleanup
213    /// Returns (path_to_repo, temp_dir_handle) - keep the TempDir alive to prevent cleanup
214    pub fn clone_with_cleanup(&self) -> Result<(PathBuf, TempDir), RemoteError> {
215        let temp_dir = TempDir::with_prefix("infiniloom-")
216            .map_err(|e| RemoteError::IoError(format!("Failed to create temp dir: {}", e)))?;
217
218        let target = temp_dir.path().to_path_buf();
219        let repo_path = self.clone_to_path(&target)?;
220
221        Ok((repo_path, temp_dir))
222    }
223
224    /// Clone the repository to a temporary directory (legacy method without RAII cleanup)
225    ///
226    /// # Warning
227    /// This method does not clean up the temp directory automatically.
228    /// Consider using [`clone_with_cleanup()`](Self::clone_with_cleanup) instead for automatic cleanup.
229    ///
230    /// # Public API Note
231    /// This method is part of the public library API for users who need manual control
232    /// over the cloned directory lifecycle. The CLI uses `clone_with_cleanup()` internally.
233    #[allow(dead_code)]
234    pub fn clone(&self, target_dir: Option<&Path>) -> Result<PathBuf, RemoteError> {
235        let target = target_dir.map(PathBuf::from).unwrap_or_else(|| {
236            std::env::temp_dir().join(format!(
237                "infiniloom-{}-{}",
238                self.owner.as_deref().unwrap_or("repo"),
239                self.name
240            ))
241        });
242
243        self.clone_to_path(&target)
244    }
245
246    /// Internal method to clone to a specific path
247    ///
248    /// SAFETY: Will only delete existing directories if:
249    /// - The directory is inside system temp directory, OR
250    /// - The directory contains an `.infiniloom-clone` marker file, OR
251    /// - The directory is empty
252    fn clone_to_path(&self, target: &Path) -> Result<PathBuf, RemoteError> {
253        // Clean up existing directory if it exists (with safety checks)
254        if target.exists() {
255            if !Self::is_safe_to_delete(target) {
256                return Err(RemoteError::IoError(format!(
257                    "Refusing to delete existing directory '{}'. \
258                     Path is not empty, not in temp dir, and has no .infiniloom-clone marker. \
259                     Please remove manually or use a different target path.",
260                    target.display()
261                )));
262            }
263            std::fs::remove_dir_all(target).map_err(|e| RemoteError::IoError(e.to_string()))?;
264        }
265
266        // Build git clone command
267        let mut cmd = Command::new("git");
268        cmd.arg("clone");
269
270        // Shallow clone for faster download
271        cmd.arg("--depth").arg("1");
272
273        // Branch if specified
274        if let Some(ref branch) = self.branch {
275            cmd.arg("--branch").arg(branch);
276        }
277
278        // Single branch for speed
279        cmd.arg("--single-branch");
280
281        cmd.arg(&self.url);
282        cmd.arg(target);
283
284        let output = cmd
285            .output()
286            .map_err(|e| RemoteError::GitError(format!("Failed to run git: {}", e)))?;
287
288        if !output.status.success() {
289            let stderr = String::from_utf8_lossy(&output.stderr);
290            return Err(RemoteError::GitError(format!("git clone failed: {}", stderr)));
291        }
292
293        // Checkout specific reference if provided
294        if let Some(ref reference) = self.reference {
295            let mut checkout = Command::new("git");
296            checkout.current_dir(target);
297            checkout.args(["checkout", reference]);
298
299            let output = checkout
300                .output()
301                .map_err(|e| RemoteError::GitError(format!("Failed to checkout: {}", e)))?;
302
303            if !output.status.success() {
304                let stderr = String::from_utf8_lossy(&output.stderr);
305                return Err(RemoteError::GitError(format!("git checkout failed: {}", stderr)));
306            }
307        }
308
309        // Create marker file so we know this is a directory we created
310        // This allows safe cleanup on subsequent runs
311        let marker_path = target.join(".infiniloom-clone");
312        drop(std::fs::write(&marker_path, format!("cloned from: {}\n", self.url)));
313
314        // If subdir specified, return path to subdir
315        if let Some(ref subdir) = self.subdir {
316            let subdir_path = target.join(subdir);
317            if subdir_path.exists() {
318                return Ok(subdir_path);
319            }
320        }
321
322        Ok(target.to_path_buf())
323    }
324
325    /// Check if a directory is safe to delete
326    ///
327    /// Returns true if:
328    /// - The path is inside system temp directory, OR
329    /// - The path contains an `.infiniloom-clone` marker file, OR
330    /// - The path is an empty directory
331    fn is_safe_to_delete(path: &Path) -> bool {
332        // Check if path is in temp directory
333        if let Ok(temp_dir) = std::env::temp_dir().canonicalize() {
334            if let Ok(canonical_path) = path.canonicalize() {
335                if canonical_path.starts_with(&temp_dir) {
336                    return true;
337                }
338            }
339        }
340
341        // Check for our marker file
342        if path.join(".infiniloom-clone").exists() {
343            return true;
344        }
345
346        // Check if directory is empty
347        if let Ok(mut entries) = std::fs::read_dir(path) {
348            if entries.next().is_none() {
349                return true;
350            }
351        }
352
353        false
354    }
355
356    /// Clone with sparse checkout (only fetch specified paths)
357    ///
358    /// This is useful for very large repositories where you only need a subset
359    /// of files. Uses Git's sparse checkout feature to minimize download size.
360    ///
361    /// # Safety
362    /// Will only delete existing directories if:
363    /// - The directory is inside system temp directory, OR
364    /// - The directory contains an `.infiniloom-clone` marker file, OR
365    /// - The directory is empty
366    ///
367    /// # Public API Note
368    /// This method is part of the public library API. The CLI does not currently
369    /// use sparse checkout, but it's available for library users who need it.
370    #[allow(dead_code)]
371    pub fn sparse_clone(
372        &self,
373        paths: &[&str],
374        target_dir: Option<&Path>,
375    ) -> Result<PathBuf, RemoteError> {
376        let target = target_dir.map(PathBuf::from).unwrap_or_else(|| {
377            std::env::temp_dir().join(format!("infiniloom-sparse-{}", self.name))
378        });
379
380        // Clean up (with safety checks)
381        if target.exists() {
382            if !Self::is_safe_to_delete(&target) {
383                return Err(RemoteError::IoError(format!(
384                    "Refusing to delete existing directory '{}'. \
385                     Path is not empty, not in temp dir, and has no .infiniloom-clone marker. \
386                     Please remove manually or use a different target path.",
387                    target.display()
388                )));
389            }
390            std::fs::remove_dir_all(&target).map_err(|e| RemoteError::IoError(e.to_string()))?;
391        }
392
393        // Initialize empty repo
394        let mut init = Command::new("git");
395        init.args(["init", &target.to_string_lossy()]);
396        init.output()
397            .map_err(|e| RemoteError::GitError(e.to_string()))?;
398
399        // Configure sparse checkout
400        let mut config = Command::new("git");
401        config.current_dir(&target);
402        config.args(["config", "core.sparseCheckout", "true"]);
403        config
404            .output()
405            .map_err(|e| RemoteError::GitError(e.to_string()))?;
406
407        // Add remote
408        let mut remote = Command::new("git");
409        remote.current_dir(&target);
410        remote.args(["remote", "add", "origin", &self.url]);
411        remote
412            .output()
413            .map_err(|e| RemoteError::GitError(e.to_string()))?;
414
415        // Write sparse checkout config
416        let sparse_dir = target.join(".git/info");
417        std::fs::create_dir_all(&sparse_dir).map_err(|e| RemoteError::IoError(e.to_string()))?;
418
419        let sparse_file = sparse_dir.join("sparse-checkout");
420        let sparse_content = paths.join("\n");
421        std::fs::write(&sparse_file, sparse_content)
422            .map_err(|e| RemoteError::IoError(e.to_string()))?;
423
424        // Fetch and checkout
425        let branch = self.branch.as_deref().unwrap_or("HEAD");
426        let mut fetch = Command::new("git");
427        fetch.current_dir(&target);
428        fetch.args(["fetch", "--depth", "1", "origin", branch]);
429        let output = fetch
430            .output()
431            .map_err(|e| RemoteError::GitError(e.to_string()))?;
432
433        if !output.status.success() {
434            let stderr = String::from_utf8_lossy(&output.stderr);
435            return Err(RemoteError::GitError(format!("git fetch failed: {}", stderr)));
436        }
437
438        let mut checkout = Command::new("git");
439        checkout.current_dir(&target);
440        checkout.args(["checkout", "FETCH_HEAD"]);
441        checkout
442            .output()
443            .map_err(|e| RemoteError::GitError(e.to_string()))?;
444
445        // Create marker file so we know this is a directory we created
446        let marker_path = target.join(".infiniloom-clone");
447        drop(std::fs::write(&marker_path, format!("sparse clone from: {}\n", self.url)));
448
449        Ok(target)
450    }
451
452    /// Check if a URL is a remote repository URL
453    pub fn is_remote_url(input: &str) -> bool {
454        input.contains("://") ||
455        input.starts_with("git@") ||
456        input.starts_with("github:") ||
457        input.starts_with("gitlab:") ||
458        input.starts_with("bitbucket:") ||
459        // Simple owner/repo format (not starting with / or .)
460        (input.contains('/') && !input.starts_with('/') && !input.starts_with('.') && input.matches('/').count() == 1)
461    }
462}
463
464/// Remote repository errors
465#[derive(Debug, Error)]
466pub enum RemoteError {
467    #[error("Invalid URL: {0}")]
468    InvalidUrl(String),
469    #[error("Git error: {0}")]
470    GitError(String),
471    #[error("I/O error: {0}")]
472    IoError(String),
473    #[error("Not found: {0}")]
474    NotFound(String),
475}
476
477#[cfg(test)]
478#[allow(clippy::str_to_string)]
479mod tests {
480    use super::*;
481
482    #[test]
483    fn test_parse_github_url() {
484        let repo = RemoteRepo::parse("https://github.com/rust-lang/rust").unwrap();
485        assert_eq!(repo.provider, GitProvider::GitHub);
486        assert_eq!(repo.owner, Some("rust-lang".to_string()));
487        assert_eq!(repo.name, "rust");
488    }
489
490    #[test]
491    fn test_parse_shorthand() {
492        let repo = RemoteRepo::parse("rust-lang/rust").unwrap();
493        assert_eq!(repo.provider, GitProvider::GitHub);
494        assert_eq!(repo.name, "rust");
495
496        let repo = RemoteRepo::parse("github:rust-lang/rust").unwrap();
497        assert_eq!(repo.provider, GitProvider::GitHub);
498    }
499
500    #[test]
501    fn test_parse_ssh_url() {
502        let repo = RemoteRepo::parse("git@github.com:rust-lang/rust.git").unwrap();
503        assert_eq!(repo.provider, GitProvider::GitHub);
504        assert_eq!(repo.owner, Some("rust-lang".to_string()));
505        assert_eq!(repo.name, "rust");
506    }
507
508    #[test]
509    fn test_parse_with_branch() {
510        let repo = RemoteRepo::parse("https://github.com/rust-lang/rust/tree/master").unwrap();
511        assert_eq!(repo.branch, Some("master".to_string()));
512    }
513
514    #[test]
515    fn test_is_remote_url() {
516        assert!(RemoteRepo::is_remote_url("https://github.com/foo/bar"));
517        assert!(RemoteRepo::is_remote_url("git@github.com:foo/bar.git"));
518        assert!(RemoteRepo::is_remote_url("github:foo/bar"));
519        assert!(!RemoteRepo::is_remote_url("/path/to/local/repo"));
520    }
521
522    #[test]
523    fn test_parse_ssh_url_generic_provider() {
524        // Self-hosted Git servers should preserve the original SSH URL
525        let repo = RemoteRepo::parse("git@git.mycompany.com:team/project.git").unwrap();
526        assert_eq!(repo.provider, GitProvider::Generic);
527        assert_eq!(repo.owner, Some("team".to_string()));
528        assert_eq!(repo.name, "project");
529        // Original SSH URL should be preserved (not converted to https://example.com/...)
530        assert_eq!(repo.url, "git@git.mycompany.com:team/project.git");
531    }
532
533    #[test]
534    fn test_parse_https_url_generic_provider() {
535        // Self-hosted Git servers via HTTPS should preserve the original URL
536        let repo = RemoteRepo::parse("https://git.mycompany.com/team/project.git").unwrap();
537        assert_eq!(repo.provider, GitProvider::Generic);
538        assert_eq!(repo.owner, Some("team".to_string()));
539        assert_eq!(repo.name, "project");
540        // Original HTTPS URL should be preserved
541        assert_eq!(repo.url, "https://git.mycompany.com/team/project.git");
542    }
543}