1use std::path::{Path, PathBuf};
6use std::process::Command;
7use tempfile::TempDir;
8use thiserror::Error;
9use url::Url;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum GitProvider {
14 GitHub,
15 GitLab,
16 Bitbucket,
17 Generic,
18}
19
20#[derive(Debug, Clone)]
22pub struct RemoteRepo {
23 pub url: String,
25 pub provider: GitProvider,
27 pub owner: Option<String>,
29 pub name: String,
31 pub branch: Option<String>,
33 pub reference: Option<String>,
35 pub subdir: Option<String>,
37}
38
39impl RemoteRepo {
40 pub fn parse(input: &str) -> Result<Self, RemoteError> {
49 let input = input.trim();
50
51 if let Some(rest) = input.strip_prefix("github:") {
53 return Self::parse_shorthand(rest, GitProvider::GitHub);
54 }
55 if let Some(rest) = input.strip_prefix("gitlab:") {
56 return Self::parse_shorthand(rest, GitProvider::GitLab);
57 }
58 if let Some(rest) = input.strip_prefix("bitbucket:") {
59 return Self::parse_shorthand(rest, GitProvider::Bitbucket);
60 }
61
62 if !input.contains("://") && !input.contains('@') && input.contains('/') {
64 return Self::parse_shorthand(input, GitProvider::GitHub);
65 }
66
67 if input.starts_with("git@") {
69 return Self::parse_ssh_url(input);
70 }
71
72 Self::parse_https_url(input)
74 }
75
76 fn parse_shorthand(input: &str, provider: GitProvider) -> Result<Self, RemoteError> {
77 let parts: Vec<&str> = input.split('/').collect();
78 if parts.len() < 2 {
79 return Err(RemoteError::InvalidUrl(format!("Invalid shorthand: {}", input)));
80 }
81
82 let owner = parts[0].to_owned();
83 let name = parts[1].trim_end_matches(".git").to_owned();
84
85 let (branch, subdir) = if parts.len() > 2 {
86 if parts.get(2) == Some(&"tree") || parts.get(2) == Some(&"blob") {
88 let branch = parts.get(3).map(|s| s.to_string());
89 let subdir = if parts.len() > 4 {
90 Some(parts[4..].join("/"))
91 } else {
92 None
93 };
94 (branch, subdir)
95 } else {
96 (None, Some(parts[2..].join("/")))
98 }
99 } else {
100 (None, None)
101 };
102
103 Ok(Self {
104 url: Self::build_clone_url(provider, &owner, &name),
105 provider,
106 owner: Some(owner),
107 name,
108 branch,
109 reference: None,
110 subdir,
111 })
112 }
113
114 fn parse_ssh_url(input: &str) -> Result<Self, RemoteError> {
115 let provider = if input.contains("github.com") {
117 GitProvider::GitHub
118 } else if input.contains("gitlab.com") {
119 GitProvider::GitLab
120 } else if input.contains("bitbucket.org") {
121 GitProvider::Bitbucket
122 } else {
123 GitProvider::Generic
124 };
125
126 let path_start = input
128 .find(':')
129 .ok_or_else(|| RemoteError::InvalidUrl("Invalid SSH URL format".to_owned()))?
130 + 1;
131 let path = &input[path_start..];
132
133 if provider == GitProvider::Generic {
136 let parts: Vec<&str> = path.split('/').collect();
137 if parts.len() < 2 {
138 return Err(RemoteError::InvalidUrl(format!(
139 "Cannot parse owner/repo from SSH URL: {}",
140 input
141 )));
142 }
143 let owner = parts[0].to_owned();
144 let name = parts[1].trim_end_matches(".git").to_owned();
145
146 return Ok(Self {
147 url: input.to_owned(), provider,
149 owner: Some(owner),
150 name,
151 branch: None,
152 reference: None,
153 subdir: None,
154 });
155 }
156
157 Self::parse_shorthand(path, provider)
158 }
159
160 fn parse_https_url(input: &str) -> Result<Self, RemoteError> {
161 let url = Url::parse(input).map_err(|e| RemoteError::InvalidUrl(e.to_string()))?;
162
163 let host = url.host_str().unwrap_or("");
164 let provider = if host.contains("github.com") {
165 GitProvider::GitHub
166 } else if host.contains("gitlab.com") {
167 GitProvider::GitLab
168 } else if host.contains("bitbucket.org") {
169 GitProvider::Bitbucket
170 } else {
171 GitProvider::Generic
172 };
173
174 let path = url.path().trim_start_matches('/');
175
176 if provider == GitProvider::Generic {
179 let parts: Vec<&str> = path.split('/').collect();
180 if parts.len() < 2 {
181 return Err(RemoteError::InvalidUrl(format!(
182 "Cannot parse repository path from URL: {}",
183 input
184 )));
185 }
186 let owner = parts[0].to_owned();
187 let name = parts[1].trim_end_matches(".git").to_owned();
188
189 return Ok(Self {
190 url: input.to_owned(), provider,
192 owner: Some(owner),
193 name,
194 branch: None,
195 reference: None,
196 subdir: None,
197 });
198 }
199
200 Self::parse_shorthand(path, provider)
201 }
202
203 fn build_clone_url(provider: GitProvider, owner: &str, name: &str) -> String {
204 match provider {
205 GitProvider::GitHub => format!("https://github.com/{}/{}.git", owner, name),
206 GitProvider::GitLab => format!("https://gitlab.com/{}/{}.git", owner, name),
207 GitProvider::Bitbucket => format!("https://bitbucket.org/{}/{}.git", owner, name),
208 GitProvider::Generic => format!("https://example.com/{}/{}.git", owner, name),
209 }
210 }
211
212 pub fn clone_with_cleanup(&self) -> Result<(PathBuf, TempDir), RemoteError> {
215 let temp_dir = TempDir::with_prefix("infiniloom-")
216 .map_err(|e| RemoteError::IoError(format!("Failed to create temp dir: {}", e)))?;
217
218 let target = temp_dir.path().to_path_buf();
219 let repo_path = self.clone_to_path(&target)?;
220
221 Ok((repo_path, temp_dir))
222 }
223
224 #[allow(dead_code)]
234 pub fn clone(&self, target_dir: Option<&Path>) -> Result<PathBuf, RemoteError> {
235 let target = target_dir.map(PathBuf::from).unwrap_or_else(|| {
236 std::env::temp_dir().join(format!(
237 "infiniloom-{}-{}",
238 self.owner.as_deref().unwrap_or("repo"),
239 self.name
240 ))
241 });
242
243 self.clone_to_path(&target)
244 }
245
246 fn clone_to_path(&self, target: &Path) -> Result<PathBuf, RemoteError> {
253 if target.exists() {
255 if !Self::is_safe_to_delete(target) {
256 return Err(RemoteError::IoError(format!(
257 "Refusing to delete existing directory '{}'. \
258 Path is not empty, not in temp dir, and has no .infiniloom-clone marker. \
259 Please remove manually or use a different target path.",
260 target.display()
261 )));
262 }
263 std::fs::remove_dir_all(target).map_err(|e| RemoteError::IoError(e.to_string()))?;
264 }
265
266 let mut cmd = Command::new("git");
268 cmd.arg("clone");
269
270 cmd.arg("--depth").arg("1");
272
273 if let Some(ref branch) = self.branch {
275 cmd.arg("--branch").arg(branch);
276 }
277
278 cmd.arg("--single-branch");
280
281 cmd.arg(&self.url);
282 cmd.arg(target);
283
284 let output = cmd
285 .output()
286 .map_err(|e| RemoteError::GitError(format!("Failed to run git: {}", e)))?;
287
288 if !output.status.success() {
289 let stderr = String::from_utf8_lossy(&output.stderr);
290 return Err(RemoteError::GitError(format!("git clone failed: {}", stderr)));
291 }
292
293 if let Some(ref reference) = self.reference {
295 let mut checkout = Command::new("git");
296 checkout.current_dir(target);
297 checkout.args(["checkout", reference]);
298
299 let output = checkout
300 .output()
301 .map_err(|e| RemoteError::GitError(format!("Failed to checkout: {}", e)))?;
302
303 if !output.status.success() {
304 let stderr = String::from_utf8_lossy(&output.stderr);
305 return Err(RemoteError::GitError(format!("git checkout failed: {}", stderr)));
306 }
307 }
308
309 let marker_path = target.join(".infiniloom-clone");
312 drop(std::fs::write(&marker_path, format!("cloned from: {}\n", self.url)));
313
314 if let Some(ref subdir) = self.subdir {
316 let subdir_path = target.join(subdir);
317 if subdir_path.exists() {
318 return Ok(subdir_path);
319 }
320 }
321
322 Ok(target.to_path_buf())
323 }
324
325 fn is_safe_to_delete(path: &Path) -> bool {
332 if let Ok(temp_dir) = std::env::temp_dir().canonicalize() {
334 if let Ok(canonical_path) = path.canonicalize() {
335 if canonical_path.starts_with(&temp_dir) {
336 return true;
337 }
338 }
339 }
340
341 if path.join(".infiniloom-clone").exists() {
343 return true;
344 }
345
346 if let Ok(mut entries) = std::fs::read_dir(path) {
348 if entries.next().is_none() {
349 return true;
350 }
351 }
352
353 false
354 }
355
356 #[allow(dead_code)]
371 pub fn sparse_clone(
372 &self,
373 paths: &[&str],
374 target_dir: Option<&Path>,
375 ) -> Result<PathBuf, RemoteError> {
376 let target = target_dir.map(PathBuf::from).unwrap_or_else(|| {
377 std::env::temp_dir().join(format!("infiniloom-sparse-{}", self.name))
378 });
379
380 if target.exists() {
382 if !Self::is_safe_to_delete(&target) {
383 return Err(RemoteError::IoError(format!(
384 "Refusing to delete existing directory '{}'. \
385 Path is not empty, not in temp dir, and has no .infiniloom-clone marker. \
386 Please remove manually or use a different target path.",
387 target.display()
388 )));
389 }
390 std::fs::remove_dir_all(&target).map_err(|e| RemoteError::IoError(e.to_string()))?;
391 }
392
393 let mut init = Command::new("git");
395 init.args(["init", &target.to_string_lossy()]);
396 init.output()
397 .map_err(|e| RemoteError::GitError(e.to_string()))?;
398
399 let mut config = Command::new("git");
401 config.current_dir(&target);
402 config.args(["config", "core.sparseCheckout", "true"]);
403 config
404 .output()
405 .map_err(|e| RemoteError::GitError(e.to_string()))?;
406
407 let mut remote = Command::new("git");
409 remote.current_dir(&target);
410 remote.args(["remote", "add", "origin", &self.url]);
411 remote
412 .output()
413 .map_err(|e| RemoteError::GitError(e.to_string()))?;
414
415 let sparse_dir = target.join(".git/info");
417 std::fs::create_dir_all(&sparse_dir).map_err(|e| RemoteError::IoError(e.to_string()))?;
418
419 let sparse_file = sparse_dir.join("sparse-checkout");
420 let sparse_content = paths.join("\n");
421 std::fs::write(&sparse_file, sparse_content)
422 .map_err(|e| RemoteError::IoError(e.to_string()))?;
423
424 let branch = self.branch.as_deref().unwrap_or("HEAD");
426 let mut fetch = Command::new("git");
427 fetch.current_dir(&target);
428 fetch.args(["fetch", "--depth", "1", "origin", branch]);
429 let output = fetch
430 .output()
431 .map_err(|e| RemoteError::GitError(e.to_string()))?;
432
433 if !output.status.success() {
434 let stderr = String::from_utf8_lossy(&output.stderr);
435 return Err(RemoteError::GitError(format!("git fetch failed: {}", stderr)));
436 }
437
438 let mut checkout = Command::new("git");
439 checkout.current_dir(&target);
440 checkout.args(["checkout", "FETCH_HEAD"]);
441 checkout
442 .output()
443 .map_err(|e| RemoteError::GitError(e.to_string()))?;
444
445 let marker_path = target.join(".infiniloom-clone");
447 drop(std::fs::write(&marker_path, format!("sparse clone from: {}\n", self.url)));
448
449 Ok(target)
450 }
451
452 pub fn is_remote_url(input: &str) -> bool {
454 input.contains("://") ||
455 input.starts_with("git@") ||
456 input.starts_with("github:") ||
457 input.starts_with("gitlab:") ||
458 input.starts_with("bitbucket:") ||
459 (input.contains('/') && !input.starts_with('/') && !input.starts_with('.') && input.matches('/').count() == 1)
461 }
462}
463
464#[derive(Debug, Error)]
466pub enum RemoteError {
467 #[error("Invalid URL: {0}")]
468 InvalidUrl(String),
469 #[error("Git error: {0}")]
470 GitError(String),
471 #[error("I/O error: {0}")]
472 IoError(String),
473 #[error("Not found: {0}")]
474 NotFound(String),
475}
476
477#[cfg(test)]
478#[allow(clippy::str_to_string)]
479mod tests {
480 use super::*;
481
482 #[test]
483 fn test_parse_github_url() {
484 let repo = RemoteRepo::parse("https://github.com/rust-lang/rust").unwrap();
485 assert_eq!(repo.provider, GitProvider::GitHub);
486 assert_eq!(repo.owner, Some("rust-lang".to_string()));
487 assert_eq!(repo.name, "rust");
488 }
489
490 #[test]
491 fn test_parse_shorthand() {
492 let repo = RemoteRepo::parse("rust-lang/rust").unwrap();
493 assert_eq!(repo.provider, GitProvider::GitHub);
494 assert_eq!(repo.name, "rust");
495
496 let repo = RemoteRepo::parse("github:rust-lang/rust").unwrap();
497 assert_eq!(repo.provider, GitProvider::GitHub);
498 }
499
500 #[test]
501 fn test_parse_ssh_url() {
502 let repo = RemoteRepo::parse("git@github.com:rust-lang/rust.git").unwrap();
503 assert_eq!(repo.provider, GitProvider::GitHub);
504 assert_eq!(repo.owner, Some("rust-lang".to_string()));
505 assert_eq!(repo.name, "rust");
506 }
507
508 #[test]
509 fn test_parse_with_branch() {
510 let repo = RemoteRepo::parse("https://github.com/rust-lang/rust/tree/master").unwrap();
511 assert_eq!(repo.branch, Some("master".to_string()));
512 }
513
514 #[test]
515 fn test_is_remote_url() {
516 assert!(RemoteRepo::is_remote_url("https://github.com/foo/bar"));
517 assert!(RemoteRepo::is_remote_url("git@github.com:foo/bar.git"));
518 assert!(RemoteRepo::is_remote_url("github:foo/bar"));
519 assert!(!RemoteRepo::is_remote_url("/path/to/local/repo"));
520 }
521
522 #[test]
523 fn test_parse_ssh_url_generic_provider() {
524 let repo = RemoteRepo::parse("git@git.mycompany.com:team/project.git").unwrap();
526 assert_eq!(repo.provider, GitProvider::Generic);
527 assert_eq!(repo.owner, Some("team".to_string()));
528 assert_eq!(repo.name, "project");
529 assert_eq!(repo.url, "git@git.mycompany.com:team/project.git");
531 }
532
533 #[test]
534 fn test_parse_https_url_generic_provider() {
535 let repo = RemoteRepo::parse("https://git.mycompany.com/team/project.git").unwrap();
537 assert_eq!(repo.provider, GitProvider::Generic);
538 assert_eq!(repo.owner, Some("team".to_string()));
539 assert_eq!(repo.name, "project");
540 assert_eq!(repo.url, "https://git.mycompany.com/team/project.git");
542 }
543}