Skip to main content

mars_agents/source/
git.rs

1//! Git source adapter — strategy and public API.
2//!
3//! Delegates to `git_cli` for git CLI operations and `archive` for
4//! GitHub archive download/extraction.
5
6use crate::diagnostic::DiagnosticCollector;
7use crate::error::MarsError;
8use crate::source::parse::extract_hostname;
9use crate::source::{AvailableVersion, GlobalCache, ResolvedRef};
10use crate::types::CommitHash;
11
12use super::archive;
13use super::git_cli;
14
15/// Options controlling git fetch behavior.
16#[derive(Debug, Clone, Default)]
17pub struct FetchOptions {
18    /// Preferred commit SHA to checkout before resolving tags/versions.
19    /// Used for lock replay to guarantee reproducible content.
20    pub preferred_commit: Option<CommitHash>,
21}
22
23/// Normalize a git URL to a filesystem-safe directory name.
24///
25/// Delegates to [`super::canonical::canonicalize_git_url`] for uniform URL
26/// normalization, then replaces `/` with `_` to produce a single path component.
27///
28/// Examples:
29/// - `https://github.com/foo/bar` -> `github.com_foo_bar`
30/// - `github.com/foo/bar` -> `github.com_foo_bar`
31/// - `git@github.com:foo/bar.git` -> `github.com_foo_bar`
32/// - `ssh://git@github.com/foo/bar` -> `github.com_foo_bar`
33pub fn url_to_dirname(url: &str) -> String {
34    super::canonical::canonicalize_git_url(url).replace('/', "_")
35}
36
37/// Parse a tag name as a semver version tag.
38///
39/// Accepts: `v1.0.0`, `v0.5.2`, `1.0.0`
40/// Rejects: `latest`, `nightly-2024`, or any non-semver tag.
41pub(crate) fn parse_semver_tag(tag: &str) -> Option<semver::Version> {
42    let version_str = tag.strip_prefix('v').unwrap_or(tag);
43    semver::Version::parse(version_str).ok()
44}
45
46/// Return a Git-CLI-fetchable remote URL from a source URL or canonical identity.
47///
48/// Source identities intentionally canonicalize GitHub/GitLab HTTPS URLs to
49/// `host/owner/repo` for equality. Git needs an actual remote locator, so adapt
50/// that shorthand at the boundary before running `git ls-remote`, `clone`, or
51/// `fetch`.
52pub(crate) fn normalize_git_remote_url(url: &str) -> String {
53    let trimmed = url.trim();
54    let lower = trimmed.to_ascii_lowercase();
55    if lower.starts_with("github.com/") || lower.starts_with("gitlab.com/") {
56        format!("https://{trimmed}")
57    } else {
58        trimmed.to_string()
59    }
60}
61
62#[derive(Debug, Clone)]
63pub(crate) struct ResolvedVersion {
64    pub tag: Option<String>,
65    pub version: Option<semver::Version>,
66    pub sha: String,
67}
68
69fn resolve_version(
70    url: &str,
71    version_req: Option<&str>,
72    diag: &mut DiagnosticCollector,
73) -> Result<ResolvedVersion, MarsError> {
74    if let Some(version_req) = version_req {
75        if let Some(requested_version) = parse_semver_tag(version_req) {
76            let tags = ls_remote_tags(url)?;
77            let selected = tags
78                .into_iter()
79                .find(|tag| tag.tag == version_req || tag.version == requested_version)
80                .ok_or_else(|| MarsError::Source {
81                    source_name: url.to_string(),
82                    message: format!("version tag `{version_req}` not found"),
83                })?;
84
85            return Ok(ResolvedVersion {
86                tag: Some(selected.tag),
87                version: Some(selected.version),
88                sha: selected.commit_id,
89            });
90        }
91
92        let sha = ls_remote_ref(url, version_req)?;
93        return Ok(ResolvedVersion {
94            tag: None,
95            version: None,
96            sha,
97        });
98    }
99
100    let tags = ls_remote_tags(url)?;
101    if let Some(selected) = tags.last() {
102        return Ok(ResolvedVersion {
103            tag: Some(selected.tag.clone()),
104            version: Some(selected.version.clone()),
105            sha: selected.commit_id.clone(),
106        });
107    }
108
109    diag.warn(
110        "no-releases",
111        format!("no releases found for {url}, using latest commit from default branch"),
112    );
113    let sha = ls_remote_head(url)?;
114    Ok(ResolvedVersion {
115        tag: None,
116        version: None,
117        sha,
118    })
119}
120
121/// Return true when the URL host resolves to github.com.
122pub fn is_github_host(url: &str) -> bool {
123    extract_hostname(url)
124        .map(|host| host.eq_ignore_ascii_case("github.com"))
125        .unwrap_or(false)
126}
127
128fn should_use_github_archive(url: &str) -> bool {
129    let trimmed = url.trim();
130    if trimmed.starts_with("git@") || trimmed.starts_with("ssh://") {
131        return false;
132    }
133
134    trimmed.starts_with("https://") && is_github_host(trimmed)
135}
136
137pub fn list_versions(url: &str, _cache: &GlobalCache) -> Result<Vec<AvailableVersion>, MarsError> {
138    ls_remote_tags(url)
139}
140
141fn ls_remote_ref(url: &str, reference: &str) -> Result<String, MarsError> {
142    let remote_url = normalize_git_remote_url(url);
143    git_cli::ls_remote_ref(&remote_url, reference)
144}
145
146pub(crate) fn ls_remote_head(url: &str) -> Result<String, MarsError> {
147    let remote_url = normalize_git_remote_url(url);
148    git_cli::ls_remote_head(&remote_url)
149}
150
151pub fn ls_remote_tags(url: &str) -> Result<Vec<AvailableVersion>, MarsError> {
152    let remote_url = normalize_git_remote_url(url);
153    git_cli::ls_remote_tags(&remote_url)
154}
155
156pub fn fetch(
157    url: &str,
158    version_req: Option<&str>,
159    source_name: &str,
160    cache: &GlobalCache,
161    options: &FetchOptions,
162    diag: &mut DiagnosticCollector,
163) -> Result<ResolvedRef, MarsError> {
164    let remote_url = normalize_git_remote_url(url);
165    let mut resolved = resolve_version(&remote_url, version_req, diag)?;
166    if let Some(preferred_commit) = options.preferred_commit.as_ref() {
167        resolved.sha = preferred_commit.to_string();
168    }
169
170    let tree_path = if should_use_github_archive(&remote_url) {
171        match archive::fetch_archive(&remote_url, &resolved.sha, cache) {
172            Ok(path) => path,
173            Err(MarsError::Http { status: 404, .. }) if options.preferred_commit.is_some() => {
174                return Err(MarsError::LockedCommitUnreachable {
175                    commit: resolved.sha.clone(),
176                    url: remote_url,
177                });
178            }
179            Err(err) => return Err(err),
180        }
181    } else {
182        // For git clone path, prefer exact SHA checkout when replaying a locked commit,
183        // or when resolving branch/default-HEAD refs (non-tag fetches).
184        let checkout_sha = if options.preferred_commit.is_some() || resolved.tag.is_none() {
185            Some(resolved.sha.as_str())
186        } else {
187            None
188        };
189
190        match git_cli::fetch_git_clone(&remote_url, resolved.tag.as_deref(), checkout_sha, cache) {
191            Ok(path) => path,
192            Err(MarsError::GitCli { .. }) if options.preferred_commit.is_some() => {
193                return Err(MarsError::LockedCommitUnreachable {
194                    commit: resolved.sha.clone(),
195                    url: remote_url,
196                });
197            }
198            Err(err) => return Err(err),
199        }
200    };
201
202    Ok(ResolvedRef {
203        source_name: source_name.into(),
204        version: resolved.version,
205        version_tag: resolved.tag,
206        commit: Some(CommitHash::from(resolved.sha)),
207        tree_path,
208    })
209}
210
211/// Fetch a git source at an exact locked commit without resolving a live ref first.
212pub fn fetch_commit(
213    url: &str,
214    commit: &str,
215    source_name: &str,
216    cache: &GlobalCache,
217    _diag: &mut DiagnosticCollector,
218) -> Result<ResolvedRef, MarsError> {
219    let remote_url = normalize_git_remote_url(url);
220    let tree_path = if should_use_github_archive(&remote_url) {
221        match archive::fetch_archive(&remote_url, commit, cache) {
222            Ok(path) => path,
223            Err(MarsError::Http { status: 404, .. }) => {
224                return Err(MarsError::LockedCommitUnreachable {
225                    commit: commit.to_string(),
226                    url: remote_url,
227                });
228            }
229            Err(err) => return Err(err),
230        }
231    } else {
232        match git_cli::fetch_git_clone(&remote_url, None, Some(commit), cache) {
233            Ok(path) => path,
234            Err(MarsError::GitCli { .. }) => {
235                return Err(MarsError::LockedCommitUnreachable {
236                    commit: commit.to_string(),
237                    url: remote_url,
238                });
239            }
240            Err(err) => return Err(err),
241        }
242    };
243
244    Ok(ResolvedRef {
245        source_name: source_name.into(),
246        version: None,
247        version_tag: None,
248        commit: Some(CommitHash::from(commit)),
249        tree_path,
250    })
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use semver::Version;
257    use std::ffi::OsStr;
258    use std::fs;
259    use std::path::Path;
260    use std::process::Command;
261    use tempfile::TempDir;
262
263    fn run_git<I, S>(cwd: &Path, args: I) -> String
264    where
265        I: IntoIterator<Item = S>,
266        S: AsRef<OsStr>,
267    {
268        let mut command = Command::new("git");
269        crate::platform::process::remove_git_local_env(&mut command);
270        command.env("GIT_AUTHOR_NAME", "Mars Test");
271        command.env("GIT_AUTHOR_EMAIL", "mars@example.com");
272        command.env("GIT_COMMITTER_NAME", "Mars Test");
273        command.env("GIT_COMMITTER_EMAIL", "mars@example.com");
274        let output = command.current_dir(cwd).args(args).output().unwrap();
275        if !output.status.success() {
276            panic!(
277                "git command failed: {}\nstdout:\n{}\nstderr:\n{}",
278                output.status,
279                String::from_utf8_lossy(&output.stdout),
280                String::from_utf8_lossy(&output.stderr)
281            );
282        }
283        String::from_utf8_lossy(&output.stdout).trim().to_string()
284    }
285
286    fn init_repo() -> TempDir {
287        let repo = TempDir::new().unwrap();
288        run_git(repo.path(), ["init", "."]);
289        run_git(repo.path(), ["config", "user.name", "Mars Test"]);
290        run_git(repo.path(), ["config", "user.email", "mars@example.com"]);
291
292        fs::write(repo.path().join("README.md"), "initial\n").unwrap();
293        run_git(repo.path(), ["add", "."]);
294        run_git(repo.path(), ["commit", "-m", "initial commit"]);
295
296        repo
297    }
298
299    fn commit_file(repo: &Path, filename: &str, contents: &str, message: &str) -> String {
300        fs::write(repo.join(filename), contents).unwrap();
301        run_git(repo, ["add", filename]);
302        run_git(repo, ["commit", "-m", message]);
303        run_git(repo, ["rev-parse", "HEAD"])
304    }
305
306    // ==================== url_to_dirname tests ====================
307
308    #[test]
309    fn url_to_dirname_https() {
310        assert_eq!(
311            url_to_dirname("https://github.com/foo/bar"),
312            "github.com_foo_bar"
313        );
314    }
315
316    #[test]
317    fn url_to_dirname_bare_domain() {
318        assert_eq!(
319            url_to_dirname("github.com/meridian-flow/meridian-base"),
320            "github.com_meridian-flow_meridian-base"
321        );
322    }
323
324    #[test]
325    fn url_to_dirname_ssh() {
326        assert_eq!(
327            url_to_dirname("git@github.com:foo/bar.git"),
328            "github.com_foo_bar"
329        );
330    }
331
332    #[test]
333    fn url_to_dirname_https_with_git_suffix() {
334        assert_eq!(
335            url_to_dirname("https://github.com/foo/bar.git"),
336            "github.com_foo_bar"
337        );
338    }
339
340    #[test]
341    fn url_to_dirname_ssh_protocol() {
342        assert_eq!(
343            url_to_dirname("ssh://git@github.com/foo/bar"),
344            "github.com_foo_bar"
345        );
346    }
347
348    #[test]
349    fn url_to_dirname_http() {
350        assert_eq!(
351            url_to_dirname("http://gitlab.com/org/repo"),
352            "gitlab.com_org_repo"
353        );
354    }
355
356    #[test]
357    fn url_to_dirname_trailing_slash() {
358        assert_eq!(
359            url_to_dirname("https://github.com/foo/bar/"),
360            "github.com_foo_bar"
361        );
362    }
363
364    // ==================== parse_semver_tag tests ====================
365
366    #[test]
367    fn parse_semver_v_prefixed() {
368        let v = parse_semver_tag("v1.2.3").unwrap();
369        assert_eq!(v, semver::Version::new(1, 2, 3));
370    }
371
372    #[test]
373    fn parse_semver_no_prefix() {
374        let v = parse_semver_tag("0.5.2").unwrap();
375        assert_eq!(v, semver::Version::new(0, 5, 2));
376    }
377
378    #[test]
379    fn ls_remote_tags_filters_sorts_and_skips_peeled_refs() {
380        let repo = init_repo();
381        run_git(repo.path(), ["tag", "v1.0.0"]);
382
383        commit_file(repo.path(), "README.md", "second\n", "second commit");
384        run_git(repo.path(), ["tag", "-a", "v1.2.0", "-m", "v1.2.0"]);
385        run_git(repo.path(), ["tag", "not-a-version"]);
386
387        commit_file(repo.path(), "README.md", "third\n", "third commit");
388        run_git(repo.path(), ["tag", "v1.10.0"]);
389
390        let versions = ls_remote_tags(repo.path().to_str().unwrap()).unwrap();
391        let tags: Vec<String> = versions.iter().map(|v| v.tag.clone()).collect();
392        assert_eq!(tags, vec!["v1.0.0", "v1.2.0", "v1.10.0"]);
393
394        for version in versions {
395            assert_eq!(version.commit_id.len(), 40);
396            assert!(version.commit_id.chars().all(|c| c.is_ascii_hexdigit()));
397        }
398    }
399
400    #[test]
401    fn fetch_local_git_repo_uses_latest_semver_tag() {
402        let remote = init_repo();
403        run_git(remote.path(), ["tag", "v0.1.0"]);
404
405        let v020_commit = commit_file(remote.path(), "README.md", "v0.2.0\n", "release v0.2.0");
406        run_git(remote.path(), ["tag", "v0.2.0"]);
407
408        let cache_root = TempDir::new().unwrap();
409        let cache = GlobalCache {
410            root: cache_root.path().join("cache"),
411        };
412        fs::create_dir_all(cache.archives_dir()).unwrap();
413        fs::create_dir_all(cache.git_dir()).unwrap();
414
415        let url = format!("file://{}", remote.path().display());
416        let mut diag = DiagnosticCollector::new();
417        let resolved = fetch(
418            &url,
419            None,
420            "local-source",
421            &cache,
422            &FetchOptions::default(),
423            &mut diag,
424        )
425        .unwrap();
426
427        assert_eq!(resolved.source_name.as_ref(), "local-source");
428        assert_eq!(resolved.version, Some(Version::new(0, 2, 0)));
429        assert_eq!(resolved.version_tag.as_deref(), Some("v0.2.0"));
430        assert_eq!(resolved.commit.as_deref(), Some(v020_commit.as_str()));
431        assert!(resolved.tree_path.join("README.md").exists());
432
433        let checked_out = run_git(&resolved.tree_path, ["rev-parse", "HEAD"]);
434        assert_eq!(checked_out, v020_commit);
435    }
436
437    #[test]
438    fn fetch_commit_checks_out_exact_commit_without_resolving_head() {
439        let remote = init_repo();
440        let locked_commit = commit_file(remote.path(), "README.md", "locked\n", "locked commit");
441        let head_commit = commit_file(remote.path(), "README.md", "head\n", "head commit");
442        assert_ne!(locked_commit, head_commit);
443
444        let cache_root = TempDir::new().unwrap();
445        let cache = GlobalCache {
446            root: cache_root.path().join("cache"),
447        };
448        fs::create_dir_all(cache.archives_dir()).unwrap();
449        fs::create_dir_all(cache.git_dir()).unwrap();
450
451        let url = format!("file://{}", remote.path().display());
452        let mut diag = DiagnosticCollector::new();
453        let resolved =
454            fetch_commit(&url, &locked_commit, "local-source", &cache, &mut diag).unwrap();
455
456        assert_eq!(resolved.source_name.as_ref(), "local-source");
457        assert_eq!(resolved.version, None);
458        assert_eq!(resolved.version_tag, None);
459        assert_eq!(resolved.commit.as_deref(), Some(locked_commit.as_str()));
460        let checked_out = run_git(&resolved.tree_path, ["rev-parse", "HEAD"]);
461        assert_eq!(checked_out, locked_commit);
462    }
463
464    #[test]
465    fn fetch_commit_on_cached_repo_fetches_missing_sha_before_checkout() {
466        let remote = init_repo();
467        run_git(remote.path(), ["tag", "v1.0.0"]);
468
469        let cache_root = TempDir::new().unwrap();
470        let cache = GlobalCache {
471            root: cache_root.path().join("cache"),
472        };
473        fs::create_dir_all(cache.archives_dir()).unwrap();
474        fs::create_dir_all(cache.git_dir()).unwrap();
475
476        let url = format!("file://{}", remote.path().display());
477
478        // Seed cache as a shallow tag checkout that does not include future commits.
479        let mut first_diag = DiagnosticCollector::new();
480        let first = fetch(
481            &url,
482            Some("v1.0.0"),
483            "local-source",
484            &cache,
485            &FetchOptions::default(),
486            &mut first_diag,
487        )
488        .unwrap();
489        assert_eq!(first.version_tag.as_deref(), Some("v1.0.0"));
490
491        let locked_commit = commit_file(
492            remote.path(),
493            "README.md",
494            "post-tag\n",
495            "commit only reachable by SHA",
496        );
497
498        let mut diag = DiagnosticCollector::new();
499        let resolved =
500            fetch_commit(&url, &locked_commit, "local-source", &cache, &mut diag).unwrap();
501        assert_eq!(resolved.commit.as_deref(), Some(locked_commit.as_str()));
502        let checked_out = run_git(&resolved.tree_path, ["rev-parse", "HEAD"]);
503        assert_eq!(checked_out, locked_commit);
504    }
505
506    #[test]
507    fn fetch_existing_cached_git_repo_updates_tags_before_checkout() {
508        let remote = init_repo();
509        run_git(remote.path(), ["tag", "v1.0.0"]);
510
511        let cache_root = TempDir::new().unwrap();
512        let cache = GlobalCache {
513            root: cache_root.path().join("cache"),
514        };
515        fs::create_dir_all(cache.archives_dir()).unwrap();
516        fs::create_dir_all(cache.git_dir()).unwrap();
517
518        let url = format!("file://{}", remote.path().display());
519
520        let mut first_diag = DiagnosticCollector::new();
521        let first = fetch(
522            &url,
523            None,
524            "local-source",
525            &cache,
526            &FetchOptions::default(),
527            &mut first_diag,
528        )
529        .unwrap();
530        assert_eq!(first.version, Some(Version::new(1, 0, 0)));
531        assert_eq!(first.version_tag.as_deref(), Some("v1.0.0"));
532
533        let v200_commit = commit_file(remote.path(), "README.md", "v2.0.0\n", "release v2.0.0");
534        run_git(remote.path(), ["tag", "v2.0.0"]);
535
536        let mut second_diag = DiagnosticCollector::new();
537        let second = fetch(
538            &url,
539            None,
540            "local-source",
541            &cache,
542            &FetchOptions::default(),
543            &mut second_diag,
544        )
545        .unwrap();
546
547        assert_eq!(second.version, Some(Version::new(2, 0, 0)));
548        assert_eq!(second.version_tag.as_deref(), Some("v2.0.0"));
549        assert_eq!(second.commit.as_deref(), Some(v200_commit.as_str()));
550
551        let checked_out = run_git(&second.tree_path, ["rev-parse", "HEAD"]);
552        assert_eq!(checked_out, v200_commit);
553    }
554
555    // ==================== is_github_host tests ====================
556
557    #[test]
558    fn is_github_host_accepts_supported_formats() {
559        assert!(is_github_host("https://github.com/org/repo"));
560        assert!(is_github_host("github.com/org/repo"));
561        assert!(is_github_host("git@github.com:org/repo.git"));
562        assert!(is_github_host("https://git@github.com:8443/org/repo"));
563    }
564
565    #[test]
566    fn is_github_host_rejects_other_hosts() {
567        assert!(!is_github_host("https://gitlab.com/org/repo"));
568        assert!(!is_github_host("git@source.example.com:org/repo.git"));
569    }
570
571    #[test]
572    fn normalize_git_remote_url_makes_known_host_identity_fetchable() {
573        assert_eq!(
574            normalize_git_remote_url("github.com/org/repo"),
575            "https://github.com/org/repo"
576        );
577        assert_eq!(
578            normalize_git_remote_url("gitlab.com/group/repo"),
579            "https://gitlab.com/group/repo"
580        );
581    }
582
583    #[test]
584    fn normalize_git_remote_url_preserves_explicit_locators() {
585        assert_eq!(
586            normalize_git_remote_url("https://github.com/org/repo"),
587            "https://github.com/org/repo"
588        );
589        assert_eq!(
590            normalize_git_remote_url("git@github.com:org/repo.git"),
591            "git@github.com:org/repo.git"
592        );
593        assert_eq!(
594            normalize_git_remote_url("git.example.com/org/repo"),
595            "git.example.com/org/repo"
596        );
597    }
598
599    #[test]
600    fn github_archive_only_for_https_github_urls() {
601        assert!(should_use_github_archive("https://github.com/org/repo"));
602        assert!(!should_use_github_archive("http://github.com/org/repo"));
603        assert!(!should_use_github_archive("github.com/org/repo"));
604        assert!(!should_use_github_archive("git@github.com:org/repo.git"));
605        assert!(!should_use_github_archive("ssh://git@github.com/org/repo"));
606    }
607}