Skip to main content

mars_agents/source/
git.rs

1//! Git source adapter — strategy and public API.
2//!
3//! Delegates to `git_cli` for git CLI operations and `archive` for
4//! GitHub archive download/extraction.
5
6use crate::diagnostic::DiagnosticCollector;
7use crate::error::MarsError;
8use crate::source::parse::extract_hostname;
9use crate::source::{AvailableVersion, GlobalCache, ResolvedRef};
10use crate::types::CommitHash;
11
12use super::archive;
13use super::git_cli;
14
15// Re-export for backward compatibility
16pub use git_cli::{ls_remote_head, ls_remote_tags};
17
18/// Options controlling git fetch behavior.
19#[derive(Debug, Clone, Default)]
20pub struct FetchOptions {
21    /// Preferred commit SHA to checkout before resolving tags/versions.
22    /// Used for lock replay to guarantee reproducible content.
23    pub preferred_commit: Option<CommitHash>,
24}
25
26/// Normalize a git URL to a filesystem-safe directory name.
27///
28/// Delegates to [`super::canonical::canonicalize_git_url`] for uniform URL
29/// normalization, then replaces `/` with `_` to produce a single path component.
30///
31/// Examples:
32/// - `https://github.com/foo/bar` -> `github.com_foo_bar`
33/// - `github.com/foo/bar` -> `github.com_foo_bar`
34/// - `git@github.com:foo/bar.git` -> `github.com_foo_bar`
35/// - `ssh://git@github.com/foo/bar` -> `github.com_foo_bar`
36pub fn url_to_dirname(url: &str) -> String {
37    super::canonical::canonicalize_git_url(url).replace('/', "_")
38}
39
40/// Parse a tag name as a semver version tag.
41///
42/// Accepts: `v1.0.0`, `v0.5.2`, `1.0.0`
43/// Rejects: `latest`, `nightly-2024`, or any non-semver tag.
44pub(crate) fn parse_semver_tag(tag: &str) -> Option<semver::Version> {
45    let version_str = tag.strip_prefix('v').unwrap_or(tag);
46    semver::Version::parse(version_str).ok()
47}
48
49#[derive(Debug, Clone)]
50pub(crate) struct ResolvedVersion {
51    pub tag: Option<String>,
52    pub version: Option<semver::Version>,
53    pub sha: String,
54}
55
56fn resolve_version(
57    url: &str,
58    version_req: Option<&str>,
59    diag: &mut DiagnosticCollector,
60) -> Result<ResolvedVersion, MarsError> {
61    if let Some(version_req) = version_req {
62        if let Some(requested_version) = parse_semver_tag(version_req) {
63            let tags = git_cli::ls_remote_tags(url)?;
64            let selected = tags
65                .into_iter()
66                .find(|tag| tag.tag == version_req || tag.version == requested_version)
67                .ok_or_else(|| MarsError::Source {
68                    source_name: url.to_string(),
69                    message: format!("version tag `{version_req}` not found"),
70                })?;
71
72            return Ok(ResolvedVersion {
73                tag: Some(selected.tag),
74                version: Some(selected.version),
75                sha: selected.commit_id,
76            });
77        }
78
79        let sha = git_cli::ls_remote_ref(url, version_req)?;
80        return Ok(ResolvedVersion {
81            tag: None,
82            version: None,
83            sha,
84        });
85    }
86
87    let tags = git_cli::ls_remote_tags(url)?;
88    if let Some(selected) = tags.last() {
89        return Ok(ResolvedVersion {
90            tag: Some(selected.tag.clone()),
91            version: Some(selected.version.clone()),
92            sha: selected.commit_id.clone(),
93        });
94    }
95
96    diag.warn(
97        "no-releases",
98        format!("no releases found for {url}, using latest commit from default branch"),
99    );
100    let sha = git_cli::ls_remote_head(url)?;
101    Ok(ResolvedVersion {
102        tag: None,
103        version: None,
104        sha,
105    })
106}
107
108/// Return true when the URL host resolves to github.com.
109pub fn is_github_host(url: &str) -> bool {
110    extract_hostname(url)
111        .map(|host| host.eq_ignore_ascii_case("github.com"))
112        .unwrap_or(false)
113}
114
115fn should_use_github_archive(url: &str) -> bool {
116    let trimmed = url.trim();
117    if trimmed.starts_with("git@") || trimmed.starts_with("ssh://") {
118        return false;
119    }
120
121    trimmed.starts_with("https://") && is_github_host(trimmed)
122}
123
124pub fn list_versions(url: &str, _cache: &GlobalCache) -> Result<Vec<AvailableVersion>, MarsError> {
125    git_cli::ls_remote_tags(url)
126}
127
128pub fn fetch(
129    url: &str,
130    version_req: Option<&str>,
131    source_name: &str,
132    cache: &GlobalCache,
133    options: &FetchOptions,
134    diag: &mut DiagnosticCollector,
135) -> Result<ResolvedRef, MarsError> {
136    let mut resolved = resolve_version(url, version_req, diag)?;
137    if let Some(preferred_commit) = options.preferred_commit.as_ref() {
138        resolved.sha = preferred_commit.to_string();
139    }
140
141    let tree_path = if should_use_github_archive(url) {
142        match archive::fetch_archive(url, &resolved.sha, cache) {
143            Ok(path) => path,
144            Err(MarsError::Http { status: 404, .. }) if options.preferred_commit.is_some() => {
145                return Err(MarsError::LockedCommitUnreachable {
146                    commit: resolved.sha.clone(),
147                    url: url.to_string(),
148                });
149            }
150            Err(err) => return Err(err),
151        }
152    } else {
153        // For git clone path, prefer exact SHA checkout when replaying a locked commit,
154        // or when resolving branch/default-HEAD refs (non-tag fetches).
155        let checkout_sha = if options.preferred_commit.is_some() || resolved.tag.is_none() {
156            Some(resolved.sha.as_str())
157        } else {
158            None
159        };
160
161        match git_cli::fetch_git_clone(url, resolved.tag.as_deref(), checkout_sha, cache) {
162            Ok(path) => path,
163            Err(MarsError::GitCli { .. }) if options.preferred_commit.is_some() => {
164                return Err(MarsError::LockedCommitUnreachable {
165                    commit: resolved.sha.clone(),
166                    url: url.to_string(),
167                });
168            }
169            Err(err) => return Err(err),
170        }
171    };
172
173    Ok(ResolvedRef {
174        source_name: source_name.into(),
175        version: resolved.version,
176        version_tag: resolved.tag,
177        commit: Some(CommitHash::from(resolved.sha)),
178        tree_path,
179    })
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use semver::Version;
186    use std::ffi::OsStr;
187    use std::fs;
188    use std::path::Path;
189    use std::process::Command;
190    use tempfile::TempDir;
191
192    fn run_git<I, S>(cwd: &Path, args: I) -> String
193    where
194        I: IntoIterator<Item = S>,
195        S: AsRef<OsStr>,
196    {
197        let output = Command::new("git")
198            .current_dir(cwd)
199            .args(args)
200            .output()
201            .unwrap();
202        if !output.status.success() {
203            panic!(
204                "git command failed: {}\nstdout:\n{}\nstderr:\n{}",
205                output.status,
206                String::from_utf8_lossy(&output.stdout),
207                String::from_utf8_lossy(&output.stderr)
208            );
209        }
210        String::from_utf8_lossy(&output.stdout).trim().to_string()
211    }
212
213    fn init_repo() -> TempDir {
214        let repo = TempDir::new().unwrap();
215        run_git(repo.path(), ["init", "."]);
216        run_git(repo.path(), ["config", "user.name", "Mars Test"]);
217        run_git(repo.path(), ["config", "user.email", "mars@example.com"]);
218
219        fs::write(repo.path().join("README.md"), "initial\n").unwrap();
220        run_git(repo.path(), ["add", "."]);
221        run_git(repo.path(), ["commit", "-m", "initial commit"]);
222
223        repo
224    }
225
226    fn commit_file(repo: &Path, filename: &str, contents: &str, message: &str) -> String {
227        fs::write(repo.join(filename), contents).unwrap();
228        run_git(repo, ["add", filename]);
229        run_git(repo, ["commit", "-m", message]);
230        run_git(repo, ["rev-parse", "HEAD"])
231    }
232
233    // ==================== url_to_dirname tests ====================
234
235    #[test]
236    fn url_to_dirname_https() {
237        assert_eq!(
238            url_to_dirname("https://github.com/foo/bar"),
239            "github.com_foo_bar"
240        );
241    }
242
243    #[test]
244    fn url_to_dirname_bare_domain() {
245        assert_eq!(
246            url_to_dirname("github.com/meridian-flow/meridian-base"),
247            "github.com_meridian-flow_meridian-base"
248        );
249    }
250
251    #[test]
252    fn url_to_dirname_ssh() {
253        assert_eq!(
254            url_to_dirname("git@github.com:foo/bar.git"),
255            "github.com_foo_bar"
256        );
257    }
258
259    #[test]
260    fn url_to_dirname_https_with_git_suffix() {
261        assert_eq!(
262            url_to_dirname("https://github.com/foo/bar.git"),
263            "github.com_foo_bar"
264        );
265    }
266
267    #[test]
268    fn url_to_dirname_ssh_protocol() {
269        assert_eq!(
270            url_to_dirname("ssh://git@github.com/foo/bar"),
271            "github.com_foo_bar"
272        );
273    }
274
275    #[test]
276    fn url_to_dirname_http() {
277        assert_eq!(
278            url_to_dirname("http://gitlab.com/org/repo"),
279            "gitlab.com_org_repo"
280        );
281    }
282
283    #[test]
284    fn url_to_dirname_trailing_slash() {
285        assert_eq!(
286            url_to_dirname("https://github.com/foo/bar/"),
287            "github.com_foo_bar"
288        );
289    }
290
291    // ==================== parse_semver_tag tests ====================
292
293    #[test]
294    fn parse_semver_v_prefixed() {
295        let v = parse_semver_tag("v1.2.3").unwrap();
296        assert_eq!(v, semver::Version::new(1, 2, 3));
297    }
298
299    #[test]
300    fn parse_semver_no_prefix() {
301        let v = parse_semver_tag("0.5.2").unwrap();
302        assert_eq!(v, semver::Version::new(0, 5, 2));
303    }
304
305    #[test]
306    fn ls_remote_tags_filters_sorts_and_skips_peeled_refs() {
307        let repo = init_repo();
308        run_git(repo.path(), ["tag", "v1.0.0"]);
309
310        commit_file(repo.path(), "README.md", "second\n", "second commit");
311        run_git(repo.path(), ["tag", "-a", "v1.2.0", "-m", "v1.2.0"]);
312        run_git(repo.path(), ["tag", "not-a-version"]);
313
314        commit_file(repo.path(), "README.md", "third\n", "third commit");
315        run_git(repo.path(), ["tag", "v1.10.0"]);
316
317        let versions = ls_remote_tags(repo.path().to_str().unwrap()).unwrap();
318        let tags: Vec<String> = versions.iter().map(|v| v.tag.clone()).collect();
319        assert_eq!(tags, vec!["v1.0.0", "v1.2.0", "v1.10.0"]);
320
321        for version in versions {
322            assert_eq!(version.commit_id.len(), 40);
323            assert!(version.commit_id.chars().all(|c| c.is_ascii_hexdigit()));
324        }
325    }
326
327    #[test]
328    fn fetch_local_git_repo_uses_latest_semver_tag() {
329        let remote = init_repo();
330        run_git(remote.path(), ["tag", "v0.1.0"]);
331
332        let v020_commit = commit_file(remote.path(), "README.md", "v0.2.0\n", "release v0.2.0");
333        run_git(remote.path(), ["tag", "v0.2.0"]);
334
335        let cache_root = TempDir::new().unwrap();
336        let cache = GlobalCache {
337            root: cache_root.path().join("cache"),
338        };
339        fs::create_dir_all(cache.archives_dir()).unwrap();
340        fs::create_dir_all(cache.git_dir()).unwrap();
341
342        let url = format!("file://{}", remote.path().display());
343        let mut diag = DiagnosticCollector::new();
344        let resolved = fetch(
345            &url,
346            None,
347            "local-source",
348            &cache,
349            &FetchOptions::default(),
350            &mut diag,
351        )
352        .unwrap();
353
354        assert_eq!(resolved.source_name.as_ref(), "local-source");
355        assert_eq!(resolved.version, Some(Version::new(0, 2, 0)));
356        assert_eq!(resolved.version_tag.as_deref(), Some("v0.2.0"));
357        assert_eq!(resolved.commit.as_deref(), Some(v020_commit.as_str()));
358        assert!(resolved.tree_path.join("README.md").exists());
359
360        let checked_out = run_git(&resolved.tree_path, ["rev-parse", "HEAD"]);
361        assert_eq!(checked_out, v020_commit);
362    }
363
364    #[test]
365    fn fetch_existing_cached_git_repo_updates_tags_before_checkout() {
366        let remote = init_repo();
367        run_git(remote.path(), ["tag", "v1.0.0"]);
368
369        let cache_root = TempDir::new().unwrap();
370        let cache = GlobalCache {
371            root: cache_root.path().join("cache"),
372        };
373        fs::create_dir_all(cache.archives_dir()).unwrap();
374        fs::create_dir_all(cache.git_dir()).unwrap();
375
376        let url = format!("file://{}", remote.path().display());
377
378        let mut first_diag = DiagnosticCollector::new();
379        let first = fetch(
380            &url,
381            None,
382            "local-source",
383            &cache,
384            &FetchOptions::default(),
385            &mut first_diag,
386        )
387        .unwrap();
388        assert_eq!(first.version, Some(Version::new(1, 0, 0)));
389        assert_eq!(first.version_tag.as_deref(), Some("v1.0.0"));
390
391        let v200_commit = commit_file(remote.path(), "README.md", "v2.0.0\n", "release v2.0.0");
392        run_git(remote.path(), ["tag", "v2.0.0"]);
393
394        let mut second_diag = DiagnosticCollector::new();
395        let second = fetch(
396            &url,
397            None,
398            "local-source",
399            &cache,
400            &FetchOptions::default(),
401            &mut second_diag,
402        )
403        .unwrap();
404
405        assert_eq!(second.version, Some(Version::new(2, 0, 0)));
406        assert_eq!(second.version_tag.as_deref(), Some("v2.0.0"));
407        assert_eq!(second.commit.as_deref(), Some(v200_commit.as_str()));
408
409        let checked_out = run_git(&second.tree_path, ["rev-parse", "HEAD"]);
410        assert_eq!(checked_out, v200_commit);
411    }
412
413    // ==================== is_github_host tests ====================
414
415    #[test]
416    fn is_github_host_accepts_supported_formats() {
417        assert!(is_github_host("https://github.com/org/repo"));
418        assert!(is_github_host("github.com/org/repo"));
419        assert!(is_github_host("git@github.com:org/repo.git"));
420        assert!(is_github_host("https://git@github.com:8443/org/repo"));
421    }
422
423    #[test]
424    fn is_github_host_rejects_other_hosts() {
425        assert!(!is_github_host("https://gitlab.com/org/repo"));
426        assert!(!is_github_host("git@source.example.com:org/repo.git"));
427    }
428
429    #[test]
430    fn github_archive_only_for_https_github_urls() {
431        assert!(should_use_github_archive("https://github.com/org/repo"));
432        assert!(!should_use_github_archive("http://github.com/org/repo"));
433        assert!(!should_use_github_archive("github.com/org/repo"));
434        assert!(!should_use_github_archive("git@github.com:org/repo.git"));
435        assert!(!should_use_github_archive("ssh://git@github.com/org/repo"));
436    }
437}