Skip to main content

mars_agents/source/
git.rs

1//! Git source adapter — strategy and public API.
2//!
3//! Delegates to `git_cli` for git CLI operations and `archive` for
4//! GitHub archive download/extraction.
5
6use crate::error::MarsError;
7use crate::source::parse::extract_hostname;
8use crate::source::{AvailableVersion, GlobalCache, ResolvedRef};
9use crate::types::CommitHash;
10
11use super::archive;
12use super::git_cli;
13
14// Re-export for backward compatibility
15pub use git_cli::{ls_remote_head, ls_remote_tags};
16
17/// Options controlling git fetch behavior.
18#[derive(Debug, Clone, Default)]
19pub struct FetchOptions {
20    /// Preferred commit SHA to checkout before resolving tags/versions.
21    /// Used for lock replay to guarantee reproducible content.
22    pub preferred_commit: Option<CommitHash>,
23}
24
25/// Normalize a git URL to a filesystem-safe directory name.
26///
27/// Strips protocol prefixes and replaces `/` and `:` with `_`.
28/// Strips trailing `.git` suffix.
29///
30/// Examples:
31/// - `https://github.com/foo/bar` -> `github.com_foo_bar`
32/// - `github.com/foo/bar` -> `github.com_foo_bar`
33/// - `git@github.com:foo/bar.git` -> `github.com_foo_bar`
34/// - `ssh://git@github.com/foo/bar` -> `github.com_foo_bar`
35pub fn url_to_dirname(url: &str) -> String {
36    let mut s = url.to_string();
37
38    // Strip common protocol prefixes
39    for prefix in &["https://", "http://", "ssh://", "git://"] {
40        if let Some(rest) = s.strip_prefix(prefix) {
41            s = rest.to_string();
42            break;
43        }
44    }
45
46    // Handle SSH shorthand: git@github.com:foo/bar -> github.com/foo/bar
47    if let Some(rest) = s.strip_prefix("git@") {
48        s = rest.to_string();
49        if let Some(colon_pos) = s.find(':') {
50            let after_colon = &s[colon_pos + 1..];
51            if !after_colon.starts_with("//") {
52                s.replace_range(colon_pos..colon_pos + 1, "/");
53            }
54        }
55    }
56
57    // Strip trailing .git
58    if let Some(rest) = s.strip_suffix(".git") {
59        s = rest.to_string();
60    }
61
62    // Strip trailing slash
63    if let Some(rest) = s.strip_suffix('/') {
64        s = rest.to_string();
65    }
66
67    // Replace `/` with `_`
68    s.replace('/', "_")
69}
70
71/// Parse a tag name as a semver version tag.
72///
73/// Accepts: `v1.0.0`, `v0.5.2`, `1.0.0`
74/// Rejects: `latest`, `nightly-2024`, or any non-semver tag.
75pub(crate) fn parse_semver_tag(tag: &str) -> Option<semver::Version> {
76    let version_str = tag.strip_prefix('v').unwrap_or(tag);
77    semver::Version::parse(version_str).ok()
78}
79
80#[derive(Debug, Clone)]
81pub(crate) struct ResolvedVersion {
82    pub tag: Option<String>,
83    pub version: Option<semver::Version>,
84    pub sha: String,
85}
86
87fn resolve_version(url: &str, version_req: Option<&str>) -> Result<ResolvedVersion, MarsError> {
88    if let Some(version_req) = version_req {
89        if let Some(requested_version) = parse_semver_tag(version_req) {
90            let tags = git_cli::ls_remote_tags(url)?;
91            let selected = tags
92                .into_iter()
93                .find(|tag| tag.tag == version_req || tag.version == requested_version)
94                .ok_or_else(|| MarsError::Source {
95                    source_name: url.to_string(),
96                    message: format!("version tag `{version_req}` not found"),
97                })?;
98
99            return Ok(ResolvedVersion {
100                tag: Some(selected.tag),
101                version: Some(selected.version),
102                sha: selected.commit_id,
103            });
104        }
105
106        let sha = git_cli::ls_remote_ref(url, version_req)?;
107        return Ok(ResolvedVersion {
108            tag: None,
109            version: None,
110            sha,
111        });
112    }
113
114    let tags = git_cli::ls_remote_tags(url)?;
115    if let Some(selected) = tags.last() {
116        return Ok(ResolvedVersion {
117            tag: Some(selected.tag.clone()),
118            version: Some(selected.version.clone()),
119            sha: selected.commit_id.clone(),
120        });
121    }
122
123    eprintln!("warning: no releases found for {url}, using latest commit from default branch");
124    let sha = git_cli::ls_remote_head(url)?;
125    Ok(ResolvedVersion {
126        tag: None,
127        version: None,
128        sha,
129    })
130}
131
132/// Return true when the URL host resolves to github.com.
133pub fn is_github_host(url: &str) -> bool {
134    extract_hostname(url)
135        .map(|host| host.eq_ignore_ascii_case("github.com"))
136        .unwrap_or(false)
137}
138
139fn should_use_github_archive(url: &str) -> bool {
140    let trimmed = url.trim();
141    if trimmed.starts_with("git@") || trimmed.starts_with("ssh://") {
142        return false;
143    }
144
145    trimmed.starts_with("https://") && is_github_host(trimmed)
146}
147
148pub fn list_versions(url: &str, _cache: &GlobalCache) -> Result<Vec<AvailableVersion>, MarsError> {
149    git_cli::ls_remote_tags(url)
150}
151
152pub fn fetch(
153    url: &str,
154    version_req: Option<&str>,
155    source_name: &str,
156    cache: &GlobalCache,
157    options: &FetchOptions,
158) -> Result<ResolvedRef, MarsError> {
159    let mut resolved = resolve_version(url, version_req)?;
160    if let Some(preferred_commit) = options.preferred_commit.as_ref() {
161        resolved.sha = preferred_commit.to_string();
162    }
163
164    let tree_path = if should_use_github_archive(url) {
165        match archive::fetch_archive(url, &resolved.sha, cache) {
166            Ok(path) => path,
167            Err(MarsError::Http { status: 404, .. }) if options.preferred_commit.is_some() => {
168                return Err(MarsError::LockedCommitUnreachable {
169                    commit: resolved.sha.clone(),
170                    url: url.to_string(),
171                });
172            }
173            Err(err) => return Err(err),
174        }
175    } else {
176        // For git clone path, prefer exact SHA checkout when replaying a locked commit,
177        // or when resolving branch/default-HEAD refs (non-tag fetches).
178        let checkout_sha = if options.preferred_commit.is_some() || resolved.tag.is_none() {
179            Some(resolved.sha.as_str())
180        } else {
181            None
182        };
183
184        match git_cli::fetch_git_clone(url, resolved.tag.as_deref(), checkout_sha, cache) {
185            Ok(path) => path,
186            Err(MarsError::GitCli { .. }) if options.preferred_commit.is_some() => {
187                return Err(MarsError::LockedCommitUnreachable {
188                    commit: resolved.sha.clone(),
189                    url: url.to_string(),
190                });
191            }
192            Err(err) => return Err(err),
193        }
194    };
195
196    Ok(ResolvedRef {
197        source_name: source_name.into(),
198        version: resolved.version,
199        version_tag: resolved.tag,
200        commit: Some(CommitHash::from(resolved.sha)),
201        tree_path,
202    })
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use semver::Version;
209    use std::ffi::OsStr;
210    use std::fs;
211    use std::path::Path;
212    use std::process::Command;
213    use tempfile::TempDir;
214
215    fn run_git<I, S>(cwd: &Path, args: I) -> String
216    where
217        I: IntoIterator<Item = S>,
218        S: AsRef<OsStr>,
219    {
220        let output = Command::new("git")
221            .current_dir(cwd)
222            .args(args)
223            .output()
224            .unwrap();
225        if !output.status.success() {
226            panic!(
227                "git command failed: {}\nstdout:\n{}\nstderr:\n{}",
228                output.status,
229                String::from_utf8_lossy(&output.stdout),
230                String::from_utf8_lossy(&output.stderr)
231            );
232        }
233        String::from_utf8_lossy(&output.stdout).trim().to_string()
234    }
235
236    fn init_repo() -> TempDir {
237        let repo = TempDir::new().unwrap();
238        run_git(repo.path(), ["init", "."]);
239        run_git(repo.path(), ["config", "user.name", "Mars Test"]);
240        run_git(repo.path(), ["config", "user.email", "mars@example.com"]);
241
242        fs::write(repo.path().join("README.md"), "initial\n").unwrap();
243        run_git(repo.path(), ["add", "."]);
244        run_git(repo.path(), ["commit", "-m", "initial commit"]);
245
246        repo
247    }
248
249    fn commit_file(repo: &Path, filename: &str, contents: &str, message: &str) -> String {
250        fs::write(repo.join(filename), contents).unwrap();
251        run_git(repo, ["add", filename]);
252        run_git(repo, ["commit", "-m", message]);
253        run_git(repo, ["rev-parse", "HEAD"])
254    }
255
256    // ==================== url_to_dirname tests ====================
257
258    #[test]
259    fn url_to_dirname_https() {
260        assert_eq!(
261            url_to_dirname("https://github.com/foo/bar"),
262            "github.com_foo_bar"
263        );
264    }
265
266    #[test]
267    fn url_to_dirname_bare_domain() {
268        assert_eq!(
269            url_to_dirname("github.com/haowjy/meridian-base"),
270            "github.com_haowjy_meridian-base"
271        );
272    }
273
274    #[test]
275    fn url_to_dirname_ssh() {
276        assert_eq!(
277            url_to_dirname("git@github.com:foo/bar.git"),
278            "github.com_foo_bar"
279        );
280    }
281
282    #[test]
283    fn url_to_dirname_https_with_git_suffix() {
284        assert_eq!(
285            url_to_dirname("https://github.com/foo/bar.git"),
286            "github.com_foo_bar"
287        );
288    }
289
290    #[test]
291    fn url_to_dirname_ssh_protocol() {
292        assert_eq!(
293            url_to_dirname("ssh://git@github.com/foo/bar"),
294            "github.com_foo_bar"
295        );
296    }
297
298    #[test]
299    fn url_to_dirname_http() {
300        assert_eq!(
301            url_to_dirname("http://gitlab.com/org/repo"),
302            "gitlab.com_org_repo"
303        );
304    }
305
306    #[test]
307    fn url_to_dirname_trailing_slash() {
308        assert_eq!(
309            url_to_dirname("https://github.com/foo/bar/"),
310            "github.com_foo_bar"
311        );
312    }
313
314    // ==================== parse_semver_tag tests ====================
315
316    #[test]
317    fn parse_semver_v_prefixed() {
318        let v = parse_semver_tag("v1.2.3").unwrap();
319        assert_eq!(v, semver::Version::new(1, 2, 3));
320    }
321
322    #[test]
323    fn parse_semver_no_prefix() {
324        let v = parse_semver_tag("0.5.2").unwrap();
325        assert_eq!(v, semver::Version::new(0, 5, 2));
326    }
327
328    #[test]
329    fn ls_remote_tags_filters_sorts_and_skips_peeled_refs() {
330        let repo = init_repo();
331        run_git(repo.path(), ["tag", "v1.0.0"]);
332
333        commit_file(repo.path(), "README.md", "second\n", "second commit");
334        run_git(repo.path(), ["tag", "-a", "v1.2.0", "-m", "v1.2.0"]);
335        run_git(repo.path(), ["tag", "not-a-version"]);
336
337        commit_file(repo.path(), "README.md", "third\n", "third commit");
338        run_git(repo.path(), ["tag", "v1.10.0"]);
339
340        let versions = ls_remote_tags(repo.path().to_str().unwrap()).unwrap();
341        let tags: Vec<String> = versions.iter().map(|v| v.tag.clone()).collect();
342        assert_eq!(tags, vec!["v1.0.0", "v1.2.0", "v1.10.0"]);
343
344        for version in versions {
345            assert_eq!(version.commit_id.len(), 40);
346            assert!(version.commit_id.chars().all(|c| c.is_ascii_hexdigit()));
347        }
348    }
349
350    #[test]
351    fn fetch_local_git_repo_uses_latest_semver_tag() {
352        let remote = init_repo();
353        run_git(remote.path(), ["tag", "v0.1.0"]);
354
355        let v020_commit = commit_file(remote.path(), "README.md", "v0.2.0\n", "release v0.2.0");
356        run_git(remote.path(), ["tag", "v0.2.0"]);
357
358        let cache_root = TempDir::new().unwrap();
359        let cache = GlobalCache {
360            root: cache_root.path().join("cache"),
361        };
362        fs::create_dir_all(cache.archives_dir()).unwrap();
363        fs::create_dir_all(cache.git_dir()).unwrap();
364
365        let url = format!("file://{}", remote.path().display());
366        let resolved = fetch(&url, None, "local-source", &cache, &FetchOptions::default()).unwrap();
367
368        assert_eq!(resolved.source_name.as_ref(), "local-source");
369        assert_eq!(resolved.version, Some(Version::new(0, 2, 0)));
370        assert_eq!(resolved.version_tag.as_deref(), Some("v0.2.0"));
371        assert_eq!(resolved.commit.as_deref(), Some(v020_commit.as_str()));
372        assert!(resolved.tree_path.join("README.md").exists());
373
374        let checked_out = run_git(&resolved.tree_path, ["rev-parse", "HEAD"]);
375        assert_eq!(checked_out, v020_commit);
376    }
377
378    // ==================== is_github_host tests ====================
379
380    #[test]
381    fn is_github_host_accepts_supported_formats() {
382        assert!(is_github_host("https://github.com/org/repo"));
383        assert!(is_github_host("github.com/org/repo"));
384        assert!(is_github_host("git@github.com:org/repo.git"));
385        assert!(is_github_host("https://git@github.com:8443/org/repo"));
386    }
387
388    #[test]
389    fn is_github_host_rejects_other_hosts() {
390        assert!(!is_github_host("https://gitlab.com/org/repo"));
391        assert!(!is_github_host("git@source.example.com:org/repo.git"));
392    }
393
394    #[test]
395    fn github_archive_only_for_https_github_urls() {
396        assert!(should_use_github_archive("https://github.com/org/repo"));
397        assert!(!should_use_github_archive("http://github.com/org/repo"));
398        assert!(!should_use_github_archive("github.com/org/repo"));
399        assert!(!should_use_github_archive("git@github.com:org/repo.git"));
400        assert!(!should_use_github_archive("ssh://git@github.com/org/repo"));
401    }
402}