Skip to main content

mars_agents/source/
git.rs

1//! Git source adapter — strategy and public API.
2//!
3//! Delegates to `git_cli` for git CLI operations and `archive` for
4//! GitHub archive download/extraction.
5
6use crate::diagnostic::DiagnosticCollector;
7use crate::error::MarsError;
8use crate::source::parse::extract_hostname;
9use crate::source::{AvailableVersion, GlobalCache, ResolvedRef};
10use crate::types::CommitHash;
11
12use super::archive;
13use super::git_cli;
14
15// Re-export for backward compatibility
16pub use git_cli::{ls_remote_head, ls_remote_tags};
17
18/// Options controlling git fetch behavior.
19#[derive(Debug, Clone, Default)]
20pub struct FetchOptions {
21    /// Preferred commit SHA to checkout before resolving tags/versions.
22    /// Used for lock replay to guarantee reproducible content.
23    pub preferred_commit: Option<CommitHash>,
24}
25
26/// Normalize a git URL to a filesystem-safe directory name.
27///
28/// Strips protocol prefixes and replaces `/` and `:` with `_`.
29/// Strips trailing `.git` suffix.
30///
31/// Examples:
32/// - `https://github.com/foo/bar` -> `github.com_foo_bar`
33/// - `github.com/foo/bar` -> `github.com_foo_bar`
34/// - `git@github.com:foo/bar.git` -> `github.com_foo_bar`
35/// - `ssh://git@github.com/foo/bar` -> `github.com_foo_bar`
36pub fn url_to_dirname(url: &str) -> String {
37    let mut s = url.to_string();
38
39    // Strip common protocol prefixes
40    for prefix in &["https://", "http://", "ssh://", "git://"] {
41        if let Some(rest) = s.strip_prefix(prefix) {
42            s = rest.to_string();
43            break;
44        }
45    }
46
47    // Handle SSH shorthand: git@github.com:foo/bar -> github.com/foo/bar
48    if let Some(rest) = s.strip_prefix("git@") {
49        s = rest.to_string();
50        if let Some(colon_pos) = s.find(':') {
51            let after_colon = &s[colon_pos + 1..];
52            if !after_colon.starts_with("//") {
53                s.replace_range(colon_pos..colon_pos + 1, "/");
54            }
55        }
56    }
57
58    // Strip trailing .git
59    if let Some(rest) = s.strip_suffix(".git") {
60        s = rest.to_string();
61    }
62
63    // Strip trailing slash
64    if let Some(rest) = s.strip_suffix('/') {
65        s = rest.to_string();
66    }
67
68    // Replace `/` with `_`
69    s.replace('/', "_")
70}
71
72/// Parse a tag name as a semver version tag.
73///
74/// Accepts: `v1.0.0`, `v0.5.2`, `1.0.0`
75/// Rejects: `latest`, `nightly-2024`, or any non-semver tag.
76pub(crate) fn parse_semver_tag(tag: &str) -> Option<semver::Version> {
77    let version_str = tag.strip_prefix('v').unwrap_or(tag);
78    semver::Version::parse(version_str).ok()
79}
80
81#[derive(Debug, Clone)]
82pub(crate) struct ResolvedVersion {
83    pub tag: Option<String>,
84    pub version: Option<semver::Version>,
85    pub sha: String,
86}
87
88fn resolve_version(
89    url: &str,
90    version_req: Option<&str>,
91    diag: &mut DiagnosticCollector,
92) -> Result<ResolvedVersion, MarsError> {
93    if let Some(version_req) = version_req {
94        if let Some(requested_version) = parse_semver_tag(version_req) {
95            let tags = git_cli::ls_remote_tags(url)?;
96            let selected = tags
97                .into_iter()
98                .find(|tag| tag.tag == version_req || tag.version == requested_version)
99                .ok_or_else(|| MarsError::Source {
100                    source_name: url.to_string(),
101                    message: format!("version tag `{version_req}` not found"),
102                })?;
103
104            return Ok(ResolvedVersion {
105                tag: Some(selected.tag),
106                version: Some(selected.version),
107                sha: selected.commit_id,
108            });
109        }
110
111        let sha = git_cli::ls_remote_ref(url, version_req)?;
112        return Ok(ResolvedVersion {
113            tag: None,
114            version: None,
115            sha,
116        });
117    }
118
119    let tags = git_cli::ls_remote_tags(url)?;
120    if let Some(selected) = tags.last() {
121        return Ok(ResolvedVersion {
122            tag: Some(selected.tag.clone()),
123            version: Some(selected.version.clone()),
124            sha: selected.commit_id.clone(),
125        });
126    }
127
128    diag.warn(
129        "no-releases",
130        format!("no releases found for {url}, using latest commit from default branch"),
131    );
132    let sha = git_cli::ls_remote_head(url)?;
133    Ok(ResolvedVersion {
134        tag: None,
135        version: None,
136        sha,
137    })
138}
139
140/// Return true when the URL host resolves to github.com.
141pub fn is_github_host(url: &str) -> bool {
142    extract_hostname(url)
143        .map(|host| host.eq_ignore_ascii_case("github.com"))
144        .unwrap_or(false)
145}
146
147fn should_use_github_archive(url: &str) -> bool {
148    let trimmed = url.trim();
149    if trimmed.starts_with("git@") || trimmed.starts_with("ssh://") {
150        return false;
151    }
152
153    trimmed.starts_with("https://") && is_github_host(trimmed)
154}
155
156pub fn list_versions(url: &str, _cache: &GlobalCache) -> Result<Vec<AvailableVersion>, MarsError> {
157    git_cli::ls_remote_tags(url)
158}
159
160pub fn fetch(
161    url: &str,
162    version_req: Option<&str>,
163    source_name: &str,
164    cache: &GlobalCache,
165    options: &FetchOptions,
166    diag: &mut DiagnosticCollector,
167) -> Result<ResolvedRef, MarsError> {
168    let mut resolved = resolve_version(url, version_req, diag)?;
169    if let Some(preferred_commit) = options.preferred_commit.as_ref() {
170        resolved.sha = preferred_commit.to_string();
171    }
172
173    let tree_path = if should_use_github_archive(url) {
174        match archive::fetch_archive(url, &resolved.sha, cache) {
175            Ok(path) => path,
176            Err(MarsError::Http { status: 404, .. }) if options.preferred_commit.is_some() => {
177                return Err(MarsError::LockedCommitUnreachable {
178                    commit: resolved.sha.clone(),
179                    url: url.to_string(),
180                });
181            }
182            Err(err) => return Err(err),
183        }
184    } else {
185        // For git clone path, prefer exact SHA checkout when replaying a locked commit,
186        // or when resolving branch/default-HEAD refs (non-tag fetches).
187        let checkout_sha = if options.preferred_commit.is_some() || resolved.tag.is_none() {
188            Some(resolved.sha.as_str())
189        } else {
190            None
191        };
192
193        match git_cli::fetch_git_clone(url, resolved.tag.as_deref(), checkout_sha, cache) {
194            Ok(path) => path,
195            Err(MarsError::GitCli { .. }) if options.preferred_commit.is_some() => {
196                return Err(MarsError::LockedCommitUnreachable {
197                    commit: resolved.sha.clone(),
198                    url: url.to_string(),
199                });
200            }
201            Err(err) => return Err(err),
202        }
203    };
204
205    Ok(ResolvedRef {
206        source_name: source_name.into(),
207        version: resolved.version,
208        version_tag: resolved.tag,
209        commit: Some(CommitHash::from(resolved.sha)),
210        tree_path,
211    })
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use semver::Version;
218    use std::ffi::OsStr;
219    use std::fs;
220    use std::path::Path;
221    use std::process::Command;
222    use tempfile::TempDir;
223
224    fn run_git<I, S>(cwd: &Path, args: I) -> String
225    where
226        I: IntoIterator<Item = S>,
227        S: AsRef<OsStr>,
228    {
229        let output = Command::new("git")
230            .current_dir(cwd)
231            .args(args)
232            .output()
233            .unwrap();
234        if !output.status.success() {
235            panic!(
236                "git command failed: {}\nstdout:\n{}\nstderr:\n{}",
237                output.status,
238                String::from_utf8_lossy(&output.stdout),
239                String::from_utf8_lossy(&output.stderr)
240            );
241        }
242        String::from_utf8_lossy(&output.stdout).trim().to_string()
243    }
244
245    fn init_repo() -> TempDir {
246        let repo = TempDir::new().unwrap();
247        run_git(repo.path(), ["init", "."]);
248        run_git(repo.path(), ["config", "user.name", "Mars Test"]);
249        run_git(repo.path(), ["config", "user.email", "mars@example.com"]);
250
251        fs::write(repo.path().join("README.md"), "initial\n").unwrap();
252        run_git(repo.path(), ["add", "."]);
253        run_git(repo.path(), ["commit", "-m", "initial commit"]);
254
255        repo
256    }
257
258    fn commit_file(repo: &Path, filename: &str, contents: &str, message: &str) -> String {
259        fs::write(repo.join(filename), contents).unwrap();
260        run_git(repo, ["add", filename]);
261        run_git(repo, ["commit", "-m", message]);
262        run_git(repo, ["rev-parse", "HEAD"])
263    }
264
265    // ==================== url_to_dirname tests ====================
266
267    #[test]
268    fn url_to_dirname_https() {
269        assert_eq!(
270            url_to_dirname("https://github.com/foo/bar"),
271            "github.com_foo_bar"
272        );
273    }
274
275    #[test]
276    fn url_to_dirname_bare_domain() {
277        assert_eq!(
278            url_to_dirname("github.com/haowjy/meridian-base"),
279            "github.com_haowjy_meridian-base"
280        );
281    }
282
283    #[test]
284    fn url_to_dirname_ssh() {
285        assert_eq!(
286            url_to_dirname("git@github.com:foo/bar.git"),
287            "github.com_foo_bar"
288        );
289    }
290
291    #[test]
292    fn url_to_dirname_https_with_git_suffix() {
293        assert_eq!(
294            url_to_dirname("https://github.com/foo/bar.git"),
295            "github.com_foo_bar"
296        );
297    }
298
299    #[test]
300    fn url_to_dirname_ssh_protocol() {
301        assert_eq!(
302            url_to_dirname("ssh://git@github.com/foo/bar"),
303            "github.com_foo_bar"
304        );
305    }
306
307    #[test]
308    fn url_to_dirname_http() {
309        assert_eq!(
310            url_to_dirname("http://gitlab.com/org/repo"),
311            "gitlab.com_org_repo"
312        );
313    }
314
315    #[test]
316    fn url_to_dirname_trailing_slash() {
317        assert_eq!(
318            url_to_dirname("https://github.com/foo/bar/"),
319            "github.com_foo_bar"
320        );
321    }
322
323    // ==================== parse_semver_tag tests ====================
324
325    #[test]
326    fn parse_semver_v_prefixed() {
327        let v = parse_semver_tag("v1.2.3").unwrap();
328        assert_eq!(v, semver::Version::new(1, 2, 3));
329    }
330
331    #[test]
332    fn parse_semver_no_prefix() {
333        let v = parse_semver_tag("0.5.2").unwrap();
334        assert_eq!(v, semver::Version::new(0, 5, 2));
335    }
336
337    #[test]
338    fn ls_remote_tags_filters_sorts_and_skips_peeled_refs() {
339        let repo = init_repo();
340        run_git(repo.path(), ["tag", "v1.0.0"]);
341
342        commit_file(repo.path(), "README.md", "second\n", "second commit");
343        run_git(repo.path(), ["tag", "-a", "v1.2.0", "-m", "v1.2.0"]);
344        run_git(repo.path(), ["tag", "not-a-version"]);
345
346        commit_file(repo.path(), "README.md", "third\n", "third commit");
347        run_git(repo.path(), ["tag", "v1.10.0"]);
348
349        let versions = ls_remote_tags(repo.path().to_str().unwrap()).unwrap();
350        let tags: Vec<String> = versions.iter().map(|v| v.tag.clone()).collect();
351        assert_eq!(tags, vec!["v1.0.0", "v1.2.0", "v1.10.0"]);
352
353        for version in versions {
354            assert_eq!(version.commit_id.len(), 40);
355            assert!(version.commit_id.chars().all(|c| c.is_ascii_hexdigit()));
356        }
357    }
358
359    #[test]
360    fn fetch_local_git_repo_uses_latest_semver_tag() {
361        let remote = init_repo();
362        run_git(remote.path(), ["tag", "v0.1.0"]);
363
364        let v020_commit = commit_file(remote.path(), "README.md", "v0.2.0\n", "release v0.2.0");
365        run_git(remote.path(), ["tag", "v0.2.0"]);
366
367        let cache_root = TempDir::new().unwrap();
368        let cache = GlobalCache {
369            root: cache_root.path().join("cache"),
370        };
371        fs::create_dir_all(cache.archives_dir()).unwrap();
372        fs::create_dir_all(cache.git_dir()).unwrap();
373
374        let url = format!("file://{}", remote.path().display());
375        let mut diag = DiagnosticCollector::new();
376        let resolved = fetch(
377            &url,
378            None,
379            "local-source",
380            &cache,
381            &FetchOptions::default(),
382            &mut diag,
383        )
384        .unwrap();
385
386        assert_eq!(resolved.source_name.as_ref(), "local-source");
387        assert_eq!(resolved.version, Some(Version::new(0, 2, 0)));
388        assert_eq!(resolved.version_tag.as_deref(), Some("v0.2.0"));
389        assert_eq!(resolved.commit.as_deref(), Some(v020_commit.as_str()));
390        assert!(resolved.tree_path.join("README.md").exists());
391
392        let checked_out = run_git(&resolved.tree_path, ["rev-parse", "HEAD"]);
393        assert_eq!(checked_out, v020_commit);
394    }
395
396    // ==================== is_github_host tests ====================
397
398    #[test]
399    fn is_github_host_accepts_supported_formats() {
400        assert!(is_github_host("https://github.com/org/repo"));
401        assert!(is_github_host("github.com/org/repo"));
402        assert!(is_github_host("git@github.com:org/repo.git"));
403        assert!(is_github_host("https://git@github.com:8443/org/repo"));
404    }
405
406    #[test]
407    fn is_github_host_rejects_other_hosts() {
408        assert!(!is_github_host("https://gitlab.com/org/repo"));
409        assert!(!is_github_host("git@source.example.com:org/repo.git"));
410    }
411
412    #[test]
413    fn github_archive_only_for_https_github_urls() {
414        assert!(should_use_github_archive("https://github.com/org/repo"));
415        assert!(!should_use_github_archive("http://github.com/org/repo"));
416        assert!(!should_use_github_archive("github.com/org/repo"));
417        assert!(!should_use_github_archive("git@github.com:org/repo.git"));
418        assert!(!should_use_github_archive("ssh://git@github.com/org/repo"));
419    }
420}