Skip to main content

ito_common/
git_url.rs

1//! Git remote URL parsing utilities.
2//!
3//! Pure parsing logic with no domain knowledge and no I/O. Handles the common
4//! git remote URL formats used by GitHub, GitLab, and similar hosts.
5
6/// Parse an `<org>/<repo>` pair from a git remote URL.
7///
8/// Handles the four common formats:
9///
10/// | Format | Example |
11/// |---|---|
12/// | SCP-style SSH | `git@github.com:withakay/ito.git` |
13/// | HTTPS with `.git` | `https://github.com/withakay/ito.git` |
14/// | HTTPS without `.git` | `https://github.com/withakay/ito` |
15/// | SSH with explicit port | `ssh://git@github.com:22/withakay/ito.git` |
16///
17/// Returns `Some((org, repo))` when the URL contains a recognisable two-component
18/// path, or `None` when the URL is empty, malformed, or has fewer than two path
19/// components.
20///
21/// The `.git` suffix is stripped from the repository name when present.
22///
23/// # Examples
24///
25/// ```
26/// use ito_common::git_url::parse_remote_url_org_repo;
27///
28/// assert_eq!(
29///     parse_remote_url_org_repo("git@github.com:withakay/ito.git"),
30///     Some(("withakay".to_string(), "ito".to_string())),
31/// );
32/// assert_eq!(
33///     parse_remote_url_org_repo("https://github.com/withakay/ito.git"),
34///     Some(("withakay".to_string(), "ito".to_string())),
35/// );
36/// assert_eq!(
37///     parse_remote_url_org_repo("https://github.com/withakay/ito"),
38///     Some(("withakay".to_string(), "ito".to_string())),
39/// );
40/// assert_eq!(
41///     parse_remote_url_org_repo("ssh://git@github.com:22/withakay/ito.git"),
42///     Some(("withakay".to_string(), "ito".to_string())),
43/// );
44/// assert_eq!(parse_remote_url_org_repo(""), None);
45/// ```
46pub fn parse_remote_url_org_repo(url: &str) -> Option<(String, String)> {
47    let url = url.trim();
48    if url.is_empty() {
49        return None;
50    }
51
52    // Extract the path portion depending on URL format.
53    let path = if let Some(rest) = url
54        .strip_prefix("ssh://")
55        .or_else(|| url.strip_prefix("git://"))
56    {
57        // ssh://[user@]host[:port]/path  or  git://host/path
58        // Drop everything up to and including the first '/' after the authority.
59        rest.split_once('/')?.1
60    } else if url.contains("://") {
61        // HTTPS (or any other scheme): https://host/path
62        // Strip scheme + authority, keep the path.
63        let after_scheme = url.split_once("://")?.1;
64        after_scheme.split_once('/')?.1
65    } else if let Some(colon_pos) = url.find(':') {
66        // SCP-style SSH: git@github.com:org/repo.git
67        // The colon separates host from path; there must be no '/' before the colon.
68        let before_colon = &url[..colon_pos];
69        if before_colon.contains('/') {
70            // Looks like a Windows absolute path or something unexpected — bail.
71            return None;
72        }
73        &url[colon_pos + 1..]
74    } else {
75        return None;
76    };
77
78    extract_org_repo_from_path(path)
79}
80
81/// Extract `(org, repo)` from the last two path components.
82///
83/// Strips a leading `/`, splits on `/`, and takes the last two non-empty
84/// segments. The `.git` suffix is removed from the repo component.
85fn extract_org_repo_from_path(path: &str) -> Option<(String, String)> {
86    let path = path.trim_start_matches('/');
87    let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
88
89    if segments.len() < 2 {
90        return None;
91    }
92
93    let org = segments[segments.len() - 2];
94    let repo_raw = segments[segments.len() - 1];
95    let repo = repo_raw.strip_suffix(".git").unwrap_or(repo_raw);
96
97    if org.is_empty() || repo.is_empty() {
98        return None;
99    }
100
101    Some((org.to_string(), repo.to_string()))
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    // ── Happy-path: all four canonical formats ────────────────────────────────
109
110    #[test]
111    fn parses_scp_ssh_url() {
112        let result = parse_remote_url_org_repo("git@github.com:withakay/ito.git");
113        assert_eq!(result, Some(("withakay".to_string(), "ito".to_string())));
114    }
115
116    #[test]
117    fn parses_https_url_with_git_suffix() {
118        let result = parse_remote_url_org_repo("https://github.com/withakay/ito.git");
119        assert_eq!(result, Some(("withakay".to_string(), "ito".to_string())));
120    }
121
122    #[test]
123    fn parses_https_url_without_git_suffix() {
124        let result = parse_remote_url_org_repo("https://github.com/withakay/ito");
125        assert_eq!(result, Some(("withakay".to_string(), "ito".to_string())));
126    }
127
128    #[test]
129    fn parses_ssh_with_explicit_port() {
130        let result = parse_remote_url_org_repo("ssh://git@github.com:22/withakay/ito.git");
131        assert_eq!(result, Some(("withakay".to_string(), "ito".to_string())));
132    }
133
134    // ── Variations ────────────────────────────────────────────────────────────
135
136    #[test]
137    fn parses_gitlab_style_subgroup_takes_last_two_segments() {
138        // GitLab allows nested groups; we take the last two path components.
139        let result = parse_remote_url_org_repo("https://gitlab.com/group/subgroup/repo.git");
140        assert_eq!(result, Some(("subgroup".to_string(), "repo".to_string())));
141    }
142
143    #[test]
144    fn parses_http_scheme() {
145        let result = parse_remote_url_org_repo("http://github.com/acme/widget.git");
146        assert_eq!(result, Some(("acme".to_string(), "widget".to_string())));
147    }
148
149    #[test]
150    fn parses_git_protocol_url() {
151        let result = parse_remote_url_org_repo("git://github.com/acme/widget.git");
152        assert_eq!(result, Some(("acme".to_string(), "widget".to_string())));
153    }
154
155    #[test]
156    fn strips_git_suffix_only_once() {
157        // repo name that ends with ".git.git" — only the trailing ".git" is stripped.
158        let result = parse_remote_url_org_repo("https://github.com/org/repo.git.git");
159        assert_eq!(result, Some(("org".to_string(), "repo.git".to_string())));
160    }
161
162    #[test]
163    fn handles_trailing_slash_in_https_url() {
164        let result = parse_remote_url_org_repo("https://github.com/withakay/ito/");
165        assert_eq!(result, Some(("withakay".to_string(), "ito".to_string())));
166    }
167
168    #[test]
169    fn handles_ssh_url_without_user() {
170        let result = parse_remote_url_org_repo("ssh://github.com/withakay/ito.git");
171        assert_eq!(result, Some(("withakay".to_string(), "ito".to_string())));
172    }
173
174    // ── Edge cases / error paths ──────────────────────────────────────────────
175
176    #[test]
177    fn returns_none_for_empty_string() {
178        assert_eq!(parse_remote_url_org_repo(""), None);
179    }
180
181    #[test]
182    fn returns_none_for_whitespace_only() {
183        assert_eq!(parse_remote_url_org_repo("   "), None);
184    }
185
186    #[test]
187    fn returns_none_for_single_path_component() {
188        assert_eq!(
189            parse_remote_url_org_repo("https://github.com/onlyone"),
190            None
191        );
192    }
193
194    #[test]
195    fn returns_none_for_no_path_after_host() {
196        assert_eq!(parse_remote_url_org_repo("https://github.com"), None);
197        assert_eq!(parse_remote_url_org_repo("https://github.com/"), None);
198    }
199
200    #[test]
201    fn returns_none_for_scp_url_with_single_component() {
202        assert_eq!(
203            parse_remote_url_org_repo("git@github.com:onlyone.git"),
204            None
205        );
206    }
207
208    #[test]
209    fn returns_none_for_bare_string_without_separator() {
210        assert_eq!(parse_remote_url_org_repo("notaurl"), None);
211    }
212}