use anyhow::Result;
use anyhow::bail;
const MAX_SUBGROUP_DEPTH: usize = 20;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RepoIdentity {
pub host: String,
pub org_path: String,
pub repo: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RepoIdentityKey {
pub host: String,
pub org_path: String,
pub repo: String,
}
impl RepoIdentity {
pub fn parse(url: &str) -> Result<Self> {
let url = url.trim();
let (host, path) = if url.starts_with("git@") {
parse_scp_url(url)?
} else if url.starts_with("ssh://") {
parse_ssh_scheme_url(url)?
} else if url.starts_with("https://") || url.starts_with("http://") {
parse_https_url(url)?
} else {
bail!("Unsupported URL format: {url}");
};
let path = path
.trim_end_matches('/')
.trim_end_matches(".git")
.trim_end_matches('/');
let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
if segments.is_empty() {
bail!("URL has no path segments: {url}");
}
for seg in &segments {
if *seg == "." || *seg == ".." {
bail!("Invalid path segment '{seg}' in URL: {url}");
}
}
if segments.len() > MAX_SUBGROUP_DEPTH + 1 {
bail!(
"Path has too many segments ({}, max {}): {}",
segments.len(),
MAX_SUBGROUP_DEPTH + 1,
url
);
}
let (org_path, repo) = if let Some(git_idx) = segments.iter().position(|s| *s == "_git") {
if git_idx + 1 >= segments.len() {
bail!("Azure DevOps URL missing repo after _git: {url}");
}
let org_segments = &segments[..git_idx];
let repo = segments[git_idx + 1];
(org_segments.join("/"), repo.to_string())
} else if segments.len() == 1 {
(String::new(), segments[0].to_string())
} else {
let org_segments = &segments[..segments.len() - 1];
let repo = segments[segments.len() - 1];
(org_segments.join("/"), repo.to_string())
};
Ok(Self {
host: host.to_lowercase(),
org_path,
repo,
})
}
pub fn canonical_key(&self) -> RepoIdentityKey {
RepoIdentityKey {
host: self.host.to_lowercase(),
org_path: self.org_path.to_lowercase(),
repo: self.repo.to_lowercase(),
}
}
}
fn parse_scp_url(url: &str) -> Result<(String, String)> {
let without_user = url.find('@').map_or(url, |i| &url[i + 1..]);
let colon_pos = without_user
.find(':')
.ok_or_else(|| anyhow::anyhow!("Invalid scp-like URL (missing colon): {url}"))?;
let host = &without_user[..colon_pos];
let path = &without_user[colon_pos + 1..];
if host.is_empty() {
bail!("Empty host in URL: {url}");
}
Ok((host.to_string(), path.to_string()))
}
fn parse_ssh_scheme_url(url: &str) -> Result<(String, String)> {
let without_scheme = url
.strip_prefix("ssh://")
.ok_or_else(|| anyhow::anyhow!("Not an SSH URL: {url}"))?;
let without_user = without_scheme
.find('@')
.map_or(without_scheme, |i| &without_scheme[i + 1..]);
let slash_pos = without_user
.find('/')
.ok_or_else(|| anyhow::anyhow!("SSH URL missing path: {url}"))?;
let host_port = &without_user[..slash_pos];
let path = &without_user[slash_pos + 1..];
let host = host_port
.split(':')
.next()
.ok_or_else(|| anyhow::anyhow!("Empty host in URL: {url}"))?;
if host.is_empty() {
bail!("Empty host in URL: {url}");
}
Ok((host.to_string(), path.to_string()))
}
fn parse_https_url(url: &str) -> Result<(String, String)> {
let scheme_end = url
.find("://")
.ok_or_else(|| anyhow::anyhow!("Invalid URL (missing ://): {url}"))?;
let without_scheme = &url[scheme_end + 3..];
let without_user = without_scheme
.find('@')
.map_or(without_scheme, |i| &without_scheme[i + 1..]);
let slash_pos = without_user
.find('/')
.ok_or_else(|| anyhow::anyhow!("URL missing path: {url}"))?;
let host_port = &without_user[..slash_pos];
let path = &without_user[slash_pos + 1..];
let host = host_port
.split(':')
.next()
.ok_or_else(|| anyhow::anyhow!("Empty host in URL: {url}"))?;
if host.is_empty() {
bail!("Empty host in URL: {url}");
}
Ok((host.to_string(), path.to_string()))
}
pub fn parse_url_and_subpath(url: &str) -> (String, Option<String>) {
let url = url.trim();
if let Some(colon_pos) = url.rfind(':') {
let potential_base = &url[..colon_pos];
let potential_subpath = &url[colon_pos + 1..];
if potential_subpath.is_empty() {
return (url.to_string(), None);
}
if potential_subpath.chars().all(|c| c.is_ascii_digit()) {
return (url.to_string(), None);
}
if potential_base.is_empty() || potential_base.ends_with("//") {
return (url.to_string(), None);
}
if RepoIdentity::parse(potential_base).is_ok() {
return (
potential_base.to_string(),
Some(potential_subpath.to_string()),
);
}
}
(url.to_string(), None)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_ssh_scp_basic() {
let id = RepoIdentity::parse("git@github.com:org/repo.git").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_ssh_scp_no_git_suffix() {
let id = RepoIdentity::parse("git@github.com:org/repo").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_https_basic() {
let id = RepoIdentity::parse("https://github.com/org/repo").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_https_with_git_suffix() {
let id = RepoIdentity::parse("https://github.com/org/repo.git").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_https_trailing_slash() {
let id = RepoIdentity::parse("https://github.com/org/repo/").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_ssh_with_port() {
let id = RepoIdentity::parse("ssh://git@host.example.com:2222/org/repo.git").unwrap();
assert_eq!(id.host, "host.example.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_gitlab_subgroups() {
let id = RepoIdentity::parse("https://gitlab.com/group/subgroup/team/repo.git").unwrap();
assert_eq!(id.host, "gitlab.com");
assert_eq!(id.org_path, "group/subgroup/team");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_gitlab_deep_subgroups() {
let id = RepoIdentity::parse("https://gitlab.com/a/b/c/d/e/repo.git").unwrap();
assert_eq!(id.host, "gitlab.com");
assert_eq!(id.org_path, "a/b/c/d/e");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_azure_devops() {
let id = RepoIdentity::parse("https://dev.azure.com/myorg/myproj/_git/myrepo").unwrap();
assert_eq!(id.host, "dev.azure.com");
assert_eq!(id.org_path, "myorg/myproj");
assert_eq!(id.repo, "myrepo");
}
#[test]
fn test_parse_host_case_normalized() {
let id = RepoIdentity::parse("https://GitHub.COM/Org/Repo").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "Org");
assert_eq!(id.repo, "Repo");
}
#[test]
fn test_parse_http_scheme() {
let id = RepoIdentity::parse("http://github.com/org/repo").unwrap();
assert_eq!(id.host, "github.com");
assert_eq!(id.org_path, "org");
assert_eq!(id.repo, "repo");
}
#[test]
fn test_parse_rejects_invalid_segments() {
assert!(RepoIdentity::parse("https://github.com/../repo").is_err());
assert!(RepoIdentity::parse("https://github.com/./repo").is_err());
}
#[test]
fn test_parse_rejects_unsupported_scheme() {
assert!(RepoIdentity::parse("ftp://github.com/org/repo").is_err());
assert!(RepoIdentity::parse("org/repo").is_err());
}
#[test]
fn test_canonical_key_equality_across_schemes() {
let ssh = RepoIdentity::parse("git@github.com:User/Repo.git").unwrap();
let https = RepoIdentity::parse("https://github.com/user/repo").unwrap();
assert_eq!(ssh.canonical_key(), https.canonical_key());
}
#[test]
fn test_canonical_key_different_repos() {
let a = RepoIdentity::parse("git@github.com:org/repo-a.git").unwrap();
let b = RepoIdentity::parse("git@github.com:org/repo-b.git").unwrap();
assert_ne!(a.canonical_key(), b.canonical_key());
}
#[test]
fn test_canonical_key_different_orgs() {
let a = RepoIdentity::parse("git@github.com:alice/utils.git").unwrap();
let b = RepoIdentity::parse("git@github.com:bob/utils.git").unwrap();
assert_ne!(a.canonical_key(), b.canonical_key());
}
#[test]
fn test_subpath_none_basic() {
let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git");
assert_eq!(url, "git@github.com:user/repo.git");
assert_eq!(sub, None);
}
#[test]
fn test_subpath_present() {
let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:docs/api");
assert_eq!(url, "git@github.com:user/repo.git");
assert_eq!(sub, Some("docs/api".to_string()));
}
#[test]
fn test_subpath_https_none() {
let (url, sub) = parse_url_and_subpath("https://github.com/user/repo");
assert_eq!(url, "https://github.com/user/repo");
assert_eq!(sub, None);
}
#[test]
fn test_subpath_ssh_port_not_confused() {
let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git");
assert_eq!(url, "ssh://git@host:2222/org/repo.git");
assert_eq!(sub, None);
}
#[test]
fn test_subpath_ssh_port_with_actual_subpath() {
let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git:docs/api");
assert_eq!(url, "ssh://git@host:2222/org/repo.git");
assert_eq!(sub, Some("docs/api".to_string()));
}
#[test]
fn test_subpath_empty_subpath_ignored() {
let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:");
assert_eq!(url, "git@github.com:user/repo.git:");
assert_eq!(sub, None);
}
}