1use anyhow::Result;
7use anyhow::bail;
8
9const MAX_SUBGROUP_DEPTH: usize = 20;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct RepoIdentity {
18 pub host: String,
20 pub org_path: String,
22 pub repo: String,
24}
25
26#[derive(Debug, Clone, PartialEq, Eq, Hash)]
30pub struct RepoIdentityKey {
31 pub host: String,
32 pub org_path: String,
33 pub repo: String,
34}
35
36impl RepoIdentity {
37 pub fn parse(url: &str) -> Result<Self> {
49 let url = url.trim();
50
51 let (host, path) = if url.starts_with("git@") {
53 parse_scp_url(url)?
55 } else if url.starts_with("ssh://") {
56 parse_ssh_scheme_url(url)?
58 } else if url.starts_with("https://") || url.starts_with("http://") {
59 parse_https_url(url)?
61 } else {
62 bail!("Unsupported URL format: {url}");
63 };
64
65 let path = path
67 .trim_end_matches('/')
68 .trim_end_matches(".git")
69 .trim_end_matches('/');
70
71 let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
73
74 if segments.is_empty() {
75 bail!("URL has no path segments: {url}");
76 }
77
78 for seg in &segments {
80 if *seg == "." || *seg == ".." {
81 bail!("Invalid path segment '{seg}' in URL: {url}");
82 }
83 }
84
85 if segments.len() > MAX_SUBGROUP_DEPTH + 1 {
86 bail!(
87 "Path has too many segments ({}, max {}): {}",
88 segments.len(),
89 MAX_SUBGROUP_DEPTH + 1,
90 url
91 );
92 }
93
94 let (org_path, repo) = if let Some(git_idx) = segments.iter().position(|s| *s == "_git") {
96 if git_idx + 1 >= segments.len() {
97 bail!("Azure DevOps URL missing repo after _git: {url}");
98 }
99 let org_segments = &segments[..git_idx];
100 let repo = segments[git_idx + 1];
101 (org_segments.join("/"), repo.to_string())
102 } else if segments.len() == 1 {
103 (String::new(), segments[0].to_string())
105 } else {
106 let org_segments = &segments[..segments.len() - 1];
108 let repo = segments[segments.len() - 1];
109 (org_segments.join("/"), repo.to_string())
110 };
111
112 Ok(Self {
113 host: host.to_lowercase(),
114 org_path,
115 repo,
116 })
117 }
118
119 pub fn canonical_key(&self) -> RepoIdentityKey {
123 RepoIdentityKey {
124 host: self.host.to_lowercase(),
125 org_path: self.org_path.to_lowercase(),
126 repo: self.repo.to_lowercase(),
127 }
128 }
129}
130
131fn parse_scp_url(url: &str) -> Result<(String, String)> {
133 let without_user = url.find('@').map_or(url, |i| &url[i + 1..]);
135
136 let colon_pos = without_user
137 .find(':')
138 .ok_or_else(|| anyhow::anyhow!("Invalid scp-like URL (missing colon): {url}"))?;
139
140 let host = &without_user[..colon_pos];
141 let path = &without_user[colon_pos + 1..];
142
143 if host.is_empty() {
144 bail!("Empty host in URL: {url}");
145 }
146
147 Ok((host.to_string(), path.to_string()))
148}
149
150fn parse_ssh_scheme_url(url: &str) -> Result<(String, String)> {
152 let without_scheme = url
153 .strip_prefix("ssh://")
154 .ok_or_else(|| anyhow::anyhow!("Not an SSH URL: {url}"))?;
155
156 let without_user = without_scheme
158 .find('@')
159 .map_or(without_scheme, |i| &without_scheme[i + 1..]);
160
161 let slash_pos = without_user
163 .find('/')
164 .ok_or_else(|| anyhow::anyhow!("SSH URL missing path: {url}"))?;
165
166 let host_port = &without_user[..slash_pos];
167 let path = &without_user[slash_pos + 1..];
168
169 let host = host_port
171 .split(':')
172 .next()
173 .ok_or_else(|| anyhow::anyhow!("Empty host in URL: {url}"))?;
174
175 if host.is_empty() {
176 bail!("Empty host in URL: {url}");
177 }
178
179 Ok((host.to_string(), path.to_string()))
180}
181
182fn parse_https_url(url: &str) -> Result<(String, String)> {
184 let scheme_end = url
185 .find("://")
186 .ok_or_else(|| anyhow::anyhow!("Invalid URL (missing ://): {url}"))?;
187
188 let without_scheme = &url[scheme_end + 3..];
189
190 let without_user = without_scheme
192 .find('@')
193 .map_or(without_scheme, |i| &without_scheme[i + 1..]);
194
195 let slash_pos = without_user
197 .find('/')
198 .ok_or_else(|| anyhow::anyhow!("URL missing path: {url}"))?;
199
200 let host_port = &without_user[..slash_pos];
201 let path = &without_user[slash_pos + 1..];
202
203 let host = host_port
205 .split(':')
206 .next()
207 .ok_or_else(|| anyhow::anyhow!("Empty host in URL: {url}"))?;
208
209 if host.is_empty() {
210 bail!("Empty host in URL: {url}");
211 }
212
213 Ok((host.to_string(), path.to_string()))
214}
215
216pub fn parse_url_and_subpath(url: &str) -> (String, Option<String>) {
240 let url = url.trim();
247
248 if let Some(colon_pos) = url.rfind(':') {
250 let potential_base = &url[..colon_pos];
251 let potential_subpath = &url[colon_pos + 1..];
252
253 if potential_subpath.is_empty() {
255 return (url.to_string(), None);
256 }
257
258 if potential_subpath.chars().all(|c| c.is_ascii_digit()) {
260 return (url.to_string(), None);
261 }
262
263 if potential_base.is_empty() || potential_base.ends_with("//") {
265 return (url.to_string(), None);
266 }
267
268 if RepoIdentity::parse(potential_base).is_ok() {
270 return (
271 potential_base.to_string(),
272 Some(potential_subpath.to_string()),
273 );
274 }
275 }
276
277 (url.to_string(), None)
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283
284 #[test]
287 fn test_parse_ssh_scp_basic() {
288 let id = RepoIdentity::parse("git@github.com:org/repo.git").unwrap();
289 assert_eq!(id.host, "github.com");
290 assert_eq!(id.org_path, "org");
291 assert_eq!(id.repo, "repo");
292 }
293
294 #[test]
295 fn test_parse_ssh_scp_no_git_suffix() {
296 let id = RepoIdentity::parse("git@github.com:org/repo").unwrap();
297 assert_eq!(id.host, "github.com");
298 assert_eq!(id.org_path, "org");
299 assert_eq!(id.repo, "repo");
300 }
301
302 #[test]
303 fn test_parse_https_basic() {
304 let id = RepoIdentity::parse("https://github.com/org/repo").unwrap();
305 assert_eq!(id.host, "github.com");
306 assert_eq!(id.org_path, "org");
307 assert_eq!(id.repo, "repo");
308 }
309
310 #[test]
311 fn test_parse_https_with_git_suffix() {
312 let id = RepoIdentity::parse("https://github.com/org/repo.git").unwrap();
313 assert_eq!(id.host, "github.com");
314 assert_eq!(id.org_path, "org");
315 assert_eq!(id.repo, "repo");
316 }
317
318 #[test]
319 fn test_parse_https_trailing_slash() {
320 let id = RepoIdentity::parse("https://github.com/org/repo/").unwrap();
321 assert_eq!(id.host, "github.com");
322 assert_eq!(id.org_path, "org");
323 assert_eq!(id.repo, "repo");
324 }
325
326 #[test]
327 fn test_parse_ssh_with_port() {
328 let id = RepoIdentity::parse("ssh://git@host.example.com:2222/org/repo.git").unwrap();
329 assert_eq!(id.host, "host.example.com");
330 assert_eq!(id.org_path, "org");
331 assert_eq!(id.repo, "repo");
332 }
333
334 #[test]
335 fn test_parse_gitlab_subgroups() {
336 let id = RepoIdentity::parse("https://gitlab.com/group/subgroup/team/repo.git").unwrap();
337 assert_eq!(id.host, "gitlab.com");
338 assert_eq!(id.org_path, "group/subgroup/team");
339 assert_eq!(id.repo, "repo");
340 }
341
342 #[test]
343 fn test_parse_gitlab_deep_subgroups() {
344 let id = RepoIdentity::parse("https://gitlab.com/a/b/c/d/e/repo.git").unwrap();
345 assert_eq!(id.host, "gitlab.com");
346 assert_eq!(id.org_path, "a/b/c/d/e");
347 assert_eq!(id.repo, "repo");
348 }
349
350 #[test]
351 fn test_parse_azure_devops() {
352 let id = RepoIdentity::parse("https://dev.azure.com/myorg/myproj/_git/myrepo").unwrap();
353 assert_eq!(id.host, "dev.azure.com");
354 assert_eq!(id.org_path, "myorg/myproj");
355 assert_eq!(id.repo, "myrepo");
356 }
357
358 #[test]
359 fn test_parse_host_case_normalized() {
360 let id = RepoIdentity::parse("https://GitHub.COM/Org/Repo").unwrap();
361 assert_eq!(id.host, "github.com");
362 assert_eq!(id.org_path, "Org");
364 assert_eq!(id.repo, "Repo");
365 }
366
367 #[test]
368 fn test_parse_http_scheme() {
369 let id = RepoIdentity::parse("http://github.com/org/repo").unwrap();
370 assert_eq!(id.host, "github.com");
371 assert_eq!(id.org_path, "org");
372 assert_eq!(id.repo, "repo");
373 }
374
375 #[test]
376 fn test_parse_rejects_invalid_segments() {
377 assert!(RepoIdentity::parse("https://github.com/../repo").is_err());
378 assert!(RepoIdentity::parse("https://github.com/./repo").is_err());
379 }
380
381 #[test]
382 fn test_parse_rejects_unsupported_scheme() {
383 assert!(RepoIdentity::parse("ftp://github.com/org/repo").is_err());
384 assert!(RepoIdentity::parse("org/repo").is_err());
385 }
386
387 #[test]
390 fn test_canonical_key_equality_across_schemes() {
391 let ssh = RepoIdentity::parse("git@github.com:User/Repo.git").unwrap();
392 let https = RepoIdentity::parse("https://github.com/user/repo").unwrap();
393
394 assert_eq!(ssh.canonical_key(), https.canonical_key());
395 }
396
397 #[test]
398 fn test_canonical_key_different_repos() {
399 let a = RepoIdentity::parse("git@github.com:org/repo-a.git").unwrap();
400 let b = RepoIdentity::parse("git@github.com:org/repo-b.git").unwrap();
401
402 assert_ne!(a.canonical_key(), b.canonical_key());
403 }
404
405 #[test]
406 fn test_canonical_key_different_orgs() {
407 let a = RepoIdentity::parse("git@github.com:alice/utils.git").unwrap();
408 let b = RepoIdentity::parse("git@github.com:bob/utils.git").unwrap();
409
410 assert_ne!(a.canonical_key(), b.canonical_key());
411 }
412
413 #[test]
416 fn test_subpath_none_basic() {
417 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git");
418 assert_eq!(url, "git@github.com:user/repo.git");
419 assert_eq!(sub, None);
420 }
421
422 #[test]
423 fn test_subpath_present() {
424 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:docs/api");
425 assert_eq!(url, "git@github.com:user/repo.git");
426 assert_eq!(sub, Some("docs/api".to_string()));
427 }
428
429 #[test]
430 fn test_subpath_https_none() {
431 let (url, sub) = parse_url_and_subpath("https://github.com/user/repo");
432 assert_eq!(url, "https://github.com/user/repo");
433 assert_eq!(sub, None);
434 }
435
436 #[test]
437 fn test_subpath_ssh_port_not_confused() {
438 let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git");
440 assert_eq!(url, "ssh://git@host:2222/org/repo.git");
441 assert_eq!(sub, None);
442 }
443
444 #[test]
445 fn test_subpath_ssh_port_with_actual_subpath() {
446 let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git:docs/api");
447 assert_eq!(url, "ssh://git@host:2222/org/repo.git");
448 assert_eq!(sub, Some("docs/api".to_string()));
449 }
450
451 #[test]
452 fn test_subpath_empty_subpath_ignored() {
453 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:");
454 assert_eq!(url, "git@github.com:user/repo.git:");
455 assert_eq!(sub, None);
456 }
457}