1use anyhow::Result;
7use anyhow::bail;
8
9const MAX_SUBGROUP_DEPTH: usize = 20;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct RepoIdentity {
18 pub host: String,
20 pub org_path: String,
22 pub repo: String,
24}
25
26#[derive(Debug, Clone, PartialEq, Eq, Hash)]
30pub struct RepoIdentityKey {
31 pub host: String,
32 pub org_path: String,
33 pub repo: String,
34}
35
36impl RepoIdentity {
37 pub fn parse(url: &str) -> Result<Self> {
49 let url = url.trim();
50
51 let (host, path) = if url.starts_with("git@") {
53 parse_scp_url(url)?
55 } else if url.starts_with("ssh://") {
56 parse_ssh_scheme_url(url)?
58 } else if url.starts_with("https://") || url.starts_with("http://") {
59 parse_https_url(url)?
61 } else {
62 bail!("Unsupported URL format: {}", url);
63 };
64
65 let path = path
67 .trim_end_matches('/')
68 .trim_end_matches(".git")
69 .trim_end_matches('/');
70
71 let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
73
74 if segments.is_empty() {
75 bail!("URL has no path segments: {}", url);
76 }
77
78 for seg in &segments {
80 if *seg == "." || *seg == ".." {
81 bail!("Invalid path segment '{}' in URL: {}", seg, url);
82 }
83 }
84
85 if segments.len() > MAX_SUBGROUP_DEPTH + 1 {
86 bail!(
87 "Path has too many segments ({}, max {}): {}",
88 segments.len(),
89 MAX_SUBGROUP_DEPTH + 1,
90 url
91 );
92 }
93
94 let (org_path, repo) = if let Some(git_idx) = segments.iter().position(|s| *s == "_git") {
96 if git_idx + 1 >= segments.len() {
97 bail!("Azure DevOps URL missing repo after _git: {}", url);
98 }
99 let org_segments = &segments[..git_idx];
100 let repo = segments[git_idx + 1];
101 (org_segments.join("/"), repo.to_string())
102 } else if segments.len() == 1 {
103 (String::new(), segments[0].to_string())
105 } else {
106 let org_segments = &segments[..segments.len() - 1];
108 let repo = segments[segments.len() - 1];
109 (org_segments.join("/"), repo.to_string())
110 };
111
112 Ok(Self {
113 host: host.to_lowercase(),
114 org_path,
115 repo,
116 })
117 }
118
119 pub fn canonical_key(&self) -> RepoIdentityKey {
123 RepoIdentityKey {
124 host: self.host.to_lowercase(),
125 org_path: self.org_path.to_lowercase(),
126 repo: self.repo.to_lowercase(),
127 }
128 }
129}
130
131fn parse_scp_url(url: &str) -> Result<(String, String)> {
133 let without_user = url.find('@').map(|i| &url[i + 1..]).unwrap_or(url);
135
136 let colon_pos = without_user
137 .find(':')
138 .ok_or_else(|| anyhow::anyhow!("Invalid scp-like URL (missing colon): {}", url))?;
139
140 let host = &without_user[..colon_pos];
141 let path = &without_user[colon_pos + 1..];
142
143 if host.is_empty() {
144 bail!("Empty host in URL: {}", url);
145 }
146
147 Ok((host.to_string(), path.to_string()))
148}
149
150fn parse_ssh_scheme_url(url: &str) -> Result<(String, String)> {
152 let without_scheme = url
153 .strip_prefix("ssh://")
154 .ok_or_else(|| anyhow::anyhow!("Not an SSH URL: {}", url))?;
155
156 let without_user = without_scheme
158 .find('@')
159 .map(|i| &without_scheme[i + 1..])
160 .unwrap_or(without_scheme);
161
162 let slash_pos = without_user
164 .find('/')
165 .ok_or_else(|| anyhow::anyhow!("SSH URL missing path: {}", url))?;
166
167 let host_port = &without_user[..slash_pos];
168 let path = &without_user[slash_pos + 1..];
169
170 let host = host_port
172 .split(':')
173 .next()
174 .ok_or_else(|| anyhow::anyhow!("Empty host in URL: {}", url))?;
175
176 if host.is_empty() {
177 bail!("Empty host in URL: {}", url);
178 }
179
180 Ok((host.to_string(), path.to_string()))
181}
182
183fn parse_https_url(url: &str) -> Result<(String, String)> {
185 let scheme_end = url
186 .find("://")
187 .ok_or_else(|| anyhow::anyhow!("Invalid URL (missing ://): {}", url))?;
188
189 let without_scheme = &url[scheme_end + 3..];
190
191 let without_user = without_scheme
193 .find('@')
194 .map(|i| &without_scheme[i + 1..])
195 .unwrap_or(without_scheme);
196
197 let slash_pos = without_user
199 .find('/')
200 .ok_or_else(|| anyhow::anyhow!("URL missing path: {}", url))?;
201
202 let host_port = &without_user[..slash_pos];
203 let path = &without_user[slash_pos + 1..];
204
205 let host = host_port
207 .split(':')
208 .next()
209 .ok_or_else(|| anyhow::anyhow!("Empty host in URL: {}", url))?;
210
211 if host.is_empty() {
212 bail!("Empty host in URL: {}", url);
213 }
214
215 Ok((host.to_string(), path.to_string()))
216}
217
218pub fn parse_url_and_subpath(url: &str) -> (String, Option<String>) {
242 let url = url.trim();
249
250 if let Some(colon_pos) = url.rfind(':') {
252 let potential_base = &url[..colon_pos];
253 let potential_subpath = &url[colon_pos + 1..];
254
255 if potential_subpath.is_empty() {
257 return (url.to_string(), None);
258 }
259
260 if potential_subpath.chars().all(|c| c.is_ascii_digit()) {
262 return (url.to_string(), None);
263 }
264
265 if potential_base.is_empty() || potential_base.ends_with("//") {
267 return (url.to_string(), None);
268 }
269
270 if RepoIdentity::parse(potential_base).is_ok() {
272 return (
273 potential_base.to_string(),
274 Some(potential_subpath.to_string()),
275 );
276 }
277 }
278
279 (url.to_string(), None)
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285
286 #[test]
289 fn test_parse_ssh_scp_basic() {
290 let id = RepoIdentity::parse("git@github.com:org/repo.git").unwrap();
291 assert_eq!(id.host, "github.com");
292 assert_eq!(id.org_path, "org");
293 assert_eq!(id.repo, "repo");
294 }
295
296 #[test]
297 fn test_parse_ssh_scp_no_git_suffix() {
298 let id = RepoIdentity::parse("git@github.com:org/repo").unwrap();
299 assert_eq!(id.host, "github.com");
300 assert_eq!(id.org_path, "org");
301 assert_eq!(id.repo, "repo");
302 }
303
304 #[test]
305 fn test_parse_https_basic() {
306 let id = RepoIdentity::parse("https://github.com/org/repo").unwrap();
307 assert_eq!(id.host, "github.com");
308 assert_eq!(id.org_path, "org");
309 assert_eq!(id.repo, "repo");
310 }
311
312 #[test]
313 fn test_parse_https_with_git_suffix() {
314 let id = RepoIdentity::parse("https://github.com/org/repo.git").unwrap();
315 assert_eq!(id.host, "github.com");
316 assert_eq!(id.org_path, "org");
317 assert_eq!(id.repo, "repo");
318 }
319
320 #[test]
321 fn test_parse_https_trailing_slash() {
322 let id = RepoIdentity::parse("https://github.com/org/repo/").unwrap();
323 assert_eq!(id.host, "github.com");
324 assert_eq!(id.org_path, "org");
325 assert_eq!(id.repo, "repo");
326 }
327
328 #[test]
329 fn test_parse_ssh_with_port() {
330 let id = RepoIdentity::parse("ssh://git@host.example.com:2222/org/repo.git").unwrap();
331 assert_eq!(id.host, "host.example.com");
332 assert_eq!(id.org_path, "org");
333 assert_eq!(id.repo, "repo");
334 }
335
336 #[test]
337 fn test_parse_gitlab_subgroups() {
338 let id = RepoIdentity::parse("https://gitlab.com/group/subgroup/team/repo.git").unwrap();
339 assert_eq!(id.host, "gitlab.com");
340 assert_eq!(id.org_path, "group/subgroup/team");
341 assert_eq!(id.repo, "repo");
342 }
343
344 #[test]
345 fn test_parse_gitlab_deep_subgroups() {
346 let id = RepoIdentity::parse("https://gitlab.com/a/b/c/d/e/repo.git").unwrap();
347 assert_eq!(id.host, "gitlab.com");
348 assert_eq!(id.org_path, "a/b/c/d/e");
349 assert_eq!(id.repo, "repo");
350 }
351
352 #[test]
353 fn test_parse_azure_devops() {
354 let id = RepoIdentity::parse("https://dev.azure.com/myorg/myproj/_git/myrepo").unwrap();
355 assert_eq!(id.host, "dev.azure.com");
356 assert_eq!(id.org_path, "myorg/myproj");
357 assert_eq!(id.repo, "myrepo");
358 }
359
360 #[test]
361 fn test_parse_host_case_normalized() {
362 let id = RepoIdentity::parse("https://GitHub.COM/Org/Repo").unwrap();
363 assert_eq!(id.host, "github.com");
364 assert_eq!(id.org_path, "Org");
366 assert_eq!(id.repo, "Repo");
367 }
368
369 #[test]
370 fn test_parse_http_scheme() {
371 let id = RepoIdentity::parse("http://github.com/org/repo").unwrap();
372 assert_eq!(id.host, "github.com");
373 assert_eq!(id.org_path, "org");
374 assert_eq!(id.repo, "repo");
375 }
376
377 #[test]
378 fn test_parse_rejects_invalid_segments() {
379 assert!(RepoIdentity::parse("https://github.com/../repo").is_err());
380 assert!(RepoIdentity::parse("https://github.com/./repo").is_err());
381 }
382
383 #[test]
384 fn test_parse_rejects_unsupported_scheme() {
385 assert!(RepoIdentity::parse("ftp://github.com/org/repo").is_err());
386 assert!(RepoIdentity::parse("org/repo").is_err());
387 }
388
389 #[test]
392 fn test_canonical_key_equality_across_schemes() {
393 let ssh = RepoIdentity::parse("git@github.com:User/Repo.git").unwrap();
394 let https = RepoIdentity::parse("https://github.com/user/repo").unwrap();
395
396 assert_eq!(ssh.canonical_key(), https.canonical_key());
397 }
398
399 #[test]
400 fn test_canonical_key_different_repos() {
401 let a = RepoIdentity::parse("git@github.com:org/repo-a.git").unwrap();
402 let b = RepoIdentity::parse("git@github.com:org/repo-b.git").unwrap();
403
404 assert_ne!(a.canonical_key(), b.canonical_key());
405 }
406
407 #[test]
408 fn test_canonical_key_different_orgs() {
409 let a = RepoIdentity::parse("git@github.com:alice/utils.git").unwrap();
410 let b = RepoIdentity::parse("git@github.com:bob/utils.git").unwrap();
411
412 assert_ne!(a.canonical_key(), b.canonical_key());
413 }
414
415 #[test]
418 fn test_subpath_none_basic() {
419 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git");
420 assert_eq!(url, "git@github.com:user/repo.git");
421 assert_eq!(sub, None);
422 }
423
424 #[test]
425 fn test_subpath_present() {
426 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:docs/api");
427 assert_eq!(url, "git@github.com:user/repo.git");
428 assert_eq!(sub, Some("docs/api".to_string()));
429 }
430
431 #[test]
432 fn test_subpath_https_none() {
433 let (url, sub) = parse_url_and_subpath("https://github.com/user/repo");
434 assert_eq!(url, "https://github.com/user/repo");
435 assert_eq!(sub, None);
436 }
437
438 #[test]
439 fn test_subpath_ssh_port_not_confused() {
440 let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git");
442 assert_eq!(url, "ssh://git@host:2222/org/repo.git");
443 assert_eq!(sub, None);
444 }
445
446 #[test]
447 fn test_subpath_ssh_port_with_actual_subpath() {
448 let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git:docs/api");
449 assert_eq!(url, "ssh://git@host:2222/org/repo.git");
450 assert_eq!(sub, Some("docs/api".to_string()));
451 }
452
453 #[test]
454 fn test_subpath_empty_subpath_ignored() {
455 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:");
456 assert_eq!(url, "git@github.com:user/repo.git:");
457 assert_eq!(sub, None);
458 }
459}