1use anyhow::{Result, bail};
7
8const MAX_SUBGROUP_DEPTH: usize = 20;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct RepoIdentity {
17 pub host: String,
19 pub org_path: String,
21 pub repo: String,
23}
24
25#[derive(Debug, Clone, PartialEq, Eq, Hash)]
29pub struct RepoIdentityKey {
30 pub host: String,
31 pub org_path: String,
32 pub repo: String,
33}
34
35impl RepoIdentity {
36 pub fn parse(url: &str) -> Result<Self> {
48 let url = url.trim();
49
50 let (host, path) = if url.starts_with("git@") {
52 parse_scp_url(url)?
54 } else if url.starts_with("ssh://") {
55 parse_ssh_scheme_url(url)?
57 } else if url.starts_with("https://") || url.starts_with("http://") {
58 parse_https_url(url)?
60 } else {
61 bail!("Unsupported URL format: {}", url);
62 };
63
64 let path = path
66 .trim_end_matches('/')
67 .trim_end_matches(".git")
68 .trim_end_matches('/');
69
70 let segments: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
72
73 if segments.is_empty() {
74 bail!("URL has no path segments: {}", url);
75 }
76
77 for seg in &segments {
79 if *seg == "." || *seg == ".." {
80 bail!("Invalid path segment '{}' in URL: {}", seg, url);
81 }
82 }
83
84 if segments.len() > MAX_SUBGROUP_DEPTH + 1 {
85 bail!(
86 "Path has too many segments ({}, max {}): {}",
87 segments.len(),
88 MAX_SUBGROUP_DEPTH + 1,
89 url
90 );
91 }
92
93 let (org_path, repo) = if let Some(git_idx) = segments.iter().position(|s| *s == "_git") {
95 if git_idx + 1 >= segments.len() {
96 bail!("Azure DevOps URL missing repo after _git: {}", url);
97 }
98 let org_segments = &segments[..git_idx];
99 let repo = segments[git_idx + 1];
100 (org_segments.join("/"), repo.to_string())
101 } else if segments.len() == 1 {
102 (String::new(), segments[0].to_string())
104 } else {
105 let org_segments = &segments[..segments.len() - 1];
107 let repo = segments[segments.len() - 1];
108 (org_segments.join("/"), repo.to_string())
109 };
110
111 Ok(Self {
112 host: host.to_lowercase(),
113 org_path,
114 repo,
115 })
116 }
117
118 pub fn canonical_key(&self) -> RepoIdentityKey {
122 RepoIdentityKey {
123 host: self.host.to_lowercase(),
124 org_path: self.org_path.to_lowercase(),
125 repo: self.repo.to_lowercase(),
126 }
127 }
128}
129
130fn parse_scp_url(url: &str) -> Result<(String, String)> {
132 let without_user = url.find('@').map(|i| &url[i + 1..]).unwrap_or(url);
134
135 let colon_pos = without_user
136 .find(':')
137 .ok_or_else(|| anyhow::anyhow!("Invalid scp-like URL (missing colon): {}", url))?;
138
139 let host = &without_user[..colon_pos];
140 let path = &without_user[colon_pos + 1..];
141
142 if host.is_empty() {
143 bail!("Empty host in URL: {}", url);
144 }
145
146 Ok((host.to_string(), path.to_string()))
147}
148
149fn parse_ssh_scheme_url(url: &str) -> Result<(String, String)> {
151 let without_scheme = url
152 .strip_prefix("ssh://")
153 .ok_or_else(|| anyhow::anyhow!("Not an SSH URL: {}", url))?;
154
155 let without_user = without_scheme
157 .find('@')
158 .map(|i| &without_scheme[i + 1..])
159 .unwrap_or(without_scheme);
160
161 let slash_pos = without_user
163 .find('/')
164 .ok_or_else(|| anyhow::anyhow!("SSH URL missing path: {}", url))?;
165
166 let host_port = &without_user[..slash_pos];
167 let path = &without_user[slash_pos + 1..];
168
169 let host = host_port
171 .split(':')
172 .next()
173 .ok_or_else(|| anyhow::anyhow!("Empty host in URL: {}", url))?;
174
175 if host.is_empty() {
176 bail!("Empty host in URL: {}", url);
177 }
178
179 Ok((host.to_string(), path.to_string()))
180}
181
182fn parse_https_url(url: &str) -> Result<(String, String)> {
184 let scheme_end = url
185 .find("://")
186 .ok_or_else(|| anyhow::anyhow!("Invalid URL (missing ://): {}", url))?;
187
188 let without_scheme = &url[scheme_end + 3..];
189
190 let without_user = without_scheme
192 .find('@')
193 .map(|i| &without_scheme[i + 1..])
194 .unwrap_or(without_scheme);
195
196 let slash_pos = without_user
198 .find('/')
199 .ok_or_else(|| anyhow::anyhow!("URL missing path: {}", url))?;
200
201 let host_port = &without_user[..slash_pos];
202 let path = &without_user[slash_pos + 1..];
203
204 let host = host_port
206 .split(':')
207 .next()
208 .ok_or_else(|| anyhow::anyhow!("Empty host in URL: {}", url))?;
209
210 if host.is_empty() {
211 bail!("Empty host in URL: {}", url);
212 }
213
214 Ok((host.to_string(), path.to_string()))
215}
216
217pub fn parse_url_and_subpath(url: &str) -> (String, Option<String>) {
241 let url = url.trim();
248
249 if let Some(colon_pos) = url.rfind(':') {
251 let potential_base = &url[..colon_pos];
252 let potential_subpath = &url[colon_pos + 1..];
253
254 if potential_subpath.is_empty() {
256 return (url.to_string(), None);
257 }
258
259 if potential_subpath.chars().all(|c| c.is_ascii_digit()) {
261 return (url.to_string(), None);
262 }
263
264 if potential_base.is_empty() || potential_base.ends_with("//") {
266 return (url.to_string(), None);
267 }
268
269 if RepoIdentity::parse(potential_base).is_ok() {
271 return (
272 potential_base.to_string(),
273 Some(potential_subpath.to_string()),
274 );
275 }
276 }
277
278 (url.to_string(), None)
279}
280
281#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
288 fn test_parse_ssh_scp_basic() {
289 let id = RepoIdentity::parse("git@github.com:org/repo.git").unwrap();
290 assert_eq!(id.host, "github.com");
291 assert_eq!(id.org_path, "org");
292 assert_eq!(id.repo, "repo");
293 }
294
295 #[test]
296 fn test_parse_ssh_scp_no_git_suffix() {
297 let id = RepoIdentity::parse("git@github.com:org/repo").unwrap();
298 assert_eq!(id.host, "github.com");
299 assert_eq!(id.org_path, "org");
300 assert_eq!(id.repo, "repo");
301 }
302
303 #[test]
304 fn test_parse_https_basic() {
305 let id = RepoIdentity::parse("https://github.com/org/repo").unwrap();
306 assert_eq!(id.host, "github.com");
307 assert_eq!(id.org_path, "org");
308 assert_eq!(id.repo, "repo");
309 }
310
311 #[test]
312 fn test_parse_https_with_git_suffix() {
313 let id = RepoIdentity::parse("https://github.com/org/repo.git").unwrap();
314 assert_eq!(id.host, "github.com");
315 assert_eq!(id.org_path, "org");
316 assert_eq!(id.repo, "repo");
317 }
318
319 #[test]
320 fn test_parse_https_trailing_slash() {
321 let id = RepoIdentity::parse("https://github.com/org/repo/").unwrap();
322 assert_eq!(id.host, "github.com");
323 assert_eq!(id.org_path, "org");
324 assert_eq!(id.repo, "repo");
325 }
326
327 #[test]
328 fn test_parse_ssh_with_port() {
329 let id = RepoIdentity::parse("ssh://git@host.example.com:2222/org/repo.git").unwrap();
330 assert_eq!(id.host, "host.example.com");
331 assert_eq!(id.org_path, "org");
332 assert_eq!(id.repo, "repo");
333 }
334
335 #[test]
336 fn test_parse_gitlab_subgroups() {
337 let id = RepoIdentity::parse("https://gitlab.com/group/subgroup/team/repo.git").unwrap();
338 assert_eq!(id.host, "gitlab.com");
339 assert_eq!(id.org_path, "group/subgroup/team");
340 assert_eq!(id.repo, "repo");
341 }
342
343 #[test]
344 fn test_parse_gitlab_deep_subgroups() {
345 let id = RepoIdentity::parse("https://gitlab.com/a/b/c/d/e/repo.git").unwrap();
346 assert_eq!(id.host, "gitlab.com");
347 assert_eq!(id.org_path, "a/b/c/d/e");
348 assert_eq!(id.repo, "repo");
349 }
350
351 #[test]
352 fn test_parse_azure_devops() {
353 let id = RepoIdentity::parse("https://dev.azure.com/myorg/myproj/_git/myrepo").unwrap();
354 assert_eq!(id.host, "dev.azure.com");
355 assert_eq!(id.org_path, "myorg/myproj");
356 assert_eq!(id.repo, "myrepo");
357 }
358
359 #[test]
360 fn test_parse_host_case_normalized() {
361 let id = RepoIdentity::parse("https://GitHub.COM/Org/Repo").unwrap();
362 assert_eq!(id.host, "github.com");
363 assert_eq!(id.org_path, "Org");
365 assert_eq!(id.repo, "Repo");
366 }
367
368 #[test]
369 fn test_parse_http_scheme() {
370 let id = RepoIdentity::parse("http://github.com/org/repo").unwrap();
371 assert_eq!(id.host, "github.com");
372 assert_eq!(id.org_path, "org");
373 assert_eq!(id.repo, "repo");
374 }
375
376 #[test]
377 fn test_parse_rejects_invalid_segments() {
378 assert!(RepoIdentity::parse("https://github.com/../repo").is_err());
379 assert!(RepoIdentity::parse("https://github.com/./repo").is_err());
380 }
381
382 #[test]
383 fn test_parse_rejects_unsupported_scheme() {
384 assert!(RepoIdentity::parse("ftp://github.com/org/repo").is_err());
385 assert!(RepoIdentity::parse("org/repo").is_err());
386 }
387
388 #[test]
391 fn test_canonical_key_equality_across_schemes() {
392 let ssh = RepoIdentity::parse("git@github.com:User/Repo.git").unwrap();
393 let https = RepoIdentity::parse("https://github.com/user/repo").unwrap();
394
395 assert_eq!(ssh.canonical_key(), https.canonical_key());
396 }
397
398 #[test]
399 fn test_canonical_key_different_repos() {
400 let a = RepoIdentity::parse("git@github.com:org/repo-a.git").unwrap();
401 let b = RepoIdentity::parse("git@github.com:org/repo-b.git").unwrap();
402
403 assert_ne!(a.canonical_key(), b.canonical_key());
404 }
405
406 #[test]
407 fn test_canonical_key_different_orgs() {
408 let a = RepoIdentity::parse("git@github.com:alice/utils.git").unwrap();
409 let b = RepoIdentity::parse("git@github.com:bob/utils.git").unwrap();
410
411 assert_ne!(a.canonical_key(), b.canonical_key());
412 }
413
414 #[test]
417 fn test_subpath_none_basic() {
418 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git");
419 assert_eq!(url, "git@github.com:user/repo.git");
420 assert_eq!(sub, None);
421 }
422
423 #[test]
424 fn test_subpath_present() {
425 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:docs/api");
426 assert_eq!(url, "git@github.com:user/repo.git");
427 assert_eq!(sub, Some("docs/api".to_string()));
428 }
429
430 #[test]
431 fn test_subpath_https_none() {
432 let (url, sub) = parse_url_and_subpath("https://github.com/user/repo");
433 assert_eq!(url, "https://github.com/user/repo");
434 assert_eq!(sub, None);
435 }
436
437 #[test]
438 fn test_subpath_ssh_port_not_confused() {
439 let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git");
441 assert_eq!(url, "ssh://git@host:2222/org/repo.git");
442 assert_eq!(sub, None);
443 }
444
445 #[test]
446 fn test_subpath_ssh_port_with_actual_subpath() {
447 let (url, sub) = parse_url_and_subpath("ssh://git@host:2222/org/repo.git:docs/api");
448 assert_eq!(url, "ssh://git@host:2222/org/repo.git");
449 assert_eq!(sub, Some("docs/api".to_string()));
450 }
451
452 #[test]
453 fn test_subpath_empty_subpath_ignored() {
454 let (url, sub) = parse_url_and_subpath("git@github.com:user/repo.git:");
455 assert_eq!(url, "git@github.com:user/repo.git:");
456 assert_eq!(sub, None);
457 }
458}