1use std::borrow::Cow;
2use std::fmt::{Debug, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::Deref;
5
6use url::Url;
7use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
8
9use crate::cache_key::{CacheKey, CacheKeyHasher};
10
11#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
20pub struct CanonicalUrl(DisplaySafeUrl);
21
22impl CanonicalUrl {
23 pub fn new(url: &DisplaySafeUrl) -> Self {
24 let mut url = url.clone();
25
26 if url.cannot_be_a_base() {
28 return Self(url);
29 }
30
31 let _ = url.set_password(None);
33 let _ = url.set_username("");
34
35 if url.path().ends_with('/') {
37 url.path_segments_mut().unwrap().pop_if_empty();
38 }
39
40 if url.host_str() == Some("github.com") {
46 let scheme = url.scheme().to_lowercase();
47 url.set_scheme(&scheme).unwrap();
48 let path = url.path().to_lowercase();
49 url.set_path(&path);
50 }
51
52 if let Some((prefix, suffix)) = url.path().rsplit_once('@') {
54 let needs_chopping = std::path::Path::new(prefix)
56 .extension()
57 .is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
58 if needs_chopping {
59 let prefix = &prefix[..prefix.len() - 4];
60 let path = format!("{prefix}@{suffix}");
61 url.set_path(&path);
62 }
63 } else {
64 let needs_chopping = std::path::Path::new(url.path())
66 .extension()
67 .is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
68 if needs_chopping {
69 let last = {
70 let last = url.path_segments().unwrap().next_back().unwrap();
73 last[..last.len() - 4].to_owned()
74 };
75 url.path_segments_mut().unwrap().pop().push(&last);
76 }
77 }
78
79 if memchr::memchr(b'%', url.path().as_bytes()).is_some() {
81 let decoded = url
83 .path_segments()
84 .unwrap()
85 .map(|segment| {
86 percent_encoding::percent_decode_str(segment)
87 .decode_utf8()
88 .unwrap_or(Cow::Borrowed(segment))
89 .into_owned()
90 })
91 .collect::<Vec<_>>();
92
93 let mut path_segments = url.path_segments_mut().unwrap();
94 path_segments.clear();
95 path_segments.extend(decoded);
96 }
97
98 Self(url)
99 }
100
101 pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
102 Ok(Self::new(&DisplaySafeUrl::parse(url)?))
103 }
104}
105
106impl CacheKey for CanonicalUrl {
107 fn cache_key(&self, state: &mut CacheKeyHasher) {
108 self.0.as_str().cache_key(state);
111 }
112}
113
114impl Hash for CanonicalUrl {
115 fn hash<H: Hasher>(&self, state: &mut H) {
116 self.0.as_str().hash(state);
119 }
120}
121
122impl From<CanonicalUrl> for DisplaySafeUrl {
123 fn from(value: CanonicalUrl) -> Self {
124 value.0
125 }
126}
127
128impl std::fmt::Display for CanonicalUrl {
129 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
130 std::fmt::Display::fmt(&self.0, f)
131 }
132}
133
134#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
150pub struct RepositoryUrl {
151 repo_url: DisplaySafeUrl,
152 with_lfs: Option<bool>,
153}
154
155impl RepositoryUrl {
156 pub fn new(url: &DisplaySafeUrl) -> Self {
157 let mut url = CanonicalUrl::new(url).0;
158
159 if url.scheme().starts_with("git+") {
161 if let Some(prefix) = url
162 .path()
163 .rsplit_once('@')
164 .map(|(prefix, _suffix)| prefix.to_string())
165 {
166 url.set_path(&prefix);
167 }
168 }
169
170 url.set_fragment(None);
172 url.set_query(None);
173
174 Self {
175 repo_url: url,
176 with_lfs: None,
177 }
178 }
179
180 pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
181 Ok(Self::new(&DisplaySafeUrl::parse(url)?))
182 }
183
184 #[must_use]
185 pub fn with_lfs(mut self, lfs: Option<bool>) -> Self {
186 self.with_lfs = lfs;
187 self
188 }
189}
190
191impl CacheKey for RepositoryUrl {
192 fn cache_key(&self, state: &mut CacheKeyHasher) {
193 self.repo_url.as_str().cache_key(state);
196 if let Some(true) = self.with_lfs {
197 1u8.cache_key(state);
198 }
199 }
200}
201
202impl Hash for RepositoryUrl {
203 fn hash<H: Hasher>(&self, state: &mut H) {
204 self.repo_url.as_str().hash(state);
207 if let Some(true) = self.with_lfs {
208 1u8.hash(state);
209 }
210 }
211}
212
213impl Deref for RepositoryUrl {
214 type Target = Url;
215
216 fn deref(&self) -> &Self::Target {
217 &self.repo_url
218 }
219}
220
221impl std::fmt::Display for RepositoryUrl {
222 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
223 std::fmt::Display::fmt(&self.repo_url, f)
224 }
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230
231 #[test]
232 fn user_credential_does_not_affect_cache_key() -> Result<(), DisplaySafeUrlError> {
233 let mut hasher = CacheKeyHasher::new();
234 CanonicalUrl::parse("https://example.com/pypa/sample-namespace-packages.git@2.0.0")?
235 .cache_key(&mut hasher);
236 let hash_without_creds = hasher.finish();
237
238 let mut hasher = CacheKeyHasher::new();
239 CanonicalUrl::parse(
240 "https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0",
241 )?
242 .cache_key(&mut hasher);
243 let hash_with_creds = hasher.finish();
244 assert_eq!(
245 hash_without_creds, hash_with_creds,
246 "URLs with no user credentials should hash the same as URLs with different user credentials",
247 );
248
249 let mut hasher = CacheKeyHasher::new();
250 CanonicalUrl::parse(
251 "https://user:bar@example.com/pypa/sample-namespace-packages.git@2.0.0",
252 )?
253 .cache_key(&mut hasher);
254 let hash_with_creds = hasher.finish();
255 assert_eq!(
256 hash_without_creds, hash_with_creds,
257 "URLs with different user credentials should hash the same",
258 );
259
260 let mut hasher = CacheKeyHasher::new();
261 CanonicalUrl::parse("https://:bar@example.com/pypa/sample-namespace-packages.git@2.0.0")?
262 .cache_key(&mut hasher);
263 let hash_with_creds = hasher.finish();
264 assert_eq!(
265 hash_without_creds, hash_with_creds,
266 "URLs with no username, though with a password, should hash the same as URLs with different user credentials",
267 );
268
269 let mut hasher = CacheKeyHasher::new();
270 CanonicalUrl::parse("https://user:@example.com/pypa/sample-namespace-packages.git@2.0.0")?
271 .cache_key(&mut hasher);
272 let hash_with_creds = hasher.finish();
273 assert_eq!(
274 hash_without_creds, hash_with_creds,
275 "URLs with no password, though with a username, should hash the same as URLs with different user credentials",
276 );
277
278 Ok(())
279 }
280
281 #[test]
282 fn canonical_url() -> Result<(), DisplaySafeUrlError> {
283 assert_eq!(
285 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
286 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
287 );
288
289 assert_eq!(
291 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git@2.0.0")?,
292 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages@2.0.0")?,
293 );
294
295 assert_ne!(
297 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
298 CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
299 );
300
301 assert_ne!(
303 CanonicalUrl::parse(
304 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a"
305 )?,
306 CanonicalUrl::parse(
307 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b"
308 )?,
309 );
310
311 assert_ne!(
313 CanonicalUrl::parse(
314 "git+https://github.com/pypa/sample-namespace-packages.git#lfs=true"
315 )?,
316 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
317 );
318
319 assert_ne!(
321 CanonicalUrl::parse(
322 "git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
323 )?,
324 CanonicalUrl::parse(
325 "git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
326 )?,
327 );
328
329 assert_eq!(
331 CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
332 CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
333 );
334
335 assert_ne!(
337 CanonicalUrl::parse("https://github.com/pypa/sample%2Fnamespace%2Fpackages")?,
338 CanonicalUrl::parse("https://github.com/pypa/sample/namespace/packages")?,
339 );
340
341 assert_eq!(
343 CanonicalUrl::parse("https://github.com/pypa/sample%2Bnamespace%2Bpackages")?,
344 CanonicalUrl::parse("https://github.com/pypa/sample+namespace+packages")?,
345 );
346
347 assert_ne!(
349 CanonicalUrl::parse(
350 "file:///home/ferris/my_project%2Fmy_project-0.1.0-py3-none-any.whl"
351 )?,
352 CanonicalUrl::parse(
353 "file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl"
354 )?,
355 );
356
357 assert_eq!(
359 CanonicalUrl::parse(
360 "file:///home/ferris/my_project/my_project-0.1.0+foo-py3-none-any.whl"
361 )?,
362 CanonicalUrl::parse(
363 "file:///home/ferris/my_project/my_project-0.1.0%2Bfoo-py3-none-any.whl"
364 )?,
365 );
366
367 Ok(())
368 }
369
370 #[test]
371 fn repository_url() -> Result<(), DisplaySafeUrlError> {
372 assert_eq!(
374 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
375 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
376 );
377
378 assert_eq!(
380 RepositoryUrl::parse(
381 "git+https://github.com/pypa/sample-namespace-packages.git@2.0.0"
382 )?,
383 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages@2.0.0")?,
384 );
385
386 assert_ne!(
388 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
389 RepositoryUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
390 );
391
392 assert_eq!(
395 RepositoryUrl::parse(
396 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a"
397 )?,
398 RepositoryUrl::parse(
399 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b"
400 )?,
401 );
402
403 assert_eq!(
406 RepositoryUrl::parse(
407 "git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
408 )?,
409 RepositoryUrl::parse(
410 "git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
411 )?,
412 );
413
414 assert_eq!(
417 RepositoryUrl::parse(
418 "git+https://github.com/pypa/sample-namespace-packages.git#lfs=true"
419 )?,
420 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
421 );
422
423 Ok(())
424 }
425
426 #[test]
427 fn repository_url_with_lfs() -> Result<(), DisplaySafeUrlError> {
428 let mut hasher = CacheKeyHasher::new();
429 RepositoryUrl::parse("https://example.com/pypa/sample-namespace-packages.git@2.0.0")?
430 .cache_key(&mut hasher);
431 let repo_url_basic = hasher.finish();
432
433 let mut hasher = CacheKeyHasher::new();
434 RepositoryUrl::parse(
435 "https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0#foo=bar",
436 )?
437 .cache_key(&mut hasher);
438 let repo_url_with_fragments = hasher.finish();
439
440 assert_eq!(
441 repo_url_basic, repo_url_with_fragments,
442 "repository urls should have the exact cache keys as fragments are removed",
443 );
444
445 let mut hasher = CacheKeyHasher::new();
446 RepositoryUrl::parse(
447 "https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0#foo=bar",
448 )?
449 .with_lfs(None)
450 .cache_key(&mut hasher);
451 let git_url_with_fragments = hasher.finish();
452
453 assert_eq!(
454 repo_url_with_fragments, git_url_with_fragments,
455 "both structs should have the exact cache keys as fragments are still removed",
456 );
457
458 let mut hasher = CacheKeyHasher::new();
459 RepositoryUrl::parse(
460 "https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0#foo=bar",
461 )?
462 .with_lfs(Some(false))
463 .cache_key(&mut hasher);
464 let git_url_with_fragments_and_lfs_false = hasher.finish();
465
466 assert_eq!(
467 git_url_with_fragments, git_url_with_fragments_and_lfs_false,
468 "both structs should have the exact cache keys as lfs false should not influence them",
469 );
470
471 let mut hasher = CacheKeyHasher::new();
472 RepositoryUrl::parse(
473 "https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0#foo=bar",
474 )?
475 .with_lfs(Some(true))
476 .cache_key(&mut hasher);
477 let git_url_with_fragments_and_lfs_true = hasher.finish();
478
479 assert_ne!(
480 git_url_with_fragments, git_url_with_fragments_and_lfs_true,
481 "both structs should have different cache keys as one has Git LFS enabled",
482 );
483
484 Ok(())
485 }
486}