uv_cache_key/
canonical_url.rs1use std::borrow::Cow;
2use std::fmt::{Debug, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::Deref;
5
6use url::Url;
7use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
8
9use crate::cache_key::{CacheKey, CacheKeyHasher};
10
11#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
20pub struct CanonicalUrl(DisplaySafeUrl);
21
22impl CanonicalUrl {
23 pub fn new(url: &DisplaySafeUrl) -> Self {
24 let mut url = url.clone();
25
26 if url.cannot_be_a_base() {
28 return Self(url);
29 }
30
31 let _ = url.set_password(None);
33 let _ = url.set_username("");
34
35 if url.path().ends_with('/') {
37 url.path_segments_mut().unwrap().pop_if_empty();
38 }
39
40 if url.host_str() == Some("github.com") {
46 let scheme = url.scheme().to_lowercase();
47 url.set_scheme(&scheme).unwrap();
48 let path = url.path().to_lowercase();
49 url.set_path(&path);
50 }
51
52 if let Some((prefix, suffix)) = url.path().rsplit_once('@') {
54 let needs_chopping = std::path::Path::new(prefix)
56 .extension()
57 .is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
58 if needs_chopping {
59 let prefix = &prefix[..prefix.len() - 4];
60 let path = format!("{prefix}@{suffix}");
61 url.set_path(&path);
62 }
63 } else {
64 let needs_chopping = std::path::Path::new(url.path())
66 .extension()
67 .is_some_and(|ext| ext.eq_ignore_ascii_case("git"));
68 if needs_chopping {
69 let last = {
70 let last = url.path_segments().unwrap().next_back().unwrap();
73 last[..last.len() - 4].to_owned()
74 };
75 url.path_segments_mut().unwrap().pop().push(&last);
76 }
77 }
78
79 if memchr::memchr(b'%', url.path().as_bytes()).is_some() {
81 let decoded = url
83 .path_segments()
84 .unwrap()
85 .map(|segment| {
86 percent_encoding::percent_decode_str(segment)
87 .decode_utf8()
88 .unwrap_or(Cow::Borrowed(segment))
89 .into_owned()
90 })
91 .collect::<Vec<_>>();
92
93 let mut path_segments = url.path_segments_mut().unwrap();
94 path_segments.clear();
95 path_segments.extend(decoded);
96 }
97
98 Self(url)
99 }
100
101 pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
102 Ok(Self::new(&DisplaySafeUrl::parse(url)?))
103 }
104}
105
106impl CacheKey for CanonicalUrl {
107 fn cache_key(&self, state: &mut CacheKeyHasher) {
108 self.0.as_str().cache_key(state);
111 }
112}
113
114impl Hash for CanonicalUrl {
115 fn hash<H: Hasher>(&self, state: &mut H) {
116 self.0.as_str().hash(state);
119 }
120}
121
122impl From<CanonicalUrl> for DisplaySafeUrl {
123 fn from(value: CanonicalUrl) -> Self {
124 value.0
125 }
126}
127
128impl std::fmt::Display for CanonicalUrl {
129 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
130 std::fmt::Display::fmt(&self.0, f)
131 }
132}
133
134#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
143pub struct RepositoryUrl(DisplaySafeUrl);
144
145impl RepositoryUrl {
146 pub fn new(url: &DisplaySafeUrl) -> Self {
147 let mut url = CanonicalUrl::new(url).0;
148
149 if url.scheme().starts_with("git+") {
151 if let Some(prefix) = url
152 .path()
153 .rsplit_once('@')
154 .map(|(prefix, _suffix)| prefix.to_string())
155 {
156 url.set_path(&prefix);
157 }
158 }
159
160 url.set_fragment(None);
162 url.set_query(None);
163
164 Self(url)
165 }
166
167 pub fn parse(url: &str) -> Result<Self, DisplaySafeUrlError> {
168 Ok(Self::new(&DisplaySafeUrl::parse(url)?))
169 }
170}
171
172impl CacheKey for RepositoryUrl {
173 fn cache_key(&self, state: &mut CacheKeyHasher) {
174 self.0.as_str().cache_key(state);
177 }
178}
179
180impl Hash for RepositoryUrl {
181 fn hash<H: Hasher>(&self, state: &mut H) {
182 self.0.as_str().hash(state);
185 }
186}
187
188impl Deref for RepositoryUrl {
189 type Target = Url;
190
191 fn deref(&self) -> &Self::Target {
192 &self.0
193 }
194}
195
196impl std::fmt::Display for RepositoryUrl {
197 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
198 std::fmt::Display::fmt(&self.0, f)
199 }
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn user_credential_does_not_affect_cache_key() -> Result<(), DisplaySafeUrlError> {
208 let mut hasher = CacheKeyHasher::new();
209 CanonicalUrl::parse("https://example.com/pypa/sample-namespace-packages.git@2.0.0")?
210 .cache_key(&mut hasher);
211 let hash_without_creds = hasher.finish();
212
213 let mut hasher = CacheKeyHasher::new();
214 CanonicalUrl::parse(
215 "https://user:foo@example.com/pypa/sample-namespace-packages.git@2.0.0",
216 )?
217 .cache_key(&mut hasher);
218 let hash_with_creds = hasher.finish();
219 assert_eq!(
220 hash_without_creds, hash_with_creds,
221 "URLs with no user credentials should hash the same as URLs with different user credentials",
222 );
223
224 let mut hasher = CacheKeyHasher::new();
225 CanonicalUrl::parse(
226 "https://user:bar@example.com/pypa/sample-namespace-packages.git@2.0.0",
227 )?
228 .cache_key(&mut hasher);
229 let hash_with_creds = hasher.finish();
230 assert_eq!(
231 hash_without_creds, hash_with_creds,
232 "URLs with different user credentials should hash the same",
233 );
234
235 let mut hasher = CacheKeyHasher::new();
236 CanonicalUrl::parse("https://:bar@example.com/pypa/sample-namespace-packages.git@2.0.0")?
237 .cache_key(&mut hasher);
238 let hash_with_creds = hasher.finish();
239 assert_eq!(
240 hash_without_creds, hash_with_creds,
241 "URLs with no username, though with a password, should hash the same as URLs with different user credentials",
242 );
243
244 let mut hasher = CacheKeyHasher::new();
245 CanonicalUrl::parse("https://user:@example.com/pypa/sample-namespace-packages.git@2.0.0")?
246 .cache_key(&mut hasher);
247 let hash_with_creds = hasher.finish();
248 assert_eq!(
249 hash_without_creds, hash_with_creds,
250 "URLs with no password, though with a username, should hash the same as URLs with different user credentials",
251 );
252
253 Ok(())
254 }
255
256 #[test]
257 fn canonical_url() -> Result<(), DisplaySafeUrlError> {
258 assert_eq!(
260 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
261 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
262 );
263
264 assert_eq!(
266 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git@2.0.0")?,
267 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages@2.0.0")?,
268 );
269
270 assert_ne!(
272 CanonicalUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
273 CanonicalUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
274 );
275
276 assert_ne!(
278 CanonicalUrl::parse(
279 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a"
280 )?,
281 CanonicalUrl::parse(
282 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b"
283 )?,
284 );
285
286 assert_ne!(
288 CanonicalUrl::parse(
289 "git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
290 )?,
291 CanonicalUrl::parse(
292 "git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
293 )?,
294 );
295
296 assert_eq!(
298 CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
299 CanonicalUrl::parse("git+https:://github.com/pypa/sample-namespace-packages.git")?,
300 );
301
302 assert_ne!(
304 CanonicalUrl::parse("https://github.com/pypa/sample%2Fnamespace%2Fpackages")?,
305 CanonicalUrl::parse("https://github.com/pypa/sample/namespace/packages")?,
306 );
307
308 assert_eq!(
310 CanonicalUrl::parse("https://github.com/pypa/sample%2Bnamespace%2Bpackages")?,
311 CanonicalUrl::parse("https://github.com/pypa/sample+namespace+packages")?,
312 );
313
314 assert_ne!(
316 CanonicalUrl::parse(
317 "file:///home/ferris/my_project%2Fmy_project-0.1.0-py3-none-any.whl"
318 )?,
319 CanonicalUrl::parse(
320 "file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl"
321 )?,
322 );
323
324 assert_eq!(
326 CanonicalUrl::parse(
327 "file:///home/ferris/my_project/my_project-0.1.0+foo-py3-none-any.whl"
328 )?,
329 CanonicalUrl::parse(
330 "file:///home/ferris/my_project/my_project-0.1.0%2Bfoo-py3-none-any.whl"
331 )?,
332 );
333
334 Ok(())
335 }
336
337 #[test]
338 fn repository_url() -> Result<(), DisplaySafeUrlError> {
339 assert_eq!(
341 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
342 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages")?,
343 );
344
345 assert_eq!(
347 RepositoryUrl::parse(
348 "git+https://github.com/pypa/sample-namespace-packages.git@2.0.0"
349 )?,
350 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages@2.0.0")?,
351 );
352
353 assert_ne!(
355 RepositoryUrl::parse("git+https://github.com/pypa/sample-namespace-packages.git")?,
356 RepositoryUrl::parse("git+https://github.com/pypa/sample-packages.git")?,
357 );
358
359 assert_eq!(
362 RepositoryUrl::parse(
363 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_a"
364 )?,
365 RepositoryUrl::parse(
366 "git+https://github.com/pypa/sample-namespace-packages.git#subdirectory=pkg_resources/pkg_b"
367 )?,
368 );
369
370 assert_eq!(
373 RepositoryUrl::parse(
374 "git+https://github.com/pypa/sample-namespace-packages.git@v1.0.0"
375 )?,
376 RepositoryUrl::parse(
377 "git+https://github.com/pypa/sample-namespace-packages.git@v2.0.0"
378 )?,
379 );
380
381 Ok(())
382 }
383}