uv_pypi_types/
parsed_url.rs

1use std::fmt::{Display, Formatter};
2use std::path::{Path, PathBuf};
3
4use thiserror::Error;
5use url::Url;
6use uv_cache_key::{CacheKey, CacheKeyHasher};
7
8use uv_distribution_filename::{DistExtension, ExtensionError};
9use uv_git_types::{GitUrl, GitUrlParseError};
10use uv_pep508::{
11    Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError, looks_like_git_repository,
12};
13use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
14
15use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind};
16
17#[derive(Debug, Error)]
18pub enum ParsedUrlError {
19    #[error("Unsupported URL prefix `{prefix}` in URL: `{url}` ({message})")]
20    UnsupportedUrlPrefix {
21        prefix: String,
22        url: String,
23        message: &'static str,
24    },
25    #[error("Invalid path in file URL: `{0}`")]
26    InvalidFileUrl(String),
27    #[error(transparent)]
28    GitUrlParse(#[from] GitUrlParseError),
29    #[error("Not a valid URL: `{0}`")]
30    UrlParse(String, #[source] DisplaySafeUrlError),
31    #[error(transparent)]
32    VerbatimUrl(#[from] VerbatimUrlError),
33    #[error(
34        "Direct URL (`{0}`) references a Git repository, but is missing the `git+` prefix (e.g., `git+{0}`)"
35    )]
36    MissingGitPrefix(String),
37    #[error("Expected direct URL (`{0}`) to end in a supported file extension: {1}")]
38    MissingExtensionUrl(String, ExtensionError),
39    #[error("Expected path (`{0}`) to end in a supported file extension: {1}")]
40    MissingExtensionPath(PathBuf, ExtensionError),
41}
42
43#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)]
44pub struct VerbatimParsedUrl {
45    pub parsed_url: ParsedUrl,
46    pub verbatim: VerbatimUrl,
47}
48
49impl CacheKey for VerbatimParsedUrl {
50    fn cache_key(&self, state: &mut CacheKeyHasher) {
51        self.verbatim.cache_key(state);
52    }
53}
54
55impl VerbatimParsedUrl {
56    /// Returns `true` if the URL is editable.
57    pub fn is_editable(&self) -> bool {
58        self.parsed_url.is_editable()
59    }
60}
61
62impl Pep508Url for VerbatimParsedUrl {
63    type Err = ParsedUrlError;
64
65    fn parse_url(url: &str, working_dir: Option<&Path>) -> Result<Self, Self::Err> {
66        let verbatim = <VerbatimUrl as Pep508Url>::parse_url(url, working_dir)?;
67        Ok(Self {
68            parsed_url: ParsedUrl::try_from(verbatim.to_url())?,
69            verbatim,
70        })
71    }
72
73    fn displayable_with_credentials(&self) -> impl Display {
74        self.verbatim.displayable_with_credentials()
75    }
76}
77
78impl UnnamedRequirementUrl for VerbatimParsedUrl {
79    fn parse_path(
80        path: impl AsRef<Path>,
81        working_dir: impl AsRef<Path>,
82    ) -> Result<Self, Self::Err> {
83        let verbatim = VerbatimUrl::from_path(&path, &working_dir)?;
84        let verbatim_path = verbatim.as_path()?;
85        let is_dir = if let Ok(metadata) = verbatim_path.metadata() {
86            metadata.is_dir()
87        } else {
88            verbatim_path.extension().is_none()
89        };
90        let url = verbatim.to_url();
91        let install_path = verbatim.as_path()?.into_boxed_path();
92        let parsed_url = if is_dir {
93            ParsedUrl::Directory(ParsedDirectoryUrl {
94                url,
95                install_path,
96                editable: None,
97                r#virtual: None,
98            })
99        } else {
100            ParsedUrl::Path(ParsedPathUrl {
101                url,
102                install_path,
103                ext: DistExtension::from_path(&path).map_err(|err| {
104                    ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err)
105                })?,
106            })
107        };
108        Ok(Self {
109            parsed_url,
110            verbatim,
111        })
112    }
113
114    fn parse_absolute_path(path: impl AsRef<Path>) -> Result<Self, Self::Err> {
115        let verbatim = VerbatimUrl::from_absolute_path(&path)?;
116        let verbatim_path = verbatim.as_path()?;
117        let is_dir = if let Ok(metadata) = verbatim_path.metadata() {
118            metadata.is_dir()
119        } else {
120            verbatim_path.extension().is_none()
121        };
122        let url = verbatim.to_url();
123        let install_path = verbatim.as_path()?.into_boxed_path();
124        let parsed_url = if is_dir {
125            ParsedUrl::Directory(ParsedDirectoryUrl {
126                url,
127                install_path,
128                editable: None,
129                r#virtual: None,
130            })
131        } else {
132            ParsedUrl::Path(ParsedPathUrl {
133                url,
134                install_path,
135                ext: DistExtension::from_path(&path).map_err(|err| {
136                    ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err)
137                })?,
138            })
139        };
140        Ok(Self {
141            parsed_url,
142            verbatim,
143        })
144    }
145
146    fn parse_unnamed_url(url: impl AsRef<str>) -> Result<Self, Self::Err> {
147        let verbatim = <VerbatimUrl as UnnamedRequirementUrl>::parse_unnamed_url(&url)?;
148        Ok(Self {
149            parsed_url: ParsedUrl::try_from(verbatim.to_url())?,
150            verbatim,
151        })
152    }
153
154    fn with_given(self, given: impl AsRef<str>) -> Self {
155        Self {
156            verbatim: self.verbatim.with_given(given),
157            ..self
158        }
159    }
160
161    fn given(&self) -> Option<&str> {
162        self.verbatim.given()
163    }
164}
165
166impl Display for VerbatimParsedUrl {
167    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
168        Display::fmt(&self.verbatim, f)
169    }
170}
171
172/// We support three types of URLs for distributions:
173/// * The path to a file or directory (`file://`)
174/// * A Git repository (`git+https://` or `git+ssh://`), optionally with a subdirectory and/or
175///   string to checkout.
176/// * A remote archive (`https://`), optional with a subdirectory (source dist only).
177///
178/// A URL in a requirement `foo @ <url>` must be one of the above.
179#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
180pub enum ParsedUrl {
181    /// The direct URL is a path to a local file.
182    Path(ParsedPathUrl),
183    /// The direct URL is a path to a local directory.
184    Directory(ParsedDirectoryUrl),
185    /// The direct URL is path to a Git repository.
186    Git(ParsedGitUrl),
187    /// The direct URL is a URL to a source archive (e.g., a `.tar.gz` file) or built archive
188    /// (i.e., a `.whl` file).
189    Archive(ParsedArchiveUrl),
190}
191
192impl ParsedUrl {
193    /// Returns `true` if the URL is editable.
194    pub fn is_editable(&self) -> bool {
195        matches!(
196            self,
197            Self::Directory(ParsedDirectoryUrl {
198                editable: Some(true),
199                ..
200            })
201        )
202    }
203}
204
205/// A local path URL for a file (i.e., a built or source distribution).
206///
207/// Examples:
208/// * `file:///home/ferris/my_project/my_project-0.1.0.tar.gz`
209/// * `file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl`
210#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
211pub struct ParsedPathUrl {
212    pub url: DisplaySafeUrl,
213    /// The absolute path to the distribution which we use for installing.
214    pub install_path: Box<Path>,
215    /// The file extension, e.g. `tar.gz`, `zip`, etc.
216    pub ext: DistExtension,
217}
218
219impl ParsedPathUrl {
220    /// Construct a [`ParsedPathUrl`] from a path requirement source.
221    pub fn from_source(install_path: Box<Path>, ext: DistExtension, url: DisplaySafeUrl) -> Self {
222        Self {
223            url,
224            install_path,
225            ext,
226        }
227    }
228}
229
230/// A local path URL for a source directory.
231///
232/// Examples:
233/// * `file:///home/ferris/my_project`
234#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
235pub struct ParsedDirectoryUrl {
236    pub url: DisplaySafeUrl,
237    /// The absolute path to the distribution which we use for installing.
238    pub install_path: Box<Path>,
239    /// Whether the project at the given URL should be installed in editable mode.
240    pub editable: Option<bool>,
241    /// Whether the project at the given URL should be treated as a virtual package.
242    pub r#virtual: Option<bool>,
243}
244
245impl ParsedDirectoryUrl {
246    /// Construct a [`ParsedDirectoryUrl`] from a path requirement source.
247    pub fn from_source(
248        install_path: Box<Path>,
249        editable: Option<bool>,
250        r#virtual: Option<bool>,
251        url: DisplaySafeUrl,
252    ) -> Self {
253        Self {
254            url,
255            install_path,
256            editable,
257            r#virtual,
258        }
259    }
260}
261
262/// A Git repository URL.
263///
264/// Examples:
265/// * `git+https://git.example.com/MyProject.git`
266/// * `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
267#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
268pub struct ParsedGitUrl {
269    pub url: GitUrl,
270    pub subdirectory: Option<Box<Path>>,
271}
272
273impl ParsedGitUrl {
274    /// Construct a [`ParsedGitUrl`] from a Git requirement source.
275    pub fn from_source(url: GitUrl, subdirectory: Option<Box<Path>>) -> Self {
276        Self { url, subdirectory }
277    }
278}
279
280impl TryFrom<DisplaySafeUrl> for ParsedGitUrl {
281    type Error = ParsedUrlError;
282
283    /// Supports URLs with and without the `git+` prefix.
284    ///
285    /// When the URL includes a prefix, it's presumed to come from a PEP 508 requirement; when it's
286    /// excluded, it's presumed to come from `tool.uv.sources`.
287    fn try_from(url_in: DisplaySafeUrl) -> Result<Self, Self::Error> {
288        let subdirectory = get_subdirectory(&url_in).map(PathBuf::into_boxed_path);
289
290        let url = url_in
291            .as_str()
292            .strip_prefix("git+")
293            .unwrap_or(url_in.as_str());
294        let url = DisplaySafeUrl::parse(url)
295            .map_err(|err| ParsedUrlError::UrlParse(url.to_string(), err))?;
296        let url = GitUrl::try_from(url)?;
297        Ok(Self { url, subdirectory })
298    }
299}
300
301/// A URL to a source or built archive.
302///
303/// Examples:
304/// * A built distribution: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1-py2.py3-none-any.whl`
305/// * A source distribution with a valid name: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz`
306/// * A source dist with a recognizable extension but invalid name: `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
307#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
308pub struct ParsedArchiveUrl {
309    pub url: DisplaySafeUrl,
310    pub subdirectory: Option<Box<Path>>,
311    pub ext: DistExtension,
312}
313
314impl ParsedArchiveUrl {
315    /// Construct a [`ParsedArchiveUrl`] from a URL requirement source.
316    pub fn from_source(
317        location: DisplaySafeUrl,
318        subdirectory: Option<Box<Path>>,
319        ext: DistExtension,
320    ) -> Self {
321        Self {
322            url: location,
323            subdirectory,
324            ext,
325        }
326    }
327}
328
329impl TryFrom<DisplaySafeUrl> for ParsedArchiveUrl {
330    type Error = ParsedUrlError;
331
332    fn try_from(mut url: DisplaySafeUrl) -> Result<Self, Self::Error> {
333        // Extract the `#subdirectory` fragment, if present.
334        let subdirectory = get_subdirectory(&url).map(PathBuf::into_boxed_path);
335        url.set_fragment(None);
336
337        // Infer the extension from the path.
338        let ext = match DistExtension::from_path(url.path()) {
339            Ok(ext) => ext,
340            Err(..) if looks_like_git_repository(&url) => {
341                return Err(ParsedUrlError::MissingGitPrefix(url.to_string()));
342            }
343            Err(err) => return Err(ParsedUrlError::MissingExtensionUrl(url.to_string(), err)),
344        };
345
346        Ok(Self {
347            url,
348            subdirectory,
349            ext,
350        })
351    }
352}
353
354/// If the URL points to a subdirectory, extract it, as in (git):
355///   `git+https://git.example.com/MyProject.git@v1.0#subdirectory=pkg_dir`
356///   `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
357/// or (direct archive url):
358///   `https://github.com/foo-labs/foo/archive/master.zip#subdirectory=packages/bar`
359///   `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
360fn get_subdirectory(url: &Url) -> Option<PathBuf> {
361    let fragment = url.fragment()?;
362    let subdirectory = fragment
363        .split('&')
364        .find_map(|fragment| fragment.strip_prefix("subdirectory="))?;
365    Some(PathBuf::from(subdirectory))
366}
367
368impl TryFrom<DisplaySafeUrl> for ParsedUrl {
369    type Error = ParsedUrlError;
370
371    fn try_from(url: DisplaySafeUrl) -> Result<Self, Self::Error> {
372        if let Some((prefix, ..)) = url.scheme().split_once('+') {
373            match prefix {
374                "git" => Ok(Self::Git(ParsedGitUrl::try_from(url)?)),
375                "bzr" => Err(ParsedUrlError::UnsupportedUrlPrefix {
376                    prefix: prefix.to_string(),
377                    url: url.to_string(),
378                    message: "Bazaar is not supported",
379                }),
380                "hg" => Err(ParsedUrlError::UnsupportedUrlPrefix {
381                    prefix: prefix.to_string(),
382                    url: url.to_string(),
383                    message: "Mercurial is not supported",
384                }),
385                "svn" => Err(ParsedUrlError::UnsupportedUrlPrefix {
386                    prefix: prefix.to_string(),
387                    url: url.to_string(),
388                    message: "Subversion is not supported",
389                }),
390                _ => Err(ParsedUrlError::UnsupportedUrlPrefix {
391                    prefix: prefix.to_string(),
392                    url: url.to_string(),
393                    message: "Unknown scheme",
394                }),
395            }
396        } else if Path::new(url.path())
397            .extension()
398            .is_some_and(|ext| ext.eq_ignore_ascii_case("git"))
399        {
400            Ok(Self::Git(ParsedGitUrl::try_from(url)?))
401        } else if url.scheme().eq_ignore_ascii_case("file") {
402            let path = url
403                .to_file_path()
404                .map_err(|()| ParsedUrlError::InvalidFileUrl(url.to_string()))?;
405            let is_dir = if let Ok(metadata) = path.metadata() {
406                metadata.is_dir()
407            } else {
408                path.extension().is_none()
409            };
410            if is_dir {
411                Ok(Self::Directory(ParsedDirectoryUrl {
412                    url,
413                    install_path: path.into_boxed_path(),
414                    editable: None,
415                    r#virtual: None,
416                }))
417            } else {
418                Ok(Self::Path(ParsedPathUrl {
419                    url,
420                    ext: DistExtension::from_path(&path)
421                        .map_err(|err| ParsedUrlError::MissingExtensionPath(path.clone(), err))?,
422                    install_path: path.into_boxed_path(),
423                }))
424            }
425        } else {
426            Ok(Self::Archive(ParsedArchiveUrl::try_from(url)?))
427        }
428    }
429}
430
431impl From<&ParsedUrl> for DirectUrl {
432    fn from(value: &ParsedUrl) -> Self {
433        match value {
434            ParsedUrl::Path(value) => Self::from(value),
435            ParsedUrl::Directory(value) => Self::from(value),
436            ParsedUrl::Git(value) => Self::from(value),
437            ParsedUrl::Archive(value) => Self::from(value),
438        }
439    }
440}
441
442impl From<&ParsedPathUrl> for DirectUrl {
443    fn from(value: &ParsedPathUrl) -> Self {
444        Self::ArchiveUrl {
445            url: value.url.to_string(),
446            archive_info: ArchiveInfo {
447                hash: None,
448                hashes: None,
449            },
450            subdirectory: None,
451        }
452    }
453}
454
455impl From<&ParsedDirectoryUrl> for DirectUrl {
456    fn from(value: &ParsedDirectoryUrl) -> Self {
457        Self::LocalDirectory {
458            url: value.url.to_string(),
459            dir_info: DirInfo {
460                editable: value.editable,
461            },
462            subdirectory: None,
463        }
464    }
465}
466
467impl From<&ParsedArchiveUrl> for DirectUrl {
468    fn from(value: &ParsedArchiveUrl) -> Self {
469        Self::ArchiveUrl {
470            url: value.url.to_string(),
471            archive_info: ArchiveInfo {
472                hash: None,
473                hashes: None,
474            },
475            subdirectory: value.subdirectory.clone(),
476        }
477    }
478}
479
480impl From<&ParsedGitUrl> for DirectUrl {
481    fn from(value: &ParsedGitUrl) -> Self {
482        Self::VcsUrl {
483            url: value.url.repository().to_string(),
484            vcs_info: VcsInfo {
485                vcs: VcsKind::Git,
486                commit_id: value.url.precise().as_ref().map(ToString::to_string),
487                requested_revision: value.url.reference().as_str().map(ToString::to_string),
488            },
489            subdirectory: value.subdirectory.clone(),
490        }
491    }
492}
493
494impl From<ParsedUrl> for DisplaySafeUrl {
495    fn from(value: ParsedUrl) -> Self {
496        match value {
497            ParsedUrl::Path(value) => value.into(),
498            ParsedUrl::Directory(value) => value.into(),
499            ParsedUrl::Git(value) => value.into(),
500            ParsedUrl::Archive(value) => value.into(),
501        }
502    }
503}
504
505impl From<ParsedPathUrl> for DisplaySafeUrl {
506    fn from(value: ParsedPathUrl) -> Self {
507        value.url
508    }
509}
510
511impl From<ParsedDirectoryUrl> for DisplaySafeUrl {
512    fn from(value: ParsedDirectoryUrl) -> Self {
513        value.url
514    }
515}
516
517impl From<ParsedArchiveUrl> for DisplaySafeUrl {
518    fn from(value: ParsedArchiveUrl) -> Self {
519        let mut url = value.url;
520        if let Some(subdirectory) = value.subdirectory {
521            url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
522        }
523        url
524    }
525}
526
527impl From<ParsedGitUrl> for DisplaySafeUrl {
528    fn from(value: ParsedGitUrl) -> Self {
529        let mut url = Self::parse(&format!("{}{}", "git+", Self::from(value.url).as_str()))
530            .expect("Git URL is invalid");
531        if let Some(subdirectory) = value.subdirectory {
532            url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
533        }
534        url
535    }
536}
537
538#[cfg(test)]
539mod tests {
540    use anyhow::Result;
541
542    use crate::parsed_url::ParsedUrl;
543    use uv_redacted::DisplaySafeUrl;
544
545    #[test]
546    fn direct_url_from_url() -> Result<()> {
547        let expected = DisplaySafeUrl::parse("git+https://github.com/pallets/flask.git")?;
548        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
549        assert_eq!(expected, actual);
550
551        let expected =
552            DisplaySafeUrl::parse("git+https://github.com/pallets/flask.git#subdirectory=pkg_dir")?;
553        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
554        assert_eq!(expected, actual);
555
556        let expected = DisplaySafeUrl::parse("git+https://github.com/pallets/flask.git@2.0.0")?;
557        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
558        assert_eq!(expected, actual);
559
560        let expected = DisplaySafeUrl::parse(
561            "git+https://github.com/pallets/flask.git@2.0.0#subdirectory=pkg_dir",
562        )?;
563        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
564        assert_eq!(expected, actual);
565
566        // TODO(charlie): Preserve other fragments.
567        let expected = DisplaySafeUrl::parse(
568            "git+https://github.com/pallets/flask.git#egg=flask&subdirectory=pkg_dir",
569        )?;
570        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
571        assert_ne!(expected, actual);
572
573        Ok(())
574    }
575
576    #[test]
577    #[cfg(unix)]
578    fn direct_url_from_url_absolute() -> Result<()> {
579        let expected = DisplaySafeUrl::parse("file:///path/to/directory")?;
580        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
581        assert_eq!(expected, actual);
582        Ok(())
583    }
584}