uv_pypi_types/
parsed_url.rs

1use std::fmt::{Display, Formatter};
2use std::path::{Path, PathBuf};
3
4use thiserror::Error;
5use url::Url;
6use uv_cache_key::{CacheKey, CacheKeyHasher};
7
8use uv_distribution_filename::{DistExtension, ExtensionError};
9use uv_git_types::{GitUrl, GitUrlParseError};
10use uv_pep508::{
11    Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError, looks_like_git_repository,
12};
13use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
14
15use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind};
16
17#[derive(Debug, Error)]
18pub enum ParsedUrlError {
19    #[error("Unsupported URL prefix `{prefix}` in URL: `{url}` ({message})")]
20    UnsupportedUrlPrefix {
21        prefix: String,
22        url: String,
23        message: &'static str,
24    },
25    #[error("Invalid path in file URL: `{0}`")]
26    InvalidFileUrl(String),
27    #[error(transparent)]
28    GitUrlParse(#[from] GitUrlParseError),
29    #[error("Not a valid URL: `{0}`")]
30    UrlParse(String, #[source] DisplaySafeUrlError),
31    #[error(transparent)]
32    VerbatimUrl(#[from] VerbatimUrlError),
33    #[error(
34        "Direct URL (`{0}`) references a Git repository, but is missing the `git+` prefix (e.g., `git+{0}`)"
35    )]
36    MissingGitPrefix(String),
37    #[error("Expected direct URL (`{0}`) to end in a supported file extension: {1}")]
38    MissingExtensionUrl(String, ExtensionError),
39    #[error("Expected path (`{0}`) to end in a supported file extension: {1}")]
40    MissingExtensionPath(PathBuf, ExtensionError),
41}
42
43#[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)]
44pub struct VerbatimParsedUrl {
45    pub parsed_url: ParsedUrl,
46    pub verbatim: VerbatimUrl,
47}
48
49impl CacheKey for VerbatimParsedUrl {
50    fn cache_key(&self, state: &mut CacheKeyHasher) {
51        self.verbatim.cache_key(state);
52    }
53}
54
55impl VerbatimParsedUrl {
56    /// Returns `true` if the URL is editable.
57    pub fn is_editable(&self) -> bool {
58        self.parsed_url.is_editable()
59    }
60}
61
62impl Pep508Url for VerbatimParsedUrl {
63    type Err = ParsedUrlError;
64
65    fn parse_url(url: &str, working_dir: Option<&Path>) -> Result<Self, Self::Err> {
66        let verbatim = <VerbatimUrl as Pep508Url>::parse_url(url, working_dir)?;
67        Ok(Self {
68            parsed_url: ParsedUrl::try_from(verbatim.to_url())?,
69            verbatim,
70        })
71    }
72
73    fn displayable_with_credentials(&self) -> impl Display {
74        self.verbatim.displayable_with_credentials()
75    }
76}
77
78impl UnnamedRequirementUrl for VerbatimParsedUrl {
79    fn parse_path(
80        path: impl AsRef<Path>,
81        working_dir: impl AsRef<Path>,
82    ) -> Result<Self, Self::Err> {
83        let verbatim = VerbatimUrl::from_path(&path, &working_dir)?;
84        let verbatim_path = verbatim.as_path()?;
85        let is_dir = if let Ok(metadata) = verbatim_path.metadata() {
86            metadata.is_dir()
87        } else {
88            verbatim_path.extension().is_none()
89        };
90        let url = verbatim.to_url();
91        let install_path = verbatim.as_path()?.into_boxed_path();
92        let parsed_url = if is_dir {
93            ParsedUrl::Directory(ParsedDirectoryUrl {
94                url,
95                install_path,
96                editable: None,
97                r#virtual: None,
98            })
99        } else {
100            ParsedUrl::Path(ParsedPathUrl {
101                url,
102                install_path,
103                ext: DistExtension::from_path(&path).map_err(|err| {
104                    ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err)
105                })?,
106            })
107        };
108        Ok(Self {
109            parsed_url,
110            verbatim,
111        })
112    }
113
114    fn parse_absolute_path(path: impl AsRef<Path>) -> Result<Self, Self::Err> {
115        let verbatim = VerbatimUrl::from_absolute_path(&path)?;
116        let verbatim_path = verbatim.as_path()?;
117        let is_dir = if let Ok(metadata) = verbatim_path.metadata() {
118            metadata.is_dir()
119        } else {
120            verbatim_path.extension().is_none()
121        };
122        let url = verbatim.to_url();
123        let install_path = verbatim.as_path()?.into_boxed_path();
124        let parsed_url = if is_dir {
125            ParsedUrl::Directory(ParsedDirectoryUrl {
126                url,
127                install_path,
128                editable: None,
129                r#virtual: None,
130            })
131        } else {
132            ParsedUrl::Path(ParsedPathUrl {
133                url,
134                install_path,
135                ext: DistExtension::from_path(&path).map_err(|err| {
136                    ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err)
137                })?,
138            })
139        };
140        Ok(Self {
141            parsed_url,
142            verbatim,
143        })
144    }
145
146    fn parse_unnamed_url(url: impl AsRef<str>) -> Result<Self, Self::Err> {
147        let verbatim = <VerbatimUrl as UnnamedRequirementUrl>::parse_unnamed_url(&url)?;
148        Ok(Self {
149            parsed_url: ParsedUrl::try_from(verbatim.to_url())?,
150            verbatim,
151        })
152    }
153
154    fn with_given(self, given: impl AsRef<str>) -> Self {
155        Self {
156            verbatim: self.verbatim.with_given(given),
157            ..self
158        }
159    }
160
161    fn given(&self) -> Option<&str> {
162        self.verbatim.given()
163    }
164}
165
166impl Display for VerbatimParsedUrl {
167    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
168        Display::fmt(&self.verbatim, f)
169    }
170}
171
172/// We support three types of URLs for distributions:
173/// * The path to a file or directory (`file://`)
174/// * A Git repository (`git+https://` or `git+ssh://`), optionally with a subdirectory and/or
175///   string to checkout.
176/// * A remote archive (`https://`), optional with a subdirectory (source dist only).
177///
178/// A URL in a requirement `foo @ <url>` must be one of the above.
179#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
180pub enum ParsedUrl {
181    /// The direct URL is a path to a local file.
182    Path(ParsedPathUrl),
183    /// The direct URL is a path to a local directory.
184    Directory(ParsedDirectoryUrl),
185    /// The direct URL is path to a Git repository.
186    Git(ParsedGitUrl),
187    /// The direct URL is a URL to a source archive (e.g., a `.tar.gz` file) or built archive
188    /// (i.e., a `.whl` file).
189    Archive(ParsedArchiveUrl),
190}
191
192impl ParsedUrl {
193    /// Returns `true` if the URL is editable.
194    pub fn is_editable(&self) -> bool {
195        matches!(
196            self,
197            Self::Directory(ParsedDirectoryUrl {
198                editable: Some(true),
199                ..
200            })
201        )
202    }
203}
204
205/// A local path URL for a file (i.e., a built or source distribution).
206///
207/// Examples:
208/// * `file:///home/ferris/my_project/my_project-0.1.0.tar.gz`
209/// * `file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl`
210#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
211pub struct ParsedPathUrl {
212    pub url: DisplaySafeUrl,
213    /// The absolute path to the distribution which we use for installing.
214    pub install_path: Box<Path>,
215    /// The file extension, e.g. `tar.gz`, `zip`, etc.
216    pub ext: DistExtension,
217}
218
219impl ParsedPathUrl {
220    /// Construct a [`ParsedPathUrl`] from a path requirement source.
221    pub fn from_source(install_path: Box<Path>, ext: DistExtension, url: DisplaySafeUrl) -> Self {
222        Self {
223            url,
224            install_path,
225            ext,
226        }
227    }
228}
229
230/// A local path URL for a source directory.
231///
232/// Examples:
233/// * `file:///home/ferris/my_project`
234#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
235pub struct ParsedDirectoryUrl {
236    pub url: DisplaySafeUrl,
237    /// The absolute path to the distribution which we use for installing.
238    pub install_path: Box<Path>,
239    /// Whether the project at the given URL should be installed in editable mode.
240    pub editable: Option<bool>,
241    /// Whether the project at the given URL should be treated as a virtual package.
242    pub r#virtual: Option<bool>,
243}
244
245impl ParsedDirectoryUrl {
246    /// Construct a [`ParsedDirectoryUrl`] from a path requirement source.
247    pub fn from_source(
248        install_path: Box<Path>,
249        editable: Option<bool>,
250        r#virtual: Option<bool>,
251        url: DisplaySafeUrl,
252    ) -> Self {
253        Self {
254            url,
255            install_path,
256            editable,
257            r#virtual,
258        }
259    }
260}
261
262/// A Git repository URL.
263///
264/// Explicit `lfs = true` or `--lfs` should be used to enable Git LFS support as
265/// we do not support implicit parsing of the `lfs=true` url fragments for now.
266///
267/// Examples:
268/// * `git+https://git.example.com/MyProject.git`
269/// * `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
270#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
271pub struct ParsedGitUrl {
272    pub url: GitUrl,
273    pub subdirectory: Option<Box<Path>>,
274}
275
276impl ParsedGitUrl {
277    /// Construct a [`ParsedGitUrl`] from a Git requirement source.
278    pub fn from_source(url: GitUrl, subdirectory: Option<Box<Path>>) -> Self {
279        Self { url, subdirectory }
280    }
281}
282
283impl TryFrom<DisplaySafeUrl> for ParsedGitUrl {
284    type Error = ParsedUrlError;
285
286    /// Supports URLs with and without the `git+` prefix.
287    ///
288    /// When the URL includes a prefix, it's presumed to come from a PEP 508 requirement; when it's
289    /// excluded, it's presumed to come from `tool.uv.sources`.
290    fn try_from(url_in: DisplaySafeUrl) -> Result<Self, Self::Error> {
291        let subdirectory = get_subdirectory(&url_in).map(PathBuf::into_boxed_path);
292
293        let url = url_in
294            .as_str()
295            .strip_prefix("git+")
296            .unwrap_or(url_in.as_str());
297        let url = DisplaySafeUrl::parse(url)
298            .map_err(|err| ParsedUrlError::UrlParse(url.to_string(), err))?;
299        let url = GitUrl::try_from(url)?;
300        Ok(Self { url, subdirectory })
301    }
302}
303
304/// A URL to a source or built archive.
305///
306/// Examples:
307/// * A built distribution: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1-py2.py3-none-any.whl`
308/// * A source distribution with a valid name: `https://files.pythonhosted.org/packages/62/06/d5604a70d160f6a6ca5fd2ba25597c24abd5c5ca5f437263d177ac242308/tqdm-4.66.1.tar.gz`
309/// * A source dist with a recognizable extension but invalid name: `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
310#[derive(Debug, Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
311pub struct ParsedArchiveUrl {
312    pub url: DisplaySafeUrl,
313    pub subdirectory: Option<Box<Path>>,
314    pub ext: DistExtension,
315}
316
317impl ParsedArchiveUrl {
318    /// Construct a [`ParsedArchiveUrl`] from a URL requirement source.
319    pub fn from_source(
320        location: DisplaySafeUrl,
321        subdirectory: Option<Box<Path>>,
322        ext: DistExtension,
323    ) -> Self {
324        Self {
325            url: location,
326            subdirectory,
327            ext,
328        }
329    }
330}
331
332impl TryFrom<DisplaySafeUrl> for ParsedArchiveUrl {
333    type Error = ParsedUrlError;
334
335    fn try_from(mut url: DisplaySafeUrl) -> Result<Self, Self::Error> {
336        // Extract the `#subdirectory` fragment, if present.
337        let subdirectory = get_subdirectory(&url).map(PathBuf::into_boxed_path);
338        url.set_fragment(None);
339
340        // Infer the extension from the path.
341        let ext = match DistExtension::from_path(url.path()) {
342            Ok(ext) => ext,
343            Err(..) if looks_like_git_repository(&url) => {
344                return Err(ParsedUrlError::MissingGitPrefix(url.to_string()));
345            }
346            Err(err) => return Err(ParsedUrlError::MissingExtensionUrl(url.to_string(), err)),
347        };
348
349        Ok(Self {
350            url,
351            subdirectory,
352            ext,
353        })
354    }
355}
356
357/// If the URL points to a subdirectory, extract it, as in (git):
358///   `git+https://git.example.com/MyProject.git@v1.0#subdirectory=pkg_dir`
359///   `git+https://git.example.com/MyProject.git@v1.0#egg=pkg&subdirectory=pkg_dir`
360/// or (direct archive url):
361///   `https://github.com/foo-labs/foo/archive/master.zip#subdirectory=packages/bar`
362///   `https://github.com/foo-labs/foo/archive/master.zip#egg=pkg&subdirectory=packages/bar`
363fn get_subdirectory(url: &Url) -> Option<PathBuf> {
364    let fragment = url.fragment()?;
365    let subdirectory = fragment
366        .split('&')
367        .find_map(|fragment| fragment.strip_prefix("subdirectory="))?;
368    Some(PathBuf::from(subdirectory))
369}
370
371impl TryFrom<DisplaySafeUrl> for ParsedUrl {
372    type Error = ParsedUrlError;
373
374    fn try_from(url: DisplaySafeUrl) -> Result<Self, Self::Error> {
375        if let Some((prefix, ..)) = url.scheme().split_once('+') {
376            match prefix {
377                "git" => Ok(Self::Git(ParsedGitUrl::try_from(url)?)),
378                "bzr" => Err(ParsedUrlError::UnsupportedUrlPrefix {
379                    prefix: prefix.to_string(),
380                    url: url.to_string(),
381                    message: "Bazaar is not supported",
382                }),
383                "hg" => Err(ParsedUrlError::UnsupportedUrlPrefix {
384                    prefix: prefix.to_string(),
385                    url: url.to_string(),
386                    message: "Mercurial is not supported",
387                }),
388                "svn" => Err(ParsedUrlError::UnsupportedUrlPrefix {
389                    prefix: prefix.to_string(),
390                    url: url.to_string(),
391                    message: "Subversion is not supported",
392                }),
393                _ => Err(ParsedUrlError::UnsupportedUrlPrefix {
394                    prefix: prefix.to_string(),
395                    url: url.to_string(),
396                    message: "Unknown scheme",
397                }),
398            }
399        } else if Path::new(url.path())
400            .extension()
401            .is_some_and(|ext| ext.eq_ignore_ascii_case("git"))
402        {
403            Ok(Self::Git(ParsedGitUrl::try_from(url)?))
404        } else if url.scheme().eq_ignore_ascii_case("file") {
405            let path = url
406                .to_file_path()
407                .map_err(|()| ParsedUrlError::InvalidFileUrl(url.to_string()))?;
408            let is_dir = if let Ok(metadata) = path.metadata() {
409                metadata.is_dir()
410            } else {
411                path.extension().is_none()
412            };
413            if is_dir {
414                Ok(Self::Directory(ParsedDirectoryUrl {
415                    url,
416                    install_path: path.into_boxed_path(),
417                    editable: None,
418                    r#virtual: None,
419                }))
420            } else {
421                Ok(Self::Path(ParsedPathUrl {
422                    url,
423                    ext: DistExtension::from_path(&path)
424                        .map_err(|err| ParsedUrlError::MissingExtensionPath(path.clone(), err))?,
425                    install_path: path.into_boxed_path(),
426                }))
427            }
428        } else {
429            Ok(Self::Archive(ParsedArchiveUrl::try_from(url)?))
430        }
431    }
432}
433
434impl From<&ParsedUrl> for DirectUrl {
435    fn from(value: &ParsedUrl) -> Self {
436        match value {
437            ParsedUrl::Path(value) => Self::from(value),
438            ParsedUrl::Directory(value) => Self::from(value),
439            ParsedUrl::Git(value) => Self::from(value),
440            ParsedUrl::Archive(value) => Self::from(value),
441        }
442    }
443}
444
445impl From<&ParsedPathUrl> for DirectUrl {
446    fn from(value: &ParsedPathUrl) -> Self {
447        Self::ArchiveUrl {
448            url: value.url.to_string(),
449            archive_info: ArchiveInfo {
450                hash: None,
451                hashes: None,
452            },
453            subdirectory: None,
454        }
455    }
456}
457
458impl From<&ParsedDirectoryUrl> for DirectUrl {
459    fn from(value: &ParsedDirectoryUrl) -> Self {
460        Self::LocalDirectory {
461            url: value.url.to_string(),
462            dir_info: DirInfo {
463                editable: value.editable,
464            },
465            subdirectory: None,
466        }
467    }
468}
469
470impl From<&ParsedArchiveUrl> for DirectUrl {
471    fn from(value: &ParsedArchiveUrl) -> Self {
472        Self::ArchiveUrl {
473            url: value.url.to_string(),
474            archive_info: ArchiveInfo {
475                hash: None,
476                hashes: None,
477            },
478            subdirectory: value.subdirectory.clone(),
479        }
480    }
481}
482
483impl From<&ParsedGitUrl> for DirectUrl {
484    fn from(value: &ParsedGitUrl) -> Self {
485        Self::VcsUrl {
486            url: value.url.repository().to_string(),
487            vcs_info: VcsInfo {
488                vcs: VcsKind::Git,
489                commit_id: value.url.precise().as_ref().map(ToString::to_string),
490                requested_revision: value.url.reference().as_str().map(ToString::to_string),
491                git_lfs: value.url.lfs().enabled().then_some(true),
492            },
493            subdirectory: value.subdirectory.clone(),
494        }
495    }
496}
497
498impl From<ParsedUrl> for DisplaySafeUrl {
499    fn from(value: ParsedUrl) -> Self {
500        match value {
501            ParsedUrl::Path(value) => value.into(),
502            ParsedUrl::Directory(value) => value.into(),
503            ParsedUrl::Git(value) => value.into(),
504            ParsedUrl::Archive(value) => value.into(),
505        }
506    }
507}
508
509impl From<ParsedPathUrl> for DisplaySafeUrl {
510    fn from(value: ParsedPathUrl) -> Self {
511        value.url
512    }
513}
514
515impl From<ParsedDirectoryUrl> for DisplaySafeUrl {
516    fn from(value: ParsedDirectoryUrl) -> Self {
517        value.url
518    }
519}
520
521impl From<ParsedArchiveUrl> for DisplaySafeUrl {
522    fn from(value: ParsedArchiveUrl) -> Self {
523        let mut url = value.url;
524        if let Some(subdirectory) = value.subdirectory {
525            url.set_fragment(Some(&format!("subdirectory={}", subdirectory.display())));
526        }
527        url
528    }
529}
530
531impl From<ParsedGitUrl> for DisplaySafeUrl {
532    fn from(value: ParsedGitUrl) -> Self {
533        let lfs = value.url.lfs().enabled();
534        let mut url = Self::parse(&format!("{}{}", "git+", Self::from(value.url).as_str()))
535            .expect("Git URL is invalid");
536        let mut frags: Vec<String> = Vec::new();
537        if let Some(subdirectory) = value.subdirectory {
538            frags.push(format!("subdirectory={}", subdirectory.display()));
539        }
540        // Displays nicely that lfs is used
541        if lfs {
542            frags.push("lfs=true".to_string());
543        }
544        if !frags.is_empty() {
545            url.set_fragment(Some(&frags.join("&")));
546        }
547        url
548    }
549}
550
551#[cfg(test)]
552mod tests {
553    use anyhow::Result;
554
555    use crate::parsed_url::ParsedUrl;
556    use uv_redacted::DisplaySafeUrl;
557
558    #[test]
559    fn direct_url_from_url() -> Result<()> {
560        let expected = DisplaySafeUrl::parse("git+https://github.com/pallets/flask.git")?;
561        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
562        assert_eq!(expected, actual);
563
564        let expected =
565            DisplaySafeUrl::parse("git+https://github.com/pallets/flask.git#subdirectory=pkg_dir")?;
566        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
567        assert_eq!(expected, actual);
568
569        let expected = DisplaySafeUrl::parse("git+https://github.com/pallets/flask.git@2.0.0")?;
570        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
571        assert_eq!(expected, actual);
572
573        let expected = DisplaySafeUrl::parse(
574            "git+https://github.com/pallets/flask.git@2.0.0#subdirectory=pkg_dir",
575        )?;
576        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
577        assert_eq!(expected, actual);
578
579        // We do not support implicit parsing of the `lfs=true` url fragments for now
580        let expected = DisplaySafeUrl::parse(
581            "git+https://github.com/pallets/flask.git#subdirectory=pkg_dir&lfs=true",
582        )?;
583        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
584        assert_ne!(expected, actual);
585
586        // TODO(charlie): Preserve other fragments.
587        let expected = DisplaySafeUrl::parse(
588            "git+https://github.com/pallets/flask.git#egg=flask&subdirectory=pkg_dir",
589        )?;
590        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
591        assert_ne!(expected, actual);
592
593        Ok(())
594    }
595
596    #[test]
597    #[cfg(unix)]
598    fn direct_url_from_url_absolute() -> Result<()> {
599        let expected = DisplaySafeUrl::parse("file:///path/to/directory")?;
600        let actual = DisplaySafeUrl::from(ParsedUrl::try_from(expected.clone())?);
601        assert_eq!(expected, actual);
602        Ok(())
603    }
604}