uv_distribution_types/
file.rs

1use std::borrow::Cow;
2use std::fmt::{self, Display, Formatter};
3use std::str::FromStr;
4
5use jiff::Timestamp;
6use serde::{Deserialize, Serialize};
7
8use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
9use uv_pep508::split_scheme;
10use uv_pypi_types::{CoreMetadata, HashDigests, Yanked};
11use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
12use uv_small_str::SmallString;
13
14/// Error converting [`uv_pypi_types::PypiFile`] to [`distribution_type::File`].
15#[derive(Debug, thiserror::Error)]
16pub enum FileConversionError {
17    #[error("Failed to parse `requires-python`: `{0}`")]
18    RequiresPython(String, #[source] VersionSpecifiersParseError),
19    #[error("Failed to parse URL: {0}")]
20    Url(String, #[source] url::ParseError),
21    #[error("Failed to parse filename from URL: {0}")]
22    MissingPathSegments(String),
23    #[error(transparent)]
24    Utf8(#[from] std::str::Utf8Error),
25}
26
27/// Internal analog to [`uv_pypi_types::PypiFile`].
28#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
29#[rkyv(derive(Debug))]
30pub struct File {
31    pub dist_info_metadata: bool,
32    pub filename: SmallString,
33    pub hashes: HashDigests,
34    pub requires_python: Option<VersionSpecifiers>,
35    pub size: Option<u64>,
36    // N.B. We don't use a Jiff timestamp here because it's a little
37    // annoying to do so with rkyv. Since we only use this field for doing
38    // comparisons in testing, we just store it as a UTC timestamp in
39    // milliseconds.
40    pub upload_time_utc_ms: Option<i64>,
41    pub url: FileLocation,
42    pub yanked: Option<Box<Yanked>>,
43    pub zstd: Option<Box<Zstd>>,
44}
45
46impl File {
47    /// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
48    pub fn try_from_pypi(
49        file: uv_pypi_types::PypiFile,
50        base: &SmallString,
51    ) -> Result<Self, FileConversionError> {
52        Ok(Self {
53            dist_info_metadata: file
54                .core_metadata
55                .as_ref()
56                .is_some_and(CoreMetadata::is_available),
57            filename: file.filename,
58            hashes: HashDigests::from(file.hashes),
59            requires_python: file
60                .requires_python
61                .transpose()
62                .map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
63            size: file.size,
64            upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
65            url: FileLocation::new(file.url, base),
66            yanked: file.yanked,
67            zstd: None,
68        })
69    }
70
71    pub fn try_from_pyx(
72        file: uv_pypi_types::PyxFile,
73        base: &SmallString,
74    ) -> Result<Self, FileConversionError> {
75        let filename = if let Some(filename) = file.filename {
76            filename
77        } else {
78            // Remove any query parameters or fragments from the URL to get the filename.
79            let base_url = file
80                .url
81                .as_ref()
82                .split_once('?')
83                .or_else(|| file.url.as_ref().split_once('#'))
84                .map(|(path, _)| path)
85                .unwrap_or(file.url.as_ref());
86
87            // Take the last segment, stripping any query or fragment.
88            let last = base_url
89                .split('/')
90                .next_back()
91                .ok_or_else(|| FileConversionError::MissingPathSegments(file.url.to_string()))?;
92
93            // Decode the filename, which may be percent-encoded.
94            let filename = percent_encoding::percent_decode_str(last).decode_utf8()?;
95
96            SmallString::from(filename)
97        };
98        Ok(Self {
99            filename,
100            dist_info_metadata: file
101                .core_metadata
102                .as_ref()
103                .is_some_and(CoreMetadata::is_available),
104            hashes: HashDigests::from(file.hashes),
105            requires_python: file
106                .requires_python
107                .transpose()
108                .map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
109            size: file.size,
110            upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
111            url: FileLocation::new(file.url, base),
112            yanked: file.yanked,
113            zstd: file
114                .zstd
115                .map(|zstd| Zstd {
116                    hashes: HashDigests::from(zstd.hashes),
117                    size: zstd.size,
118                })
119                .map(Box::new),
120        })
121    }
122}
123
124/// While a registry file is generally a remote URL, it can also be a file if it comes from a directory flat indexes.
125#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
126#[rkyv(derive(Debug))]
127pub enum FileLocation {
128    /// URL relative to the base URL.
129    RelativeUrl(SmallString, SmallString),
130    /// Absolute URL.
131    AbsoluteUrl(UrlString),
132}
133
134impl FileLocation {
135    /// Parse a relative or absolute URL on a page with a base URL.
136    ///
137    /// This follows the HTML semantics where a link on a page is resolved relative to the URL of
138    /// that page.
139    pub fn new(url: SmallString, base: &SmallString) -> Self {
140        match split_scheme(&url) {
141            Some(..) => Self::AbsoluteUrl(UrlString::new(url)),
142            None => Self::RelativeUrl(base.clone(), url),
143        }
144    }
145
146    /// Convert this location to a URL.
147    ///
148    /// A relative URL has its base joined to the path. An absolute URL is
149    /// parsed as-is. And a path location is turned into a URL via the `file`
150    /// protocol.
151    ///
152    /// # Errors
153    ///
154    /// This returns an error if any of the URL parsing fails, or if, for
155    /// example, the location is a path and the path isn't valid UTF-8.
156    /// (Because URLs must be valid UTF-8.)
157    pub fn to_url(&self) -> Result<DisplaySafeUrl, ToUrlError> {
158        match self {
159            Self::RelativeUrl(base, path) => {
160                let base_url =
161                    DisplaySafeUrl::parse(base).map_err(|err| ToUrlError::InvalidBase {
162                        base: base.to_string(),
163                        err,
164                    })?;
165                let joined = base_url.join(path).map_err(|err| ToUrlError::InvalidJoin {
166                    base: base.to_string(),
167                    path: path.to_string(),
168                    err,
169                })?;
170                Ok(joined)
171            }
172            Self::AbsoluteUrl(absolute) => absolute.to_url(),
173        }
174    }
175}
176
177impl Display for FileLocation {
178    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
179        match self {
180            Self::RelativeUrl(_base, url) => Display::fmt(&url, f),
181            Self::AbsoluteUrl(url) => Display::fmt(&url.0, f),
182        }
183    }
184}
185
186/// A [`Url`] represented as a `String`.
187///
188/// This type is not guaranteed to be a valid URL, and may error on conversion.
189#[derive(
190    Debug,
191    Clone,
192    PartialEq,
193    Eq,
194    PartialOrd,
195    Ord,
196    Hash,
197    Serialize,
198    Deserialize,
199    rkyv::Archive,
200    rkyv::Deserialize,
201    rkyv::Serialize,
202)]
203#[serde(transparent)]
204#[rkyv(derive(Debug))]
205pub struct UrlString(SmallString);
206
207impl UrlString {
208    /// Create a new [`UrlString`] from a [`String`].
209    pub fn new(url: SmallString) -> Self {
210        Self(url)
211    }
212
213    /// Converts a [`UrlString`] to a [`DisplaySafeUrl`].
214    pub fn to_url(&self) -> Result<DisplaySafeUrl, ToUrlError> {
215        DisplaySafeUrl::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute {
216            absolute: self.0.to_string(),
217            err,
218        })
219    }
220
221    /// Return the [`UrlString`] with any query parameters and fragments removed.
222    pub fn base_str(&self) -> &str {
223        self.as_ref()
224            .split_once('?')
225            .or_else(|| self.as_ref().split_once('#'))
226            .map(|(path, _)| path)
227            .unwrap_or(self.as_ref())
228    }
229
230    /// Return the [`UrlString`] (as a [`Cow`]) with any fragments removed.
231    #[must_use]
232    pub fn without_fragment(&self) -> Cow<'_, Self> {
233        self.as_ref()
234            .split_once('#')
235            .map(|(path, _)| Cow::Owned(Self(SmallString::from(path))))
236            .unwrap_or(Cow::Borrowed(self))
237    }
238}
239
240impl AsRef<str> for UrlString {
241    fn as_ref(&self) -> &str {
242        &self.0
243    }
244}
245
246impl From<DisplaySafeUrl> for UrlString {
247    fn from(value: DisplaySafeUrl) -> Self {
248        Self(value.as_str().into())
249    }
250}
251
252impl From<&DisplaySafeUrl> for UrlString {
253    fn from(value: &DisplaySafeUrl) -> Self {
254        Self(value.as_str().into())
255    }
256}
257
258impl Display for UrlString {
259    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
260        fmt::Display::fmt(&self.0, f)
261    }
262}
263
264/// An error that occurs when a [`FileLocation`] is not a valid URL.
265#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
266pub enum ToUrlError {
267    /// An error that occurs when the base URL in [`FileLocation::Relative`]
268    /// could not be parsed as a valid URL.
269    #[error("Could not parse base URL `{base}` as a valid URL")]
270    InvalidBase {
271        /// The base URL that could not be parsed as a valid URL.
272        base: String,
273        /// The underlying URL parse error.
274        #[source]
275        err: DisplaySafeUrlError,
276    },
277    /// An error that occurs when the base URL could not be joined with
278    /// the relative path in a [`FileLocation::Relative`].
279    #[error("Could not join base URL `{base}` to relative path `{path}`")]
280    InvalidJoin {
281        /// The base URL that could not be parsed as a valid URL.
282        base: String,
283        /// The relative path segment.
284        path: String,
285        /// The underlying URL parse error.
286        #[source]
287        err: DisplaySafeUrlError,
288    },
289    /// An error that occurs when the absolute URL in [`FileLocation::Absolute`]
290    /// could not be parsed as a valid URL.
291    #[error("Could not parse absolute URL `{absolute}` as a valid URL")]
292    InvalidAbsolute {
293        /// The absolute URL that could not be parsed as a valid URL.
294        absolute: String,
295        /// The underlying URL parse error.
296        #[source]
297        err: DisplaySafeUrlError,
298    },
299}
300
301#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
302pub struct Zstd {
303    pub hashes: HashDigests,
304    pub size: Option<u64>,
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn base_str() {
313        let url = UrlString("https://example.com/path?query#fragment".into());
314        assert_eq!(url.base_str(), "https://example.com/path");
315
316        let url = UrlString("https://example.com/path#fragment".into());
317        assert_eq!(url.base_str(), "https://example.com/path");
318
319        let url = UrlString("https://example.com/path".into());
320        assert_eq!(url.base_str(), "https://example.com/path");
321    }
322
323    #[test]
324    fn without_fragment() {
325        // Borrows a URL without a fragment
326        let url = UrlString("https://example.com/path".into());
327        assert_eq!(&*url.without_fragment(), &url);
328        assert!(matches!(url.without_fragment(), Cow::Borrowed(_)));
329
330        // Removes the fragment if present on the URL
331        let url = UrlString("https://example.com/path?query#fragment".into());
332        assert_eq!(
333            &*url.without_fragment(),
334            &UrlString("https://example.com/path?query".into())
335        );
336        assert!(matches!(url.without_fragment(), Cow::Owned(_)));
337    }
338}