uv-distribution-types 0.0.6

This is an internal component crate of uv
Documentation
use std::borrow::Cow;
use std::fmt::{self, Display, Formatter};
use std::str::FromStr;

use jiff::Timestamp;
use serde::{Deserialize, Serialize};

use uv_pep440::{VersionSpecifiers, VersionSpecifiersParseError};
use uv_pep508::split_scheme;
use uv_pypi_types::{CoreMetadata, HashDigests, Yanked};
use uv_redacted::{DisplaySafeUrl, DisplaySafeUrlError};
use uv_small_str::SmallString;

/// Error converting [`uv_pypi_types::PypiFile`] to [`distribution_type::File`].
#[derive(Debug, thiserror::Error)]
pub enum FileConversionError {
    #[error("Failed to parse `requires-python`: `{0}`")]
    RequiresPython(String, #[source] VersionSpecifiersParseError),
    #[error("Failed to parse URL: {0}")]
    Url(String, #[source] url::ParseError),
    #[error("Failed to parse filename from URL: {0}")]
    MissingPathSegments(String),
    #[error(transparent)]
    Utf8(#[from] std::str::Utf8Error),
}

/// Internal analog to [`uv_pypi_types::PypiFile`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
#[rkyv(derive(Debug))]
pub struct File {
    pub dist_info_metadata: bool,
    pub filename: SmallString,
    pub hashes: HashDigests,
    pub requires_python: Option<VersionSpecifiers>,
    pub size: Option<u64>,
    // N.B. We don't use a Jiff timestamp here because it's a little
    // annoying to do so with rkyv. Since we only use this field for doing
    // comparisons in testing, we just store it as a UTC timestamp in
    // milliseconds.
    pub upload_time_utc_ms: Option<i64>,
    pub url: FileLocation,
    pub yanked: Option<Box<Yanked>>,
    pub zstd: Option<Box<Zstd>>,
}

impl File {
    /// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
    pub fn try_from_pypi(
        file: uv_pypi_types::PypiFile,
        base: &SmallString,
    ) -> Result<Self, FileConversionError> {
        Ok(Self {
            dist_info_metadata: file
                .core_metadata
                .as_ref()
                .is_some_and(CoreMetadata::is_available),
            filename: file.filename,
            hashes: HashDigests::from(file.hashes),
            requires_python: file
                .requires_python
                .transpose()
                .map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
            size: file.size,
            upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
            url: FileLocation::new(file.url, base),
            yanked: file.yanked,
            zstd: None,
        })
    }

    pub fn try_from_pyx(
        file: uv_pypi_types::PyxFile,
        base: &SmallString,
    ) -> Result<Self, FileConversionError> {
        let filename = if let Some(filename) = file.filename {
            filename
        } else {
            // Remove any query parameters or fragments from the URL to get the filename.
            let base_url = file
                .url
                .as_ref()
                .split_once('?')
                .or_else(|| file.url.as_ref().split_once('#'))
                .map(|(path, _)| path)
                .unwrap_or(file.url.as_ref());

            // Take the last segment, stripping any query or fragment.
            let last = base_url
                .split('/')
                .next_back()
                .ok_or_else(|| FileConversionError::MissingPathSegments(file.url.to_string()))?;

            // Decode the filename, which may be percent-encoded.
            let filename = percent_encoding::percent_decode_str(last).decode_utf8()?;

            SmallString::from(filename)
        };
        Ok(Self {
            filename,
            dist_info_metadata: file
                .core_metadata
                .as_ref()
                .is_some_and(CoreMetadata::is_available),
            hashes: HashDigests::from(file.hashes),
            requires_python: file
                .requires_python
                .transpose()
                .map_err(|err| FileConversionError::RequiresPython(err.line().clone(), err))?,
            size: file.size,
            upload_time_utc_ms: file.upload_time.map(Timestamp::as_millisecond),
            url: FileLocation::new(file.url, base),
            yanked: file.yanked,
            zstd: file
                .zstd
                .map(|zstd| Zstd {
                    hashes: HashDigests::from(zstd.hashes),
                    size: zstd.size,
                })
                .map(Box::new),
        })
    }
}

/// While a registry file is generally a remote URL, it can also be a file if it comes from a directory flat indexes.
#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
#[rkyv(derive(Debug))]
pub enum FileLocation {
    /// URL relative to the base URL.
    RelativeUrl(SmallString, SmallString),
    /// Absolute URL.
    AbsoluteUrl(UrlString),
}

impl FileLocation {
    /// Parse a relative or absolute URL on a page with a base URL.
    ///
    /// This follows the HTML semantics where a link on a page is resolved relative to the URL of
    /// that page.
    pub fn new(url: SmallString, base: &SmallString) -> Self {
        match split_scheme(&url) {
            Some(..) => Self::AbsoluteUrl(UrlString::new(url)),
            None => Self::RelativeUrl(base.clone(), url),
        }
    }

    /// Convert this location to a URL.
    ///
    /// A relative URL has its base joined to the path. An absolute URL is
    /// parsed as-is. And a path location is turned into a URL via the `file`
    /// protocol.
    ///
    /// # Errors
    ///
    /// This returns an error if any of the URL parsing fails, or if, for
    /// example, the location is a path and the path isn't valid UTF-8.
    /// (Because URLs must be valid UTF-8.)
    pub fn to_url(&self) -> Result<DisplaySafeUrl, ToUrlError> {
        match self {
            Self::RelativeUrl(base, path) => {
                let base_url =
                    DisplaySafeUrl::parse(base).map_err(|err| ToUrlError::InvalidBase {
                        base: base.to_string(),
                        err,
                    })?;
                let joined = base_url.join(path).map_err(|err| ToUrlError::InvalidJoin {
                    base: base.to_string(),
                    path: path.to_string(),
                    err,
                })?;
                Ok(joined)
            }
            Self::AbsoluteUrl(absolute) => absolute.to_url(),
        }
    }
}

impl Display for FileLocation {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        match self {
            Self::RelativeUrl(_base, url) => Display::fmt(&url, f),
            Self::AbsoluteUrl(url) => Display::fmt(&url.0, f),
        }
    }
}

/// A [`Url`] represented as a `String`.
///
/// This type is not guaranteed to be a valid URL, and may error on conversion.
#[derive(
    Debug,
    Clone,
    PartialEq,
    Eq,
    PartialOrd,
    Ord,
    Hash,
    Serialize,
    Deserialize,
    rkyv::Archive,
    rkyv::Deserialize,
    rkyv::Serialize,
)]
#[serde(transparent)]
#[rkyv(derive(Debug))]
pub struct UrlString(SmallString);

impl UrlString {
    /// Create a new [`UrlString`] from a [`String`].
    pub fn new(url: SmallString) -> Self {
        Self(url)
    }

    /// Converts a [`UrlString`] to a [`DisplaySafeUrl`].
    pub fn to_url(&self) -> Result<DisplaySafeUrl, ToUrlError> {
        DisplaySafeUrl::from_str(&self.0).map_err(|err| ToUrlError::InvalidAbsolute {
            absolute: self.0.to_string(),
            err,
        })
    }

    /// Return the [`UrlString`] with any query parameters and fragments removed.
    pub fn base_str(&self) -> &str {
        self.as_ref()
            .split_once('?')
            .or_else(|| self.as_ref().split_once('#'))
            .map(|(path, _)| path)
            .unwrap_or(self.as_ref())
    }

    /// Return the [`UrlString`] (as a [`Cow`]) with any fragments removed.
    #[must_use]
    pub fn without_fragment(&self) -> Cow<'_, Self> {
        self.as_ref()
            .split_once('#')
            .map(|(path, _)| Cow::Owned(Self(SmallString::from(path))))
            .unwrap_or(Cow::Borrowed(self))
    }
}

impl AsRef<str> for UrlString {
    fn as_ref(&self) -> &str {
        &self.0
    }
}

impl From<DisplaySafeUrl> for UrlString {
    fn from(value: DisplaySafeUrl) -> Self {
        Self(value.as_str().into())
    }
}

impl From<&DisplaySafeUrl> for UrlString {
    fn from(value: &DisplaySafeUrl) -> Self {
        Self(value.as_str().into())
    }
}

impl Display for UrlString {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        fmt::Display::fmt(&self.0, f)
    }
}

/// An error that occurs when a [`FileLocation`] is not a valid URL.
#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
pub enum ToUrlError {
    /// An error that occurs when the base URL in [`FileLocation::Relative`]
    /// could not be parsed as a valid URL.
    #[error("Could not parse base URL `{base}` as a valid URL")]
    InvalidBase {
        /// The base URL that could not be parsed as a valid URL.
        base: String,
        /// The underlying URL parse error.
        #[source]
        err: DisplaySafeUrlError,
    },
    /// An error that occurs when the base URL could not be joined with
    /// the relative path in a [`FileLocation::Relative`].
    #[error("Could not join base URL `{base}` to relative path `{path}`")]
    InvalidJoin {
        /// The base URL that could not be parsed as a valid URL.
        base: String,
        /// The relative path segment.
        path: String,
        /// The underlying URL parse error.
        #[source]
        err: DisplaySafeUrlError,
    },
    /// An error that occurs when the absolute URL in [`FileLocation::Absolute`]
    /// could not be parsed as a valid URL.
    #[error("Could not parse absolute URL `{absolute}` as a valid URL")]
    InvalidAbsolute {
        /// The absolute URL that could not be parsed as a valid URL.
        absolute: String,
        /// The underlying URL parse error.
        #[source]
        err: DisplaySafeUrlError,
    },
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)]
pub struct Zstd {
    pub hashes: HashDigests,
    pub size: Option<u64>,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn base_str() {
        let url = UrlString("https://example.com/path?query#fragment".into());
        assert_eq!(url.base_str(), "https://example.com/path");

        let url = UrlString("https://example.com/path#fragment".into());
        assert_eq!(url.base_str(), "https://example.com/path");

        let url = UrlString("https://example.com/path".into());
        assert_eq!(url.base_str(), "https://example.com/path");
    }

    #[test]
    fn without_fragment() {
        // Borrows a URL without a fragment
        let url = UrlString("https://example.com/path".into());
        assert_eq!(&*url.without_fragment(), &url);
        assert!(matches!(url.without_fragment(), Cow::Borrowed(_)));

        // Removes the fragment if present on the URL
        let url = UrlString("https://example.com/path?query#fragment".into());
        assert_eq!(
            &*url.without_fragment(),
            &UrlString("https://example.com/path?query".into())
        );
        assert!(matches!(url.without_fragment(), Cow::Owned(_)));
    }
}