iri-string 0.7.12

IRI as string types
Documentation
//! URI and IRI resolvers.
//!
//! # IRI resolution can fail without WHATWG URL Standard serialization
//!
//! ## Pure RFC 3986 algorithm
//!
//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
//! Below are examples:
//!
//! * base=`scheme:`, ref=`.///bar`.
//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:foo`, ref=`.///bar`.
//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:`, ref=`/..//baz`.
//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:foo/bar`, ref=`..//baz`.
//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//!
//! IRI without authority (note that this is different from "with empty authority")
//! cannot have a path starting with `//`, since it is ambiguous and can be
//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
//! authority, not a path.
//!
//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
//! cases.
//!
//! Note that this kind of failure can happen only when the base IRI has no
//! authority and empty path. This would be rare in the wild, since many people
//! would use an IRI with authority part, such as `http://`.
//!
//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
//! failure. Currently no cases are known to fail when at least one of the base
//! IRI or the relative IRI contains authorities.
//!
//! If you want this kind of abnormal IRI resolution to succeed and to be
//! idempotent, check the resolution result using
//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
//!
//! ## WHATWG serialization
//!
//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
//! algorithm for this kind of result, and it makes IRI resolution (and even
//! normalization) infallible and idempotent.
//!
//! IRI resolution and normalization provided by this crate automatically
//! applies this special rule if necessary, so they are infallible. If you want
//! to detect resolution/normalization failure, use
//! [`Normalized::ensure_rfc3986_normalizable`] method.
//!
//! ## Examples
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::format::ToDedicatedString;
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("scheme:")?;
//! {
//!     let reference = IriReferenceStr::new(".///not-a-host")?;
//!     let result = reference.resolve_against(base);
//!     assert!(result.ensure_rfc3986_normalizable().is_err());
//!     assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
//! }
//!
//! {
//!     let reference2 = IriReferenceStr::new("/..//not-a-host")?;
//!     // Resulting string will be `scheme://not-a-host`, but `not-a-host`
//!     // should be a path segment, not a host. So, the semantically correct
//!     // target IRI cannot be represented by RFC 3986 IRI resolution.
//!     let result2 = reference2.resolve_against(base);
//!     assert!(result2.ensure_rfc3986_normalizable().is_err());
//!
//!     // Algorithm defined in WHATWG URL Standard addresses this case.
//!     assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
//! }
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```

use crate::components::RiReferenceComponents;
use crate::normalize::{NormalizationInput, Normalized};
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};

/// A resolver against the fixed base.
#[derive(Debug, Clone, Copy)]
pub struct FixedBaseResolver<'a, S: Spec> {
    /// Components of the base IRI.
    base_components: RiReferenceComponents<'a, S>,
}

impl<'a, S: Spec> FixedBaseResolver<'a, S> {
    /// Creates a new resolver with the given base.
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// # // `ToDedicatedString` is available only when
    /// # // `alloc` feature is enabled.
    /// #[cfg(feature = "alloc")] {
    /// use iri_string::format::ToDedicatedString;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
    ///
    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// let reference = IriReferenceStr::new("../there")?;
    /// let resolved = resolver.resolve(reference);
    ///
    /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
    /// # }
    /// # Ok::<_, Error>(())
    /// ```
    #[must_use]
    pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
        Self {
            base_components: RiReferenceComponents::from(base.as_ref()),
        }
    }

    /// Returns the base.
    ///
    /// # Examples
    ///
    /// ```
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
    ///
    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// assert_eq!(resolver.base(), base);
    /// # Ok::<_, iri_string::validate::Error>(())
    /// ```
    #[must_use]
    pub fn base(&self) -> &'a RiAbsoluteStr<S> {
        // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
        // and the type of `base_components` does not allow modification of the
        // content after it is created.
        unsafe {
            RiAbsoluteStr::new_unchecked_justified(
                self.base_components.iri().as_str(),
                "already validated on `FixedBaseResolver` creation",
            )
        }
    }
}

/// Components getters.
///
/// These getters are more efficient than calling through the result of `.base()`.
impl<S: Spec> FixedBaseResolver<'_, S> {
    /// Returns the scheme.
    ///
    /// The following colon is truncated.
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::IriAbsoluteStr;
    ///
    /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// assert_eq!(resolver.scheme_str(), "http");
    /// assert_eq!(base.scheme_str(), "http");
    /// # Ok::<_, Error>(())
    /// ```
    #[inline]
    #[must_use]
    pub fn scheme_str(&self) -> &str {
        self.base_components
            .scheme_str()
            .expect("absolute IRI should have the scheme part")
    }

    /// Returns the authority.
    ///
    /// The leading `//` is truncated.
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::IriAbsoluteStr;
    ///
    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
    /// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
    /// # Ok::<_, Error>(())
    /// ```
    #[inline]
    #[must_use]
    pub fn authority_str(&self) -> Option<&str> {
        self.base_components.authority_str()
    }

    /// Returns the path.
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::IriAbsoluteStr;
    ///
    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// assert_eq!(resolver.path_str(), "/base/");
    /// assert_eq!(base.path_str(), "/base/");
    /// # Ok::<_, Error>(())
    /// ```
    #[inline]
    #[must_use]
    pub fn path_str(&self) -> &str {
        self.base_components.path_str()
    }

    /// Returns the query.
    ///
    /// The leading question mark (`?`) is truncated.
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
    ///
    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
    /// let resolver = FixedBaseResolver::new(base);
    /// let query = IriQueryStr::new("query")?;
    ///
    /// assert_eq!(resolver.query(), Some(query));
    /// assert_eq!(base.query(), Some(query));
    /// # Ok::<_, Error>(())
    /// ```
    #[inline]
    #[must_use]
    pub fn query(&self) -> Option<&RiQueryStr<S>> {
        let query_raw = self.query_str()?;
        // SAFETY: `RiAbsoluteStr` must have already validated the string
        // including the query, so the query must be valid.
        let query = unsafe {
            RiQueryStr::<S>::new_unchecked_justified(
                query_raw,
                "must be a valid query if present in an absolute-IRI",
            )
        };
        Some(query)
    }

    /// Returns the query in a raw string slice.
    ///
    /// The leading question mark (`?`) is truncated.
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::IriAbsoluteStr;
    ///
    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// assert_eq!(resolver.query_str(), Some("query"));
    /// assert_eq!(base.query_str(), Some("query"));
    /// # Ok::<_, Error>(())
    /// ```
    #[inline]
    #[must_use]
    pub fn query_str(&self) -> Option<&str> {
        self.base_components.query_str()
    }
}

impl<'a, S: Spec> FixedBaseResolver<'a, S> {
    /// Resolves the given reference against the fixed base.
    ///
    /// The task returned by this method does **not** normalize the resolution
    /// result. However, `..` and `.` are recognized even when they are
    /// percent-encoded.
    ///
    /// # Failures
    ///
    /// This function itself does not fail, but resolution algorithm defined by
    /// RFC 3986 can fail. In that case, serialization algorithm defined by
    /// WHATWG URL Standard would be automatically applied.
    ///
    /// See the documentation of [`Normalized`].
    ///
    /// # Examples
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// # // `ToDedicatedString` is available only when
    /// # // `alloc` feature is enabled.
    /// # #[cfg(feature = "alloc")] {
    /// use iri_string::format::ToDedicatedString;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
    ///
    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// let reference = IriReferenceStr::new("../there")?;
    /// let resolved = resolver.resolve(reference);
    ///
    /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
    /// # }
    /// # Ok::<_, Error>(())
    /// ```
    ///
    /// Note that `..` and `.` path segments are recognized even when they are
    /// percent-encoded.
    ///
    /// ```
    /// # use iri_string::validate::Error;
    /// # // `ToDedicatedString` is available only when
    /// # // `alloc` feature is enabled.
    /// # #[cfg(feature = "alloc")] {
    /// use iri_string::format::ToDedicatedString;
    /// use iri_string::resolve::FixedBaseResolver;
    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
    ///
    /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
    /// let resolver = FixedBaseResolver::new(base);
    ///
    /// // `%2e%2e` is recognized as `..`.
    /// // However, `dot%2edot` is NOT normalized into `dot.dot`.
    /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
    /// let resolved = resolver.resolve(reference);
    ///
    /// // Resolved but not normalized.
    /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
    /// # }
    /// # Ok::<_, Error>(())
    /// ```
    #[inline]
    #[must_use]
    pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
        let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
        Normalized::from_input(input)
    }
}