Skip to main content

iri_string/
resolve.rs

1//! URI and IRI resolvers.
2//!
3//! # IRI resolution can fail without WHATWG URL Standard serialization
4//!
5//! ## Pure RFC 3986 algorithm
6//!
7//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
8//! Below are examples:
9//!
10//! * base=`scheme:`, ref=`.///bar`.
11//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
12//! * base=`scheme:foo`, ref=`.///bar`.
13//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
14//! * base=`scheme:`, ref=`/..//baz`.
15//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
16//! * base=`scheme:foo/bar`, ref=`..//baz`.
17//!     + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
18//!
19//! IRI without authority (note that this is different from "with empty authority")
20//! cannot have a path starting with `//`, since it is ambiguous and can be
21//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
22//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
23//! authority, not a path.
24//!
25//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
26//! cases.
27//!
28//! Note that this kind of failure can happen only when the base IRI has no
29//! authority and empty path. This would be rare in the wild, since many people
30//! would use an IRI with authority part, such as `http://`.
31//!
32//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
33//! failure. Currently no cases are known to fail when at least one of the base
34//! IRI or the relative IRI contains authorities.
35//!
36//! If you want this kind of abnormal IRI resolution to succeed and to be
37//! idempotent, check the resolution result using
38//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
39//!
40//! ## WHATWG serialization
41//!
42//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
43//! algorithm for this kind of result, and it makes IRI resolution (and even
44//! normalization) infallible and idempotent.
45//!
46//! IRI resolution and normalization provided by this crate automatically
47//! applies this special rule if necessary, so they are infallible. If you want
48//! to detect resolution/normalization failure, use
49//! [`Normalized::ensure_rfc3986_normalizable`] method.
50//!
51//! ## Examples
52//!
53//! ```
54//! # #[cfg(feature = "alloc")] {
55//! use iri_string::format::ToDedicatedString;
56//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
57//!
58//! let base = IriAbsoluteStr::new("scheme:")?;
59//! {
60//!     let reference = IriReferenceStr::new(".///not-a-host")?;
61//!     let result = reference.resolve_against(base);
62//!     assert!(result.ensure_rfc3986_normalizable().is_err());
63//!     assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
64//! }
65//!
66//! {
67//!     let reference2 = IriReferenceStr::new("/..//not-a-host")?;
68//!     // Resulting string will be `scheme://not-a-host`, but `not-a-host`
69//!     // should be a path segment, not a host. So, the semantically correct
70//!     // target IRI cannot be represented by RFC 3986 IRI resolution.
71//!     let result2 = reference2.resolve_against(base);
72//!     assert!(result2.ensure_rfc3986_normalizable().is_err());
73//!
74//!     // Algorithm defined in WHATWG URL Standard addresses this case.
75//!     assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
76//! }
77//! # }
78//! # Ok::<_, iri_string::validate::Error>(())
79//! ```
80
81use crate::components::RiReferenceComponents;
82use crate::normalize::{NormalizationInput, Normalized};
83use crate::spec::Spec;
84use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};
85
86/// A resolver against the fixed base.
87#[derive(Debug, Clone, Copy)]
88pub struct FixedBaseResolver<'a, S: Spec> {
89    /// Components of the base IRI.
90    base_components: RiReferenceComponents<'a, S>,
91}
92
93impl<'a, S: Spec> FixedBaseResolver<'a, S> {
94    /// Creates a new resolver with the given base.
95    ///
96    /// # Examples
97    ///
98    /// ```
99    /// # use iri_string::validate::Error;
100    /// # // `ToDedicatedString` is available only when
101    /// # // `alloc` feature is enabled.
102    /// #[cfg(feature = "alloc")] {
103    /// use iri_string::format::ToDedicatedString;
104    /// use iri_string::resolve::FixedBaseResolver;
105    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
106    ///
107    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
108    /// let resolver = FixedBaseResolver::new(base);
109    ///
110    /// let reference = IriReferenceStr::new("../there")?;
111    /// let resolved = resolver.resolve(reference);
112    ///
113    /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
114    /// # }
115    /// # Ok::<_, Error>(())
116    /// ```
117    #[must_use]
118    pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
119        Self {
120            base_components: RiReferenceComponents::from(base.as_ref()),
121        }
122    }
123
124    /// Returns the base.
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use iri_string::resolve::FixedBaseResolver;
130    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
131    ///
132    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
133    /// let resolver = FixedBaseResolver::new(base);
134    ///
135    /// assert_eq!(resolver.base(), base);
136    /// # Ok::<_, iri_string::validate::Error>(())
137    /// ```
138    #[must_use]
139    pub fn base(&self) -> &'a RiAbsoluteStr<S> {
140        // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
141        // and the type of `base_components` does not allow modification of the
142        // content after it is created.
143        unsafe {
144            RiAbsoluteStr::new_unchecked_justified(
145                self.base_components.iri().as_str(),
146                "already validated on `FixedBaseResolver` creation",
147            )
148        }
149    }
150}
151
152/// Components getters.
153///
154/// These getters are more efficient than calling through the result of `.base()`.
155impl<S: Spec> FixedBaseResolver<'_, S> {
156    /// Returns the scheme.
157    ///
158    /// The following colon is truncated.
159    ///
160    /// # Examples
161    ///
162    /// ```
163    /// # use iri_string::validate::Error;
164    /// use iri_string::resolve::FixedBaseResolver;
165    /// use iri_string::types::IriAbsoluteStr;
166    ///
167    /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
168    /// let resolver = FixedBaseResolver::new(base);
169    ///
170    /// assert_eq!(resolver.scheme_str(), "http");
171    /// assert_eq!(base.scheme_str(), "http");
172    /// # Ok::<_, Error>(())
173    /// ```
174    #[inline]
175    #[must_use]
176    pub fn scheme_str(&self) -> &str {
177        self.base_components
178            .scheme_str()
179            .expect("absolute IRI should have the scheme part")
180    }
181
182    /// Returns the authority.
183    ///
184    /// The leading `//` is truncated.
185    ///
186    /// # Examples
187    ///
188    /// ```
189    /// # use iri_string::validate::Error;
190    /// use iri_string::resolve::FixedBaseResolver;
191    /// use iri_string::types::IriAbsoluteStr;
192    ///
193    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
194    /// let resolver = FixedBaseResolver::new(base);
195    ///
196    /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
197    /// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
198    /// # Ok::<_, Error>(())
199    /// ```
200    #[inline]
201    #[must_use]
202    pub fn authority_str(&self) -> Option<&str> {
203        self.base_components.authority_str()
204    }
205
206    /// Returns the path.
207    ///
208    /// # Examples
209    ///
210    /// ```
211    /// # use iri_string::validate::Error;
212    /// use iri_string::resolve::FixedBaseResolver;
213    /// use iri_string::types::IriAbsoluteStr;
214    ///
215    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
216    /// let resolver = FixedBaseResolver::new(base);
217    ///
218    /// assert_eq!(resolver.path_str(), "/base/");
219    /// assert_eq!(base.path_str(), "/base/");
220    /// # Ok::<_, Error>(())
221    /// ```
222    #[inline]
223    #[must_use]
224    pub fn path_str(&self) -> &str {
225        self.base_components.path_str()
226    }
227
228    /// Returns the query.
229    ///
230    /// The leading question mark (`?`) is truncated.
231    ///
232    /// # Examples
233    ///
234    /// ```
235    /// # use iri_string::validate::Error;
236    /// use iri_string::resolve::FixedBaseResolver;
237    /// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
238    ///
239    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
240    /// let resolver = FixedBaseResolver::new(base);
241    /// let query = IriQueryStr::new("query")?;
242    ///
243    /// assert_eq!(resolver.query(), Some(query));
244    /// assert_eq!(base.query(), Some(query));
245    /// # Ok::<_, Error>(())
246    /// ```
247    #[inline]
248    #[must_use]
249    pub fn query(&self) -> Option<&RiQueryStr<S>> {
250        let query_raw = self.query_str()?;
251        // SAFETY: `RiAbsoluteStr` must have already validated the string
252        // including the query, so the query must be valid.
253        let query = unsafe {
254            RiQueryStr::<S>::new_unchecked_justified(
255                query_raw,
256                "must be a valid query if present in an absolute-IRI",
257            )
258        };
259        Some(query)
260    }
261
262    /// Returns the query in a raw string slice.
263    ///
264    /// The leading question mark (`?`) is truncated.
265    ///
266    /// # Examples
267    ///
268    /// ```
269    /// # use iri_string::validate::Error;
270    /// use iri_string::resolve::FixedBaseResolver;
271    /// use iri_string::types::IriAbsoluteStr;
272    ///
273    /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
274    /// let resolver = FixedBaseResolver::new(base);
275    ///
276    /// assert_eq!(resolver.query_str(), Some("query"));
277    /// assert_eq!(base.query_str(), Some("query"));
278    /// # Ok::<_, Error>(())
279    /// ```
280    #[inline]
281    #[must_use]
282    pub fn query_str(&self) -> Option<&str> {
283        self.base_components.query_str()
284    }
285}
286
287impl<'a, S: Spec> FixedBaseResolver<'a, S> {
288    /// Resolves the given reference against the fixed base.
289    ///
290    /// The task returned by this method does **not** normalize the resolution
291    /// result. However, `..` and `.` are recognized even when they are
292    /// percent-encoded.
293    ///
294    /// # Failures
295    ///
296    /// This function itself does not fail, but resolution algorithm defined by
297    /// RFC 3986 can fail. In that case, serialization algorithm defined by
298    /// WHATWG URL Standard would be automatically applied.
299    ///
300    /// See the documentation of [`Normalized`].
301    ///
302    /// # Examples
303    ///
304    /// ```
305    /// # use iri_string::validate::Error;
306    /// # // `ToDedicatedString` is available only when
307    /// # // `alloc` feature is enabled.
308    /// # #[cfg(feature = "alloc")] {
309    /// use iri_string::format::ToDedicatedString;
310    /// use iri_string::resolve::FixedBaseResolver;
311    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
312    ///
313    /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
314    /// let resolver = FixedBaseResolver::new(base);
315    ///
316    /// let reference = IriReferenceStr::new("../there")?;
317    /// let resolved = resolver.resolve(reference);
318    ///
319    /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
320    /// # }
321    /// # Ok::<_, Error>(())
322    /// ```
323    ///
324    /// Note that `..` and `.` path segments are recognized even when they are
325    /// percent-encoded.
326    ///
327    /// ```
328    /// # use iri_string::validate::Error;
329    /// # // `ToDedicatedString` is available only when
330    /// # // `alloc` feature is enabled.
331    /// # #[cfg(feature = "alloc")] {
332    /// use iri_string::format::ToDedicatedString;
333    /// use iri_string::resolve::FixedBaseResolver;
334    /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
335    ///
336    /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
337    /// let resolver = FixedBaseResolver::new(base);
338    ///
339    /// // `%2e%2e` is recognized as `..`.
340    /// // However, `dot%2edot` is NOT normalized into `dot.dot`.
341    /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
342    /// let resolved = resolver.resolve(reference);
343    ///
344    /// // Resolved but not normalized.
345    /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
346    /// # }
347    /// # Ok::<_, Error>(())
348    /// ```
349    #[inline]
350    #[must_use]
351    pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
352        let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
353        Normalized::from_input(input)
354    }
355}