iri_string/resolve.rs
1//! URI and IRI resolvers.
2//!
3//! # IRI resolution can fail without WHATWG URL Standard serialization
4//!
5//! ## Pure RFC 3986 algorithm
6//!
7//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
8//! Below are examples:
9//!
10//! * base=`scheme:`, ref=`.///bar`.
11//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
12//! * base=`scheme:foo`, ref=`.///bar`.
13//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
14//! * base=`scheme:`, ref=`/..//baz`.
15//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
16//! * base=`scheme:foo/bar`, ref=`..//baz`.
17//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
18//!
19//! IRI without authority (note that this is different from "with empty authority")
20//! cannot have a path starting with `//`, since it is ambiguous and can be
21//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
22//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
23//! authority, not a path.
24//!
25//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
26//! cases.
27//!
28//! Note that this kind of failure can happen only when the base IRI has no
29//! authority and empty path. This would be rare in the wild, since many people
30//! would use an IRI with authority part, such as `http://`.
31//!
32//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
33//! failure. Currently no cases are known to fail when at least one of the base
34//! IRI or the relative IRI contains authorities.
35//!
36//! If you want this kind of abnormal IRI resolution to succeed and to be
37//! idempotent, check the resolution result using
38//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
39//!
40//! ## WHATWG serialization
41//!
42//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
43//! algorithm for this kind of result, and it makes IRI resolution (and even
44//! normalization) infallible and idempotent.
45//!
46//! IRI resolution and normalization provided by this crate automatically
47//! applies this special rule if necessary, so they are infallible. If you want
48//! to detect resolution/normalization failure, use
49//! [`Normalized::ensure_rfc3986_normalizable`] method.
50//!
51//! ## Examples
52//!
53//! ```
54//! # #[cfg(feature = "alloc")] {
55//! use iri_string::format::ToDedicatedString;
56//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
57//!
58//! let base = IriAbsoluteStr::new("scheme:")?;
59//! {
60//! let reference = IriReferenceStr::new(".///not-a-host")?;
61//! let result = reference.resolve_against(base);
62//! assert!(result.ensure_rfc3986_normalizable().is_err());
63//! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
64//! }
65//!
66//! {
67//! let reference2 = IriReferenceStr::new("/..//not-a-host")?;
68//! // Resulting string will be `scheme://not-a-host`, but `not-a-host`
69//! // should be a path segment, not a host. So, the semantically correct
70//! // target IRI cannot be represented by RFC 3986 IRI resolution.
71//! let result2 = reference2.resolve_against(base);
72//! assert!(result2.ensure_rfc3986_normalizable().is_err());
73//!
74//! // Algorithm defined in WHATWG URL Standard addresses this case.
75//! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
76//! }
77//! # }
78//! # Ok::<_, iri_string::validate::Error>(())
79//! ```
80
81use crate::components::RiReferenceComponents;
82use crate::normalize::{NormalizationInput, Normalized};
83use crate::spec::Spec;
84use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};
85
86/// A resolver against the fixed base.
87#[derive(Debug, Clone, Copy)]
88pub struct FixedBaseResolver<'a, S: Spec> {
89 /// Components of the base IRI.
90 base_components: RiReferenceComponents<'a, S>,
91}
92
93impl<'a, S: Spec> FixedBaseResolver<'a, S> {
94 /// Creates a new resolver with the given base.
95 ///
96 /// # Examples
97 ///
98 /// ```
99 /// # use iri_string::validate::Error;
100 /// # // `ToDedicatedString` is available only when
101 /// # // `alloc` feature is enabled.
102 /// #[cfg(feature = "alloc")] {
103 /// use iri_string::format::ToDedicatedString;
104 /// use iri_string::resolve::FixedBaseResolver;
105 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
106 ///
107 /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
108 /// let resolver = FixedBaseResolver::new(base);
109 ///
110 /// let reference = IriReferenceStr::new("../there")?;
111 /// let resolved = resolver.resolve(reference);
112 ///
113 /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
114 /// # }
115 /// # Ok::<_, Error>(())
116 /// ```
117 #[must_use]
118 pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
119 Self {
120 base_components: RiReferenceComponents::from(base.as_ref()),
121 }
122 }
123
124 /// Returns the base.
125 ///
126 /// # Examples
127 ///
128 /// ```
129 /// use iri_string::resolve::FixedBaseResolver;
130 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
131 ///
132 /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
133 /// let resolver = FixedBaseResolver::new(base);
134 ///
135 /// assert_eq!(resolver.base(), base);
136 /// # Ok::<_, iri_string::validate::Error>(())
137 /// ```
138 #[must_use]
139 pub fn base(&self) -> &'a RiAbsoluteStr<S> {
140 // SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
141 // and the type of `base_components` does not allow modification of the
142 // content after it is created.
143 unsafe {
144 RiAbsoluteStr::new_unchecked_justified(
145 self.base_components.iri().as_str(),
146 "already validated on `FixedBaseResolver` creation",
147 )
148 }
149 }
150}
151
152/// Components getters.
153///
154/// These getters are more efficient than calling through the result of `.base()`.
155impl<S: Spec> FixedBaseResolver<'_, S> {
156 /// Returns the scheme.
157 ///
158 /// The following colon is truncated.
159 ///
160 /// # Examples
161 ///
162 /// ```
163 /// # use iri_string::validate::Error;
164 /// use iri_string::resolve::FixedBaseResolver;
165 /// use iri_string::types::IriAbsoluteStr;
166 ///
167 /// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
168 /// let resolver = FixedBaseResolver::new(base);
169 ///
170 /// assert_eq!(resolver.scheme_str(), "http");
171 /// assert_eq!(base.scheme_str(), "http");
172 /// # Ok::<_, Error>(())
173 /// ```
174 #[inline]
175 #[must_use]
176 pub fn scheme_str(&self) -> &str {
177 self.base_components
178 .scheme_str()
179 .expect("absolute IRI should have the scheme part")
180 }
181
182 /// Returns the authority.
183 ///
184 /// The leading `//` is truncated.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// # use iri_string::validate::Error;
190 /// use iri_string::resolve::FixedBaseResolver;
191 /// use iri_string::types::IriAbsoluteStr;
192 ///
193 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
194 /// let resolver = FixedBaseResolver::new(base);
195 ///
196 /// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
197 /// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
198 /// # Ok::<_, Error>(())
199 /// ```
200 #[inline]
201 #[must_use]
202 pub fn authority_str(&self) -> Option<&str> {
203 self.base_components.authority_str()
204 }
205
206 /// Returns the path.
207 ///
208 /// # Examples
209 ///
210 /// ```
211 /// # use iri_string::validate::Error;
212 /// use iri_string::resolve::FixedBaseResolver;
213 /// use iri_string::types::IriAbsoluteStr;
214 ///
215 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
216 /// let resolver = FixedBaseResolver::new(base);
217 ///
218 /// assert_eq!(resolver.path_str(), "/base/");
219 /// assert_eq!(base.path_str(), "/base/");
220 /// # Ok::<_, Error>(())
221 /// ```
222 #[inline]
223 #[must_use]
224 pub fn path_str(&self) -> &str {
225 self.base_components.path_str()
226 }
227
228 /// Returns the query.
229 ///
230 /// The leading question mark (`?`) is truncated.
231 ///
232 /// # Examples
233 ///
234 /// ```
235 /// # use iri_string::validate::Error;
236 /// use iri_string::resolve::FixedBaseResolver;
237 /// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
238 ///
239 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
240 /// let resolver = FixedBaseResolver::new(base);
241 /// let query = IriQueryStr::new("query")?;
242 ///
243 /// assert_eq!(resolver.query(), Some(query));
244 /// assert_eq!(base.query(), Some(query));
245 /// # Ok::<_, Error>(())
246 /// ```
247 #[inline]
248 #[must_use]
249 pub fn query(&self) -> Option<&RiQueryStr<S>> {
250 let query_raw = self.query_str()?;
251 // SAFETY: `RiAbsoluteStr` must have already validated the string
252 // including the query, so the query must be valid.
253 let query = unsafe {
254 RiQueryStr::<S>::new_unchecked_justified(
255 query_raw,
256 "must be a valid query if present in an absolute-IRI",
257 )
258 };
259 Some(query)
260 }
261
262 /// Returns the query in a raw string slice.
263 ///
264 /// The leading question mark (`?`) is truncated.
265 ///
266 /// # Examples
267 ///
268 /// ```
269 /// # use iri_string::validate::Error;
270 /// use iri_string::resolve::FixedBaseResolver;
271 /// use iri_string::types::IriAbsoluteStr;
272 ///
273 /// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
274 /// let resolver = FixedBaseResolver::new(base);
275 ///
276 /// assert_eq!(resolver.query_str(), Some("query"));
277 /// assert_eq!(base.query_str(), Some("query"));
278 /// # Ok::<_, Error>(())
279 /// ```
280 #[inline]
281 #[must_use]
282 pub fn query_str(&self) -> Option<&str> {
283 self.base_components.query_str()
284 }
285}
286
287impl<'a, S: Spec> FixedBaseResolver<'a, S> {
288 /// Resolves the given reference against the fixed base.
289 ///
290 /// The task returned by this method does **not** normalize the resolution
291 /// result. However, `..` and `.` are recognized even when they are
292 /// percent-encoded.
293 ///
294 /// # Failures
295 ///
296 /// This function itself does not fail, but resolution algorithm defined by
297 /// RFC 3986 can fail. In that case, serialization algorithm defined by
298 /// WHATWG URL Standard would be automatically applied.
299 ///
300 /// See the documentation of [`Normalized`].
301 ///
302 /// # Examples
303 ///
304 /// ```
305 /// # use iri_string::validate::Error;
306 /// # // `ToDedicatedString` is available only when
307 /// # // `alloc` feature is enabled.
308 /// # #[cfg(feature = "alloc")] {
309 /// use iri_string::format::ToDedicatedString;
310 /// use iri_string::resolve::FixedBaseResolver;
311 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
312 ///
313 /// let base = IriAbsoluteStr::new("http://example.com/base/")?;
314 /// let resolver = FixedBaseResolver::new(base);
315 ///
316 /// let reference = IriReferenceStr::new("../there")?;
317 /// let resolved = resolver.resolve(reference);
318 ///
319 /// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
320 /// # }
321 /// # Ok::<_, Error>(())
322 /// ```
323 ///
324 /// Note that `..` and `.` path segments are recognized even when they are
325 /// percent-encoded.
326 ///
327 /// ```
328 /// # use iri_string::validate::Error;
329 /// # // `ToDedicatedString` is available only when
330 /// # // `alloc` feature is enabled.
331 /// # #[cfg(feature = "alloc")] {
332 /// use iri_string::format::ToDedicatedString;
333 /// use iri_string::resolve::FixedBaseResolver;
334 /// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
335 ///
336 /// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
337 /// let resolver = FixedBaseResolver::new(base);
338 ///
339 /// // `%2e%2e` is recognized as `..`.
340 /// // However, `dot%2edot` is NOT normalized into `dot.dot`.
341 /// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
342 /// let resolved = resolver.resolve(reference);
343 ///
344 /// // Resolved but not normalized.
345 /// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
346 /// # }
347 /// # Ok::<_, Error>(())
348 /// ```
349 #[inline]
350 #[must_use]
351 pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
352 let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
353 Normalized::from_input(input)
354 }
355}