fluent_uri/
resolve.rs

1//! Module for reference resolution.
2
3use crate::imp::{Meta, Ri, RiMaybeRef, RmrRef};
4use alloc::string::String;
5use borrow_or_share::Bos;
6use core::{fmt, num::NonZeroUsize};
7
8/// An error occurred when resolving a URI/IRI reference.
9#[derive(Clone, Copy, Debug, Eq, PartialEq)]
10pub enum ResolveError {
11    /// The base has a fragment.
12    BaseWithFragment,
13    /// The base has no authority and its path is rootless, but the reference
14    /// is relative, is not empty and does not start with `'#'`.
15    InvalidReferenceAgainstOpaqueBase,
16    /// An underflow occurred in path resolution.
17    ///
18    /// Used only when [`Resolver::allow_path_underflow`] is set to `false`.
19    PathUnderflow,
20}
21
22impl fmt::Display for ResolveError {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        let msg = match self {
25            Self::BaseWithFragment => "base should not have fragment",
26            Self::InvalidReferenceAgainstOpaqueBase => {
27                "when base has a rootless path and no authority, reference should either have scheme, be empty or start with '#'"
28            }
29            Self::PathUnderflow => "underflow occurred in path resolution",
30        };
31        f.write_str(msg)
32    }
33}
34
35#[cfg(feature = "impl-error")]
36impl crate::Error for ResolveError {}
37
38/// A configurable URI/IRI reference resolver against a fixed base.
39///
40/// # Examples
41///
42/// ```
43/// use fluent_uri::{resolve::Resolver, Uri, UriRef};
44///
45/// let base = Uri::parse("http://example.com/foo/bar")?;
46/// let resolver = Resolver::with_base(base);
47///
48/// assert_eq!(resolver.resolve(&UriRef::parse("baz")?).unwrap(), "http://example.com/foo/baz");
49/// assert_eq!(resolver.resolve(&UriRef::parse("../baz")?).unwrap(), "http://example.com/baz");
50/// assert_eq!(resolver.resolve(&UriRef::parse("?baz")?).unwrap(), "http://example.com/foo/bar?baz");
51/// # Ok::<_, fluent_uri::ParseError>(())
52/// ```
53#[derive(Clone, Copy, Debug)]
54#[must_use]
55pub struct Resolver<R> {
56    base: R,
57    allow_path_underflow: bool,
58}
59
60impl<R: Ri> Resolver<R>
61where
62    R::Val: Bos<str>,
63{
64    /// Creates a new `Resolver` with the given base and default configuration.
65    pub fn with_base(base: R) -> Self {
66        Self {
67            base,
68            allow_path_underflow: true,
69        }
70    }
71
72    /// Sets whether to allow underflow in path resolution.
73    ///
74    /// This defaults to `true`. A value of `false` is a deviation from the
75    /// reference resolution algorithm defined in
76    /// [Section 5 of RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986/#section-5).
77    ///
78    /// # Examples
79    ///
80    /// ```
81    /// use fluent_uri::{resolve::{Resolver, ResolveError}, Uri, UriRef};
82    ///
83    /// let base = Uri::parse("http://example.com/foo/bar")?;
84    /// let resolver = Resolver::with_base(base).allow_path_underflow(false);
85    ///
86    /// assert_eq!(resolver.resolve(&UriRef::parse("../../baz")?).unwrap_err(), ResolveError::PathUnderflow);
87    /// assert_eq!(resolver.resolve(&UriRef::parse("../../../baz")?).unwrap_err(), ResolveError::PathUnderflow);
88    /// assert_eq!(resolver.resolve(&UriRef::parse("/../baz")?).unwrap_err(), ResolveError::PathUnderflow);
89    /// # Ok::<_, fluent_uri::ParseError>(())
90    /// ```
91    pub fn allow_path_underflow(mut self, value: bool) -> Self {
92        self.allow_path_underflow = value;
93        self
94    }
95
96    /// Resolves the given reference against the configured base.
97    ///
98    /// See [`resolve_against`] for the exact behavior of this method.
99    ///
100    /// # Errors
101    ///
102    /// Returns `Err` on the same conditions as [`resolve_against`] or if an underflow
103    /// occurred in path resolution when [`allow_path_underflow`] is set to `false`.
104    ///
105    /// [`resolve_against`]: crate::UriRef::resolve_against
106    /// [`allow_path_underflow`]: Self::allow_path_underflow
107    pub fn resolve<T: Bos<str>>(
108        &self,
109        reference: &R::Ref<T>,
110    ) -> Result<R::WithVal<String>, ResolveError> {
111        resolve(
112            self.base.make_ref(),
113            reference.make_ref(),
114            self.allow_path_underflow,
115        )
116        .map(RiMaybeRef::from_pair)
117    }
118}
119
120pub(crate) fn resolve(
121    base: RmrRef<'_, '_>,
122    /* reference */ r: RmrRef<'_, '_>,
123    allow_path_underflow: bool,
124) -> Result<(String, Meta), ResolveError> {
125    assert!(base.has_scheme());
126
127    if base.has_fragment() {
128        return Err(ResolveError::BaseWithFragment);
129    }
130    if !base.has_authority()
131        && base.path().is_rootless()
132        && !r.has_scheme()
133        && !matches!(r.as_str().bytes().next(), None | Some(b'#'))
134    {
135        return Err(ResolveError::InvalidReferenceAgainstOpaqueBase);
136    }
137
138    let (t_scheme, t_authority, t_path, t_query, t_fragment);
139
140    let r_scheme = r.scheme_opt();
141    let r_authority = r.authority();
142    let r_path = r.path();
143    let r_query = r.query();
144    let r_fragment = r.fragment();
145
146    if let Some(r_scheme) = r_scheme {
147        t_scheme = r_scheme;
148        t_authority = r_authority;
149        t_path = (r_path.as_str(), None);
150        t_query = r_query;
151    } else {
152        if r_authority.is_some() {
153            t_authority = r_authority;
154            t_path = (r_path.as_str(), None);
155            t_query = r_query;
156        } else {
157            if r_path.is_empty() {
158                t_path = (base.path().as_str(), None);
159                if r_query.is_some() {
160                    t_query = r_query;
161                } else {
162                    t_query = base.query();
163                }
164            } else {
165                if r_path.is_absolute() {
166                    t_path = (r_path.as_str(), None);
167                } else {
168                    let base_path = base.path();
169                    let base_path = if base_path.is_empty() {
170                        "/"
171                    } else {
172                        base_path.as_str()
173                    };
174
175                    // Make sure that swapping the order of resolution and normalization
176                    // does not change the result.
177                    let last_slash_i = base_path.rfind('/').unwrap();
178                    let last_seg = &base_path[last_slash_i + 1..];
179                    let base_path_stripped = match classify_segment(last_seg) {
180                        SegKind::DoubleDot => base_path,
181                        _ => &base_path[..=last_slash_i],
182                    };
183
184                    // Instead of merging the paths, remove dot segments incrementally.
185                    t_path = (base_path_stripped, Some(r_path.as_str()));
186                }
187                t_query = r_query;
188            }
189            t_authority = base.authority();
190        }
191        t_scheme = base.scheme();
192    }
193    t_fragment = r_fragment;
194
195    // Calculate the output length.
196    let mut len = t_scheme.as_str().len() + 1;
197    if let Some(authority) = t_authority {
198        len += authority.as_str().len() + 2;
199    }
200    len += t_path.0.len() + t_path.1.map_or(0, |s| s.len());
201    if let Some(query) = t_query {
202        len += query.len() + 1;
203    }
204    if let Some(fragment) = t_fragment {
205        len += fragment.len() + 1;
206    }
207
208    let mut buf = String::with_capacity(len);
209    let mut meta = Meta::default();
210
211    buf.push_str(t_scheme.as_str());
212    meta.scheme_end = NonZeroUsize::new(buf.len());
213    buf.push(':');
214
215    if let Some(authority) = t_authority {
216        let mut auth_meta = authority.meta();
217        buf.push_str("//");
218
219        auth_meta.host_bounds.0 += buf.len();
220        auth_meta.host_bounds.1 += buf.len();
221
222        buf.push_str(authority.as_str());
223        meta.auth_meta = Some(auth_meta);
224    }
225
226    let path_start = buf.len();
227    meta.path_bounds.0 = path_start;
228
229    if t_path.0.starts_with('/') {
230        let path = [t_path.0, t_path.1.unwrap_or("")];
231        let path = &path[..t_path.1.is_some() as usize + 1];
232
233        let underflow_occurred = remove_dot_segments(&mut buf, path_start, path);
234        if underflow_occurred && !allow_path_underflow {
235            return Err(ResolveError::PathUnderflow);
236        }
237    } else {
238        buf.push_str(t_path.0);
239    }
240
241    // Close the loophole in the original algorithm.
242    if t_authority.is_none() && buf[path_start..].starts_with("//") {
243        buf.insert_str(path_start, "/.");
244    }
245
246    meta.path_bounds.1 = buf.len();
247
248    if let Some(query) = t_query {
249        buf.push('?');
250        buf.push_str(query.as_str());
251        meta.query_end = NonZeroUsize::new(buf.len());
252    }
253
254    if let Some(fragment) = t_fragment {
255        buf.push('#');
256        buf.push_str(fragment.as_str());
257    }
258
259    debug_assert!(buf.len() <= len);
260
261    Ok((buf, meta))
262}
263
264pub(crate) fn remove_dot_segments(buf: &mut String, start: usize, path: &[&str]) -> bool {
265    let mut underflow_occurred = false;
266    for seg in path.iter().flat_map(|s| s.split_inclusive('/')) {
267        let seg_stripped = seg.strip_suffix('/').unwrap_or(seg);
268        match classify_segment(seg_stripped) {
269            SegKind::Dot => {}
270            SegKind::DoubleDot => {
271                if buf.len() > start + 1 {
272                    buf.truncate(buf[..buf.len() - 1].rfind('/').unwrap() + 1);
273                } else {
274                    underflow_occurred = true;
275                }
276            }
277            SegKind::Normal => buf.push_str(seg),
278        }
279    }
280    underflow_occurred
281}
282
283enum SegKind {
284    Dot,
285    DoubleDot,
286    Normal,
287}
288
289fn classify_segment(mut seg: &str) -> SegKind {
290    if seg.is_empty() {
291        return SegKind::Normal;
292    }
293    if let Some(rem) = seg.strip_prefix('.') {
294        seg = rem;
295    } else if let Some(rem) = seg.strip_prefix("%2E") {
296        seg = rem;
297    } else if let Some(rem) = seg.strip_prefix("%2e") {
298        seg = rem;
299    }
300    if seg.is_empty() {
301        SegKind::Dot
302    } else if seg == "." || seg == "%2E" || seg == "%2e" {
303        SegKind::DoubleDot
304    } else {
305        SegKind::Normal
306    }
307}