Skip to main content

lychee_lib/types/
base_info.rs

1//! Parses and resolves [`RawUri`] into into fully-qualified [`Uri`] by
2//! applying base URL and root dir mappings.
3
4use reqwest::Url;
5use serde::Deserialize;
6use std::borrow::Cow;
7use std::path::{Path, PathBuf};
8use url::ParseError;
9
10use crate::ErrorKind;
11use crate::types::uri::parsed::ParsedUri;
12use crate::types::uri::relative::RelativeUri;
13use crate::utils;
14
15/// Information used for resolving relative URLs within a particular
16/// input source. There should be a 1:1 correspondence between each
17/// `BaseInfo` and its originating `InputSource`. The main entry
18/// point for constructing is [`BaseInfo::from_source_url`].
19///
20/// Once constructed, [`BaseInfo::parse_url_text`] can be used to
21/// parse and resolve a (possibly relative) URL obtained from within
22/// the associated `InputSource`.
23///
24/// A `BaseInfo` may be built from input sources which cannot resolve
25/// relative links---for instance, stdin. It may also be built from input
26/// sources which can resolve *locally*-relative links, but not *root*-relative
27/// links.
28#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Default)]
29#[serde(try_from = "String")]
30pub enum BaseInfo {
31    /// No base information is available. This is for sources with no base
32    /// information, such as [`ResolvedInputSource::Stdin`], and for URLs which
33    /// *cannot be a base*, such as `data:` and `tel:`. [`BaseInfo::None`]
34    /// can resolve no relative links; only fully-qualified links will be
35    /// parsed successfully.
36    #[default]
37    None,
38
39    /// A base which cannot resolve root-relative links. This is for
40    /// `file:` URLs where the root directory is not known. As such, you can
41    /// traverse relative to the current URL (by traversing the filesystem),
42    /// but you cannot jump to the "root".
43    NoRoot(Url),
44
45    /// A full base made up of `origin` and `path`. This can resolve
46    /// all kinds of relative links.
47    ///
48    /// All non-`file:` URLs which *can be a base* fall into this case. For these,
49    /// `origin` and `path` are obtained by dividing the source URL into its
50    /// origin and path. When joined, `${origin}/${path}` should be equivalent
51    /// to the source's original URL.
52    ///
53    /// This also represents `file:` URLs with a known root. The `origin` field
54    /// records the `file:` URL which will be used to resolve root-relative links.
55    /// The `path` field is the subpath to a particular input source within the
56    /// root. This is retained to resolve locally-relative links.
57    ///
58    /// In all cases, the fields should satisfy `origin.join(path) == input_source_url`
59    /// where `input_source_url` is the URL of the originating input source.
60    Full {
61        /// A `file:` or *can be a base* URL which acts as the origin. If this is
62        /// a `file:` URL, root-relative links will resolve to subpaths of this URL.
63        /// See the [`BaseInfo::Full`] for more information.
64        origin: Url,
65
66        /// The `path` field is conditionally joined with `origin` to resolve
67        /// links. This is a (possibly-empty) locally- or root-relative link
68        /// and should not be a full URL or a scheme-relative link.
69        path: String,
70    },
71}
72
73impl BaseInfo {
74    /// Constructs [`BaseInfo::None`].
75    #[must_use]
76    pub const fn none() -> Self {
77        Self::None
78    }
79
80    /// Constructs [`BaseInfo::Full`] with the given fields.
81    #[must_use]
82    pub const fn full(origin: Url, path: String) -> Self {
83        Self::Full { origin, path }
84    }
85
86    /// Constructs a [`BaseInfo`], with the variant being determined by the given URL.
87    ///
88    /// - A [`Url::cannot_be_a_base`] URL will yield [`BaseInfo::None`].
89    /// - A `file:` URL will yield [`BaseInfo::NoRoot`].
90    /// - For other URLs, a [`BaseInfo::Full`] will be constructed from the URL's
91    ///   origin and path.
92    ///
93    /// Compared to [`BaseInfo::from_base_url`], this function is more lenient in
94    /// what it accepts because this function should return *a* result for all
95    /// input source URLs.
96    #[must_use]
97    pub fn from_source_url(url: &Url) -> Self {
98        if url.scheme() == "file" {
99            Self::NoRoot(url.clone())
100        } else {
101            match Self::split_url_origin_and_path(url) {
102                Some((origin, path)) => Self::full(origin, path),
103                None => Self::none(),
104            }
105        }
106    }
107
108    /// Split URL into its origin and path, if possible. Will fail and return
109    /// `None` for URLs which *cannot be a base*.
110    fn split_url_origin_and_path(url: &Url) -> Option<(Url, String)> {
111        let origin = url.join("/").ok()?;
112        let subpath = origin.make_relative(url)?;
113        Some((origin, subpath))
114    }
115
116    /// Constructs a [`BaseInfo`] from the given URL, requiring that the given path be acceptable as a
117    /// base URL. That is, it cannot be a special scheme like `data:`.
118    ///
119    /// # Errors
120    ///
121    /// Errors if the given URL cannot be a base.
122    pub fn from_base_url(url: &Url) -> Result<BaseInfo, ErrorKind> {
123        if url.cannot_be_a_base() {
124            return Err(ErrorKind::InvalidBase(
125                url.to_string(),
126                "The given URL cannot be used as a base URL".to_string(),
127            ));
128        }
129
130        Ok(Self::from_source_url(url))
131    }
132
133    /// Constructs a [`BaseInfo`] from the given filesystem path, requiring that
134    /// the given path be absolute. Assumes that the given path represents a directory.
135    ///
136    /// This constructs a [`BaseInfo::Full`] where root-relative links will go to
137    /// the given path.
138    ///
139    /// # Errors
140    ///
141    /// Errors if the given path is not an absolute path.
142    pub fn from_path(path: &Path) -> Result<BaseInfo, ErrorKind> {
143        let Ok(url) = Url::from_directory_path(path) else {
144            return Err(ErrorKind::InvalidBase(
145                path.to_string_lossy().to_string(),
146                "Base must either be a full URL (with scheme) or an absolute local path"
147                    .to_string(),
148            ));
149        };
150
151        Self::from_base_url(&url).map(|x| x.use_fs_path_as_origin().into_owned())
152    }
153
154    /// If this is a [`BaseInfo::NoRoot`], promote it to a [`BaseInfo::Full`]
155    /// by using the filesystem root as the "origin" for root-relative links.
156    /// Root-relative links will go to the filesystem root.
157    ///
158    /// Generally, this function should be avoided in favour of a more explicit
159    /// user-provided root directory. The filesystem root is rarely a good place
160    /// to look for files.
161    ///
162    /// Makes no change to other [`BaseInfo`] variants.
163    ///
164    /// # Panics
165    ///
166    /// If unable to split a [`BaseInfo::NoRoot`] into origin and path.
167    #[must_use]
168    pub fn use_fs_root_as_origin(&self) -> Cow<'_, Self> {
169        let Self::NoRoot(url) = self else {
170            return Cow::Borrowed(self);
171        };
172
173        let (fs_root, subpath) = Self::split_url_origin_and_path(url)
174            .expect("splitting up a NoRoot file:// URL should work");
175
176        Cow::Owned(Self::full(fs_root, subpath))
177    }
178
179    /// If this is a [`BaseInfo::NoRoot`], promote it to a [`BaseInfo::Full`]
180    /// by using the entire filesystem path as the "origin" for root-relative links.
181    /// Root-relative links will go to the URL that was previously within `NoRoot`.
182    ///
183    /// Generally, this function should be avoided in favour of a more explicit
184    /// user-provided root directory.
185    ///
186    /// Makes no change to other [`BaseInfo`] variants.
187    #[must_use]
188    pub fn use_fs_path_as_origin(&self) -> Cow<'_, Self> {
189        let Self::NoRoot(url) = self else {
190            return Cow::Borrowed(self);
191        };
192
193        Cow::Owned(Self::full(url.clone(), String::new()))
194    }
195
196    /// Returns the URL for the current [`BaseInfo`], joining the origin and path
197    /// if needed.
198    #[must_use]
199    pub fn url(&self) -> Option<Url> {
200        match self {
201            Self::None => None,
202            Self::NoRoot(url) => Some(url.clone()),
203            Self::Full { origin, path } => origin.join(path).ok(),
204        }
205    }
206
207    /// Returns the filesystem path for the current [`BaseInfo`] if the underlying
208    /// URL is a `file:` URL.
209    #[must_use]
210    pub fn to_file_path(&self) -> Option<PathBuf> {
211        self.url()
212            .filter(|url| url.scheme() == "file")
213            .and_then(|x| x.to_file_path().ok())
214    }
215
216    /// Returns the scheme of the underlying URL.
217    #[must_use]
218    pub fn scheme(&self) -> Option<&str> {
219        match self {
220            Self::None => None,
221            Self::NoRoot(url) | Self::Full { origin: url, .. } => Some(url.scheme()),
222        }
223    }
224
225    /// Returns whether this value is [`BaseInfo::None`].
226    #[must_use]
227    pub const fn is_none(&self) -> bool {
228        matches!(self, Self::None)
229    }
230
231    /// Returns whether this [`BaseInfo`] variant supports resolving root-relative links.
232    ///
233    /// If true, implies [`BaseInfo::supports_locally_relative`].
234    #[must_use]
235    pub const fn supports_root_relative(&self) -> bool {
236        matches!(self, Self::Full { .. })
237    }
238
239    /// Returns whether this [`BaseInfo`] variant supports resolving locally-relative links.
240    #[must_use]
241    pub const fn supports_locally_relative(&self) -> bool {
242        !self.is_none()
243    }
244
245    /// Returns the [`BaseInfo`] which has _more information_
246    /// between `self` and the given `fallback`.
247    ///
248    /// [`BaseInfo::Full`] is preferred over [`BaseInfo::NoRoot`]
249    /// which is preferred over [`BaseInfo::None`]. If both `self`
250    /// and `fallback` are the same variant, then `self` will be preferred.
251    #[must_use]
252    #[allow(clippy::match_same_arms)]
253    pub const fn or_fallback<'a>(&'a self, fallback: &'a Self) -> &'a Self {
254        match (self, fallback) {
255            (x @ Self::Full { .. }, _) => x,
256            (_, x @ Self::Full { .. }) => x,
257            (x @ Self::NoRoot(_), _) => x,
258            (_, x @ Self::NoRoot(_)) => x,
259            (x @ Self::None, Self::None) => x,
260        }
261    }
262
263    /// Parses the given URL text into a fully-qualified URL, including
264    /// resolving relative links if supported by the current [`BaseInfo`].
265    ///
266    /// To parse and resolve relative links, this uses [`Url::join`] with
267    /// the current [`BaseInfo`]'s URL as a base, as applicable.
268    ///
269    /// # Errors
270    ///
271    /// Returns an error if the text is an invalid URL, or if the text is a
272    /// relative link and this [`BaseInfo`] variant cannot resolve
273    /// the relative link.
274    pub fn parse_url_text(&self, text: &str) -> Result<Url, ErrorKind> {
275        match ParsedUri::try_from(text) {
276            Ok(ParsedUri::Absolute(uri)) => Ok(uri.url),
277            Ok(ParsedUri::Relative(rel)) => self.resolve_relative_link(&rel),
278            Err(e) => Err(e),
279        }
280    }
281
282    /// Resolves the given relative link into a fully-qualified URL, if
283    /// supported by the current [`BaseInfo`].
284    ///
285    /// # Errors
286    ///
287    /// Returns an error if the text is an invalid URL, or if the current
288    /// [`BaseInfo`] is not capable of resolving the given relative link.
289    /// Returned errors include [`ErrorKind::RootRelativeLinkWithoutRoot`]
290    /// and [`ParseError::RelativeUrlWithoutBase`] (within [`ErrorKind::ParseUrl`]).
291    #[expect(clippy::unnested_or_patterns, reason = "more readable here")]
292    #[expect(clippy::match_same_arms, reason = "we need to comment one of the arms")]
293    pub fn resolve_relative_link(&self, rel: &RelativeUri<'_>) -> Result<Url, ErrorKind> {
294        match (self, &rel) {
295            (Self::None, RelativeUri::Root(_)) | (Self::NoRoot(_), RelativeUri::Root(_)) => {
296                return Err(ErrorKind::RootRelativeLinkWithoutRoot(
297                    rel.link_text().to_string(),
298                ));
299            }
300
301            (Self::None, _) => Err(ParseError::RelativeUrlWithoutBase),
302
303            (Self::NoRoot(base), RelativeUri::Local(text)) => base.join(text),
304
305            // `(Self::NoRoot, RelativeUri::Scheme)` happens when a link like `///a` occurs
306            // within a local file without root-dir. note the triple slash because file
307            // URLs typically don't have a hostname. however, file URLs with hostname
308            // are also valid syntax, but they will be rejected by:
309            // https://docs.rs/reqwest/0.12.23/reqwest/struct.Url.html#method.to_file_path
310            (Self::NoRoot(base), RelativeUri::Scheme(text)) => base.join(text),
311
312            (Self::Full { origin, .. }, RelativeUri::Root(root_rel))
313                if origin.scheme() == "file" =>
314            {
315                // `root_rel` starts with `/`, so this prefixing it with `.`
316                // changes it to a locally-relative link like `./something`.
317                origin.join(&format!(".{root_rel}"))
318            }
319
320            (Self::Full { origin, path }, rel) => {
321                origin.join(path).and_then(|x| x.join(rel.link_text()))
322            }
323        }
324        .map_err(|e| ErrorKind::ParseUrl(e, rel.link_text().to_string()))
325    }
326
327    /// Parses the given URL text into a fully-qualified URL, including
328    /// resolving relative links if supported by the current [`BaseInfo`]
329    /// and applying the given root-dir if necessary.
330    ///
331    /// The root-dir is applied if the current `BaseInfo` is [`BaseInfo::None`]
332    /// or has a `file:` URL and if the given text is a root-relative link.
333    /// In these cases, the given `root_dir` will *override* the original
334    /// `BaseInfo`.
335    ///
336    /// # Errors
337    ///
338    /// Propagates errors from [`BaseInfo::parse_url_text`].
339    pub fn parse_url_text_with_root_dir(
340        &self,
341        text: &str,
342        root_dir: Option<&Url>,
343    ) -> Result<Url, ErrorKind> {
344        // HACK: if root-dir is specified, apply it by fudging around with
345        // file:// URLs. eventually, someone up the stack should construct
346        // the BaseInfo::Full for root-dir and this function should be deleted.
347
348        let rel = match ParsedUri::try_from(text) {
349            Ok(ParsedUri::Absolute(uri)) => return Ok(uri.url),
350            Err(e) => return Err(e),
351            Ok(ParsedUri::Relative(rel)) => rel,
352        };
353
354        // NOTE: also applies root-dir for BaseInfo::None :)
355        if let Some(root_dir) = root_dir
356            && let RelativeUri::Root(_) = rel
357            && let None | Some("file") = self.scheme()
358        {
359            let root_dir_base = Self::full(root_dir.clone(), String::new());
360            root_dir_base.resolve_relative_link(&rel)
361        } else {
362            self.resolve_relative_link(&rel)
363        }
364    }
365}
366
367/// Attempts to parse a base from the given string which may be
368/// a URL or a filesystem path. In both cases, the string must
369/// represent a valid base (i.e., not resulting in [`BaseInfo::None`]).
370/// Otherwise, an error will be returned.
371///
372/// Note that this makes a distinction between filesystem paths as paths
373/// and filesystem paths as URLs. When specified as a path, they will
374/// become [`BaseInfo::Full`] but when specified as a URL, they will
375/// become [`BaseInfo::NoRoot`].
376///
377/// Additionally, the empty string is accepted and will be parsed to
378/// [`BaseInfo::None`].
379impl TryFrom<&str> for BaseInfo {
380    type Error = ErrorKind;
381
382    fn try_from(value: &str) -> Result<Self, ErrorKind> {
383        if value.is_empty() {
384            return Ok(BaseInfo::none());
385        }
386        match utils::url::parse_url_or_path(value) {
387            Ok(url) => BaseInfo::from_base_url(&url),
388            Err(path) => BaseInfo::from_path(&PathBuf::from(path)),
389        }
390    }
391}
392
393impl TryFrom<String> for BaseInfo {
394    type Error = ErrorKind;
395    fn try_from(value: String) -> Result<Self, ErrorKind> {
396        BaseInfo::try_from(value.as_ref())
397    }
398}
399
400#[cfg(test)]
401mod tests {
402    use super::BaseInfo;
403    use reqwest::Url;
404    use rstest::rstest;
405    use std::path::PathBuf;
406
407    #[test]
408    fn test_base_info_construction() {
409        assert_eq!(
410            BaseInfo::try_from("https://a.com/b/?q#x").unwrap(),
411            BaseInfo::full(Url::parse("https://a.com").unwrap(), "b/?q#x".to_string())
412        );
413        assert_eq!(
414            BaseInfo::try_from("file:///file-path").unwrap(),
415            BaseInfo::NoRoot(Url::parse("file:///file-path").unwrap())
416        );
417        assert_eq!(
418            BaseInfo::try_from("/file-path").unwrap(),
419            BaseInfo::full(Url::parse("file:///file-path/").unwrap(), String::new())
420        );
421
422        // symbols inside a path are encoded if needed and should *not* be decoded.
423        assert_eq!(
424            BaseInfo::from_path(&PathBuf::from("/file path")).unwrap(),
425            BaseInfo::full(Url::parse("file:///file%20path/").unwrap(), String::new())
426        );
427        assert_eq!(
428            BaseInfo::from_path(&PathBuf::from("/file%20path")).unwrap(),
429            BaseInfo::full(Url::parse("file:///file%2520path/").unwrap(), String::new())
430        );
431        // query parameters are *not* interpreted from paths and are treated as literals
432        assert_eq!(
433            BaseInfo::from_path(&PathBuf::from("/file?q=2")).unwrap(),
434            BaseInfo::full(Url::parse("file:///file%3Fq=2/").unwrap(), String::new())
435        );
436
437        // symbols are encoded inside URLs if needed
438        assert_eq!(
439            BaseInfo::from_source_url(&Url::parse("http://a.com/x y/").unwrap()),
440            BaseInfo::full(Url::parse("http://a.com/").unwrap(), "x%20y/".to_owned())
441        );
442        assert_eq!(
443            BaseInfo::from_source_url(&Url::parse("http://a.com/x?q=x y").unwrap()),
444            BaseInfo::full(Url::parse("http://a.com/").unwrap(), "x?q=x%20y".to_owned())
445        );
446        assert_eq!(
447            BaseInfo::from_source_url(&Url::parse("http://a.com/Ω≈ç√∫˜µ≤≥÷/").unwrap()),
448            BaseInfo::full(
449                Url::parse("http://a.com/").unwrap(),
450                "%CE%A9%E2%89%88%C3%A7%E2%88%9A%E2%88%AB%CB%9C%C2%B5%E2%89%A4%E2%89%A5%C3%B7/"
451                    .to_owned()
452            )
453        );
454        assert_eq!(
455            BaseInfo::from_source_url(&Url::parse("http://みんな.com/x").unwrap()),
456            BaseInfo::full(
457                Url::parse("http://xn--q9jyb4c.com/").unwrap(),
458                "x".to_owned()
459            )
460        );
461        assert_eq!(
462            BaseInfo::from_source_url(&Url::parse("http://München-Ost.com/x").unwrap()),
463            BaseInfo::full(
464                Url::parse("http://xn--mnchen-ost-9db.com/").unwrap(),
465                "x".to_owned()
466            )
467        );
468        assert_eq!(
469            BaseInfo::from_source_url(&Url::parse("http://😉.com/x").unwrap()),
470            BaseInfo::full(Url::parse("http://xn--n28h.com/").unwrap(), "x".to_owned())
471        );
472
473        let urls = [
474            "https://a.com/b/?q#x",
475            "file:///a.com/b/?q#x",
476            "https://a.com/b%20a/?q#x",
477        ];
478        // .url() of base-info should return the original URL with no changes to encoding
479        for url_str in urls {
480            let url = Url::parse(url_str).unwrap();
481            assert_eq!(BaseInfo::try_from(url_str).unwrap().url(), Some(url));
482        }
483    }
484
485    #[test]
486    fn test_base_info_with_http_base() {
487        let base = BaseInfo::try_from("https://a.com/c/u/").unwrap();
488        let root_dir = Url::parse("file:///root/").unwrap();
489
490        // shouldn't trigger the root URL
491        assert_eq!(
492            base.parse_url_text_with_root_dir("/a", Some(&root_dir)),
493            Ok(Url::parse("https://a.com/a").unwrap())
494        );
495
496        assert_eq!(
497            base.parse_url_text_with_root_dir("..", Some(&root_dir)),
498            Ok(Url::parse("https://a.com/c/").unwrap())
499        );
500    }
501
502    #[test]
503    fn test_base_info_parse_with_root_dir() {
504        let base = BaseInfo::try_from("/file-path").unwrap();
505        let root_dir = Url::parse("file:///root/").unwrap();
506
507        // first, links which shouldn't trigger the root URL
508        assert_eq!(
509            base.parse_url_text_with_root_dir("a", Some(&root_dir)),
510            Ok(Url::parse("file:///file-path/a").unwrap())
511        );
512        assert_eq!(
513            base.parse_url_text_with_root_dir("./a", Some(&root_dir)),
514            Ok(Url::parse("file:///file-path/a").unwrap())
515        );
516        assert_eq!(
517            base.parse_url_text_with_root_dir("///scheme-relative", Some(&root_dir)),
518            Ok(Url::parse("file:///scheme-relative").unwrap())
519        );
520        assert_eq!(
521            base.parse_url_text_with_root_dir("https://a.com/b?q", Some(&root_dir)),
522            Ok(Url::parse("https://a.com/b?q").unwrap())
523        );
524        assert_eq!(
525            base.parse_url_text_with_root_dir("file:///a/", Some(&root_dir)),
526            Ok(Url::parse("file:///a/").unwrap())
527        );
528
529        // basic root dir use
530        assert_eq!(
531            base.parse_url_text_with_root_dir("/a", Some(&root_dir)),
532            Ok(Url::parse("file:///root/a").unwrap())
533        );
534
535        // root-dir can be traversed out of
536        assert_eq!(
537            base.parse_url_text_with_root_dir("/../../", Some(&root_dir)),
538            Ok(Url::parse("file:///").unwrap())
539        );
540    }
541
542    #[rstest]
543    // normal HTTP traversal and parsing absolute links
544    #[case("https://a.com/b", "x/", "d", "https://a.com/x/d")]
545    #[case("https://a.com/b/", "x/", "d", "https://a.com/b/x/d")]
546    #[case("https://a.com/b/", "", "https://new.com", "https://new.com/")]
547    // parsing absolute file://
548    #[case("https://a.com/b/", "", "file:///a", "file:///a")]
549    #[case("https://a.com/b/", "", "file:///a/", "file:///a/")]
550    #[case("https://a.com/b/", "", "file:///a/b/", "file:///a/b/")]
551    // file traversal
552    #[case("file:///a/b/", "", "/x/y", "file:///a/b/x/y")]
553    #[case("file:///a/b/", "", "a/", "file:///a/b/a/")]
554    #[case("file:///a/b/", "a/", "../..", "file:///a/")]
555    #[case("file:///a/b/", "a/", "/", "file:///a/b/")]
556    #[case("file:///a/b/", "", "/..", "file:///a/")]
557    #[case("file:///a/b/", "", "/../../", "file:///")]
558    #[case("file:///a/b/", "", "?", "file:///a/b/?")]
559    #[case("file:///a/b/", ".", "?", "file:///a/b/?")]
560    // HTTP relative links
561    #[case("https://a.com/x", "", "#", "https://a.com/x#")]
562    #[case("https://a.com/x", "", "../../..", "https://a.com/")]
563    #[case("https://a.com/x", "?q", "#x", "https://a.com/x?q#x")]
564    #[case("https://a.com/x", ".", "?a", "https://a.com/?a")]
565    #[case("https://a.com/x/", "", "/", "https://a.com/")]
566    #[case("https://a.com/x?q#anchor", "", "?q", "https://a.com/x?q")]
567    #[case("https://a.com/x#anchor", "", "?x", "https://a.com/x?x")]
568    // scheme relative link - can traverse outside of root
569    #[case("file:///root/", "", "///new-root", "file:///new-root")]
570    #[case("file:///root/", "", "//a.com/boop", "file://a.com/boop")]
571    #[case("https://root/", "", "//a.com/boop", "https://a.com/boop")]
572    fn test_parse_url_text(
573        #[case] origin: &str,
574        #[case] path: &str,
575        #[case] text: &str,
576        #[case] expected: &str,
577    ) {
578        assert_eq!(
579            BaseInfo::full(Url::parse(origin).unwrap(), path.to_string())
580                .parse_url_text(text)
581                .unwrap()
582                .to_string(),
583            expected,
584            "origin={origin}, path={path:?}, text={text:?}, expected={expected}"
585        );
586    }
587
588    #[rstest]
589    // file URLs without trailing / are kinda weird.
590    #[case("file:///a/b/c", "", "/../../x", "file:///x")]
591    #[case("file:///a/b/c", "", "/", "file:///a/b/")]
592    #[case("file:///a/b/c", "", ".?qq", "file:///a/b/?qq")]
593    #[case("file:///a/b/c", "", "#x", "file:///a/b/c#x")]
594    #[case("file:///a/b/c", "", "./", "file:///a/b/")]
595    #[case("file:///a/b/c", "", "c", "file:///a/b/c")]
596    // joining with d
597    #[case("file:///a/b/c", "d", "/../../x", "file:///x")]
598    #[case("file:///a/b/c", "d", "/", "file:///a/b/")]
599    #[case("file:///a/b/c", "d", ".", "file:///a/b/")]
600    #[case("file:///a/b/c", "d", "./", "file:///a/b/")]
601    // joining with d/
602    #[case("file:///a/b/c", "d/", "/", "file:///a/b/")]
603    #[case("file:///a/b/c", "d/", ".", "file:///a/b/d/")]
604    #[case("file:///a/b/c", "d/", "./", "file:///a/b/d/")]
605    fn test_parse_url_text_with_trailing_filename(
606        #[case] origin: &str,
607        #[case] path: &str,
608        #[case] text: &str,
609        #[case] expected: &str,
610    ) {
611        assert_eq!(
612            BaseInfo::full(Url::parse(origin).unwrap(), path.to_string())
613                .parse_url_text(text)
614                .unwrap()
615                .to_string(),
616            expected,
617            "origin={origin}, path={path:?}, text={text:?}, expected={expected}"
618        );
619    }
620
621    #[test]
622    fn test_none_rejects_relative_but_accepts_absolute() {
623        // Ensures BaseInfo::None doesn't silently swallow relative links
624        let none = BaseInfo::none();
625        // Absolute URLs still work
626        assert!(none.parse_url_text("https://a.com").is_ok());
627        // Relative links fail
628        assert!(none.parse_url_text("relative").is_err());
629        assert!(none.parse_url_text("/root-relative").is_err());
630    }
631
632    #[test]
633    fn test_no_root_rejects_root_relative() {
634        // A file:// source without --root-dir can resolve siblings but not root-relative links
635        let no_root = BaseInfo::try_from("file:///some/path/").unwrap();
636        assert_eq!(
637            no_root.parse_url_text("sibling.html").unwrap(),
638            Url::parse("file:///some/path/sibling.html").unwrap()
639        );
640        assert!(no_root.parse_url_text("/root-relative").is_err());
641    }
642
643    #[test]
644    fn test_or_fallback_prefers_more_capable_variant() {
645        // Pins the fallback priority that drives base selection in the collector
646        let none = BaseInfo::none();
647        let no_root = BaseInfo::NoRoot(Url::parse("file:///a/").unwrap());
648        let full = BaseInfo::full(Url::parse("https://a.com/").unwrap(), String::new());
649
650        assert_eq!(none.or_fallback(&full), &full);
651        assert_eq!(full.or_fallback(&none), &full);
652        assert_eq!(none.or_fallback(&no_root), &no_root);
653        assert_eq!(no_root.or_fallback(&full), &full);
654        assert_eq!(none.or_fallback(&none), &none);
655    }
656
657    #[test]
658    fn test_try_from_rejects_invalid_bases() {
659        // Prevent data: URLs and relative paths from silently becoming a base in the future
660        assert!(BaseInfo::try_from("data:text/plain,hello").is_err());
661        assert!(BaseInfo::try_from("relative/path").is_err());
662        assert!(BaseInfo::from_path(&PathBuf::from("relative")).is_err());
663    }
664}