1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use reqwest::Url;
use serde::{Deserialize, Serialize};
use std::{convert::TryFrom, path::PathBuf};

use crate::{ErrorKind, InputSource};

/// When encountering links without a full domain in a document,
/// the base determines where this resource can be found.
/// Both, local and remote targets are supported.
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
#[allow(variant_size_differences)]
#[serde(try_from = "String")]
pub enum Base {
    /// Local file path pointing to root directory
    Local(PathBuf),
    /// Remote URL pointing to a website homepage
    Remote(Url),
}

impl Base {
    /// Join link with base url
    #[must_use]
    pub(crate) fn join(&self, link: &str) -> Option<Url> {
        match self {
            Self::Remote(url) => url.join(link).ok(),
            Self::Local(_) => None,
        }
    }

    /// Return the directory if the base is local
    #[must_use]
    pub(crate) fn dir(&self) -> Option<PathBuf> {
        match self {
            Self::Remote(_) => None,
            Self::Local(d) => Some(d.clone()),
        }
    }

    pub(crate) fn from_source(source: &InputSource) -> Option<Url> {
        match &source {
            InputSource::RemoteUrl(url) => {
                // TODO: This should be refactored.
                // Cases like https://user:pass@example.com are not handled
                // We can probably use the original URL and just replace the
                // path component in the caller of this function
                if let Some(port) = url.port() {
                    Url::parse(&format!("{}://{}:{port}", url.scheme(), url.host_str()?)).ok()
                } else {
                    Url::parse(&format!("{}://{}", url.scheme(), url.host_str()?)).ok()
                }
            }
            // other inputs do not have a URL to extract a base
            _ => None,
        }
    }
}

impl TryFrom<&str> for Base {
    type Error = ErrorKind;

    fn try_from(value: &str) -> Result<Self, Self::Error> {
        if let Ok(url) = Url::parse(value) {
            if url.cannot_be_a_base() {
                return Err(ErrorKind::InvalidBase(
                    value.to_string(),
                    "The given URL cannot be a base".to_string(),
                ));
            }
            return Ok(Self::Remote(url));
        }
        Ok(Self::Local(PathBuf::from(value)))
    }
}

impl TryFrom<String> for Base {
    type Error = ErrorKind;

    fn try_from(value: String) -> Result<Self, Self::Error> {
        Self::try_from(value.as_str())
    }
}

#[cfg(test)]
mod test_base {
    use crate::Result;

    use super::*;

    #[test]
    fn test_valid_remote() -> Result<()> {
        let base = Base::try_from("https://endler.dev")?;
        assert_eq!(
            base,
            Base::Remote(Url::parse("https://endler.dev").unwrap())
        );
        Ok(())
    }

    #[test]
    fn test_invalid_url() {
        assert!(Base::try_from("data:text/plain,Hello?World#").is_err());
    }

    #[test]
    fn test_valid_local() -> Result<()> {
        let dir = tempfile::tempdir().unwrap();
        Base::try_from(dir.as_ref().to_str().unwrap())?;
        Ok(())
    }

    #[test]
    fn test_get_base_from_url() {
        for (url, expected) in [
            ("https://example.com", "https://example.com"),
            ("https://example.com?query=something", "https://example.com"),
            ("https://example.com/#anchor", "https://example.com"),
            ("https://example.com/foo/bar", "https://example.com"),
            (
                "https://example.com:1234/foo/bar",
                "https://example.com:1234",
            ),
        ] {
            let url = Url::parse(url).unwrap();
            let source = InputSource::RemoteUrl(Box::new(url.clone()));
            let base = Base::from_source(&source);
            let expected = Url::parse(expected).unwrap();
            assert_eq!(base, Some(expected));
        }
    }
}