asimov_module/
url.rs

1// This is free and unencumbered software released into the public domain.
2
3use iri_string::types::{IriReferenceStr, IriReferenceString};
4use std::string::{String, ToString};
5
6#[derive(Clone, Debug, thiserror::Error)]
7pub enum NormalizeError {
8    #[error(transparent)]
9    Parse(#[from] iri_string::types::CreationError<String>),
10    #[error(transparent)]
11    Build(#[from] iri_string::validate::Error),
12}
13
14pub fn normalize_url(url: &str) -> Result<String, NormalizeError> {
15    let iri = IriReferenceString::try_from(url)
16        .or_else(|_| IriReferenceString::try_from(url.replace(" ", "%20")))?;
17
18    let mut builder = iri_string::build::Builder::new();
19
20    // default `file:` scheme
21    let scheme = iri.scheme_str().unwrap_or("file");
22    builder.scheme(scheme);
23
24    if let Some(auth) = iri.authority_components() {
25        if let Some(user) = auth.userinfo() {
26            builder.userinfo(user);
27        }
28        builder.host(auth.host());
29        if let Some(port) = auth.port() {
30            builder.port(port);
31        }
32    }
33
34    let path = iri.path_str();
35
36    // TODO: utilize `path.normalize_lexically()` once it stabilizes
37    // https://github.com/rust-lang/rust/issues/134694
38
39    let path = if scheme == "file" && path.starts_with("~/") {
40        let rest = path.strip_prefix("~/").unwrap(); // safe, the prefix was just checked just
41
42        let home_dir = std::env::home_dir().expect("unable to determine home directory");
43
44        let path = home_dir.join(rest);
45        let path = std::path::absolute(&path).unwrap_or(path);
46        let path = path.canonicalize().unwrap_or(path);
47
48        path.display().to_string()
49    } else if scheme == "file" {
50        // `std::path::absolute` also changes relative paths to absolute with the current directory
51        // as base.
52        let path = std::path::absolute(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
53        let path = path.canonicalize().unwrap_or(path);
54
55        path.display().to_string()
56    } else if iri.authority_str().is_some() && path.is_empty() {
57        "/".to_string()
58    } else {
59        path.to_string()
60    };
61    #[cfg(windows)]
62    let path = if scheme == "file" && !path.starts_with("/") {
63        "/".to_string() + &path.replace('\\', "/")
64    } else {
65        path
66    };
67
68    builder.path(&path);
69
70    if let Some(query) = iri.query() {
71        builder.query(query.as_str());
72    }
73
74    if let Some(fraq) = iri.fragment() {
75        builder.fragment(fraq.as_str());
76    }
77
78    builder.normalize();
79
80    builder
81        .build::<IriReferenceStr>()
82        .map(|r| r.to_string())
83        .map_err(Into::into)
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use std::{format, string::ToString};
90
91    #[test]
92    fn url_normalization() {
93        let cases = [
94            ("https://example.org", "https://example.org/"),
95            ("https://example.org/", "https://example.org/"),
96            ("http://example.com/path", "http://example.com/path"),
97            ("https://api.example.com", "https://api.example.com/"),
98            ("http://localhost:3000", "http://localhost:3000/"),
99            ("ftp://fileserver.local", "ftp://fileserver.local/"),
100            (
101                "https://user:pass@example.org:8080/path?foo=bar&query=hello world#fragment",
102                "https://user:pass@example.org:8080/path?foo=bar&query=hello%20world#fragment",
103            ),
104            ("near://testnet/123456789", "near://testnet/123456789"),
105            (
106                "ftp://files.example.com/file.txt",
107                "ftp://files.example.com/file.txt",
108            ),
109            ("ws://localhost:3000/socket", "ws://localhost:3000/socket"),
110            ("mailto:user@example.com", "mailto:user@example.com"),
111            (
112                "https://example.org/path with spaces",
113                "https://example.org/path%20with%20spaces",
114            ),
115            (
116                "https://example.org/path+with+plus",
117                "https://example.org/path+with+plus",
118            ),
119            (
120                "https://example.org/path%20already%20encoded",
121                "https://example.org/path%20already%20encoded",
122            ),
123            (
124                "data:text/plain;base64,SGVsbG8=",
125                "data:text/plain;base64,SGVsbG8=",
126            ),
127            ("tel:+1-555-123-4567", "tel:+1-555-123-4567"),
128            ("urn:isbn:1234567890", "urn:isbn:1234567890"),
129            (
130                "ldap://[2001:db8::7]/c=GB?objectClass?one",
131                "ldap://[2001:db8::7]/c=GB?objectClass?one",
132            ),
133            (
134                "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
135                "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
136            ),
137            ("telnet://192.0.2.16:80", "telnet://192.0.2.16:80/"),
138            // TODO: should this be inferred?
139            // ("localhost:8080", "http://localhost:8080"),
140        ];
141
142        for case in cases {
143            assert_eq!(
144                normalize_url(case.0).expect(case.0),
145                case.1,
146                "input: {:?}",
147                case.0
148            );
149        }
150
151        #[cfg(unix)]
152        {
153            let cases = [
154                ("/file with spaces.txt", "file:/file%20with%20spaces.txt"),
155                ("/file+with+pluses.txt", "file:/file+with+pluses.txt"),
156                (
157                    // Plain strings get `file:` scheme and current directory prepended
158                    "document.txt",
159                    &format!(
160                        "file:{}/document.txt",
161                        std::env::current_dir().unwrap().display()
162                    ),
163                ),
164                (
165                    // Domain-like strings without scheme get treated as files
166                    "example.org",
167                    &format!(
168                        "file:{}/example.org",
169                        std::env::current_dir().unwrap().display()
170                    ),
171                ),
172                (
173                    "folder name/file.txt",
174                    &format!(
175                        "file:{}/folder%20name/file.txt",
176                        std::env::current_dir().unwrap().display()
177                    ),
178                ),
179                (
180                    "./subfolder/../file.txt",
181                    &format!(
182                        "file:{}/file.txt",
183                        std::env::current_dir().unwrap().display()
184                    ),
185                ),
186                (
187                    "../parent/./file.txt",
188                    &format!(
189                        "file:{}/parent/file.txt",
190                        std::env::current_dir().unwrap().parent().unwrap().display()
191                    ),
192                ),
193            ];
194
195            for case in cases {
196                assert_eq!(
197                    normalize_url(case.0).unwrap(),
198                    case.1,
199                    "input: {:?}",
200                    case.0
201                );
202            }
203
204            if let Some(home_dir) = std::env::home_dir() {
205                let home_dir = home_dir.display().to_string();
206
207                let input = "~/path/to/file.txt";
208                let want = "file:".to_string() + &home_dir + "/path/to/file.txt";
209                assert_eq!(
210                    normalize_url(input).unwrap(),
211                    want,
212                    "relative path should be get added after current directory, input: {:?}",
213                    input
214                );
215            }
216        }
217
218        #[cfg(windows)]
219        {
220            let cwd = std::env::current_dir().unwrap();
221            let drive = cwd.to_str().unwrap().chars().next().unwrap();
222            let cases = [
223                (
224                    "/file with spaces.txt",
225                    format!("file:/{drive}:/file%20with%20spaces.txt"),
226                ),
227                (
228                    "/file+with+pluses.txt",
229                    format!("file:/{drive}:/file+with+pluses.txt"),
230                ),
231            ];
232
233            for case in cases {
234                assert_eq!(
235                    normalize_url(case.0).unwrap(),
236                    case.1,
237                    "input: {:?}",
238                    case.0
239                );
240            }
241        }
242    }
243}