asimov_module/
normalization.rs

1// This is free and unencumbered software released into the public domain.
2
3use iri_string::types::{IriReferenceStr, IriReferenceString};
4use std::string::{String, ToString};
5
6/// Normalizes module names by removing dots and converting to lowercase.
7/// Allows domain names like `near.ai` or names stylized with capital letters.
8/// The `name` field of [`crate::ModuleManifest`] should equal the normalized form.
9///
10/// # Examples
11///
12/// ```
13/// # use asimov_module::normalization::normalize_module_name;
14/// assert_eq!(normalize_module_name("foo.bar"), "foobar");
15/// assert_eq!(normalize_module_name("FOOBAR"), "foobar");
16/// ```
17pub fn normalize_module_name(module: &str) -> String {
18    module.replace('.', "").to_lowercase()
19}
20
21#[derive(Clone, Debug, thiserror::Error)]
22pub enum NormalizeError {
23    #[error(transparent)]
24    Parse(#[from] iri_string::types::CreationError<String>),
25    #[error(transparent)]
26    Build(#[from] iri_string::validate::Error),
27}
28
29/// Normalizes URLs and file paths into valid IRI format with consistent scheme handling.
30///
31/// Adds `file:` scheme to paths, resolves relative paths, handles `~/` expansion,
32/// and properly encodes spaces and special characters.
33///
34/// # Examples
35///
36/// ```
37/// # fn main() -> Result<(), Box<dyn core::error::Error>> {
38/// # use asimov_module::normalization::normalize_url;
39/// assert_eq!(normalize_url("https://example.org")?, "https://example.org/");
40/// assert!(normalize_url("path with spaces.txt")?.starts_with("file:"));
41/// assert!(normalize_url("~/document.txt")?.ends_with("/document.txt"));
42/// # Ok(())
43/// # }
44/// ```
45pub fn normalize_url(url: &str) -> Result<String, NormalizeError> {
46    let iri = IriReferenceString::try_from(url)
47        .or_else(|_| IriReferenceString::try_from(url.replace(" ", "%20")))?;
48
49    let mut builder = iri_string::build::Builder::new();
50
51    // default `file:` scheme
52    let scheme = iri.scheme_str().unwrap_or("file");
53    builder.scheme(scheme);
54
55    if let Some(auth) = iri.authority_components() {
56        if let Some(user) = auth.userinfo() {
57            builder.userinfo(user);
58        }
59        builder.host(auth.host());
60        if let Some(port) = auth.port() {
61            builder.port(port);
62        }
63    }
64
65    let path = iri.path_str();
66
67    // TODO: utilize `path.normalize_lexically()` once it stabilizes
68    // https://github.com/rust-lang/rust/issues/134694
69
70    let path = if scheme == "file" && path.starts_with("~/") {
71        let rest = path.strip_prefix("~/").unwrap(); // safe, the prefix was just checked just
72
73        let home_dir = std::env::home_dir().expect("unable to determine home directory");
74
75        let path = home_dir.join(rest);
76        let path = std::path::absolute(&path).unwrap_or(path);
77        let path = path.canonicalize().unwrap_or(path);
78
79        path.display().to_string()
80    } else if scheme == "file" {
81        // `std::path::absolute` also changes relative paths to absolute with the current directory
82        // as base.
83        let path = std::path::absolute(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
84        let path = path.canonicalize().unwrap_or(path);
85
86        path.display().to_string()
87    } else if iri.authority_str().is_some() && path.is_empty() {
88        "/".to_string()
89    } else {
90        path.to_string()
91    };
92    #[cfg(windows)]
93    let path = if scheme == "file" && !path.starts_with("/") {
94        "/".to_string() + &path.replace('\\', "/")
95    } else {
96        path
97    };
98
99    builder.path(&path);
100
101    if let Some(query) = iri.query() {
102        builder.query(query.as_str());
103    }
104
105    if let Some(fraq) = iri.fragment() {
106        builder.fragment(fraq.as_str());
107    }
108
109    builder.normalize();
110
111    builder
112        .build::<IriReferenceStr>()
113        .map(|r| r.to_string())
114        .map_err(Into::into)
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use std::{format, string::ToString};
121
122    #[test]
123    fn url_normalization() {
124        let cases = [
125            ("https://example.org", "https://example.org/"),
126            ("https://example.org/", "https://example.org/"),
127            ("http://example.com/path", "http://example.com/path"),
128            ("https://api.example.com", "https://api.example.com/"),
129            ("http://localhost:3000", "http://localhost:3000/"),
130            ("ftp://fileserver.local", "ftp://fileserver.local/"),
131            (
132                "https://user:pass@example.org:8080/path?foo=bar&query=hello world#fragment",
133                "https://user:pass@example.org:8080/path?foo=bar&query=hello%20world#fragment",
134            ),
135            ("near://testnet/123456789", "near://testnet/123456789"),
136            (
137                "ftp://files.example.com/file.txt",
138                "ftp://files.example.com/file.txt",
139            ),
140            ("ws://localhost:3000/socket", "ws://localhost:3000/socket"),
141            ("mailto:user@example.com", "mailto:user@example.com"),
142            (
143                "https://example.org/path with spaces",
144                "https://example.org/path%20with%20spaces",
145            ),
146            (
147                "https://example.org/path+with+plus",
148                "https://example.org/path+with+plus",
149            ),
150            (
151                "https://example.org/path%20already%20encoded",
152                "https://example.org/path%20already%20encoded",
153            ),
154            (
155                "data:text/plain;base64,SGVsbG8=",
156                "data:text/plain;base64,SGVsbG8=",
157            ),
158            ("tel:+1-555-123-4567", "tel:+1-555-123-4567"),
159            ("urn:isbn:1234567890", "urn:isbn:1234567890"),
160            (
161                "ldap://[2001:db8::7]/c=GB?objectClass?one",
162                "ldap://[2001:db8::7]/c=GB?objectClass?one",
163            ),
164            (
165                "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
166                "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
167            ),
168            ("telnet://192.0.2.16:80", "telnet://192.0.2.16:80/"),
169            // TODO: should this be inferred?
170            // ("localhost:8080", "http://localhost:8080"),
171        ];
172
173        for case in cases {
174            assert_eq!(
175                normalize_url(case.0).expect(case.0),
176                case.1,
177                "input: {:?}",
178                case.0
179            );
180        }
181
182        #[cfg(unix)]
183        {
184            let cases = [
185                ("/file with spaces.txt", "file:/file%20with%20spaces.txt"),
186                ("/file+with+pluses.txt", "file:/file+with+pluses.txt"),
187                (
188                    // Plain strings get `file:` scheme and current directory prepended
189                    "document.txt",
190                    &format!(
191                        "file:{}/document.txt",
192                        std::env::current_dir().unwrap().display()
193                    ),
194                ),
195                (
196                    // Domain-like strings without scheme get treated as files
197                    "example.org",
198                    &format!(
199                        "file:{}/example.org",
200                        std::env::current_dir().unwrap().display()
201                    ),
202                ),
203                (
204                    "folder name/file.txt",
205                    &format!(
206                        "file:{}/folder%20name/file.txt",
207                        std::env::current_dir().unwrap().display()
208                    ),
209                ),
210                (
211                    "./subfolder/../file.txt",
212                    &format!(
213                        "file:{}/file.txt",
214                        std::env::current_dir().unwrap().display()
215                    ),
216                ),
217                (
218                    "../parent/./file.txt",
219                    &format!(
220                        "file:{}/parent/file.txt",
221                        std::env::current_dir().unwrap().parent().unwrap().display()
222                    ),
223                ),
224            ];
225
226            for case in cases {
227                assert_eq!(
228                    normalize_url(case.0).unwrap(),
229                    case.1,
230                    "input: {:?}",
231                    case.0
232                );
233            }
234
235            if let Some(home_dir) = std::env::home_dir() {
236                let home_dir = home_dir.display().to_string();
237
238                let input = "~/path/to/file.txt";
239                let want = "file:".to_string() + &home_dir + "/path/to/file.txt";
240                assert_eq!(
241                    normalize_url(input).unwrap(),
242                    want,
243                    "relative path should be get added after current directory, input: {:?}",
244                    input
245                );
246            }
247        }
248
249        #[cfg(windows)]
250        {
251            let cwd = std::env::current_dir().unwrap();
252            let drive = cwd.to_str().unwrap().chars().next().unwrap();
253            let cases = [
254                (
255                    "/file with spaces.txt",
256                    format!("file:/{drive}:/file%20with%20spaces.txt"),
257                ),
258                (
259                    "/file+with+pluses.txt",
260                    format!("file:/{drive}:/file+with+pluses.txt"),
261                ),
262            ];
263
264            for case in cases {
265                assert_eq!(
266                    normalize_url(case.0).unwrap(),
267                    case.1,
268                    "input: {:?}",
269                    case.0
270                );
271            }
272        }
273    }
274}