Skip to main content

asimov_module/
normalization.rs

1// This is free and unencumbered software released into the public domain.
2
3use alloc::string::{String, ToString};
4use iri_string::types::{IriReferenceStr, IriReferenceString};
5
6/// Normalizes module names by removing dots and converting to lowercase.
7/// Allows domain names like `near.ai` or names stylized with capital letters.
8/// The `name` field of [`crate::ModuleManifest`] should equal the normalized form.
9///
10/// # Examples
11///
12/// ```
13/// # use asimov_module::normalization::normalize_module_name;
14/// assert_eq!(normalize_module_name("foo.bar"), "foobar");
15/// assert_eq!(normalize_module_name("FOOBAR"), "foobar");
16/// ```
17pub fn normalize_module_name(module: &str) -> String {
18    module.replace('.', "").to_lowercase()
19}
20
21#[derive(Clone, Debug, thiserror::Error)]
22pub enum NormalizeError {
23    #[error(transparent)]
24    Parse(#[from] iri_string::types::CreationError<String>),
25    #[error(transparent)]
26    Build(#[from] iri_string::validate::Error),
27}
28
29/// Normalizes URLs and file paths into valid IRI format with consistent scheme handling.
30///
31/// Adds `file:` scheme to paths, resolves relative paths, handles `~/` expansion,
32/// and properly encodes spaces and special characters.
33///
34/// # Examples
35///
36/// ```
37/// # fn main() -> Result<(), Box<dyn core::error::Error>> {
38/// # use asimov_module::normalization::normalize_url;
39/// assert_eq!(normalize_url("https://example.org")?, "https://example.org/");
40/// assert!(normalize_url("path with spaces.txt")?.starts_with("file:"));
41/// assert!(normalize_url("~/document.txt")?.ends_with("/document.txt"));
42/// # Ok(())
43/// # }
44/// ```
45pub fn normalize_url(url: &str) -> Result<String, NormalizeError> {
46    let iri = IriReferenceString::try_from(url)
47        .or_else(|_| IriReferenceString::try_from(url.replace(" ", "%20")))?;
48
49    let mut builder = iri_string::build::Builder::new();
50
51    // default `file:` scheme
52    let scheme = iri.scheme_str().unwrap_or("file");
53    builder.scheme(scheme);
54
55    if let Some(auth) = iri.authority_components() {
56        if let Some(user) = auth.userinfo() {
57            builder.userinfo(user);
58        }
59        builder.host(auth.host());
60        if let Some(port) = auth.port() {
61            builder.port(port);
62        }
63    }
64
65    let path = iri.path_str();
66
67    // TODO: utilize `path.normalize_lexically()` once it stabilizes
68    // https://github.com/rust-lang/rust/issues/134694
69
70    #[cfg(feature = "std")]
71    let path = if scheme == "file" && path.starts_with("~/") {
72        let rest = path.strip_prefix("~/").unwrap(); // safe, the prefix was just checked just
73
74        let home_dir = std::env::home_dir().expect("unable to determine home directory");
75
76        let path = home_dir.join(rest);
77        let path = std::path::absolute(&path).unwrap_or(path);
78        let path = path.canonicalize().unwrap_or(path);
79
80        path.display().to_string()
81    } else if scheme == "file" {
82        // `std::path::absolute` also changes relative paths to absolute with the current directory
83        // as base.
84        let path = std::path::absolute(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
85        let path = path.canonicalize().unwrap_or(path);
86
87        path.display().to_string()
88    } else if iri.authority_str().is_some() && path.is_empty() {
89        "/".to_string()
90    } else {
91        path.to_string()
92    };
93
94    #[cfg(windows)]
95    let path = if scheme == "file" && !path.starts_with("/") {
96        "/".to_string() + &path.replace('\\', "/")
97    } else {
98        path
99    };
100
101    builder.path(&path);
102
103    if let Some(query) = iri.query() {
104        builder.query(query.as_str());
105    }
106
107    if let Some(fraq) = iri.fragment() {
108        builder.fragment(fraq.as_str());
109    }
110
111    builder.normalize();
112
113    builder
114        .build::<IriReferenceStr>()
115        .map(|r| r.to_string())
116        .map_err(Into::into)
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122    use std::{format, string::ToString};
123
124    #[test]
125    fn url_normalization() {
126        let cases = [
127            ("https://example.org", "https://example.org/"),
128            ("https://example.org/", "https://example.org/"),
129            ("http://example.com/path", "http://example.com/path"),
130            ("https://api.example.com", "https://api.example.com/"),
131            ("http://localhost:3000", "http://localhost:3000/"),
132            ("ftp://fileserver.local", "ftp://fileserver.local/"),
133            (
134                "https://user:pass@example.org:8080/path?foo=bar&query=hello world#fragment",
135                "https://user:pass@example.org:8080/path?foo=bar&query=hello%20world#fragment",
136            ),
137            ("near://testnet/123456789", "near://testnet/123456789"),
138            (
139                "ftp://files.example.com/file.txt",
140                "ftp://files.example.com/file.txt",
141            ),
142            ("ws://localhost:3000/socket", "ws://localhost:3000/socket"),
143            ("mailto:user@example.com", "mailto:user@example.com"),
144            (
145                "https://example.org/path with spaces",
146                "https://example.org/path%20with%20spaces",
147            ),
148            (
149                "https://example.org/path+with+plus",
150                "https://example.org/path+with+plus",
151            ),
152            (
153                "https://example.org/path%20already%20encoded",
154                "https://example.org/path%20already%20encoded",
155            ),
156            (
157                "data:text/plain;base64,SGVsbG8=",
158                "data:text/plain;base64,SGVsbG8=",
159            ),
160            ("tel:+1-555-123-4567", "tel:+1-555-123-4567"),
161            ("urn:isbn:1234567890", "urn:isbn:1234567890"),
162            (
163                "ldap://[2001:db8::7]/c=GB?objectClass?one",
164                "ldap://[2001:db8::7]/c=GB?objectClass?one",
165            ),
166            (
167                "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
168                "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
169            ),
170            ("telnet://192.0.2.16:80", "telnet://192.0.2.16:80/"),
171            // TODO: should this be inferred?
172            // ("localhost:8080", "http://localhost:8080"),
173        ];
174
175        for case in cases {
176            assert_eq!(
177                normalize_url(case.0).expect(case.0),
178                case.1,
179                "input: {:?}",
180                case.0
181            );
182        }
183
184        #[cfg(unix)]
185        {
186            let cases = [
187                ("/file with spaces.txt", "file:/file%20with%20spaces.txt"),
188                ("/file+with+pluses.txt", "file:/file+with+pluses.txt"),
189                (
190                    // Plain strings get `file:` scheme and current directory prepended
191                    "document.txt",
192                    &format!(
193                        "file:{}/document.txt",
194                        std::env::current_dir().unwrap().display()
195                    ),
196                ),
197                (
198                    // Domain-like strings without scheme get treated as files
199                    "example.org",
200                    &format!(
201                        "file:{}/example.org",
202                        std::env::current_dir().unwrap().display()
203                    ),
204                ),
205                (
206                    "folder name/file.txt",
207                    &format!(
208                        "file:{}/folder%20name/file.txt",
209                        std::env::current_dir().unwrap().display()
210                    ),
211                ),
212                (
213                    "./subfolder/../file.txt",
214                    &format!(
215                        "file:{}/file.txt",
216                        std::env::current_dir().unwrap().display()
217                    ),
218                ),
219                (
220                    "../parent/./file.txt",
221                    &format!(
222                        "file:{}/parent/file.txt",
223                        std::env::current_dir().unwrap().parent().unwrap().display()
224                    ),
225                ),
226            ];
227
228            for case in cases {
229                assert_eq!(
230                    normalize_url(case.0).unwrap(),
231                    case.1,
232                    "input: {:?}",
233                    case.0
234                );
235            }
236
237            if let Some(home_dir) = std::env::home_dir() {
238                let home_dir = home_dir.display().to_string();
239
240                let input = "~/path/to/file.txt";
241                let want = "file:".to_string() + &home_dir + "/path/to/file.txt";
242                assert_eq!(
243                    normalize_url(input).unwrap(),
244                    want,
245                    "relative path should be get added after current directory, input: {:?}",
246                    input
247                );
248            }
249        }
250
251        #[cfg(windows)]
252        {
253            let cwd = std::env::current_dir().unwrap();
254            let drive = cwd.to_str().unwrap().chars().next().unwrap();
255            let cases = [
256                (
257                    "/file with spaces.txt",
258                    format!("file:/{drive}:/file%20with%20spaces.txt"),
259                ),
260                (
261                    "/file+with+pluses.txt",
262                    format!("file:/{drive}:/file+with+pluses.txt"),
263                ),
264            ];
265
266            for case in cases {
267                assert_eq!(
268                    normalize_url(case.0).unwrap(),
269                    case.1,
270                    "input: {:?}",
271                    case.0
272                );
273            }
274        }
275    }
276}