1use iri_string::types::{IriReferenceStr, IriReferenceString};
4use std::string::{String, ToString};
5
6#[derive(Clone, Debug, thiserror::Error)]
7pub enum NormalizeError {
8 #[error(transparent)]
9 Parse(#[from] iri_string::types::CreationError<String>),
10 #[error(transparent)]
11 Build(#[from] iri_string::validate::Error),
12}
13
14pub fn normalize_url(url: &str) -> Result<String, NormalizeError> {
15 let iri = IriReferenceString::try_from(url)
16 .or_else(|_| IriReferenceString::try_from(url.replace(" ", "%20")))?;
17
18 let mut builder = iri_string::build::Builder::new();
19
20 let scheme = iri.scheme_str().unwrap_or("file");
22 builder.scheme(scheme);
23
24 if let Some(auth) = iri.authority_components() {
25 if let Some(user) = auth.userinfo() {
26 builder.userinfo(user);
27 }
28 builder.host(auth.host());
29 if let Some(port) = auth.port() {
30 builder.port(port);
31 }
32 }
33
34 let path = iri.path_str();
35
36 let path = if scheme == "file" && path.starts_with("~/") {
40 let rest = path.strip_prefix("~/").unwrap(); let home_dir = std::env::home_dir().expect("unable to determine home directory");
43
44 let path = home_dir.join(rest);
45 let path = std::path::absolute(&path).unwrap_or(path);
46 let path = path.canonicalize().unwrap_or(path);
47
48 path.display().to_string()
49 } else if scheme == "file" {
50 let path = std::path::absolute(path).unwrap_or_else(|_| std::path::PathBuf::from(path));
53 let path = path.canonicalize().unwrap_or(path);
54
55 path.display().to_string()
56 } else if iri.authority_str().is_some() && path.is_empty() {
57 "/".to_string()
58 } else {
59 path.to_string()
60 };
61 #[cfg(windows)]
62 let path = if scheme == "file" && !path.starts_with("/") {
63 "/".to_string() + &path.replace('\\', "/")
64 } else {
65 path
66 };
67
68 builder.path(&path);
69
70 if let Some(query) = iri.query() {
71 builder.query(query.as_str());
72 }
73
74 if let Some(fraq) = iri.fragment() {
75 builder.fragment(fraq.as_str());
76 }
77
78 builder.normalize();
79
80 builder
81 .build::<IriReferenceStr>()
82 .map(|r| r.to_string())
83 .map_err(Into::into)
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89 use std::{format, string::ToString};
90
91 #[test]
92 fn url_normalization() {
93 let cases = [
94 ("https://example.org", "https://example.org/"),
95 ("https://example.org/", "https://example.org/"),
96 ("http://example.com/path", "http://example.com/path"),
97 ("https://api.example.com", "https://api.example.com/"),
98 ("http://localhost:3000", "http://localhost:3000/"),
99 ("ftp://fileserver.local", "ftp://fileserver.local/"),
100 (
101 "https://user:pass@example.org:8080/path?foo=bar&query=hello world#fragment",
102 "https://user:pass@example.org:8080/path?foo=bar&query=hello%20world#fragment",
103 ),
104 ("near://testnet/123456789", "near://testnet/123456789"),
105 (
106 "ftp://files.example.com/file.txt",
107 "ftp://files.example.com/file.txt",
108 ),
109 ("ws://localhost:3000/socket", "ws://localhost:3000/socket"),
110 ("mailto:user@example.com", "mailto:user@example.com"),
111 (
112 "https://example.org/path with spaces",
113 "https://example.org/path%20with%20spaces",
114 ),
115 (
116 "https://example.org/path+with+plus",
117 "https://example.org/path+with+plus",
118 ),
119 (
120 "https://example.org/path%20already%20encoded",
121 "https://example.org/path%20already%20encoded",
122 ),
123 (
124 "data:text/plain;base64,SGVsbG8=",
125 "data:text/plain;base64,SGVsbG8=",
126 ),
127 ("tel:+1-555-123-4567", "tel:+1-555-123-4567"),
128 ("urn:isbn:1234567890", "urn:isbn:1234567890"),
129 (
130 "ldap://[2001:db8::7]/c=GB?objectClass?one",
131 "ldap://[2001:db8::7]/c=GB?objectClass?one",
132 ),
133 (
134 "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
135 "ldap://foo:bar@[2001:db8::7]:80/c=GB?objectClass?one",
136 ),
137 ("telnet://192.0.2.16:80", "telnet://192.0.2.16:80/"),
138 ];
141
142 for case in cases {
143 assert_eq!(
144 normalize_url(case.0).expect(case.0),
145 case.1,
146 "input: {:?}",
147 case.0
148 );
149 }
150
151 #[cfg(unix)]
152 {
153 let cases = [
154 ("/file with spaces.txt", "file:/file%20with%20spaces.txt"),
155 ("/file+with+pluses.txt", "file:/file+with+pluses.txt"),
156 (
157 "document.txt",
159 &format!(
160 "file:{}/document.txt",
161 std::env::current_dir().unwrap().display()
162 ),
163 ),
164 (
165 "example.org",
167 &format!(
168 "file:{}/example.org",
169 std::env::current_dir().unwrap().display()
170 ),
171 ),
172 (
173 "folder name/file.txt",
174 &format!(
175 "file:{}/folder%20name/file.txt",
176 std::env::current_dir().unwrap().display()
177 ),
178 ),
179 (
180 "./subfolder/../file.txt",
181 &format!(
182 "file:{}/file.txt",
183 std::env::current_dir().unwrap().display()
184 ),
185 ),
186 (
187 "../parent/./file.txt",
188 &format!(
189 "file:{}/parent/file.txt",
190 std::env::current_dir().unwrap().parent().unwrap().display()
191 ),
192 ),
193 ];
194
195 for case in cases {
196 assert_eq!(
197 normalize_url(case.0).unwrap(),
198 case.1,
199 "input: {:?}",
200 case.0
201 );
202 }
203
204 if let Some(home_dir) = std::env::home_dir() {
205 let home_dir = home_dir.display().to_string();
206
207 let input = "~/path/to/file.txt";
208 let want = "file:".to_string() + &home_dir + "/path/to/file.txt";
209 assert_eq!(
210 normalize_url(input).unwrap(),
211 want,
212 "relative path should be get added after current directory, input: {:?}",
213 input
214 );
215 }
216 }
217
218 #[cfg(windows)]
219 {
220 let cwd = std::env::current_dir().unwrap();
221 let drive = cwd.to_str().unwrap().chars().next().unwrap();
222 let cases = [
223 (
224 "/file with spaces.txt",
225 format!("file:/{drive}:/file%20with%20spaces.txt"),
226 ),
227 (
228 "/file+with+pluses.txt",
229 format!("file:/{drive}:/file+with+pluses.txt"),
230 ),
231 ];
232
233 for case in cases {
234 assert_eq!(
235 normalize_url(case.0).unwrap(),
236 case.1,
237 "input: {:?}",
238 case.0
239 );
240 }
241 }
242 }
243}