1use url::Url;
36
37#[cfg(not(target_arch = "wasm32"))]
69pub fn uri_to_fs_path(uri: &str) -> Option<std::path::PathBuf> {
70 let url = Url::parse(uri).ok()?;
72
73 if url.scheme() != "file" {
75 return None;
76 }
77
78 let path = url.to_file_path().ok().or_else(|| windows_rooted_file_uri_to_path(&url))?;
82 Some(repair_path_mojibake(path))
83}
84
85#[cfg(not(target_arch = "wasm32"))]
119pub fn fs_path_to_uri<P: AsRef<std::path::Path>>(path: P) -> Result<String, String> {
120 let path = normalize_filesystem_path(path.as_ref());
121
122 let abs_path = if path.is_absolute() {
124 path.to_path_buf()
125 } else {
126 std::env::current_dir()
127 .map_err(|e| format!("Failed to get current directory: {}", e))?
128 .join(path)
129 };
130
131 Url::from_file_path(&abs_path)
133 .map(|url| url.to_string())
134 .map_err(|_| format!("Failed to convert path to URI: {}", abs_path.display()))
135}
136
137#[cfg(not(target_arch = "wasm32"))]
138fn normalize_filesystem_path(path: &std::path::Path) -> std::path::PathBuf {
139 #[cfg(windows)]
140 {
141 if let Some(path_str) = path.to_str() {
142 if let Some(stripped) = path_str.strip_prefix(r"\\?\UNC\") {
143 return std::path::PathBuf::from(format!(r"\\{}", stripped));
144 }
145 if let Some(stripped) = path_str.strip_prefix(r"\\?\") {
146 return std::path::PathBuf::from(stripped);
147 }
148 }
149 }
150
151 path.to_path_buf()
152}
153
154#[cfg(all(not(target_arch = "wasm32"), windows))]
155fn windows_rooted_file_uri_to_path(url: &Url) -> Option<std::path::PathBuf> {
156 use percent_encoding::percent_decode_str;
157
158 match url.host_str() {
159 None | Some("localhost") => {}
160 Some(_) => return None,
161 }
162
163 let decoded = percent_decode_str(url.path()).decode_utf8().ok()?;
164 if decoded.is_empty() {
165 return None;
166 }
167
168 let native = if decoded.len() > 3
169 && decoded.starts_with('/')
170 && decoded.as_bytes()[2] == b':'
171 && decoded.as_bytes()[1].is_ascii_alphabetic()
172 {
173 decoded[1..].replace('/', "\\")
174 } else {
175 decoded.replace('/', "\\")
176 };
177
178 Some(std::path::PathBuf::from(native))
179}
180
181#[cfg(all(not(target_arch = "wasm32"), not(windows)))]
182fn windows_rooted_file_uri_to_path(_url: &Url) -> Option<std::path::PathBuf> {
183 None
184}
185
186#[cfg(not(target_arch = "wasm32"))]
187fn repair_path_mojibake(path: std::path::PathBuf) -> std::path::PathBuf {
188 let Some(path_text) = path.to_str() else {
189 return path;
190 };
191
192 let repaired = repair_mojibake_text(path_text);
193 if repaired == path_text { path } else { std::path::PathBuf::from(repaired) }
194}
195
196#[cfg(not(target_arch = "wasm32"))]
197fn repair_mojibake_text(text: &str) -> String {
198 if !looks_like_mojibake(text) {
199 return text.to_string();
200 }
201
202 let mut bytes = Vec::with_capacity(text.len());
203 for ch in text.chars() {
204 let code = u32::from(ch);
205 let Ok(byte) = u8::try_from(code) else {
206 return text.to_string();
207 };
208 bytes.push(byte);
209 }
210
211 let Ok(candidate) = String::from_utf8(bytes) else {
212 return text.to_string();
213 };
214
215 if mojibake_marker_count(&candidate) < mojibake_marker_count(text) {
216 candidate
217 } else {
218 text.to_string()
219 }
220}
221
222#[cfg(not(target_arch = "wasm32"))]
223fn looks_like_mojibake(text: &str) -> bool {
224 mojibake_marker_count(text) > 0
225}
226
227#[cfg(not(target_arch = "wasm32"))]
228fn mojibake_marker_count(text: &str) -> usize {
229 text.chars().filter(|ch| matches!(ch, 'Ã' | 'Â' | 'â' | 'ð' | '�')).count()
230}
231
232#[cfg(not(target_arch = "wasm32"))]
264pub fn normalize_uri(uri: &str) -> String {
265 let path = std::path::Path::new(uri);
266
267 if path.is_absolute()
270 && let Ok(uri_string) = fs_path_to_uri(path)
271 {
272 return uri_string;
273 }
274
275 if let Ok(url) = Url::parse(uri) {
277 if url.scheme() == "file"
281 && url.host_str() == Some("localhost")
282 && let Some(fs_path) = uri_to_fs_path(uri)
283 && let Ok(normalized) = fs_path_to_uri(&fs_path)
284 {
285 return normalized;
286 }
287
288 return url.to_string();
290 }
291
292 if let Ok(uri_string) = fs_path_to_uri(path) {
295 return uri_string;
296 }
297
298 if uri.starts_with("file://")
301 && let Some(fs_path) = uri_to_fs_path(uri)
302 && let Ok(normalized) = fs_path_to_uri(&fs_path)
303 {
304 return normalized;
305 }
306
307 uri.to_string()
309}
310
311#[cfg(target_arch = "wasm32")]
313pub fn normalize_uri(uri: &str) -> String {
314 if let Ok(url) = Url::parse(uri) { url.to_string() } else { uri.to_string() }
316}
317
318pub mod classify;
320pub use classify::{is_file_uri, is_special_scheme, uri_extension, uri_key};
321
322#[cfg(test)]
323#[allow(clippy::unwrap_used, clippy::expect_used)]
324mod tests {
325 use super::*;
326
327 #[test]
328 fn test_uri_key_basic() {
329 assert_eq!(uri_key("file:///tmp/test.pl"), "file:///tmp/test.pl");
330 }
331
332 #[test]
333 fn test_uri_key_windows_drive() {
334 assert_eq!(uri_key("file:///C:/Users/test.pl"), "file:///c:/Users/test.pl");
335 assert_eq!(uri_key("file:///D:/foo/bar.pm"), "file:///d:/foo/bar.pm");
336 }
337
338 #[test]
339 fn test_uri_key_invalid() {
340 assert_eq!(uri_key("not-a-uri"), "not-a-uri");
341 }
342
343 #[test]
344 fn test_is_file_uri() {
345 assert!(is_file_uri("file:///tmp/test.pl"));
346 assert!(!is_file_uri("https://example.com"));
347 assert!(!is_file_uri("untitled:Untitled-1"));
348 }
349
350 #[test]
351 fn test_is_special_scheme() {
352 assert!(is_special_scheme("untitled:Untitled-1"));
353 assert!(!is_special_scheme("file:///tmp/test.pl"));
354 }
355
356 #[test]
357 fn test_uri_extension() {
358 assert_eq!(uri_extension("file:///tmp/test.pl"), Some("pl"));
359 assert_eq!(uri_extension("file:///tmp/Module.pm"), Some("pm"));
360 assert_eq!(uri_extension("file:///tmp/script.t"), Some("t"));
361 assert_eq!(uri_extension("file:///tmp/no-extension"), None);
362 assert_eq!(uri_extension("file:///tmp/file.pl?query=1"), Some("pl"));
363 }
364
365 #[cfg(not(target_arch = "wasm32"))]
366 mod filesystem_tests {
367 use super::*;
368 use perl_tdd_support::{must, must_some};
369
370 #[test]
371 fn test_uri_to_fs_path_basic() {
372 let path = uri_to_fs_path("file:///tmp/test.pl");
373 assert!(path.is_some());
374 let path = must_some(path);
375 assert!(path.ends_with("test.pl"));
376 }
377
378 #[test]
379 fn test_uri_to_fs_path_non_file() {
380 assert!(uri_to_fs_path("https://example.com").is_none());
381 assert!(uri_to_fs_path("untitled:Untitled-1").is_none());
382 }
383
384 #[test]
385 fn test_uri_to_fs_path_with_spaces() {
386 let path = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl");
387 assert!(path.is_some());
388 let path = must_some(path);
389 let path_str = path.to_string_lossy();
390 assert!(path_str.contains("path with spaces"));
391 }
392
393 #[test]
394 fn test_uri_to_fs_path_repairs_common_mojibake() {
395 let path = must_some(uri_to_fs_path("file:///tmp/caf%C3%83%C2%A9.pl"));
396 let path_str = path.to_string_lossy();
397 assert!(path_str.contains("café.pl"), "expected repaired UTF-8 path, got {path_str}");
398 }
399
400 #[test]
401 fn test_fs_path_to_uri_basic() {
402 let uri = must(fs_path_to_uri("/tmp/test.pl"));
403 assert!(uri.starts_with("file:///"));
404 assert!(uri.contains("test.pl"));
405 }
406
407 #[test]
408 fn test_fs_path_to_uri_with_spaces() {
409 let uri = must(fs_path_to_uri("/tmp/path with spaces/test.pl"));
410 assert!(uri.contains("%20") || uri.contains("path with spaces"));
411 }
412
413 #[test]
414 fn test_normalize_uri_valid() {
415 let uri = normalize_uri("file:///tmp/test.pl");
416 assert_eq!(uri, "file:///tmp/test.pl");
417 }
418
419 #[test]
420 fn test_normalize_uri_canonicalizes_localhost_authority() {
421 assert_eq!(normalize_uri("file://localhost/tmp/test.pl"), "file:///tmp/test.pl");
422 }
423
424 #[test]
425 fn test_normalize_uri_special() {
426 let uri = normalize_uri("untitled:Untitled-1");
427 assert_eq!(uri, "untitled:Untitled-1");
428 }
429
430 #[test]
431 fn test_normalize_uri_absolute_path() {
432 let path = std::env::temp_dir().join("normalize-uri-absolute.pl");
433 let raw_path = path.to_string_lossy();
434 let expected = must(fs_path_to_uri(&path));
435
436 assert_eq!(normalize_uri(raw_path.as_ref()), expected);
437 }
438
439 #[test]
440 fn test_roundtrip() {
441 let original = "/tmp/roundtrip-test.pl";
442 let uri = must(fs_path_to_uri(original));
443 let path = must_some(uri_to_fs_path(&uri));
444 assert!(path.ends_with("roundtrip-test.pl"));
445 }
446 }
447}