Skip to main content

perl_uri/
lib.rs

1//! URI ↔ filesystem path conversion and normalization utilities.
2//!
3//! This crate provides consistent URI handling for the Perl LSP ecosystem,
4//! including:
5//!
6//! - Converting between `file://` URIs and filesystem paths
7//! - Windows drive-letter normalization
8//! - Percent encoding/decoding
9//! - Special scheme handling (`untitled:`, etc.)
10//!
11//! # Platform Support
12//!
13//! Most functions are not available on `wasm32` targets since they require
14//! filesystem access.
15//!
16//! # Examples
17//!
18//! ```
19//! # #[cfg(not(target_arch = "wasm32"))]
20//! # fn main() {
21//! use perl_uri::{uri_to_fs_path, fs_path_to_uri};
22//!
23//! // Convert a URI to a path
24//! let path = uri_to_fs_path("file:///tmp/test.pl");
25//! assert!(path.is_some());
26//!
27//! // Convert a path to a URI
28//! let uri = fs_path_to_uri("/tmp/test.pl");
29//! assert!(uri.is_ok());
30//! # }
31//! # #[cfg(target_arch = "wasm32")]
32//! # fn main() {}
33//! ```
34
35use url::Url;
36
37/// Convert a `file://` URI to a filesystem path.
38///
39/// Properly handles percent-encoding and works with spaces, Windows paths,
40/// and non-ASCII characters. Returns `None` if the URI is not a valid `file://` URI.
41///
42/// # Examples
43///
44/// ```
45/// # #[cfg(not(target_arch = "wasm32"))]
46/// # fn main() {
47/// use perl_uri::uri_to_fs_path;
48///
49/// // Basic file URI
50/// let path = uri_to_fs_path("file:///tmp/test.pl");
51/// assert!(path.is_some());
52///
53/// // URI with percent-encoded spaces
54/// let path = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl");
55/// assert!(path.is_some());
56///
57/// // Non-file URIs return None
58/// let path = uri_to_fs_path("https://example.com");
59/// assert!(path.is_none());
60/// # }
61/// # #[cfg(target_arch = "wasm32")]
62/// # fn main() {}
63/// ```
64///
65/// # Platform Support
66///
67/// This function is not available on `wasm32` targets (no filesystem).
68#[cfg(not(target_arch = "wasm32"))]
69pub fn uri_to_fs_path(uri: &str) -> Option<std::path::PathBuf> {
70    // Parse the URI
71    let url = Url::parse(uri).ok()?;
72
73    // Only handle file:// URIs
74    if url.scheme() != "file" {
75        return None;
76    }
77
78    // Convert to filesystem path using the url crate's built-in method.
79    // On Windows, accept rooted file URIs like file:///tmp/test.pl as \tmp\test.pl
80    // so cross-platform tests and internal helpers stay permissive.
81    let path = url.to_file_path().ok().or_else(|| windows_rooted_file_uri_to_path(&url))?;
82    Some(repair_path_mojibake(path))
83}
84
85/// Convert a filesystem path to a `file://` URI.
86///
87/// Properly handles percent-encoding and works with spaces, Windows paths,
88/// and non-ASCII characters.
89///
90/// # Examples
91///
92/// ```
93/// # #[cfg(not(target_arch = "wasm32"))]
94/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
95/// use perl_uri::fs_path_to_uri;
96///
97/// // Absolute path
98/// let uri = fs_path_to_uri("/tmp/test.pl")?;
99/// assert!(uri.starts_with("file:///"));
100///
101/// // Path with spaces gets percent-encoded
102/// let uri = fs_path_to_uri("/tmp/path with spaces/test.pl")?;
103/// assert!(uri.contains("%20"));
104/// # Ok(())
105/// # }
106/// # #[cfg(target_arch = "wasm32")]
107/// # fn main() {}
108/// ```
109///
110/// # Errors
111///
112/// Returns an error if the path cannot be converted to an absolute path
113/// or if the conversion to a URI fails.
114///
115/// # Platform Support
116///
117/// This function is not available on `wasm32` targets (no filesystem).
118#[cfg(not(target_arch = "wasm32"))]
119pub fn fs_path_to_uri<P: AsRef<std::path::Path>>(path: P) -> Result<String, String> {
120    let path = normalize_filesystem_path(path.as_ref());
121
122    // Convert to absolute path if relative
123    let abs_path = if path.is_absolute() {
124        path.to_path_buf()
125    } else {
126        std::env::current_dir()
127            .map_err(|e| format!("Failed to get current directory: {}", e))?
128            .join(path)
129    };
130
131    // Use the url crate's built-in method to create a proper file:// URI
132    Url::from_file_path(&abs_path)
133        .map(|url| url.to_string())
134        .map_err(|_| format!("Failed to convert path to URI: {}", abs_path.display()))
135}
136
137#[cfg(not(target_arch = "wasm32"))]
138fn normalize_filesystem_path(path: &std::path::Path) -> std::path::PathBuf {
139    #[cfg(windows)]
140    {
141        if let Some(path_str) = path.to_str() {
142            if let Some(stripped) = path_str.strip_prefix(r"\\?\UNC\") {
143                return std::path::PathBuf::from(format!(r"\\{}", stripped));
144            }
145            if let Some(stripped) = path_str.strip_prefix(r"\\?\") {
146                return std::path::PathBuf::from(stripped);
147            }
148        }
149    }
150
151    path.to_path_buf()
152}
153
154#[cfg(all(not(target_arch = "wasm32"), windows))]
155fn windows_rooted_file_uri_to_path(url: &Url) -> Option<std::path::PathBuf> {
156    use percent_encoding::percent_decode_str;
157
158    match url.host_str() {
159        None | Some("localhost") => {}
160        Some(_) => return None,
161    }
162
163    let decoded = percent_decode_str(url.path()).decode_utf8().ok()?;
164    if decoded.is_empty() {
165        return None;
166    }
167
168    let native = if decoded.len() > 3
169        && decoded.starts_with('/')
170        && decoded.as_bytes()[2] == b':'
171        && decoded.as_bytes()[1].is_ascii_alphabetic()
172    {
173        decoded[1..].replace('/', "\\")
174    } else {
175        decoded.replace('/', "\\")
176    };
177
178    Some(std::path::PathBuf::from(native))
179}
180
181#[cfg(all(not(target_arch = "wasm32"), not(windows)))]
182fn windows_rooted_file_uri_to_path(_url: &Url) -> Option<std::path::PathBuf> {
183    None
184}
185
186#[cfg(not(target_arch = "wasm32"))]
187fn repair_path_mojibake(path: std::path::PathBuf) -> std::path::PathBuf {
188    let Some(path_text) = path.to_str() else {
189        return path;
190    };
191
192    let repaired = repair_mojibake_text(path_text);
193    if repaired == path_text { path } else { std::path::PathBuf::from(repaired) }
194}
195
196#[cfg(not(target_arch = "wasm32"))]
197fn repair_mojibake_text(text: &str) -> String {
198    if !looks_like_mojibake(text) {
199        return text.to_string();
200    }
201
202    let mut bytes = Vec::with_capacity(text.len());
203    for ch in text.chars() {
204        let code = u32::from(ch);
205        let Ok(byte) = u8::try_from(code) else {
206            return text.to_string();
207        };
208        bytes.push(byte);
209    }
210
211    let Ok(candidate) = String::from_utf8(bytes) else {
212        return text.to_string();
213    };
214
215    if mojibake_marker_count(&candidate) < mojibake_marker_count(text) {
216        candidate
217    } else {
218        text.to_string()
219    }
220}
221
222#[cfg(not(target_arch = "wasm32"))]
223fn looks_like_mojibake(text: &str) -> bool {
224    mojibake_marker_count(text) > 0
225}
226
227#[cfg(not(target_arch = "wasm32"))]
228fn mojibake_marker_count(text: &str) -> usize {
229    text.chars().filter(|ch| matches!(ch, 'Ã' | 'Â' | 'â' | 'ð' | '�')).count()
230}
231
232/// Normalize a URI to a consistent form.
233///
234/// This function handles various URI formats and normalizes them:
235/// - Valid URIs are parsed and re-serialized
236/// - File paths are converted to `file://` URIs
237/// - Malformed `file://` URIs are reconstructed
238/// - Special URIs (e.g., `untitled:`) are preserved as-is
239///
240/// # Examples
241///
242/// ```
243/// # #[cfg(not(target_arch = "wasm32"))]
244/// # fn main() {
245/// use perl_uri::normalize_uri;
246///
247/// // Already valid URI
248/// let uri = normalize_uri("file:///tmp/test.pl");
249/// assert_eq!(uri, "file:///tmp/test.pl");
250///
251/// // Special schemes preserved
252/// let uri = normalize_uri("untitled:Untitled-1");
253/// assert_eq!(uri, "untitled:Untitled-1");
254/// # }
255/// # #[cfg(target_arch = "wasm32")]
256/// # fn main() {}
257/// ```
258///
259/// # Platform Support
260///
261/// The full implementation is only available on non-`wasm32` targets.
262/// On `wasm32`, only URI parsing is performed without filesystem operations.
263#[cfg(not(target_arch = "wasm32"))]
264pub fn normalize_uri(uri: &str) -> String {
265    let path = std::path::Path::new(uri);
266
267    // Raw absolute filesystem paths should normalize to file:// URIs before
268    // URL parsing, especially on Windows where `C:\foo` can parse as `c:`.
269    if path.is_absolute()
270        && let Ok(uri_string) = fs_path_to_uri(path)
271    {
272        return uri_string;
273    }
274
275    // Try to parse as URL first
276    if let Ok(url) = Url::parse(uri) {
277        // Canonicalize local file URIs through filesystem conversion so legacy
278        // forms like `file://C:/...` normalize to `file:///c:/...` on Windows
279        // and `file:///tmp/...` on Unix while preserving non-local authorities.
280        if url.scheme() == "file"
281            && url.host_str() == Some("localhost")
282            && let Some(fs_path) = uri_to_fs_path(uri)
283            && let Ok(normalized) = fs_path_to_uri(&fs_path)
284        {
285            return normalized;
286        }
287
288        // Already a valid non-file URI, return as-is.
289        return url.to_string();
290    }
291
292    // If not a valid URI, try to treat as a file path
293    // Try to convert path to URI using our helper function
294    if let Ok(uri_string) = fs_path_to_uri(path) {
295        return uri_string;
296    }
297
298    // Last resort: if it looks like a file:// URI but is malformed,
299    // try to extract the path and reconstruct properly
300    if uri.starts_with("file://")
301        && let Some(fs_path) = uri_to_fs_path(uri)
302        && let Ok(normalized) = fs_path_to_uri(&fs_path)
303    {
304        return normalized;
305    }
306
307    // Final fallback: return as-is for special URIs like untitled:
308    uri.to_string()
309}
310
311/// Normalize a URI to a consistent form (wasm32 version - no filesystem).
312#[cfg(target_arch = "wasm32")]
313pub fn normalize_uri(uri: &str) -> String {
314    // On wasm32, just try to parse as URL or return as-is
315    if let Ok(url) = Url::parse(uri) { url.to_string() } else { uri.to_string() }
316}
317
318/// URI classification and key normalization helpers (previously `perl-uri-classify`).
319pub mod classify;
320pub use classify::{is_file_uri, is_special_scheme, uri_extension, uri_key};
321
322#[cfg(test)]
323#[allow(clippy::unwrap_used, clippy::expect_used)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn test_uri_key_basic() {
329        assert_eq!(uri_key("file:///tmp/test.pl"), "file:///tmp/test.pl");
330    }
331
332    #[test]
333    fn test_uri_key_windows_drive() {
334        assert_eq!(uri_key("file:///C:/Users/test.pl"), "file:///c:/Users/test.pl");
335        assert_eq!(uri_key("file:///D:/foo/bar.pm"), "file:///d:/foo/bar.pm");
336    }
337
338    #[test]
339    fn test_uri_key_invalid() {
340        assert_eq!(uri_key("not-a-uri"), "not-a-uri");
341    }
342
343    #[test]
344    fn test_is_file_uri() {
345        assert!(is_file_uri("file:///tmp/test.pl"));
346        assert!(!is_file_uri("https://example.com"));
347        assert!(!is_file_uri("untitled:Untitled-1"));
348    }
349
350    #[test]
351    fn test_is_special_scheme() {
352        assert!(is_special_scheme("untitled:Untitled-1"));
353        assert!(!is_special_scheme("file:///tmp/test.pl"));
354    }
355
356    #[test]
357    fn test_uri_extension() {
358        assert_eq!(uri_extension("file:///tmp/test.pl"), Some("pl"));
359        assert_eq!(uri_extension("file:///tmp/Module.pm"), Some("pm"));
360        assert_eq!(uri_extension("file:///tmp/script.t"), Some("t"));
361        assert_eq!(uri_extension("file:///tmp/no-extension"), None);
362        assert_eq!(uri_extension("file:///tmp/file.pl?query=1"), Some("pl"));
363    }
364
365    #[cfg(not(target_arch = "wasm32"))]
366    mod filesystem_tests {
367        use super::*;
368        use perl_tdd_support::{must, must_some};
369
370        #[test]
371        fn test_uri_to_fs_path_basic() {
372            let path = uri_to_fs_path("file:///tmp/test.pl");
373            assert!(path.is_some());
374            let path = must_some(path);
375            assert!(path.ends_with("test.pl"));
376        }
377
378        #[test]
379        fn test_uri_to_fs_path_non_file() {
380            assert!(uri_to_fs_path("https://example.com").is_none());
381            assert!(uri_to_fs_path("untitled:Untitled-1").is_none());
382        }
383
384        #[test]
385        fn test_uri_to_fs_path_with_spaces() {
386            let path = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl");
387            assert!(path.is_some());
388            let path = must_some(path);
389            let path_str = path.to_string_lossy();
390            assert!(path_str.contains("path with spaces"));
391        }
392
393        #[test]
394        fn test_uri_to_fs_path_repairs_common_mojibake() {
395            let path = must_some(uri_to_fs_path("file:///tmp/caf%C3%83%C2%A9.pl"));
396            let path_str = path.to_string_lossy();
397            assert!(path_str.contains("café.pl"), "expected repaired UTF-8 path, got {path_str}");
398        }
399
400        #[test]
401        fn test_fs_path_to_uri_basic() {
402            let uri = must(fs_path_to_uri("/tmp/test.pl"));
403            assert!(uri.starts_with("file:///"));
404            assert!(uri.contains("test.pl"));
405        }
406
407        #[test]
408        fn test_fs_path_to_uri_with_spaces() {
409            let uri = must(fs_path_to_uri("/tmp/path with spaces/test.pl"));
410            assert!(uri.contains("%20") || uri.contains("path with spaces"));
411        }
412
413        #[test]
414        fn test_normalize_uri_valid() {
415            let uri = normalize_uri("file:///tmp/test.pl");
416            assert_eq!(uri, "file:///tmp/test.pl");
417        }
418
419        #[test]
420        fn test_normalize_uri_canonicalizes_localhost_authority() {
421            assert_eq!(normalize_uri("file://localhost/tmp/test.pl"), "file:///tmp/test.pl");
422        }
423
424        #[test]
425        fn test_normalize_uri_special() {
426            let uri = normalize_uri("untitled:Untitled-1");
427            assert_eq!(uri, "untitled:Untitled-1");
428        }
429
430        #[test]
431        fn test_normalize_uri_absolute_path() {
432            let path = std::env::temp_dir().join("normalize-uri-absolute.pl");
433            let raw_path = path.to_string_lossy();
434            let expected = must(fs_path_to_uri(&path));
435
436            assert_eq!(normalize_uri(raw_path.as_ref()), expected);
437        }
438
439        #[test]
440        fn test_roundtrip() {
441            let original = "/tmp/roundtrip-test.pl";
442            let uri = must(fs_path_to_uri(original));
443            let path = must_some(uri_to_fs_path(&uri));
444            assert!(path.ends_with("roundtrip-test.pl"));
445        }
446    }
447}