use url::Url;
#[cfg(not(target_arch = "wasm32"))]
pub fn uri_to_fs_path(uri: &str) -> Option<std::path::PathBuf> {
let url = Url::parse(uri).ok()?;
if url.scheme() != "file" {
return None;
}
let path = url.to_file_path().ok().or_else(|| windows_rooted_file_uri_to_path(&url))?;
Some(repair_path_mojibake(path))
}
#[cfg(not(target_arch = "wasm32"))]
pub fn fs_path_to_uri<P: AsRef<std::path::Path>>(path: P) -> Result<String, String> {
let path = normalize_filesystem_path(path.as_ref());
let abs_path = if path.is_absolute() {
path.to_path_buf()
} else {
std::env::current_dir()
.map_err(|e| format!("Failed to get current directory: {}", e))?
.join(path)
};
Url::from_file_path(&abs_path)
.map(|url| url.to_string())
.map_err(|_| format!("Failed to convert path to URI: {}", abs_path.display()))
}
#[cfg(not(target_arch = "wasm32"))]
fn normalize_filesystem_path(path: &std::path::Path) -> std::path::PathBuf {
#[cfg(windows)]
{
if let Some(path_str) = path.to_str() {
if let Some(stripped) = path_str.strip_prefix(r"\\?\UNC\") {
return std::path::PathBuf::from(format!(r"\\{}", stripped));
}
if let Some(stripped) = path_str.strip_prefix(r"\\?\") {
return std::path::PathBuf::from(stripped);
}
}
}
path.to_path_buf()
}
#[cfg(all(not(target_arch = "wasm32"), windows))]
fn windows_rooted_file_uri_to_path(url: &Url) -> Option<std::path::PathBuf> {
use percent_encoding::percent_decode_str;
match url.host_str() {
None | Some("localhost") => {}
Some(_) => return None,
}
let decoded = percent_decode_str(url.path()).decode_utf8().ok()?;
if decoded.is_empty() {
return None;
}
let native = if decoded.len() > 3
&& decoded.starts_with('/')
&& decoded.as_bytes()[2] == b':'
&& decoded.as_bytes()[1].is_ascii_alphabetic()
{
decoded[1..].replace('/', "\\")
} else {
decoded.replace('/', "\\")
};
Some(std::path::PathBuf::from(native))
}
#[cfg(all(not(target_arch = "wasm32"), not(windows)))]
fn windows_rooted_file_uri_to_path(_url: &Url) -> Option<std::path::PathBuf> {
None
}
#[cfg(not(target_arch = "wasm32"))]
fn repair_path_mojibake(path: std::path::PathBuf) -> std::path::PathBuf {
let Some(path_text) = path.to_str() else {
return path;
};
let repaired = repair_mojibake_text(path_text);
if repaired == path_text { path } else { std::path::PathBuf::from(repaired) }
}
#[cfg(not(target_arch = "wasm32"))]
fn repair_mojibake_text(text: &str) -> String {
if !looks_like_mojibake(text) {
return text.to_string();
}
let mut bytes = Vec::with_capacity(text.len());
for ch in text.chars() {
let code = u32::from(ch);
let Ok(byte) = u8::try_from(code) else {
return text.to_string();
};
bytes.push(byte);
}
let Ok(candidate) = String::from_utf8(bytes) else {
return text.to_string();
};
if mojibake_marker_count(&candidate) < mojibake_marker_count(text) {
candidate
} else {
text.to_string()
}
}
#[cfg(not(target_arch = "wasm32"))]
fn looks_like_mojibake(text: &str) -> bool {
mojibake_marker_count(text) > 0
}
#[cfg(not(target_arch = "wasm32"))]
fn mojibake_marker_count(text: &str) -> usize {
text.chars().filter(|ch| matches!(ch, 'Ã' | 'Â' | 'â' | 'ð' | '�')).count()
}
#[cfg(not(target_arch = "wasm32"))]
pub fn normalize_uri(uri: &str) -> String {
let path = std::path::Path::new(uri);
if path.is_absolute()
&& let Ok(uri_string) = fs_path_to_uri(path)
{
return uri_string;
}
if let Ok(url) = Url::parse(uri) {
if url.scheme() == "file"
&& url.host_str() == Some("localhost")
&& let Some(fs_path) = uri_to_fs_path(uri)
&& let Ok(normalized) = fs_path_to_uri(&fs_path)
{
return normalized;
}
return url.to_string();
}
if let Ok(uri_string) = fs_path_to_uri(path) {
return uri_string;
}
if uri.starts_with("file://")
&& let Some(fs_path) = uri_to_fs_path(uri)
&& let Ok(normalized) = fs_path_to_uri(&fs_path)
{
return normalized;
}
uri.to_string()
}
#[cfg(target_arch = "wasm32")]
pub fn normalize_uri(uri: &str) -> String {
if let Ok(url) = Url::parse(uri) { url.to_string() } else { uri.to_string() }
}
pub mod classify;
pub use classify::{is_file_uri, is_special_scheme, uri_extension, uri_key};
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn test_uri_key_basic() {
assert_eq!(uri_key("file:///tmp/test.pl"), "file:///tmp/test.pl");
}
#[test]
fn test_uri_key_windows_drive() {
assert_eq!(uri_key("file:///C:/Users/test.pl"), "file:///c:/Users/test.pl");
assert_eq!(uri_key("file:///D:/foo/bar.pm"), "file:///d:/foo/bar.pm");
}
#[test]
fn test_uri_key_invalid() {
assert_eq!(uri_key("not-a-uri"), "not-a-uri");
}
#[test]
fn test_is_file_uri() {
assert!(is_file_uri("file:///tmp/test.pl"));
assert!(!is_file_uri("https://example.com"));
assert!(!is_file_uri("untitled:Untitled-1"));
}
#[test]
fn test_is_special_scheme() {
assert!(is_special_scheme("untitled:Untitled-1"));
assert!(!is_special_scheme("file:///tmp/test.pl"));
}
#[test]
fn test_uri_extension() {
assert_eq!(uri_extension("file:///tmp/test.pl"), Some("pl"));
assert_eq!(uri_extension("file:///tmp/Module.pm"), Some("pm"));
assert_eq!(uri_extension("file:///tmp/script.t"), Some("t"));
assert_eq!(uri_extension("file:///tmp/no-extension"), None);
assert_eq!(uri_extension("file:///tmp/file.pl?query=1"), Some("pl"));
}
#[cfg(not(target_arch = "wasm32"))]
mod filesystem_tests {
use super::*;
use perl_tdd_support::{must, must_some};
#[test]
fn test_uri_to_fs_path_basic() {
let path = uri_to_fs_path("file:///tmp/test.pl");
assert!(path.is_some());
let path = must_some(path);
assert!(path.ends_with("test.pl"));
}
#[test]
fn test_uri_to_fs_path_non_file() {
assert!(uri_to_fs_path("https://example.com").is_none());
assert!(uri_to_fs_path("untitled:Untitled-1").is_none());
}
#[test]
fn test_uri_to_fs_path_with_spaces() {
let path = uri_to_fs_path("file:///tmp/path%20with%20spaces/test.pl");
assert!(path.is_some());
let path = must_some(path);
let path_str = path.to_string_lossy();
assert!(path_str.contains("path with spaces"));
}
#[test]
fn test_uri_to_fs_path_repairs_common_mojibake() {
let path = must_some(uri_to_fs_path("file:///tmp/caf%C3%83%C2%A9.pl"));
let path_str = path.to_string_lossy();
assert!(path_str.contains("café.pl"), "expected repaired UTF-8 path, got {path_str}");
}
#[test]
fn test_fs_path_to_uri_basic() {
let uri = must(fs_path_to_uri("/tmp/test.pl"));
assert!(uri.starts_with("file:///"));
assert!(uri.contains("test.pl"));
}
#[test]
fn test_fs_path_to_uri_with_spaces() {
let uri = must(fs_path_to_uri("/tmp/path with spaces/test.pl"));
assert!(uri.contains("%20") || uri.contains("path with spaces"));
}
#[test]
fn test_normalize_uri_valid() {
let uri = normalize_uri("file:///tmp/test.pl");
assert_eq!(uri, "file:///tmp/test.pl");
}
#[test]
fn test_normalize_uri_canonicalizes_localhost_authority() {
assert_eq!(normalize_uri("file://localhost/tmp/test.pl"), "file:///tmp/test.pl");
}
#[test]
fn test_normalize_uri_special() {
let uri = normalize_uri("untitled:Untitled-1");
assert_eq!(uri, "untitled:Untitled-1");
}
#[test]
fn test_normalize_uri_absolute_path() {
let path = std::env::temp_dir().join("normalize-uri-absolute.pl");
let raw_path = path.to_string_lossy();
let expected = must(fs_path_to_uri(&path));
assert_eq!(normalize_uri(raw_path.as_ref()), expected);
}
#[test]
fn test_roundtrip() {
let original = "/tmp/roundtrip-test.pl";
let uri = must(fs_path_to_uri(original));
let path = must_some(uri_to_fs_path(&uri));
assert!(path.ends_with("roundtrip-test.pl"));
}
}
}