use std::{fmt::Write, path::PathBuf, str::FromStr};
use itertools::Itertools;
use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS};
use thiserror::Error;
use typed_path::{
Utf8TypedComponent, Utf8TypedPath, Utf8TypedPathBuf, Utf8UnixComponent, Utf8WindowsComponent,
Utf8WindowsPrefix,
};
use url::{Host, Url};
fn is_windows_drive_letter_segment(segment: &str) -> Option<String> {
if let Some((drive_letter, ':')) = segment.chars().collect_tuple() {
if drive_letter.is_ascii_alphabetic() {
return Some(format!("{drive_letter}:\\"));
}
}
if let Some((drive_letter, '%', '3', 'a' | 'A')) = segment.chars().collect_tuple() {
if drive_letter.is_ascii_alphabetic() {
return Some(format!("{drive_letter}:\\"));
}
}
None
}
fn url_to_path_inner<T: From<String>>(url: &Url) -> Option<T> {
if url.scheme() != "file" {
return None;
}
let mut segments = url.path_segments()?;
let host = match url.host() {
None | Some(Host::Domain("localhost")) => None,
Some(host) => Some(host),
};
let (mut path, separator) = if let Some(host) = host {
(format!("\\\\{host}\\"), "\\")
} else {
let first = segments.next()?;
if first.starts_with('.') {
return None;
}
if let Some(drive_letter) = is_windows_drive_letter_segment(first) {
(drive_letter, "\\")
} else {
let decoded = String::from_utf8(percent_decode(first.as_bytes()).collect()).ok()?;
if starts_with_windows_drive_letter(&decoded) && decoded.contains('\\') {
return Some(T::from(decoded));
}
(format!("/{first}/"), "/")
}
};
for (idx, segment) in segments.enumerate() {
if idx > 0 {
path.push_str(separator);
}
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
Ok(s) => path.push_str(&s),
_ => return None,
}
}
Some(T::from(path))
}
pub fn url_to_path(url: &Url) -> Option<PathBuf> {
url_to_path_inner(url)
}
pub fn url_to_typed_path(url: &Url) -> Option<Utf8TypedPathBuf> {
url_to_path_inner(url)
}
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
#[inline]
pub fn is_windows_drive_letter(segment: &str) -> bool {
segment.len() == 2 && starts_with_windows_drive_letter(segment)
}
fn starts_with_windows_drive_letter(s: &str) -> bool {
let mut str_bytes = s.as_bytes().iter();
let Some((&a, &b)) = str_bytes.next_tuple() else {
return false;
};
let c = str_bytes.next();
(a as char).is_ascii_alphabetic()
&& matches!(b, b':' | b'|')
&& c.is_none_or(|c| matches!(*c, b'/' | b'\\' | b'?' | b'#'))
}
fn path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<String, FileURLParseError> {
let path = path.into();
let mut components = path.components();
let mut result = String::from("file://");
let host_start = result.len() + 1;
let root = components.next();
match root {
Some(Utf8TypedComponent::Windows(Utf8WindowsComponent::Prefix(ref p))) => match p.kind() {
Utf8WindowsPrefix::Disk(letter) | Utf8WindowsPrefix::VerbatimDisk(letter) => {
result.push('/');
result.push(letter);
result.push(':');
}
Utf8WindowsPrefix::UNC(server, share)
| Utf8WindowsPrefix::VerbatimUNC(server, share) => {
let host =
Host::parse(server).map_err(|_err| FileURLParseError::NotAnAbsolutePath)?;
write!(result, "{host}").unwrap();
result.push('/');
result.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
}
_ => return Err(FileURLParseError::NotAnAbsolutePath),
},
Some(Utf8TypedComponent::Unix(Utf8UnixComponent::RootDir)) => {}
_ => return Err(FileURLParseError::NotAnAbsolutePath),
}
let mut path_only_has_prefix = true;
for component in components {
if matches!(
component,
Utf8TypedComponent::Windows(Utf8WindowsComponent::RootDir)
| Utf8TypedComponent::Unix(Utf8UnixComponent::RootDir)
) {
continue;
}
path_only_has_prefix = false;
let component = component.as_str();
result.push('/');
result.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
}
if result.len() > host_start
&& is_windows_drive_letter(&result[host_start..])
&& path_only_has_prefix
{
result.push('/');
}
Ok(result)
}
#[derive(Debug, Error)]
pub enum FileURLParseError {
#[error("The path is not an absolute path")]
NotAnAbsolutePath,
#[error("The URL string is invalid")]
InvalidUrl(#[from] url::ParseError),
}
pub fn file_path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<Url, FileURLParseError> {
let url = path_to_url(path)?;
Url::from_str(&url).map_err(FileURLParseError::InvalidUrl)
}
pub fn directory_path_to_url<'a>(
path: impl Into<Utf8TypedPath<'a>>,
) -> Result<Url, FileURLParseError> {
let mut url = path_to_url(path)?;
if !url.ends_with('/') {
url.push('/');
}
Ok(Url::from_str(&url).expect("url string must be a valid url"))
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use rstest::rstest;
use url::Url;
#[rstest]
#[case("file:///home/bob/test-file.txt", Some("/home/bob/test-file.txt"))]
#[case("file:///C:/Test/Foo.txt", Some("C:\\Test\\Foo.txt"))]
#[case("file:///c:/temp/test-file.txt", Some("c:\\temp\\test-file.txt"))]
#[case("file:///c:\\temp\\test-file.txt", Some("c:\\temp\\test-file.txt"))]
#[case("file:///foo/ba%20r", Some("/foo/ba r"))]
#[case("file:///C%3A/Test/Foo.txt", Some("C:\\Test\\Foo.txt"))]
#[case("file:///C%3A%5Cfoo%5Cbar", Some("C:\\foo\\bar"))]
#[case("file:///c%3A%5Ctemp%5Ctest-file.txt", Some("c:\\temp\\test-file.txt"))]
#[case("http://example.com", None)]
fn test_url_to_path(#[case] url: &str, #[case] expected: Option<&str>) {
let url = url.parse::<Url>().unwrap();
let expected = expected.map(PathBuf::from);
assert_eq!(super::url_to_path(&url), expected);
}
#[rstest]
#[case::win_drive("C:/", Some("file:///C:/"))]
#[case::unix_path("/root", Some("file:///root"))]
#[case::not_absolute("root", None)]
#[case::win_share("//servername/path", Some("file://servername/path"))]
#[case::dos_device_path("\\\\?\\C:\\Test\\Foo.txt", Some("file:///C:/Test/Foo.txt"))]
#[case::unsupported_guid_volumes(
"\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt",
None
)]
#[case::percent_encoding("//foo/ba r", Some("file://foo/ba%20r"))]
fn test_file_path_to_url(#[case] path: &str, #[case] expected: Option<&str>) {
let expected = expected.map(std::string::ToString::to_string);
assert_eq!(
super::file_path_to_url(path).map(|u| u.to_string()).ok(),
expected
);
}
}