use lazy_static::lazy_static;
use regex::Regex;
use std::collections::HashMap;
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
lazy_static! {
static ref URL_SCHEME_RE: Regex =
Regex::new(r"^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$").unwrap();
static ref URL_HEX_ESCAPES_RE: Regex = Regex::new(r"(%[0-9a-fA-F]{2})").unwrap();
}
#[derive(Debug)]
pub enum Error {
AboveRoot(String, String),
SubsegmentMissesEquals(String),
UnsafeCharacters(char),
IoError(std::io::Error),
SegmentParameterKeyContainsEquals(String, String),
SegmentParameterContainsComma(String, Vec<String>),
NotLocalUrl(String),
InvalidUNCUrl(String),
UrlNotAscii(String),
InvalidWin32LocalUrl(String),
UrlTooShort(String),
PathNotChild(String, String),
}
type Result<K> = std::result::Result<K, Error>;
pub fn split(url: &str, exclude_trailing_slash: bool) -> (String, String) {
let (scheme_loc, first_path_slash) = find_scheme_and_separator(url);
if first_path_slash.is_none() {
if scheme_loc.is_none() {
let mut url = url;
if exclude_trailing_slash && url.ends_with('/') {
url = &url[..url.len() - 1];
}
let split = url.rsplit_once('/').map(|(head, tail)| {
if head.is_empty() {
("/", tail)
} else {
(head, tail)
}
});
match split {
None => return (String::new(), url.to_string()),
Some((head, tail)) => return (head.to_string(), tail.to_string()),
}
} else {
return (url.to_string(), String::new());
}
}
let url_base = &url[..first_path_slash.unwrap()]; let mut path = &url[first_path_slash.unwrap()..];
#[cfg(target_os = "win32")]
if url.starts_with("file:///") {
let (url_base, path) = _win32_extract_drive_letter(url_base, path);
}
if exclude_trailing_slash && path.len() > 1 && path.ends_with('/') {
path = &path[..path.len() - 1];
}
let split = path.rsplit_once('/').map(|(head, tail)| {
if head.is_empty() {
("/", tail)
} else {
(head, tail)
}
});
match split {
None => (url_base.to_string(), path.to_string()),
Some((head, tail)) => (url_base.to_string() + head, tail.to_string()),
}
}
pub fn find_scheme_and_separator(url: &str) -> (Option<usize>, Option<usize>) {
if let Some(m) = URL_SCHEME_RE.captures(url) {
let scheme = m.name("scheme").unwrap().as_str();
let path = m.name("path").unwrap().as_str();
if let Some(first_path_slash) = path.find('/') {
(
Some(scheme.len()),
Some(first_path_slash + m.name("path").unwrap().start()),
)
} else {
(Some(scheme.len()), None)
}
} else {
(None, None)
}
}
pub fn is_url(url: &str) -> bool {
URL_SCHEME_RE.is_match(url)
}
pub fn strip_trailing_slash(url: &str) -> &str {
if !url.ends_with('/') {
return url;
}
#[cfg(target_os = "windows")]
if url.starts_with("file://") {
return _win32_strip_local_trailing_slash(url);
}
let (scheme_loc, first_path_slash) = find_scheme_and_separator(url);
if scheme_loc.is_none() {
&url[..url.len() - 1]
} else if first_path_slash.is_none() || first_path_slash.unwrap() == url.len() - 1 {
url
} else {
&url[..url.len() - 1]
}
}
pub fn joinpath(base: &str, args: &[&str]) -> Result<String> {
let mut path = base.split('/').collect::<Vec<&str>>();
if path.len() > 1 && path[path.len() - 1].is_empty() {
path.pop();
}
for arg in args {
if arg.starts_with('/') {
path = vec![];
}
for chunk in arg.split('/') {
if chunk == "." {
continue;
} else if chunk == ".." {
if path == [""] {
return Err(Error::AboveRoot(base.to_string(), args.join("/")));
}
path.pop();
} else {
path.push(chunk);
}
}
}
Ok(if path == [""] {
"/".to_string()
} else {
path.join("/")
})
}
pub fn basename(url: &str, exclude_trailing_slash: bool) -> String {
split(url, exclude_trailing_slash).1
}
pub fn dirname(url: &str, exclude_trailing_slash: bool) -> String {
split(url, exclude_trailing_slash).0
}
pub fn join<'a>(mut base: &'a str, args: &[&'a str]) -> Result<String> {
if args.is_empty() {
return Ok(base.to_string());
}
let (scheme_end, path_start) = find_scheme_and_separator(base);
let mut path_start = if scheme_end.is_none() && path_start.is_none() {
0
} else if path_start.is_none() {
base.len()
} else {
path_start.unwrap()
};
let mut path = base[path_start..].to_string();
for arg in args {
let (arg_scheme_end, arg_path_start) = find_scheme_and_separator(arg);
let arg_path_start = if arg_scheme_end.is_none() && arg_path_start.is_none() {
0
} else if arg_path_start.is_none() {
arg.len()
} else {
arg_path_start.unwrap()
};
if arg_scheme_end.is_some() {
base = arg;
path = arg[arg_path_start..].to_string();
path_start = arg_path_start;
} else {
path = joinpath(path.as_str(), vec![*arg].as_slice())?;
}
}
Ok(base[..path_start].to_string() + &path)
}
pub fn split_segment_parameters_raw(url: &str) -> (&str, Vec<&str>) {
let lurl = strip_trailing_slash(url);
let segment_start = lurl.rfind('/').map_or_else(|| 0, |i| i + 1);
if !lurl[segment_start..].contains(',') {
return (url, vec![]);
}
let mut iter = lurl[segment_start..].split(',');
let first = iter.next().unwrap();
(
&lurl[..segment_start + first.len()],
iter.map(|s| s.trim()).collect(),
)
}
pub fn split_segment_parameters(
url: &str,
) -> Result<(&str, std::collections::HashMap<&str, &str>)> {
let (base_url, subsegments) = split_segment_parameters_raw(url);
let parameters = subsegments
.iter()
.map(|subsegment| {
subsegment
.split_once('=')
.ok_or_else(|| Error::SubsegmentMissesEquals(subsegment.to_string()))
.map(|(key, value)| (key.trim(), value.trim()))
})
.collect::<Result<HashMap<&str, &str>>>()?;
Ok((base_url, parameters))
}
pub fn strip_segment_parameters(url: &str) -> &str {
split_segment_parameters_raw(url).0
}
pub fn join_segment_parameters_raw(base: &str, subsegments: &[&str]) -> Result<String> {
if subsegments.is_empty() {
return Ok(base.to_string());
}
for subsegment in subsegments {
if subsegment.contains(',') {
return Err(Error::SegmentParameterContainsComma(
base.to_string(),
subsegments.iter().map(|s| s.to_string()).collect(),
));
}
}
Ok(format!("{},{}", base, subsegments.join(",")))
}
pub fn join_segment_parameters(url: &str, parameters: &HashMap<&str, &str>) -> Result<String> {
let (base, existing_parameters) = split_segment_parameters(url)?;
let mut new_parameters = existing_parameters.clone();
for (key, value) in parameters {
if key.contains('=') {
return Err(Error::SegmentParameterKeyContainsEquals(
url.to_string(),
key.to_string(),
));
}
new_parameters.insert(key, value);
}
let mut items: Vec<_> = new_parameters.iter().collect();
items.sort_by(|a, b| a.0.cmp(b.0));
let sorted_parameters: Vec<_> = items
.iter()
.map(|(key, value)| format!("{}={}", key, value))
.collect();
join_segment_parameters_raw(
base,
&sorted_parameters
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>(),
)
}
pub fn relative_url(base: &str, other: &str) -> String {
let (_, base_first_slash) = find_scheme_and_separator(base);
if base_first_slash.is_none() {
return other.to_string();
}
let (_, other_first_slash) = find_scheme_and_separator(other);
if other_first_slash.is_none() {
return other.to_string();
}
let base_scheme = &base[..base_first_slash.unwrap()];
let other_scheme = &other[..other_first_slash.unwrap()];
if base_scheme != other_scheme {
return other.to_string();
}
#[cfg(target_os = "windows")]
if base_scheme == "file://" {
let base_drive = &base[base_first_slash.unwrap() + 1..base_first_slash.unwrap() + 3];
let other_drive = &other[other_first_slash.unwrap() + 1..other_first_slash.unwrap() + 3];
if base_drive != other_drive {
return other.to_string();
}
}
let mut base_path = &base[base_first_slash.unwrap() + 1..];
let other_path = &other[other_first_slash.unwrap() + 1..];
if base_path.ends_with('/') {
base_path = &base_path[..base_path.len() - 1];
}
let mut base_sections: Vec<_> = base_path.split('/').collect();
let mut other_sections: Vec<_> = other_path.split('/').collect();
if base_sections == [""] {
base_sections = Vec::new();
}
if other_sections == [""] {
other_sections = Vec::new();
}
let mut output_sections = Vec::new();
for (b, o) in base_sections.iter().zip(other_sections.iter()) {
if b != o {
break;
}
output_sections.push(b);
}
let match_len = output_sections.len();
let mut output_sections: Vec<_> = base_sections[match_len..].iter().map(|_x| "..").collect();
output_sections.extend_from_slice(&other_sections[match_len..]);
let ret = output_sections.join("/");
if ret.is_empty() {
".".to_string()
} else {
ret
}
}
fn char_is_safe(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' || c == '~'
}
fn unescape_safe_chars(captures: ®ex::Captures) -> String {
let hex_digits = &captures[0][1..];
let char_code = u8::from_str_radix(hex_digits, 16).unwrap();
let character = char::from(char_code);
if char_is_safe(character) {
character.to_string()
} else {
captures[0].to_uppercase()
}
}
pub fn combine_paths(base_path: &str, relpath: &str) -> String {
let relpath = URL_HEX_ESCAPES_RE
.replace_all(relpath, unescape_safe_chars)
.to_string();
let mut base_parts: Vec<&str> = if relpath.starts_with('/') {
vec![]
} else {
base_path.split('/').collect()
};
if base_parts.last() == Some(&"") {
base_parts.pop();
}
for p in relpath.split('/') {
match p {
".." => {
if let Some(last) = base_parts.last() {
if !last.is_empty() {
base_parts.pop();
}
}
}
"." | "" => (),
_ => base_parts.push(p),
}
}
let mut path = base_parts.join("/");
if !path.starts_with('/') {
path.insert(0, '/');
}
path
}
pub fn normalize_url(url: &str) -> Result<String> {
let (scheme_end, path_start) = find_scheme_and_separator(url);
if scheme_end.is_none() {
local_path_to_url(url).map_err(Error::IoError)
} else {
let prefix = &url[..path_start.unwrap()];
let path = &url[path_start.unwrap()..];
const URL_SAFE_CHARACTERS: &[u8] = b"_.-!~*'()/;?:@&=+$,%#";
let path = path
.as_bytes()
.iter()
.map(|c| {
if !c.is_ascii_alphanumeric() && !URL_SAFE_CHARACTERS.contains(c) {
format!("%{:02X}", c)
} else {
(*c as char).to_string()
}
})
.collect::<String>();
let path = URL_HEX_ESCAPES_RE.replace_all(path.as_str(), unescape_safe_chars);
Ok(prefix.to_string() + path.as_ref())
}
}
pub fn escape(relpath: &[u8], safe: Option<&str>) -> String {
let mut result = String::new();
let safe = safe.unwrap_or("/~").as_bytes();
for b in relpath {
if char_is_safe(char::from(*b)) || safe.contains(b) {
result.push(char::from(*b));
} else {
result.push_str(&format!("%{:02X}", *b));
}
}
result
}
pub fn unescape(url: &str) -> Result<String> {
use percent_encoding::percent_decode_str;
if !url.is_ascii() {
return Err(Error::UrlNotAscii(url.to_string()));
}
Ok(percent_decode_str(url)
.decode_utf8()
.map(|s| s.to_string())
.unwrap_or_else(|_| url.to_string()))
}
pub mod win32 {
use std::path::{Path, PathBuf};
pub fn local_path_to_url<P: AsRef<Path>>(path: P) -> std::io::Result<String> {
if path.as_ref().as_os_str() == "/" {
return Ok("file:///".to_string());
}
let win32_path = osutils::path::win32::abspath(path.as_ref())?;
let win32_path = win32_path.as_path().to_str().unwrap();
if win32_path.starts_with("//") {
Ok(format!(
"file:{}",
super::escape(win32_path.as_bytes(), Some("/~"))
))
} else {
let drive = win32_path.chars().next().unwrap().to_ascii_uppercase();
Ok(format!(
"file:///{}:{}",
drive,
super::escape(win32_path[2..].as_bytes(), Some("/~"))
))
}
}
pub fn local_path_from_url(url: &str) -> super::Result<PathBuf> {
if !url.starts_with("file://") {
return Err(super::Error::NotLocalUrl(url.to_string()));
}
let url = super::strip_segment_parameters(url);
let win32_url = &url[5..];
if !win32_url.starts_with("///") {
if win32_url.len() < 3
|| win32_url.chars().nth(2).unwrap() == '/'
|| "|:".contains(win32_url.chars().nth(3).unwrap())
{
return Err(super::Error::InvalidUNCUrl(url.to_string()));
}
return Ok(super::unescape(win32_url)?.into());
}
if win32_url == "///" {
return Ok(PathBuf::from("/"));
}
if win32_url.len() < 6
|| !("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(&win32_url[3..=3]))
|| !("|:".contains(win32_url.chars().nth(4).unwrap()))
|| win32_url.chars().nth(5) != Some('/')
{
return Err(super::Error::InvalidWin32LocalUrl(url.to_string()));
}
Ok(PathBuf::from(format!(
"{}:{}",
win32_url[3..=3].to_uppercase(),
super::unescape(&win32_url[5..])?
)))
}
}
pub mod posix {
use std::os::unix::ffi::OsStrExt;
use std::path::{Path, PathBuf};
pub fn local_path_to_url<P: AsRef<Path>>(path: P) -> std::io::Result<String> {
let abs_path = osutils::path::posix::abspath(path.as_ref())?;
let escaped_path = super::escape(abs_path.as_path().as_os_str().as_bytes(), Some("/~"));
Ok(format!("file://{}", escaped_path))
}
const FILE_LOCALHOST_PREFIX: &str = "file://localhost";
const PLAIN_FILE_PREFIX: &str = "file:///";
pub fn local_path_from_url(url: &str) -> std::result::Result<PathBuf, super::Error> {
let url = super::strip_segment_parameters(url);
let path = if url.starts_with(FILE_LOCALHOST_PREFIX) {
&url[FILE_LOCALHOST_PREFIX.len()..]
} else if url.starts_with(PLAIN_FILE_PREFIX) {
&url[7..]
} else {
return Err(super::Error::NotLocalUrl(url.to_string()));
};
Ok(PathBuf::from(super::unescape(path)?))
}
}
pub fn local_path_to_url<P: AsRef<Path>>(path: P) -> std::io::Result<String> {
#[cfg(target_os = "win32")]
return Ok(win32::local_path_to_url(path)?);
#[cfg(unix)]
return posix::local_path_to_url(path);
}
pub fn local_path_from_url(url: &str) -> Result<PathBuf> {
#[cfg(target_os = "win32")]
return Ok(win32::local_path_from_url(url)?);
#[cfg(unix)]
return posix::local_path_from_url(url);
}
pub fn derive_to_location(from_location: &str) -> String {
let from_location = strip_segment_parameters(from_location);
if let Some(separator_index) = from_location.rfind('/') {
let basename = &from_location[separator_index + 1..];
return basename.trim_end_matches("/\\").to_string();
} else if let Some(separator_index) = from_location.find(':') {
return from_location[separator_index + 1..].to_string();
} else {
return from_location.to_string();
}
}
#[cfg(win32)]
pub const MIN_ABS_FILEURL_LENGTH: usize = "file:///C:".len();
#[cfg(not(win32))]
pub const MIN_ABS_FILEURL_LENGTH: usize = "file:///".len();
pub fn file_relpath(base: &str, path: &str) -> Result<String> {
if base.len() < MIN_ABS_FILEURL_LENGTH {
return Err(Error::UrlTooShort(base.to_string()));
}
let base: PathBuf = osutils::path::normpath(local_path_from_url(base)?);
let path: PathBuf = osutils::path::normpath(local_path_from_url(path)?);
let relpath = osutils::path::relpath(base.as_path(), path.as_path());
if relpath.is_none() {
return Err(Error::PathNotChild(
path.display().to_string(),
base.display().to_string(),
));
}
Ok(escape(relpath.unwrap().as_os_str().as_bytes(), None))
}