use crate::{
parse::parse_url,
query::{parse_query, stringify_query, QueryObject},
};
use lazy_static::lazy_static;
use regex::Regex;
lazy_static! {
static ref PROTOCOL_STRICT_REGEX: Regex = Regex::new(r"^[\s\w+.-]{2,}:([/\\]{1,2})").unwrap();
static ref PROTOCOL_REGEX: Regex = Regex::new(r"^[\s\w+.-]{2,}:(?:/\\{2})?").unwrap();
static ref PROTOCOL_RELATIVE_REGEX: Regex = Regex::new(r"^([/\\]\s*){2,}[^/\\]").unwrap();
static ref PROTOCOL_SCRIPT_RE: Regex =
Regex::new(r"^[\s\0]*(blob|data|javascript|vbscript):$").unwrap();
static ref TRAILING_SLASH_RE: Regex = Regex::new(r"\/$|\/\?|\/#").unwrap();
static ref JOIN_LEADING_SLASH_RE: Regex = Regex::new(r"^\.?/").unwrap();
}
pub fn is_relative(input_string: &str) -> bool {
input_string.starts_with("./") || input_string.starts_with("../")
}
#[derive(Default, Clone)]
pub struct HasProtocolOptions {
pub accept_relative: bool, pub strict: bool, }
pub fn has_protocol(input_string: &str, opts: HasProtocolOptions) -> bool {
if opts.strict {
return PROTOCOL_STRICT_REGEX.is_match(input_string);
}
PROTOCOL_REGEX.is_match(input_string)
|| (opts.accept_relative && PROTOCOL_RELATIVE_REGEX.is_match(input_string))
}
pub fn has_trailing_slash(input: &str, respect_query_fragment: bool) -> bool {
if !respect_query_fragment {
input.ends_with('/')
} else {
TRAILING_SLASH_RE.is_match(input)
}
}
pub fn without_trailing_slash(input: &str, respect_query_fragment: bool) -> String {
if !respect_query_fragment {
return if has_trailing_slash(input, false) {
input[..input.len() - 1].to_string()
} else {
input.to_string()
};
}
if !has_trailing_slash(input, true) {
return input.to_string();
}
let mut path = input.to_string();
let mut fragment = String::new();
if let Some(frag_idx) = input.find('#') {
fragment = input[frag_idx..].to_string();
path = input[..frag_idx].to_string();
}
let parts: Vec<&str> = path.split('?').collect();
let clean_path = if parts[0].ends_with('/') {
&parts[0][..parts[0].len() - 1]
} else {
parts[0]
};
format!(
"{}{}{}",
clean_path,
if parts.len() > 1 {
format!("?{}", parts[1..].join("?"))
} else {
String::new()
},
fragment
)
}
pub fn with_trailing_slash(input: &str, respect_query_fragment: bool) -> String {
if !respect_query_fragment {
if input.ends_with('/') {
input.to_string()
} else {
format!("{}/", input)
}
} else {
if has_trailing_slash(input, true) {
return input.to_string();
}
let mut path = input.to_string();
let mut fragment = String::new();
if let Some(frag_idx) = input.find('#') {
fragment = input[frag_idx..].to_string();
path = input[..frag_idx].to_string();
if path.is_empty() {
return fragment;
}
}
let parts: Vec<&str> = path.split('?').collect();
format!(
"{}/{}{}",
parts[0],
if parts.len() > 1 {
format!("?{}", parts[1..].join("?"))
} else {
String::new()
},
fragment
)
}
}
pub fn has_leading_slash(input: &str) -> bool {
input.starts_with('/')
}
pub fn without_leading_slash(input: &str) -> String {
if has_leading_slash(input) {
input[1..].to_string()
} else {
input.to_string()
}
}
pub fn with_leading_slash(input: &str) -> String {
if has_leading_slash(input) {
input.to_string()
} else {
format!("/{}", input)
}
}
pub fn clean_double_slashes(url: &str) -> String {
let mut result = String::with_capacity(url.len());
let mut chars = url.chars().peekable();
let mut after_colon = false;
while let Some(c) = chars.next() {
if c == ':' {
result.push(c);
after_colon = true;
continue;
}
if c != '/' {
result.push(c);
after_colon = false;
} else {
result.push(c);
if after_colon {
while let Some(&next_c) = chars.peek() {
if next_c == '/' {
result.push(chars.next().unwrap());
} else {
break;
}
}
after_colon = false;
} else {
while let Some(&next_c) = chars.peek() {
if next_c == '/' {
chars.next();
} else {
break;
}
}
}
}
}
result
}
pub fn with_base(input: &str, base: &str) -> String {
let result = if is_empty_url(base) || has_protocol(input, HasProtocolOptions::default()) {
return input.to_string();
} else {
let base = without_trailing_slash(base, false);
if input.starts_with(&base) {
input.to_string()
} else {
join_url(&base, input)
}
};
clean_double_slashes(&result)
}
pub fn without_base(input: &str, base: &str) -> String {
if is_empty_url(base) {
return input.to_string();
}
let base = without_trailing_slash(base, false);
if !input.starts_with(&base) {
return input.to_string();
}
let trimmed = &input[base.len()..];
if trimmed.starts_with('/') {
trimmed.to_string()
} else {
format!("/{}", trimmed)
}
}
pub fn with_query(input: &str, query: &QueryObject) -> String {
let mut parsed = parse_url(input);
let current: QueryObject = parse_query(&parsed.search);
let mut result = QueryObject::new();
for (key, value) in current.iter() {
result.insert(key.clone(), value.clone());
}
for (key, value) in query.iter() {
result.insert(key.clone(), value.clone());
}
parsed.search = stringify_query(&result);
parsed.stringify()
}
pub fn is_empty_url(url: &str) -> bool {
url.is_empty() || url == "/"
}
pub fn join_url(base: &str, input: &str) -> String {
let mut url = base.to_string();
if !is_empty_url(input) {
if !url.is_empty() {
let segment = input.trim_start_matches(|c| c == '.' || c == '/');
url = format!("{}/{}", with_trailing_slash(&url, false), segment);
} else {
url = input.to_string();
}
}
url
}
pub fn join_relative_url(inputs: &[&str]) -> String {
if inputs.is_empty() {
return String::new();
}
let mut segments: Vec<String> = Vec::new();
let mut segments_depth = 0;
for input in inputs.iter().filter(|&&i| !i.is_empty() && i != "/") {
for (sindex, s) in input.split('/').enumerate() {
if s.is_empty() || s == "." {
continue;
}
if s == ".." {
if segments.len() == 1
&& has_protocol(segments[0].as_str(), HasProtocolOptions::default())
{
continue;
}
if !segments.is_empty() {
segments.pop();
segments_depth -= 1;
} else {
segments_depth -= 1;
}
continue;
}
if sindex == 1 && segments.last().map_or(false, |last| last.ends_with(':')) {
if let Some(last) = segments.last_mut() {
*last = format!("{}//", last);
}
segments.push(s.to_string());
segments_depth += 1;
continue;
}
segments.push(s.to_string());
segments_depth += 1;
}
}
let mut url = segments.join("/");
if segments_depth >= 0 {
if inputs.first().map_or(false, |&i| i.starts_with('/')) && !url.starts_with('/') {
url = format!("/{}", url);
} else if inputs.first().map_or(false, |&i| i.starts_with("./")) && !url.starts_with("./") {
url = format!("./{}", url);
}
} else {
url = format!("{}{}", "../".repeat(-segments_depth as usize), url);
}
if inputs.last().map_or(false, |&i| i.ends_with('/')) && !url.ends_with('/') {
url.push('/');
}
url
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_relative() {
assert!(is_relative("./foo"));
assert!(is_relative("../foo"));
assert!(!is_relative("/foo"));
assert!(!is_relative("foo"));
assert!(!is_relative("http://example.com"));
assert!(is_relative("./"));
assert!(is_relative("../"));
assert!(!is_relative("//foo"));
assert!(!is_relative("https://foo"));
}
#[test]
fn test_has_protocol() {
let strict_opts = HasProtocolOptions {
strict: true,
..Default::default()
};
let relative_opts = HasProtocolOptions {
accept_relative: true,
..Default::default()
};
assert!(has_protocol("http://example.com", strict_opts.clone()));
assert!(has_protocol("https://example.com", strict_opts.clone()));
assert!(has_protocol("ftp://files.example.com", strict_opts.clone()));
assert!(!has_protocol("//example.com", strict_opts));
assert!(has_protocol("//example.com", relative_opts));
assert!(!has_protocol("example.com", HasProtocolOptions::default()));
assert!(has_protocol(
"sftp://example.com",
HasProtocolOptions::default()
));
assert!(has_protocol(
"ws://example.com",
HasProtocolOptions::default()
));
assert!(has_protocol(
"wss://example.com",
HasProtocolOptions::default()
));
}
#[test]
fn test_trailing_slash() {
assert_eq!(without_trailing_slash("/foo/", false), "/foo");
assert_eq!(with_trailing_slash("/foo", false), "/foo/");
assert_eq!(
without_trailing_slash("/foo/?query=1", true),
"/foo?query=1"
);
assert_eq!(with_trailing_slash("/foo?query=1", true), "/foo/?query=1");
assert_eq!(without_trailing_slash("/foo/#hash", true), "/foo#hash");
assert_eq!(with_trailing_slash("/foo#hash", true), "/foo/#hash");
assert_eq!(
without_trailing_slash("/foo/bar/?query=1#hash", true),
"/foo/bar?query=1#hash"
);
assert_eq!(
with_trailing_slash("/foo/bar?query=1#hash", true),
"/foo/bar/?query=1#hash"
);
assert_eq!(without_trailing_slash("", false), "");
assert_eq!(with_trailing_slash("", false), "/");
assert_eq!(without_trailing_slash("/", false), "");
assert_eq!(with_trailing_slash("/", false), "/");
assert_eq!(without_trailing_slash("foo/", false), "foo");
assert_eq!(with_trailing_slash("foo", false), "foo/");
}
#[test]
fn test_leading_slash() {
assert_eq!(without_leading_slash("/foo"), "foo");
assert_eq!(with_leading_slash("foo"), "/foo");
assert_eq!(without_leading_slash("/foo/bar"), "foo/bar");
assert_eq!(with_leading_slash("foo/bar"), "/foo/bar");
assert_eq!(without_leading_slash("foo"), "foo");
assert_eq!(with_leading_slash("/foo"), "/foo");
assert_eq!(without_leading_slash(""), "");
assert_eq!(with_leading_slash(""), "/");
assert_eq!(without_leading_slash("/"), "");
assert_eq!(with_leading_slash("/"), "/");
assert_eq!(without_leading_slash("//foo"), "/foo");
assert_eq!(with_leading_slash("//foo"), "//foo");
}
#[test]
fn test_clean_double_slashes() {
assert_eq!(
clean_double_slashes("http://example.com//foo//bar"),
"http://example.com/foo/bar"
);
assert_eq!(
clean_double_slashes("https://example.com///foo////bar"),
"https://example.com/foo/bar"
);
assert_eq!(clean_double_slashes("//foo//bar"), "/foo/bar");
assert_eq!(clean_double_slashes("foo//bar"), "foo/bar");
assert_eq!(clean_double_slashes(""), "");
assert_eq!(clean_double_slashes("/"), "/");
assert_eq!(clean_double_slashes("////"), "/");
assert_eq!(
clean_double_slashes("ftp://example.com////foo///bar//"),
"ftp://example.com/foo/bar/"
);
}
#[test]
fn test_join_relative_url() {
assert_eq!(join_relative_url(&["/a", "../b", "./c"]), "/b/c");
assert_eq!(join_relative_url(&["a", "b", "c"]), "a/b/c");
assert_eq!(join_relative_url(&["a", "../b", "../c"]), "c");
assert_eq!(join_relative_url(&["/", "a", "b", "/"]), "/a/b/");
assert_eq!(join_relative_url(&["./", "a", "../b"]), "./b");
assert_eq!(join_relative_url(&["a", "b", "..", "c"]), "a/c");
assert_eq!(join_relative_url(&[]), "");
assert_eq!(join_relative_url(&["/"]), "/");
assert_eq!(join_relative_url(&[".", "."]), "");
assert_eq!(join_relative_url(&["..", ".."]), "../../");
assert_eq!(join_relative_url(&["a", ".", "b"]), "a/b");
}
#[test]
fn test_with_query() {
let mut query = QueryObject::new();
query.insert("foo".to_string(), serde_json::json!("bar"));
assert_eq!(
with_query("http://example.com", &query),
"http://example.com?foo=bar"
);
assert_eq!(
with_query("http://example.com?existing=1", &query),
"http://example.com?existing=1&foo=bar"
);
let mut complex_query = QueryObject::new();
complex_query.insert("array".to_string(), serde_json::json!(["1", "2"]));
assert_eq!(
with_query("http://example.com", &complex_query),
"http://example.com?array=1&array=2"
);
let empty_query = QueryObject::new();
assert_eq!(
with_query("http://example.com", &empty_query),
"http://example.com"
);
let mut multiple_query = QueryObject::new();
multiple_query.insert("a".to_string(), serde_json::json!("1"));
multiple_query.insert("b".to_string(), serde_json::json!("2"));
assert_eq!(
with_query("http://example.com?c=3", &multiple_query),
"http://example.com?c=3&a=1&b=2"
);
}
#[test]
fn test_with_base() {
assert_eq!(with_base("/path", ""), "/path");
assert_eq!(with_base("/path", "/"), "/path");
assert_eq!(with_base("/path", "/base"), "/base/path");
assert_eq!(
with_base("http://example.com", "/base"),
"http://example.com"
);
assert_eq!(with_base("/base/path", "/base"), "/base/path");
assert_eq!(with_base("path", "/base/"), "/base/path");
}
#[test]
fn test_without_base() {
assert_eq!(without_base("/path", ""), "/path");
assert_eq!(without_base("/path", "/"), "/path");
assert_eq!(without_base("/base/path", "/base"), "/path");
assert_eq!(without_base("/other/path", "/base"), "/other/path");
assert_eq!(without_base("/base", "/base"), "/");
assert_eq!(without_base("/base/", "/base"), "/");
}
}