use std::borrow::Cow;
use lazy_static::lazy_static;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use crate::htmltools::encode_html;
#[derive(Debug)]
pub(crate) enum UrlBits {
AbsoluteUrl(url::Url),
RelativeUrl {
url: url::Url,
source: String,
},
}
const PSEUDO_BASE: &str = "http://example.com";
lazy_static! {
static ref BASE: url::Url = url::Url::parse(PSEUDO_BASE)
.expect("A valid url");
}
impl UrlBits {
fn make_relative_url(url: &str) -> std::io::Result<Self> {
let new_url = BASE.join(url).map_err(
|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
Ok(Self::RelativeUrl {
url: new_url,
source: url.to_owned()
})
}
pub fn parse(url: &str) -> Self {
match url::Url::parse(url) {
Ok(u) => Self::AbsoluteUrl(u),
Err(_) => {
Self::make_relative_url(url).unwrap_or_else(|_| {
let safed_url = utf8_percent_encode(url, NON_ALPHANUMERIC).to_string();
Self::make_relative_url(&safed_url).unwrap_or_else(|_|
Self::RelativeUrl { url: BASE.clone(), source: "".into() }
)
})
},
}
}
pub fn scheme(&self) -> &str {
match self {
UrlBits::AbsoluteUrl(url) => url.scheme(),
UrlBits::RelativeUrl { .. } => "",
}
}
pub fn is_relative(&self) -> bool {
match self {
UrlBits::AbsoluteUrl(_) => false,
UrlBits::RelativeUrl {..} => true,
}
}
}
impl ToString for UrlBits {
fn to_string(&self) -> String {
match self {
UrlBits::AbsoluteUrl(url) => url.to_string(),
UrlBits::RelativeUrl {url, source} => {
let str_url = url.to_string();
match (str_url.find('?'), source.find('?')) {
(Some(url_pos), Some(src_pos)) => {
let normalized_query = &str_url[url_pos + 1..];
let source_start = source[..src_pos + 1].to_owned();
source_start + normalized_query
},
(_, _) => {
match (str_url.rfind('#'), source.rfind('#')) {
(Some(url_frag_pos), Some(src_frag_pos)) => {
let normal_frag = &str_url[url_frag_pos + 1..];
let source_start = source[..src_frag_pos + 1].to_owned();
source_start + normal_frag
},
(None, Some(src_frag_pos)) => {
source[..src_frag_pos].to_owned()
},
(_, _) => {
source.clone()
},
}
},
}
},
}
}
}
#[derive(Clone, Debug)]
pub(crate) enum UrlString<'t> {
Normalized(Cow<'t, str>),
Raw(Cow<'t, str>)
}
impl <'t> From<String> for UrlString<'t> {
fn from(source: String) -> Self {
Self::Raw(Cow::Owned(source))
}
}
impl <'t> From<Cow<'t, str>> for UrlString<'t> {
fn from(source: Cow<'t, str>) -> Self {
Self::Raw(source)
}
}
impl <'t> ToString for UrlString<'t> {
fn to_string(&self) -> String {
match self {
Self::Normalized(url_text) => url_text.clone().into_owned(),
Self::Raw(url_text) => {
if url_text.is_empty() {
String::new()
} else {
UrlBits::parse(url_text).to_string()
}
}
}
}
}
impl <'t> UrlString<'t> {
pub(crate) fn source(&self) -> &Cow<'t, str> {
match self {
UrlString::Normalized(t) => t,
UrlString::Raw(t) => t,
}
}
pub(crate) fn to_html_string(&self) -> String {
encode_html(&self.to_string(), true, true)
}
}
#[cfg(test)]
mod test {
use std::borrow::Cow;
use crate::urlutils::{UrlBits, UrlString};
#[test]
fn test_url_bits() {
let bits = UrlBits::parse("http://example.com/&.html");
assert!(!bits.is_relative());
assert_eq!(bits.scheme(), "http");
assert_eq!(bits.to_string(), "http://example.com/&.html");
assert_eq!(UrlString::from(Cow::Borrowed("http://example.com/<&test>.html")).to_html_string(),
"http://example.com/%3C&test%3E.html");
let bits = UrlBits::parse("http://example.com/<script>window.alert(\"Hello World!\");</script>.png");
assert_eq!(bits.to_string(), "http://example.com/%3Cscript%3Ewindow.alert(%22Hello%20World!%22);%3C/script%3E.png");
let bits = UrlBits::parse("some_page.html?q=Some query#Some text");
assert!(bits.is_relative());
assert_eq!(bits.scheme(), "");
assert_eq!(bits.to_string(), "some_page.html?q=Some%20query#Some%20text");
let bits = UrlBits::parse("../../some_page.html#Some text");
assert!(bits.is_relative());
assert_eq!(bits.scheme(), "");
assert_eq!(bits.to_string(), "../../some_page.html#Some%20text");
let bits = UrlBits::parse("https://example.com/some_page.html?q=Some query#Some text");
assert!(!bits.is_relative());
assert_eq!(bits.scheme(), "https");
assert_eq!(bits.to_string(), "https://example.com/some_page.html?q=Some%20query#Some%20text");
let bits = UrlBits::parse("https:::://example.com/some_page.html?q=Some query#Some text");
assert!(bits.is_relative());
assert_eq!(bits.scheme(), "");
assert_eq!(bits.to_string(), "https%3A%3A%3A%3A%2F%2Fexample%2Ecom%2Fsome%5Fpage%2Ehtml%3Fq%3DSome%20query%23Some%20text");
}
}