use crate::{Error, Result};
use log::debug;
use uhttp_uri::HttpUri;
use url::Url;
const SAFE_URL_SCHEME: &str = "safe";
const MAX_LEN_URL_LABELS: usize = 63;
const INVALID_NRS_CHARS: [char; 30] = [
'\u{200B}', '\u{200C}', '\u{200D}', '\u{2060}', '\u{FEFF}', '\u{180E}', '\u{2800}', '\u{3164}', '\u{115F}', '\u{1160}', '\u{FFA0}', '\u{2422}', '\u{034F}', '\u{17B4}', '\u{17B5}', '\u{2065}', '\u{FFF0}', '\u{FFF1}', '\u{FFF2}', '\u{FFF3}', '\u{FFF4}', '\u{FFF5}', '\u{FFF6}', '\u{FFF7}', '\u{206A}', '\u{206B}', '\u{206C}', '\u{206D}', '\u{206E}', '\u{206F}', ];
#[derive(Debug, Clone)]
pub(crate) struct SafeUrlParts {
pub scheme: String,
pub public_name: String, pub top_name: String, pub sub_names: String, pub sub_names_vec: Vec<String>,
pub path: String,
pub query_string: String,
pub fragment: String,
}
impl SafeUrlParts {
pub fn parse(url: &str, ignore_labels_size: bool) -> Result<Self> {
validate_url_chars(url)?;
let parsing_url = Url::parse(&url).map_err(|parse_err| {
let msg = format!("Problem parsing the URL \"{}\": {}", url, parse_err);
Error::InvalidXorUrl(msg)
})?;
let scheme = parsing_url.scheme().to_string();
if scheme != SAFE_URL_SCHEME {
let msg = format!(
"invalid scheme: '{}'. expected: '{}'",
scheme, SAFE_URL_SCHEME
);
return Err(Error::InvalidXorUrl(msg));
}
let public_name = match parsing_url.host_str() {
Some(h) => h.to_string(),
None => {
let msg = format!("Problem parsing the URL \"{}\": {}", url, "missing name");
return Err(Error::InvalidXorUrl(msg));
}
};
if public_name.contains("..") {
let msg = "name contains empty subname".to_string();
return Err(Error::InvalidXorUrl(msg));
}
if public_name.len() > 255 {
let msg = format!(
"Name is {} chars, must be no more than 255",
public_name.len()
);
return Err(Error::InvalidInput(msg));
}
let names_vec: Vec<String> = public_name.split('.').map(String::from).collect();
if !ignore_labels_size {
for name in &names_vec {
if name.len() > MAX_LEN_URL_LABELS {
let msg = format!(
"Label is {} chars, must be no more than 63: {}",
name.len(),
name
);
return Err(Error::InvalidInput(msg));
}
}
}
let top_name = names_vec[names_vec.len() - 1].to_string();
let sub_names_vec = (&names_vec[0..names_vec.len() - 1]).to_vec();
let sub_names = sub_names_vec.join(".");
let http_url = url.replacen("safe://", "http://", 1);
let uri = HttpUri::new(&http_url).map_err(|parse_err| {
let msg = format!("Problem parsing the URL \"{}\": {:?}", url, parse_err);
Error::InvalidXorUrl(msg)
})?;
let path = uri.resource.path.to_string();
let query_string = parsing_url.query().unwrap_or("").to_string();
let fragment = parsing_url.fragment().unwrap_or("").to_string();
if path.contains("//") {
let msg = "path contains empty component".to_string();
return Err(Error::InvalidXorUrl(msg));
}
debug!(
"Parsed url: scheme: {}, public_name: {}, top_name: {}, sub_names: {}, sub_names_vec: {:?}, path: {}, query_string: {}, fragment: {:?}",
scheme,
public_name,
top_name,
sub_names,
sub_names_vec,
path,
query_string,
fragment,
);
Ok(Self {
scheme,
public_name,
top_name,
sub_names,
sub_names_vec,
path,
query_string,
fragment,
})
}
}
fn validate_url_chars(url: &str) -> Result<()> {
if url.contains(char::is_whitespace) {
let msg = "The URL cannot contain whitespace".to_string();
return Err(Error::InvalidInput(msg));
}
if url.contains(char::is_control) {
let msg = "The URL cannot contain control characters".to_string();
return Err(Error::InvalidInput(msg));
}
if url.contains(&INVALID_NRS_CHARS[..]) {
let msg = "The URL cannot contain invalid characters".to_string();
return Err(Error::InvalidInput(msg));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use anyhow::{anyhow, Result};
#[test]
fn test_safeurl_validate_url_chars_with_whitespace() -> Result<()> {
let urls = vec![
"safe://with space", "safe://nonbreaking\u{00a0}space",
"safe://tab\u{0009}char",
"safe://new\u{000A}line",
"safe://line\u{000B}tabulation",
"safe://form\u{000C}feed",
"safe://carriage\u{000D}return",
"safe://next\u{0085}line",
"safe://ogham\u{1680}spacemark",
"safe://en\u{2000}quad",
"safe://em\u{2001}quad",
"safe://en\u{2002}space",
"safe://en\u{2003}space",
"safe://threeper\u{2004}emspace",
"safe://fourper\u{2005}emspace",
"safe://sixper\u{2006}emspace",
"safe://figure\u{2007}space",
"safe://punctuation\u{2008}space",
"safe://thin\u{2009}space",
"safe://hair\u{200A}space",
"safe://line\u{2028}separator",
"safe://paragraph\u{2029}separator",
"safe://narrow\u{202F}nobreakspace",
"safe://medium\u{205F}mathematicalspace",
"safe://ideographic\u{3000}space",
];
for url in urls {
match SafeUrlParts::parse(&url, false) {
Ok(_) => {
return Err(anyhow!(
"Unexpectedly validated url with whitespace {}",
url
));
}
Err(Error::InvalidInput(msg)) => {
assert_eq!(msg, "The URL cannot contain whitespace".to_string());
}
Err(err) => {
return Err(anyhow!("Error returned is not the expected one: {}", err));
}
};
}
Ok(())
}
#[test]
fn test_safeurl_validate_url_chars_with_control_characters() -> Result<()> {
let urls = vec![
"safe://null\u{0000}character",
"safe://start\u{0001}heading",
"safe://start\u{0002}text",
"safe://end\u{0003}text",
"safe://end\u{0004}transmission",
"safe://enquiry\u{0005}character",
"safe://acknowledge\u{0006}character",
"safe://bell\u{0007}character",
"safe://backspace\u{0008}character",
"safe://shift\u{000E}out",
"safe://shift\u{000F}in",
"safe://datalink\u{0010}escape",
"safe://device\u{0011}controlone",
"safe://device\u{0012}controltwo",
"safe://device\u{0013}controlthree",
"safe://device\u{0014}controlfour",
"safe://negative\u{0015}acknowledge",
"safe://synchronous\u{0016}idle",
"safe://end\u{0017}transmission",
"safe://cancel\u{0018}character",
"safe://end\u{0019}ofmedium",
"safe://substitute\u{001A}character",
"safe://escape\u{001B}character",
"safe://file\u{001C}separator",
"safe://group\u{001D}separator",
"safe://record\u{001E}separator",
"safe://unit\u{001F}separator",
"safe://delete\u{007F}character",
"safe://padding\u{0080}character",
"safe://highoctet\u{0081}preset",
"safe://break\u{0082}permitted",
"safe://no\u{0083}break",
"safe://index\u{0084}character",
"safe://startof\u{0086}selectedarea",
"safe://endof\u{0087}selectedarea",
"safe://character\u{0088}tabulationset",
"safe://character\u{0089}tabulationwithjustification",
"safe://line\u{008A}tabulationset",
"safe://partialline\u{008B}forward",
"safe://partialline\u{008C}backward",
"safe://reverse\u{008D}feed",
"safe://single\u{008E}shift2",
"safe://single\u{008F}shift3",
"safe://devicecontrol\u{0090}string",
"safe://private\u{0091}use1",
"safe://private\u{0092}use2",
"safe://set\u{0093}transmitstate",
"safe://cancel\u{0094}character",
"safe://message\u{0095}waiting",
"safe://startof\u{0096}protectedarea",
"safe://endof\u{0097}protectedarea",
"safe://startof\u{0098}string",
"safe://singlegraphic\u{0099}characterintroducer",
"safe://single\u{009A}characterintroducer",
"safe://controlsequence\u{009B}introducer",
"safe://string\u{009C}terminator",
"safe://operatingsystem\u{009D}command",
"safe://privacy\u{009E}message",
"safe://application\u{009F}programcommand",
];
for url in urls {
match SafeUrlParts::parse(&url, false) {
Ok(_) => {
return Err(anyhow!(
"Unexpectedly validated url with control character {}",
url
));
}
Err(Error::InvalidInput(msg)) => {
assert_eq!(msg, "The URL cannot contain control characters".to_string());
}
Err(err) => {
return Err(anyhow!("Error returned is not the expected one: {}", err));
}
};
}
Ok(())
}
#[test]
fn test_safeurl_validate_url_chars_with_invalid_characters() -> Result<()> {
let urls = vec![
"safe://zerowidth\u{200B}space",
"safe://zerowidth\u{200C}nonjoiner",
"safe://zerowidth\u{200D}joiner",
"safe://word\u{2060}joiner",
"safe://zerowidth\u{FEFF}nbsp",
"safe://mongolian\u{180E}vowelseparator",
"safe://braille\u{2800}patter",
"safe://hangul\u{3164}filler",
"safe://hangul\u{115F}choseongfiller",
"safe://hangul\u{1160}jungseongfiller",
"safe://halfwidth\u{FFA0}hangulfiller",
"safe://blank\u{2422}symbol",
"safe://combining\u{034F}graphemejoiner",
"safe://khmervowel\u{17B4}inherentaq",
"safe://khmervowel\u{17B5}inherentaa",
"safe://reserved\u{2065}reserved",
"safe://reserved\u{FFF0}reserved",
"safe://reserved\u{FFF1}reserved",
"safe://reserved\u{FFF2}reserved",
"safe://reserved\u{FFF3}reserved",
"safe://reserved\u{FFF4}reserved",
"safe://reserved\u{FFF5}reserved",
"safe://reserved\u{FFF6}reserved",
"safe://reserved\u{FFF7}reserved",
"safe://inhibit\u{206A}symmetricswapping",
"safe://activate\u{206B}symmetricswapping",
"safe://inhibit\u{206C}arabicformshaping",
"safe://activate\u{206D}arabicformshaping",
"safe://national\u{206E}digitshapes",
"safe://nominal\u{206F}digitshapes",
];
for url in urls {
match SafeUrlParts::parse(&url, false) {
Ok(_) => {
return Err(anyhow!(
"Unexpectedly validated url with invalid character {}",
url
));
}
Err(Error::InvalidInput(msg)) => {
assert_eq!(msg, "The URL cannot contain invalid characters".to_string());
}
Err(err) => {
return Err(anyhow!("Error returned is not the expected one: {}", err));
}
};
}
Ok(())
}
}