use regex::Regex;
use std::borrow::Cow;
use std::sync::LazyLock;
#[cfg(feature = "html")]
use crate::tree::LinkLocation;
pub const URL_SCHEMES: [&str; 19] = [
"blob:",
"chrome-extension://",
"chrome://",
"content://",
"dns:",
"feed:",
"file://",
"ftp://",
"git://",
"gopher://",
"http://",
"https://",
"irc6://",
"irc://",
"ircs://",
"mailto:",
"resource://",
"rtmp://",
"sftp://",
];
pub fn is_url(url: &str) -> bool {
for scheme in &URL_SCHEMES {
if url.starts_with(scheme) {
return true;
}
}
false
}
pub fn dangerous_scheme(url: &str) -> bool {
static SCHEME_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^[\w\-]+$").unwrap());
if url.starts_with('/') {
return false;
}
url.split_once(':')
.map(|(scheme, _)| {
if !SCHEME_REGEX.is_match(scheme) {
return true;
}
scheme.eq_ignore_ascii_case("javascript")
|| scheme.eq_ignore_ascii_case("data")
})
.unwrap_or(false)
}
#[cfg(feature = "html")]
pub fn normalize_link<'a>(
link: &'a LinkLocation<'a>,
helper: &dyn BuildSiteUrl,
) -> Cow<'a, str> {
match link {
LinkLocation::Url(url) => normalize_href(url, None),
LinkLocation::Page(page_ref) => {
let (site, page, extra) = page_ref.fields();
match site {
Some(site) => Cow::Owned(helper.build_url(site, page, extra)),
None => normalize_href(page, extra),
}
}
}
}
pub fn normalize_href<'a>(url: &'a str, extra: Option<&'a str>) -> Cow<'a, str> {
if url == "javascript:;" {
trace!("Leaving no-op link as-is");
Cow::Borrowed(url)
} else if is_url(url) || url.starts_with('/') || url.starts_with('#') {
match extra {
Some(extra) => {
trace!("Leaving safe URL with extra as-is: {url}{extra}");
Cow::Owned(format!("{url}{extra}"))
}
None => {
trace!("Leaving safe URL as-is: {url}");
Cow::Borrowed(url)
}
}
} else if dangerous_scheme(url) {
warn!("Attempt to pass in dangerous URL: {url}");
Cow::Borrowed("#invalid-url")
} else {
let extra = extra.unwrap_or("");
trace!("Adding leading slash to URL: {url}{extra}");
Cow::Owned(format!("/{url}{extra}"))
}
}
pub trait BuildSiteUrl {
fn build_url(&self, site: &str, path: &str, extra: Option<&str>) -> String;
}
#[test]
fn detect_dangerous_schemes() {
macro_rules! test {
($input:expr, $result:expr $(,)?) => {
assert_eq!(
dangerous_scheme($input),
$result,
"For input {:?}, dangerous scheme detection failed",
$input,
)
};
}
test!("http://example.com/", false);
test!("https://example.com/", false);
test!("irc://irc.scpwiki.com", false);
test!("javascript:alert(1)", true);
test!("JAVASCRIPT:alert(1)", true);
test!(" javascript:alert(1)", true);
test!("java\nscript:alert(1)", true);
test!("javascript\t:alert(1)", true);
test!("wtf$1:foo", true);
test!("JaVaScRiPt:alert(document.cookie)", true);
test!("data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==", true);
test!("data:text/javascript,alert(1)", true);
test!("data:text/html,<script>alert('XSS');</script>", true);
test!("DATA:text/html,<script>alert('XSS');</script>", true);
test!("/page", false);
test!("/page#target", false);
test!("/page/edit", false);
test!("/page/edit#target", false);
test!("/category:page", false);
test!("/category:page#target", false);
test!("/category:page/edit", false);
test!("/category:page/edit#target", false);
}
#[test]
fn test_normalize_href() {
macro_rules! test {
($input:expr => $expected:expr $(,)?) => {{
let actual = normalize_href($input, None);
assert_eq!(
actual.as_ref(),
$expected,
"For input {:?}, normalize_href() doesn't match expected",
$input,
);
}};
($url_input:expr, $extra_input:expr => $expected:expr $(,)?) => {{
let actual = normalize_href($url_input, Some($extra_input));
assert_eq!(
actual.as_ref(),
$expected,
"For input {:?} / {:?}, normalize_href() doesn't match expected",
$url_input,
$extra_input,
);
}};
($input:expr) => {
test!($input => $input)
};
}
test!("#");
test!("#target");
test!("#edit-area");
test!("javascript:;");
test!("http://example.net");
test!("https://example.net");
test!("irc://irc.scpwiki.com");
test!("sftp://ftp.example.com/upload");
test!("javascript:alert(1)" => "#invalid-url");
test!(
"data:text/html,<script>alert('XSS')</script>" => "#invalid-url",
);
test!("/page");
test!("/page", "#target" => "/page#target");
test!("/page", "/edit" => "/page/edit");
test!("page", "/edit#target" => "/page/edit#target");
test!("/category:page");
test!("/category:page", "#target" => "/category:page#target");
test!("/category:page", "/edit" => "/category:page/edit");
test!("/category:page", "/edit#target" => "/category:page/edit#target");
test!("some-page" => "/some-page");
test!("some-page#target" => "/some-page#target");
test!("system:some-page" => "/system:some-page");
test!("system:some-page#target" => "/system:some-page#target");
}