use std::sync::LazyLock;
use regex::Regex;
use crate::types::Link;
static CONTROL_CHAR_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"[\x00-\x1f\x7f]").expect("CONTROL_CHAR_RE is a valid regex"));
static DANGEROUS_SCHEME: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)^(javascript|data|vbscript|blob)\s*:").expect("DANGEROUS_SCHEME is a valid regex")
});
static SCHEME_MATCH: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"^([a-zA-Z][a-zA-Z0-9+\-.]*)\s*:").expect("SCHEME_MATCH is a valid regex")
});
pub const DEFAULT_SCHEMES: &[&str] = &["http", "https"];
pub const STRICT_SCHEMES: &[&str] = &["http", "https", "mailto"];
#[must_use]
pub fn sanitize_url(url: &str) -> String {
if url.is_empty() {
return url.to_owned();
}
let normalized = CONTROL_CHAR_RE.replace_all(url, "");
let normalized = normalized.trim();
if DANGEROUS_SCHEME.is_match(normalized) {
"about:blank".to_owned()
} else {
url.to_owned()
}
}
#[must_use]
pub fn sanitize_url_strict(url: &str) -> String {
sanitize_url_with_schemes(url, Some(STRICT_SCHEMES))
}
#[must_use]
pub fn sanitize_url_with_schemes(url: &str, allowed_schemes: Option<&[&str]>) -> String {
let base = sanitize_url(url);
if base == "about:blank" || base.is_empty() {
return base;
}
let schemes = allowed_schemes.unwrap_or(DEFAULT_SCHEMES);
let normalized = CONTROL_CHAR_RE.replace_all(&base, "");
let normalized = normalized.trim();
if let Some(caps) = SCHEME_MATCH.captures(normalized) {
let scheme = caps
.get(1)
.expect("capture group 1 guaranteed by regex")
.as_str()
.to_lowercase();
if !schemes.iter().any(|s| *s == scheme) {
return "about:blank".to_owned();
}
}
base
}
pub(crate) fn sanitize_link(link: &Link) -> Link {
let mut out = link.clone();
if !out.url.is_empty() {
let safe = sanitize_url(&out.url);
if safe != out.url {
out.url = safe;
}
}
out
}