use url::Url;
pub fn sanitize_image_url(url: Option<&str>) -> Option<String> {
let raw = url?.trim();
if raw.is_empty() {
return None;
}
let with_scheme = if raw.starts_with("//") {
format!("https:{raw}")
} else {
raw.to_string()
};
let parsed = Url::parse(&with_scheme).ok()?;
match parsed.scheme() {
"http" | "https" => {}
_ => return None,
}
parsed.host_str()?;
let tracking_params = [
"utm_source",
"utm_medium",
"utm_campaign",
"utm_term",
"utm_content",
"fbclid",
"gclid",
"ref",
];
let mut cleaned = parsed.clone();
{
let filtered: Vec<(String, String)> = parsed
.query_pairs()
.into_owned()
.filter(|(key, _)| !tracking_params.contains(&key.as_str()))
.collect();
let mut query = cleaned.query_pairs_mut();
query.clear();
for (key, value) in &filtered {
query.append_pair(key, value);
}
}
if cleaned.query() == Some("") {
cleaned.set_query(None);
}
Some(cleaned.to_string())
}
pub fn sanitize_stream_url(url: &str) -> String {
let trimmed = url.trim();
if trimmed.is_empty() {
return String::new();
}
match Url::parse(trimmed) {
Ok(parsed) => parsed.to_string(),
Err(_) => trimmed.to_string(),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn valid_https_url_passes() {
let result = sanitize_image_url(Some("https://cdn.example.com/logo.png"));
assert_eq!(result, Some("https://cdn.example.com/logo.png".into()));
}
#[test]
fn valid_http_url_passes() {
let result = sanitize_image_url(Some("http://cdn.example.com/logo.png"));
assert_eq!(result, Some("http://cdn.example.com/logo.png".into()));
}
#[test]
fn none_returns_none() {
assert_eq!(sanitize_image_url(None), None);
}
#[test]
fn empty_returns_none() {
assert_eq!(sanitize_image_url(Some("")), None);
assert_eq!(sanitize_image_url(Some(" ")), None);
}
#[test]
fn invalid_scheme_returns_none() {
assert_eq!(sanitize_image_url(Some("ftp://example.com/logo.png")), None);
assert_eq!(sanitize_image_url(Some("data:image/png;base64,abc")), None);
}
#[test]
fn protocol_relative_gets_https() {
let result = sanitize_image_url(Some("//cdn.example.com/logo.png"));
assert!(result.is_some());
assert!(result.unwrap().starts_with("https://"));
}
#[test]
fn removes_tracking_params() {
let result = sanitize_image_url(Some(
"https://cdn.example.com/logo.png?w=100&utm_source=tv&fbclid=abc",
));
let url = result.unwrap();
assert!(url.contains("w=100"));
assert!(!url.contains("utm_source"));
assert!(!url.contains("fbclid"));
}
#[test]
fn trims_whitespace() {
let result = sanitize_image_url(Some(" https://cdn.example.com/logo.png "));
assert_eq!(result, Some("https://cdn.example.com/logo.png".into()));
}
#[test]
fn trims_stream_url_whitespace() {
assert_eq!(
sanitize_stream_url(" http://example.com/stream "),
"http://example.com/stream",
);
}
#[test]
fn empty_stream_url_returns_empty() {
assert_eq!(sanitize_stream_url(""), "");
assert_eq!(sanitize_stream_url(" "), "");
}
#[test]
fn normalizes_percent_encoding() {
let result = sanitize_stream_url("http://example.com/path%20with%20spaces");
assert!(result.contains("example.com"));
}
#[test]
fn unparseable_url_returns_trimmed() {
let input = "some://[invalid-url";
let result = sanitize_stream_url(input);
assert_eq!(result, input);
}
}