use std::collections::HashSet;
use std::path::Path;
const DEFAULT_DOMAINS: &[&str] = &[
"doubleclick.net",
"googlesyndication.com",
"googleadservices.com",
"google-analytics.com",
"googletagmanager.com",
"googletagservices.com",
"adservice.google.com",
"amazon-adsystem.com",
"adnxs.com",
"criteo.com",
"criteo.net",
"taboola.com",
"outbrain.com",
"scorecardresearch.com",
"quantserve.com",
"pubmatic.com",
"rubiconproject.com",
"openx.net",
"casalemedia.com",
"moatads.com",
"zedo.com",
"adcolony.com",
"applovin.com",
"chartbeat.com",
"hotjar.com",
"mixpanel.com",
"segment.com",
"branch.io",
"connect.facebook.net",
"ads.yahoo.com",
"ads.linkedin.com",
"bat.bing.com",
];
pub fn load(user_file: &Path) -> HashSet<String> {
let mut set: HashSet<String> = DEFAULT_DOMAINS.iter().map(|s| s.to_string()).collect();
if let Ok(text) = std::fs::read_to_string(user_file) {
for line in text.lines() {
let domain = line.trim();
if !domain.is_empty() && !domain.starts_with('#') {
set.insert(domain.to_string());
}
}
}
set
}
pub fn host_of(uri: &str) -> Option<&str> {
let after_scheme = uri.split("://").nth(1)?;
let authority = after_scheme.split(['/', '?', '#']).next()?;
let host = authority.rsplit('@').next()?; let host = host.split(':').next()?; if host.is_empty() { None } else { Some(host) }
}
pub fn content_filter_json(blocklist: &HashSet<String>) -> String {
let rules: Vec<serde_json::Value> = blocklist
.iter()
.map(|domain| {
let escaped = domain.replace('.', r"\.");
serde_json::json!({
"trigger": { "url-filter": format!(r"https?://([^/]+\.)?{escaped}[:/]") },
"action": { "type": "block" }
})
})
.collect();
serde_json::to_string(&rules).unwrap_or_else(|_| "[]".to_string())
}
pub fn is_blocked(uri: &str, blocklist: &HashSet<String>) -> bool {
let Some(host) = host_of(uri) else {
return false;
};
blocklist
.iter()
.any(|d| host == d || host.ends_with(&format!(".{d}")))
}
#[cfg(test)]
mod tests {
use super::*;
fn list() -> HashSet<String> {
["doubleclick.net".to_string()].into_iter().collect()
}
#[test]
fn extracts_host() {
assert_eq!(host_of("https://ad.doubleclick.net/foo?x=1"), Some("ad.doubleclick.net"));
assert_eq!(host_of("http://user@example.com:8080/p"), Some("example.com"));
assert_eq!(host_of("about:blank"), None);
}
#[test]
fn content_filter_is_valid_json_rules() {
let set: HashSet<String> = ["doubleclick.net".to_string()].into_iter().collect();
let json = content_filter_json(&set);
let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
let arr = parsed.as_array().unwrap();
assert_eq!(arr.len(), 1);
assert_eq!(arr[0]["action"]["type"], "block");
assert!(
arr[0]["trigger"]["url-filter"]
.as_str()
.unwrap()
.contains(r"doubleclick\.net")
);
}
#[test]
fn blocks_domain_and_subdomains() {
assert!(is_blocked("https://doubleclick.net/x", &list()));
assert!(is_blocked("https://ad.doubleclick.net/x", &list()));
assert!(!is_blocked("https://example.com/x", &list()));
assert!(!is_blocked("https://notdoubleclick.net/x", &list()));
}
}