pub const DEFAULT_BLOCKED_RESOURCE_TYPES: &[&str] =
&["Image", "Media", "Font", "Manifest", "WebSocket"];
pub const DEFAULT_BLOCKED_HOSTS: &[&str] = &[
"google-analytics.com",
"googletagmanager.com",
"doubleclick.net",
"googleadservices.com",
"googlesyndication.com",
"hotjar.com",
"segment.io",
"segment.com",
"amplitude.com",
"mixpanel.com",
"clarity.ms",
"onetrust.com",
"cookielaw.org",
"criteo.com",
"criteo.net",
"taboola.com",
"outbrain.com",
"adsystem.com",
"adservice.google.com",
"scorecardresearch.com",
"quantserve.com",
"chartbeat.com",
"nr-data.net",
"newrelic.com",
];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BlockReason {
ResourceType,
Host,
}
#[derive(Debug, Clone)]
pub struct Blocklist {
resource_types: Vec<String>,
host_substrings: Vec<String>,
block_stylesheets: bool,
}
impl Blocklist {
pub fn defaults() -> Self {
Self {
resource_types: DEFAULT_BLOCKED_RESOURCE_TYPES
.iter()
.map(|s| (*s).to_string())
.collect(),
host_substrings: DEFAULT_BLOCKED_HOSTS
.iter()
.map(|s| s.to_lowercase())
.collect(),
block_stylesheets: false,
}
}
pub fn with_stylesheets(mut self, on: bool) -> Self {
self.block_stylesheets = on;
self
}
pub fn empty() -> Self {
Self {
resource_types: Vec::new(),
host_substrings: Vec::new(),
block_stylesheets: false,
}
}
pub fn should_block(&self, resource_type: &str, url: &str) -> Option<BlockReason> {
if self.resource_types.iter().any(|t| t == resource_type) {
return Some(BlockReason::ResourceType);
}
if self.block_stylesheets && resource_type == "Stylesheet" {
return Some(BlockReason::ResourceType);
}
if !self.host_substrings.is_empty()
&& let Ok(parsed) = url::Url::parse(url)
&& let Some(host) = parsed.host_str()
{
let host_lc = host.to_lowercase();
if self.host_substrings.iter().any(|h| host_lc.contains(h)) {
return Some(BlockReason::Host);
}
}
None
}
}
impl Default for Blocklist {
fn default() -> Self {
Self::defaults()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn blocks_image_resource_type() {
let bl = Blocklist::defaults();
assert_eq!(
bl.should_block("Image", "https://example.com/cat.jpg"),
Some(BlockReason::ResourceType),
);
}
#[test]
fn blocks_font_resource_type() {
let bl = Blocklist::defaults();
assert_eq!(
bl.should_block("Font", "https://example.com/x.woff2"),
Some(BlockReason::ResourceType),
);
}
#[test]
fn allows_document_resource_type() {
let bl = Blocklist::defaults();
assert_eq!(bl.should_block("Document", "https://example.com/"), None);
}
#[test]
fn allows_stylesheet_by_default() {
let bl = Blocklist::defaults();
assert_eq!(
bl.should_block("Stylesheet", "https://example.com/x.css"),
None
);
}
#[test]
fn blocks_stylesheet_when_enabled() {
let bl = Blocklist::defaults().with_stylesheets(true);
assert_eq!(
bl.should_block("Stylesheet", "https://example.com/x.css"),
Some(BlockReason::ResourceType),
);
}
#[test]
fn blocks_google_analytics_host() {
let bl = Blocklist::defaults();
assert_eq!(
bl.should_block("XHR", "https://www.google-analytics.com/g/collect"),
Some(BlockReason::Host),
);
}
#[test]
fn blocks_doubleclick_host_case_insensitive() {
let bl = Blocklist::defaults();
assert_eq!(
bl.should_block("Script", "https://AD.DOUBLECLICK.NET/track"),
Some(BlockReason::Host),
);
}
#[test]
fn empty_blocklist_allows_everything() {
let bl = Blocklist::empty();
assert_eq!(bl.should_block("Image", "https://example.com/x.jpg"), None);
assert_eq!(
bl.should_block("Script", "https://google-analytics.com/g"),
None,
);
}
#[test]
fn malformed_url_does_not_panic() {
let bl = Blocklist::defaults();
assert_eq!(
bl.should_block("Image", "not a url"),
Some(BlockReason::ResourceType)
);
assert_eq!(bl.should_block("Script", "not a url"), None);
}
}