//! Transforms filter rules into content blocking syntax used on iOS and MacOS.
use crate::filters::cosmetic::CosmeticFilter;
use crate::filters::network::{NetworkFilter, NetworkFilterMask};
use crate::lists::ParsedFilter;
use memchr::{memchr as find_char, memmem};
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::convert::{TryFrom, TryInto};
/// By default, ABP rules do not block top-level document requests. There's no way to express that
/// in content blocking format, so instead it's approximated with a rule that applies an exception
/// to any first-party requests that are document types.
///
/// This rule should be added after all other network rules.
pub fn ignore_previous_fp_documents() -> CbRule {
let mut resource_type = HashSet::new();
resource_type.insert(CbResourceType::Document);
CbRule {
trigger: CbTrigger {
url_filter: String::from(".*"),
resource_type: Some(resource_type),
load_type: vec![CbLoadType::FirstParty],
..CbTrigger::default()
},
action: CbAction {
typ: CbType::IgnorePreviousRules,
selector: None,
},
}
}
/// Rust representation of a single content blocking rule.
///
/// This can be deserialized with `serde_json` directly into the correct format.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct CbRule {
pub action: CbAction,
pub trigger: CbTrigger,
}
impl CbRule {
/// If this returns false, the rule will not compile and should not be used.
fn is_ascii(&self) -> bool {
self.action.selector.iter().all(|s| s.is_ascii()) &&
self.trigger.url_filter.is_ascii() &&
self.trigger.if_domain.iter().flatten().all(|d| d.is_ascii()) &&
self.trigger.unless_domain.iter().flatten().all(|d| d.is_ascii()) &&
self.trigger.if_top_url.iter().flatten().all(|d| d.is_ascii()) &&
self.trigger.unless_top_url.iter().flatten().all(|d| d.is_ascii())
}
}
/// Corresponds to the `action` field of a Safari content blocking rule.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct CbAction {
#[serde(rename = "type")]
pub typ: CbType,
/// Specify a string that defines a selector list. This value is required when the action type
/// is css-display-none. If it's not, the selector field is ignored by Safari. Use CSS
/// identifiers as the individual selector values, separated by commas. Safari and WebKit
/// supports all of its CSS selectors for Safari content-blocking rules.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selector: Option<String>,
}
/// Corresponds to the `action.type` field of a Safari content blocking rule.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub enum CbType {
/// Stops loading of the resource. If the resource was cached, the cache is ignored.
Block,
/// Strips cookies from the header before sending to the server. Only cookies otherwise
/// acceptable to Safari's privacy policy can be blocked. Combining with ignore-previous-rules
/// doesn't override the browser’s privacy settings.
BlockCookies,
/// Hides elements of the page based on a CSS selector. A selector field contains the selector
/// list. Any matching element has its display property set to none, which hides it.
CssDisplayNone,
/// Ignores previously triggered actions.
IgnorePreviousRules,
/// Changes a URL from http to https. URLs with a specified (nondefault) port and links using
/// other protocols are unaffected.
MakeHttps,
}
/// Corresponds to possible entries in the `trigger.load_type` field of a Safari content blocking
/// rule.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub enum CbLoadType {
FirstParty,
ThirdParty,
}
/// Corresponds to possible entries in the `trigger.resource_type` field of a Safari content
/// blocking rule.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub enum CbResourceType {
Document,
Image,
StyleSheet,
Script,
Font,
Raw,
SvgDocument,
Media,
Popup,
}
/// Corresponds to the `trigger` field of a Safari content blocking rule.
#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)]
#[serde(rename_all = "kebab-case")]
pub struct CbTrigger {
/// Specifies a pattern to match the URL against.
pub url_filter: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
/// A Boolean value. The default value is false.
pub url_filter_is_case_sensitive: Option<bool>,
/// An array of strings matched to a URL's domain; limits action to a list of specific domains.
/// Values must be lowercase ASCII, or punycode for non-ASCII. Add * in front to match domain
/// and subdomains. Can't be used with unless-domain.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub if_domain: Option<Vec<String>>,
/// An array of strings matched to a URL's domain; acts on any site except domains in a
/// provided list. Values must be lowercase ASCII, or punycode for non-ASCII. Add * in front to
/// match domain and subdomains. Can't be used with if-domain.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub unless_domain: Option<Vec<String>>,
/// An array of strings representing the resource types (how the browser intends to use the
/// resource) that the rule should match. If not specified, the rule matches all resource
/// types. Valid values: document, image, style-sheet, script, font, raw (Any untyped load),
/// svg-document, media, popup.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub resource_type: Option<HashSet<CbResourceType>>,
/// An array of strings that can include one of two mutually exclusive values. If not
/// specified, the rule matches all load types. first-party is triggered only if the resource
/// has the same scheme, domain, and port as the main page resource. third-party is triggered
/// if the resource is not from the same domain as the main page resource.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub load_type: Vec<CbLoadType>,
/// An array of strings matched to the entire main document URL; limits the action to a
/// specific list of URL patterns. Values must be lowercase ASCII, or punycode for non-ASCII.
/// Can't be used with unless-top-url.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub if_top_url: Option<Vec<String>>,
/// An array of strings matched to the entire main document URL; acts on any site except URL
/// patterns in provided list. Values must be lowercase ASCII, or punycode for non-ASCII. Can't
/// be used with if-top-url.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub unless_top_url: Option<Vec<String>>,
}
/// Possible failure reasons when attempting to convert an adblock rule into content filtering
/// syntax.
#[derive(Debug)]
pub enum CbRuleCreationFailure {
/// Currently, only filter rules parsed in debug mode can be translated into equivalent content
/// blocking syntax.
NeedsDebugMode,
/// Content blocking rules cannot have if-domain and unless-domain together at the same time.
UnlessAndIfDomainTogetherUnsupported,
/// A network filter rule with only the given content type flags was provided, and none of them
/// are supported. If at least one supported content type is provided, no failure will occur
/// and unsupported types will be silently dropped.
NoSupportedNetworkOptions(NetworkFilterMask),
/// Network rules with redirect options cannot be represented in content blocking syntax.
NetworkRedirectUnsupported,
/// Network rules with generichide options cannot be supported in content blocking syntax.
NetworkGenerichideUnsupported,
/// Network rules with badfilter options cannot be supported in content blocking syntax.
NetworkBadFilterUnsupported,
/// Network rules with csp options cannot be supported in content blocking syntax.
NetworkCspUnsupported,
/// Network rules with removeparam options cannot be supported in content blocking syntax.
NetworkRemoveparamUnsupported,
/// Content blocking syntax only supports a subset of regex features, namely:
/// - Matching any character with “.”.
/// - Matching ranges with the range syntax [a-b].
/// - Quantifying expressions with “?”, “+” and “*”.
/// - Groups with parenthesis.
/// It may be possible to correctly convert some full-regex rules, but others use unsupported
/// features (e.g. quantified repetition with {...}) that make conversion to content blocking
/// syntax impossible.
FullRegexUnsupported,
/// `Blocker`-internal `NetworkFilter`s can be represented in optimized form, but these cannot
/// be currently converted into content blocking syntax.
OptimizedRulesUnsupported,
/// Cosmetic rules with entities (e.g. google.*) rather than hostnames cannot be represented in
/// content blocking syntax.
CosmeticEntitiesUnsupported,
/// Cosmetic rules with custom action specification (i.e. `:style(...)`) cannot be represented
/// in content blocking syntax.
CosmeticActionRulesNotSupported,
/// Cosmetic rules with scriptlet injections (i.e. `+js(...)`) cannot be represented in content
/// blocking syntax.
ScriptletInjectionsNotSupported,
/// Valid content blocking rules can only include ASCII characters.
RuleContainsNonASCII,
/// `from` as a `domain` alias is not currently supported in content blocking syntax.
FromNotSupported,
}
impl TryFrom<ParsedFilter> for CbRuleEquivalent {
type Error = CbRuleCreationFailure;
fn try_from(v: ParsedFilter) -> Result<Self, Self::Error> {
match v {
ParsedFilter::Network(f) => f.try_into(),
ParsedFilter::Cosmetic(f) => Ok(Self::SingleRule(f.try_into()?)),
}
}
}
fn non_empty(v: Vec<String>) -> Option<Vec<String>> {
if v.len() > 0 {
Some(v)
} else {
None
}
}
/// Some adblock rules cannot be directly represented by a single content blocking rule. This enum
/// serves as an intermediate conversion step that provides extra context on why one rule turned
/// into multiple rules.
///
/// The contained rules can be accessed using `IntoIterator`.
pub enum CbRuleEquivalent {
/// In most successful cases, an ABP rule can be converted into a single content blocking rule.
SingleRule(CbRule),
/// If a network rule has more than one specified resource type, one of those types is
/// `Document`, and no load type is specified, then the rule should be split into two content
/// blocking rules: the first has all original resource types except `Document`, and the second
/// only specifies `Document` with a third-party load type.
SplitDocument(CbRule, CbRule),
}
impl IntoIterator for CbRuleEquivalent {
type Item = CbRule;
type IntoIter = CbRuleEquivalentIterator;
fn into_iter(self) -> Self::IntoIter {
match self {
Self::SingleRule(r) => CbRuleEquivalentIterator {
rules: [Some(r), None],
index: 0,
},
Self::SplitDocument(r1, r2) => CbRuleEquivalentIterator {
rules: [Some(r1), Some(r2)],
index: 0,
},
}
}
}
/// Returned by [`CbRuleEquivalent`]'s `IntoIterator` implementation.
pub struct CbRuleEquivalentIterator {
rules: [Option<CbRule>; 2],
index: usize,
}
impl Iterator for CbRuleEquivalentIterator {
type Item = CbRule;
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.rules.len() {
return None;
}
let result = self.rules[self.index].take();
self.index += 1;
result
}
}
impl TryFrom<NetworkFilter> for CbRuleEquivalent {
type Error = CbRuleCreationFailure;
fn try_from(v: NetworkFilter) -> Result<Self, Self::Error> {
static SPECIAL_CHARS: Lazy<Regex> =
Lazy::new(|| Regex::new(r##"([.+?^${}()|\[\]\\])"##).unwrap());
static REPLACE_WILDCARDS: Lazy<Regex> = Lazy::new(|| Regex::new(r##"\*"##).unwrap());
static TRAILING_SEPARATOR: Lazy<Regex> = Lazy::new(|| Regex::new(r##"\^$"##).unwrap());
if let Some(raw_line) = &v.raw_line {
if v.is_redirect() {
return Err(CbRuleCreationFailure::NetworkRedirectUnsupported);
}
if v.mask.contains(NetworkFilterMask::GENERIC_HIDE) {
return Err(CbRuleCreationFailure::NetworkGenerichideUnsupported);
}
if v.mask.contains(NetworkFilterMask::BAD_FILTER) {
return Err(CbRuleCreationFailure::NetworkBadFilterUnsupported);
}
if v.is_csp() {
return Err(CbRuleCreationFailure::NetworkCspUnsupported);
}
if v.mask.contains(NetworkFilterMask::IS_COMPLETE_REGEX) {
return Err(CbRuleCreationFailure::FullRegexUnsupported);
}
if v.is_removeparam() {
return Err(CbRuleCreationFailure::NetworkRemoveparamUnsupported);
}
let load_type = if v
.mask
.contains(NetworkFilterMask::THIRD_PARTY | NetworkFilterMask::FIRST_PARTY)
{
vec![]
} else if v.mask.contains(NetworkFilterMask::THIRD_PARTY) {
vec![CbLoadType::ThirdParty]
} else if v.mask.contains(NetworkFilterMask::FIRST_PARTY) {
vec![CbLoadType::FirstParty]
} else {
vec![]
};
let url_filter = match (v.filter, v.hostname) {
(crate::filters::network::FilterPart::AnyOf(_), _) => {
return Err(CbRuleCreationFailure::OptimizedRulesUnsupported)
}
(crate::filters::network::FilterPart::Simple(part), Some(hostname)) => {
let without_trailing_separator = TRAILING_SEPARATOR.replace_all(&part, "");
let escaped_special_chars =
SPECIAL_CHARS.replace_all(&without_trailing_separator, r##"\$1"##);
let with_fixed_wildcards =
REPLACE_WILDCARDS.replace_all(&escaped_special_chars, ".*");
let mut url_filter = format!(
"^[^:]+:(//)?([^/]+\\.)?{}",
SPECIAL_CHARS.replace_all(&hostname, r##"\$1"##)
);
if v.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX) {
url_filter += ".*";
}
url_filter += &with_fixed_wildcards;
if v.mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) {
url_filter += "$";
}
url_filter
}
(crate::filters::network::FilterPart::Simple(part), None) => {
let without_trailing_separator = TRAILING_SEPARATOR.replace_all(&part, "");
let escaped_special_chars =
SPECIAL_CHARS.replace_all(&without_trailing_separator, r##"\$1"##);
let with_fixed_wildcards =
REPLACE_WILDCARDS.replace_all(&escaped_special_chars, ".*");
let mut url_filter = if v.mask.contains(NetworkFilterMask::IS_LEFT_ANCHOR) {
format!("^{}", with_fixed_wildcards)
} else {
let scheme_part = if v
.mask
.contains(NetworkFilterMask::FROM_HTTP | NetworkFilterMask::FROM_HTTPS)
{
""
} else if v.mask.contains(NetworkFilterMask::FROM_HTTP) {
"^http://.*"
} else if v.mask.contains(NetworkFilterMask::FROM_HTTPS) {
"^https://.*"
} else if v.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) {
"^wss?://.*"
} else {
unreachable!("Invalid scheme information");
};
format!("{}{}", scheme_part, with_fixed_wildcards)
};
if v.mask.contains(NetworkFilterMask::IS_RIGHT_ANCHOR) {
url_filter += "$";
}
url_filter
}
(crate::filters::network::FilterPart::Empty, Some(hostname)) => {
let escaped_special_chars = SPECIAL_CHARS.replace_all(&hostname, r##"\$1"##);
format!("^[^:]+:(//)?([^/]+\\.)?{}", escaped_special_chars)
}
(crate::filters::network::FilterPart::Empty, None) => {
if v.mask.contains(NetworkFilterMask::FROM_HTTP | NetworkFilterMask::FROM_HTTPS) {
"^https?://"
} else if v.mask.contains(NetworkFilterMask::FROM_HTTP) {
"^http://"
} else if v.mask.contains(NetworkFilterMask::FROM_HTTPS) {
"^https://"
} else if v.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) {
"^wss?://"
} else {
unreachable!("Invalid scheme information");
}.to_string()
}
};
let (if_domain, unless_domain) = if v.opt_domains.is_some() || v.opt_not_domains.is_some() {
let mut if_domain = vec![];
let mut unless_domain = vec![];
// Unwraps are okay here - any rules with opt_domains or opt_not_domains must have
// an options section delimited by a '$' character, followed by a `domain=` option.
let opts = &raw_line[find_char(b'$', raw_line.as_bytes()).unwrap() + "$".len()..];
let domain_start_index = if let Some(index) = memmem::find(opts.as_bytes(), b"domain=") {
index
} else {
return Err(CbRuleCreationFailure::FromNotSupported);
};
let domains_start =
&opts[domain_start_index + "domain=".len()..];
let domains = if let Some(comma) = find_char(b',', domains_start.as_bytes()) {
&domains_start[..comma]
} else {
domains_start
}.split('|');
domains.for_each(|domain| {
let (collection, domain) = if let Some(domain_stripped) = domain.strip_prefix('~') {
(&mut unless_domain, domain_stripped)
} else {
(&mut if_domain, domain)
};
let lowercase = domain.to_lowercase();
let normalized_domain = if lowercase.is_ascii() {
lowercase
} else {
// The network filter has already parsed successfully, so this should be
// safe
idna::domain_to_ascii(&lowercase).unwrap()
};
collection.push(format!("*{}", normalized_domain));
});
(non_empty(if_domain), non_empty(unless_domain))
} else {
(None, None)
};
if if_domain.is_some() && unless_domain.is_some() {
return Err(CbRuleCreationFailure::UnlessAndIfDomainTogetherUnsupported);
}
let blocking_type = if v.mask.contains(NetworkFilterMask::IS_EXCEPTION) {
CbType::IgnorePreviousRules
} else {
CbType::Block
};
let resource_type = if v.mask.contains(NetworkFilterMask::FROM_NETWORK_TYPES) {
None
} else {
let mut types = HashSet::new();
let mut unsupported_flags = NetworkFilterMask::empty();
macro_rules! push_if_flag {
($flag:ident, $target:ident) => {
if v.mask.contains(NetworkFilterMask::$flag) {
types.insert(CbResourceType::$target);
}
};
($flag:ident) => {
if v.mask.contains(NetworkFilterMask::$flag) {
unsupported_flags |= NetworkFilterMask::$flag;
}
};
}
push_if_flag!(FROM_IMAGE, Image);
push_if_flag!(FROM_MEDIA, Media);
push_if_flag!(FROM_OBJECT);
push_if_flag!(FROM_OTHER);
push_if_flag!(FROM_PING);
push_if_flag!(FROM_SCRIPT, Script);
push_if_flag!(FROM_STYLESHEET, StyleSheet);
push_if_flag!(FROM_SUBDOCUMENT, Document);
push_if_flag!(FROM_WEBSOCKET);
push_if_flag!(FROM_XMLHTTPREQUEST, Raw);
push_if_flag!(FROM_FONT, Font);
// TODO - Popup, Document when implemented
if !unsupported_flags.is_empty() && types.is_empty() {
return Err(CbRuleCreationFailure::NoSupportedNetworkOptions(
unsupported_flags,
));
}
Some(types)
};
let url_filter_is_case_sensitive = if v.mask.contains(NetworkFilterMask::MATCH_CASE) {
Some(true)
} else {
None
};
let single_rule = CbRule {
action: CbAction {
typ: blocking_type,
selector: None,
},
trigger: CbTrigger {
url_filter,
load_type,
if_domain,
unless_domain,
resource_type,
url_filter_is_case_sensitive,
..Default::default()
},
};
if !single_rule.is_ascii() {
return Err(CbRuleCreationFailure::RuleContainsNonASCII);
}
if let Some(resource_types) = &single_rule.trigger.resource_type {
if resource_types.len() > 1
&& resource_types.contains(&CbResourceType::Document)
&& single_rule.trigger.load_type.is_empty()
{
let mut non_doc_types = resource_types.clone();
non_doc_types.remove(&CbResourceType::Document);
let rule_clone = single_rule.clone();
let non_doc_rule = CbRule {
trigger: CbTrigger {
resource_type: Some(non_doc_types),
..rule_clone.trigger
},
..rule_clone
};
let mut doc_type = HashSet::new();
doc_type.insert(CbResourceType::Document);
let just_doc_rule = CbRule {
trigger: CbTrigger {
resource_type: Some(doc_type),
load_type: vec![CbLoadType::ThirdParty],
..single_rule.trigger
},
..single_rule
};
return Ok(Self::SplitDocument(non_doc_rule, just_doc_rule));
}
}
Ok(Self::SingleRule(single_rule))
} else {
Err(CbRuleCreationFailure::NeedsDebugMode)
}
}
}
impl TryFrom<CosmeticFilter> for CbRule {
type Error = CbRuleCreationFailure;
fn try_from(v: CosmeticFilter) -> Result<Self, Self::Error> {
use crate::filters::cosmetic::{CosmeticFilterLocationType, CosmeticFilterMask};
if v.action.is_some() {
return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported);
}
if v.mask.contains(CosmeticFilterMask::SCRIPT_INJECT) {
return Err(CbRuleCreationFailure::ScriptletInjectionsNotSupported);
}
if let Some(raw_line) = v.raw_line {
let mut hostnames_vec = vec![];
let mut not_hostnames_vec = vec![];
let mut any_entities = false;
// Unwrap is okay here - cosmetic rules must have a '#' character
let sharp_index = find_char(b'#', raw_line.as_bytes()).unwrap();
CosmeticFilter::locations_before_sharp(&raw_line, sharp_index).for_each(
|(location_type, location)| match location_type {
CosmeticFilterLocationType::Entity => any_entities = true,
CosmeticFilterLocationType::NotEntity => any_entities = true,
CosmeticFilterLocationType::Hostname => {
if let Ok(encoded) = idna::domain_to_ascii(location) {
hostnames_vec.push(encoded);
}
}
CosmeticFilterLocationType::NotHostname => {
if let Ok(encoded) = idna::domain_to_ascii(location) {
not_hostnames_vec.push(encoded);
}
}
},
);
if any_entities {
return Err(CbRuleCreationFailure::CosmeticEntitiesUnsupported);
}
let hostnames_vec = non_empty(hostnames_vec);
let not_hostnames_vec = non_empty(not_hostnames_vec);
if hostnames_vec.is_some() && not_hostnames_vec.is_some() {
return Err(CbRuleCreationFailure::UnlessAndIfDomainTogetherUnsupported);
}
let (unless_domain, if_domain) = if v.mask.contains(CosmeticFilterMask::UNHIDE) {
(hostnames_vec, not_hostnames_vec)
} else {
(not_hostnames_vec, hostnames_vec)
};
let rule = Self {
action: CbAction {
typ: CbType::CssDisplayNone,
selector: Some(v.selector),
},
trigger: CbTrigger {
url_filter: ".*".to_string(),
if_domain,
unless_domain,
..Default::default()
},
};
if !rule.is_ascii() {
return Err(CbRuleCreationFailure::RuleContainsNonASCII);
}
Ok(rule)
} else {
Err(CbRuleCreationFailure::NeedsDebugMode)
}
}
}
#[cfg(test)]
mod ab2cb_tests {
use super::*;
fn test_from_abp(abp_rule: &str, cb: &str) {
let filter = crate::lists::parse_filter(abp_rule, true, Default::default())
.expect("Rule under test could not be parsed");
assert_eq!(
CbRuleEquivalent::try_from(filter)
.unwrap()
.into_iter()
.collect::<Vec<_>>(),
serde_json::from_str::<Vec<CbRule>>(cb)
.expect("content blocking rule under test could not be deserialized")
);
}
#[test]
fn ad_tests() {
test_from_abp(
"&ad_box_",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "&ad_box_"
}
}]"####,
);
test_from_abp(
"&ad_channel=",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "&ad_channel="
}
}]"####,
);
test_from_abp(
"+advertorial.",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "\\+advertorial\\."
}
}]"####,
);
test_from_abp(
"&prvtof=*&poru=",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "&prvtof=.*&poru="
}
}]"####,
);
test_from_abp(
"-ad-180x150px.",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "-ad-180x150px\\."
}
}]"####,
);
test_from_abp(
"://findnsave.*.*/api/groupon.json?",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "://findnsave\\..*\\..*/api/groupon\\.json\\?"
}
}]"####,
);
test_from_abp(
"|https://$script,third-party,domain=tamilrockers.ws",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"if-domain": ["*tamilrockers.ws"],
"load-type": ["third-party"],
"resource-type": ["script"],
"url-filter": "^https://"
}
}]"####,
);
test_from_abp("||com/banners/$image,object,subdocument,domain=~pingdom.com|~thetvdb.com|~tooltrucks.com", r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?com/banners/",
"unless-domain": [
"*pingdom.com",
"*thetvdb.com",
"*tooltrucks.com"
],
"resource-type": [
"image"
]
}
}, {
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?com/banners/",
"unless-domain": [
"*pingdom.com",
"*thetvdb.com",
"*tooltrucks.com"
],
"resource-type": [
"document"
],
"load-type": [
"third-party"
]
},
"action": {
"type": "block"
}
}]"####);
test_from_abp(
"$image,third-party,xmlhttprequest,domain=rd.com",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^https?://",
"if-domain": [
"*rd.com"
],
"resource-type": [
"image",
"raw"
],
"load-type": [
"third-party"
]
}
}]"####,
);
test_from_abp(
"|https://r.i.ua^",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^https://r\\.i\\.ua"
}
}]"####,
);
test_from_abp(
"|ws://$domain=4shared.com",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^wss?://",
"if-domain": [
"*4shared.com"
]
}
}]"####,
);
}
#[test]
fn element_hiding_tests() {
test_from_abp(
"###A9AdsMiddleBoxTop",
r####"[{
"action": {
"type": "css-display-none",
"selector": "#A9AdsMiddleBoxTop"
},
"trigger": {
"url-filter": ".*"
}
}]"####,
);
test_from_abp(
"thedailygreen.com#@##AD_banner",
r####"[{
"action": {
"type": "css-display-none",
"selector": "#AD_banner"
},
"trigger": {
"url-filter": ".*",
"unless-domain": [
"thedailygreen.com"
]
}
}]"####,
);
test_from_abp(
"sprouts.com,tbns.com.au#@##AdImage",
r####"[{
"action": {
"type": "css-display-none",
"selector": "#AdImage"
},
"trigger": {
"url-filter": ".*",
"unless-domain": [
"sprouts.com",
"tbns.com.au"
]
}
}]"####,
);
test_from_abp(
r#"santander.co.uk#@#a[href^="http://ad-emea.doubleclick.net/"]"#,
r####"[{
"action": {
"type": "css-display-none",
"selector": "a[href^=\"http://ad-emea.doubleclick.net/\"]"
},
"trigger": {
"url-filter": ".*",
"unless-domain": [
"santander.co.uk"
]
}
}]"####,
);
test_from_abp(
"search.safefinder.com,search.snapdo.com###ABottomD",
r####"[{
"action": {
"type": "css-display-none",
"selector": "#ABottomD"
},
"trigger": {
"url-filter": ".*",
"if-domain": [
"search.safefinder.com",
"search.snapdo.com"
]
}
}]"####,
);
test_from_abp(
r#"tweakguides.com###adbar > br + p[style="text-align: center"] + p[style="text-align: center"]"#,
r####"[{
"action": {
"type": "css-display-none",
"selector": "#adbar > br + p[style=\"text-align: center\"] + p[style=\"text-align: center\"]"
},
"trigger": {
"url-filter": ".*",
"if-domain": [
"tweakguides.com"
]
}
}]"####,
);
}
/* TODO - `$popup` is currently unsupported by NetworkFilter
#[test]
fn popup_tests() {
test_from_abp("||admngronline.com^$popup,third-party", r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^https?://admngronline\\.com(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)",
"load-type": [
"third-party"
],
"resource-type": [
"popup"
]
}
}]"####);
test_from_abp("||bet365.com^*affiliate=$popup", r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^https?://bet365\\.com(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$).*affiliate=",
"resource-type": [
"popup"
]
}
}]"####);
}
*/
#[test]
fn third_party() {
test_from_abp(
"||007-gateway.com^$third-party",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?007-gateway\\.com",
"load-type": [
"third-party"
]
}
}]"####,
);
test_from_abp(
"||allestörungen.at^$third-party",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?xn--allestrungen-9ib\\.at",
"load-type": [
"third-party"
]
}
}]"####,
);
test_from_abp(
"||anet*.tradedoubler.com^$third-party",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?anet.*\\.tradedoubler\\.com",
"load-type": [
"third-party"
]
}
}]"####,
);
test_from_abp("||doubleclick.net^$third-party,domain=3news.co.nz|92q.com|abc-7.com|addictinggames.com|allbusiness.com|allthingsd.com|bizjournals.com|bloomberg.com|bnn.ca|boom92houston.com|boom945.com|boomphilly.com|break.com|cbc.ca|cbs19.tv|cbs3springfield.com|cbsatlanta.com|cbslocal.com|complex.com|dailymail.co.uk|darkhorizons.com|doubleviking.com|euronews.com|extratv.com|fandango.com|fox19.com|fox5vegas.com|gorillanation.com|hawaiinewsnow.com|hellobeautiful.com|hiphopnc.com|hot1041stl.com|hothiphopdetroit.com|hotspotatl.com|hulu.com|imdb.com|indiatimes.com|indyhiphop.com|ipowerrichmond.com|joblo.com|kcra.com|kctv5.com|ketv.com|koat.com|koco.com|kolotv.com|kpho.com|kptv.com|ksat.com|ksbw.com|ksfy.com|ksl.com|kypost.com|kysdc.com|live5news.com|livestation.com|livestream.com|metro.us|metronews.ca|miamiherald.com|my9nj.com|myboom1029.com|mycolumbusmagic.com|mycolumbuspower.com|myfoxdetroit.com|myfoxorlando.com|myfoxphilly.com|myfoxphoenix.com|myfoxtampabay.com|nbcrightnow.com|neatorama.com|necn.com|neopets.com|news.com.au|news4jax.com|newsone.com|nintendoeverything.com|oldschoolcincy.com|own3d.tv|pagesuite-professional.co.uk|pandora.com|player.theplatform.com|ps3news.com|radio.com|radionowindy.com|rottentomatoes.com|sbsun.com|shacknews.com|sk-gaming.com|ted.com|thebeatdfw.com|theboxhouston.com|theglobeandmail.com|timesnow.tv|tv2.no|twitch.tv|universalsports.com|ustream.tv|wapt.com|washingtonpost.com|wate.com|wbaltv.com|wcvb.com|wdrb.com|wdsu.com|wflx.com|wfmz.com|wfsb.com|wgal.com|whdh.com|wired.com|wisn.com|wiznation.com|wlky.com|wlns.com|wlwt.com|wmur.com|wnem.com|wowt.com|wral.com|wsj.com|wsmv.com|wsvn.com|wtae.com|wthr.com|wxii12.com|wyff4.com|yahoo.com|youtube.com|zhiphopcleveland.com", r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?doubleclick\\.net",
"load-type": [
"third-party"
],
"if-domain": [
"*3news.co.nz",
"*92q.com",
"*abc-7.com",
"*addictinggames.com",
"*allbusiness.com",
"*allthingsd.com",
"*bizjournals.com",
"*bloomberg.com",
"*bnn.ca",
"*boom92houston.com",
"*boom945.com",
"*boomphilly.com",
"*break.com",
"*cbc.ca",
"*cbs19.tv",
"*cbs3springfield.com",
"*cbsatlanta.com",
"*cbslocal.com",
"*complex.com",
"*dailymail.co.uk",
"*darkhorizons.com",
"*doubleviking.com",
"*euronews.com",
"*extratv.com",
"*fandango.com",
"*fox19.com",
"*fox5vegas.com",
"*gorillanation.com",
"*hawaiinewsnow.com",
"*hellobeautiful.com",
"*hiphopnc.com",
"*hot1041stl.com",
"*hothiphopdetroit.com",
"*hotspotatl.com",
"*hulu.com",
"*imdb.com",
"*indiatimes.com",
"*indyhiphop.com",
"*ipowerrichmond.com",
"*joblo.com",
"*kcra.com",
"*kctv5.com",
"*ketv.com",
"*koat.com",
"*koco.com",
"*kolotv.com",
"*kpho.com",
"*kptv.com",
"*ksat.com",
"*ksbw.com",
"*ksfy.com",
"*ksl.com",
"*kypost.com",
"*kysdc.com",
"*live5news.com",
"*livestation.com",
"*livestream.com",
"*metro.us",
"*metronews.ca",
"*miamiherald.com",
"*my9nj.com",
"*myboom1029.com",
"*mycolumbusmagic.com",
"*mycolumbuspower.com",
"*myfoxdetroit.com",
"*myfoxorlando.com",
"*myfoxphilly.com",
"*myfoxphoenix.com",
"*myfoxtampabay.com",
"*nbcrightnow.com",
"*neatorama.com",
"*necn.com",
"*neopets.com",
"*news.com.au",
"*news4jax.com",
"*newsone.com",
"*nintendoeverything.com",
"*oldschoolcincy.com",
"*own3d.tv",
"*pagesuite-professional.co.uk",
"*pandora.com",
"*player.theplatform.com",
"*ps3news.com",
"*radio.com",
"*radionowindy.com",
"*rottentomatoes.com",
"*sbsun.com",
"*shacknews.com",
"*sk-gaming.com",
"*ted.com",
"*thebeatdfw.com",
"*theboxhouston.com",
"*theglobeandmail.com",
"*timesnow.tv",
"*tv2.no",
"*twitch.tv",
"*universalsports.com",
"*ustream.tv",
"*wapt.com",
"*washingtonpost.com",
"*wate.com",
"*wbaltv.com",
"*wcvb.com",
"*wdrb.com",
"*wdsu.com",
"*wflx.com",
"*wfmz.com",
"*wfsb.com",
"*wgal.com",
"*whdh.com",
"*wired.com",
"*wisn.com",
"*wiznation.com",
"*wlky.com",
"*wlns.com",
"*wlwt.com",
"*wmur.com",
"*wnem.com",
"*wowt.com",
"*wral.com",
"*wsj.com",
"*wsmv.com",
"*wsvn.com",
"*wtae.com",
"*wthr.com",
"*wxii12.com",
"*wyff4.com",
"*yahoo.com",
"*youtube.com",
"*zhiphopcleveland.com"
]
}
}]"####);
test_from_abp("||dt00.net^$third-party,domain=~marketgid.com|~marketgid.ru|~marketgid.ua|~mgid.com|~thechive.com", r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?dt00\\.net",
"load-type": [
"third-party"
],
"unless-domain": [
"*marketgid.com",
"*marketgid.ru",
"*marketgid.ua",
"*mgid.com",
"*thechive.com"
]
}
}]"####);
test_from_abp("||amazonaws.com/newscloud-production/*/backgrounds/$domain=crescent-news.com|daily-jeff.com|recordpub.com|state-journal.com|the-daily-record.com|the-review.com|times-gazette.com", r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?amazonaws\\.com/newscloud-production/.*/backgrounds/",
"if-domain": [
"*crescent-news.com",
"*daily-jeff.com",
"*recordpub.com",
"*state-journal.com",
"*the-daily-record.com",
"*the-review.com",
"*times-gazette.com"
]
}
}]"####);
test_from_abp(
"||d1noellhv8fksc.cloudfront.net^",
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?d1noellhv8fksc\\.cloudfront\\.net"
}
}]"####,
);
}
#[test]
fn whitelist() {
test_from_abp(
"@@||google.com/recaptcha/$domain=mediafire.com",
r####"[{
"action": {
"type": "ignore-previous-rules"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?google\\.com/recaptcha/",
"if-domain": [
"*mediafire.com"
]
}
}]"####,
);
test_from_abp(
"@@||ad4.liverail.com/?compressed|$domain=majorleaguegaming.com|pbs.org|wikihow.com",
r####"[{
"action": {
"type": "ignore-previous-rules"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?ad4\\.liverail\\.com/\\?compressed$",
"if-domain": [
"*majorleaguegaming.com",
"*pbs.org",
"*wikihow.com"
]
}
}]"####,
);
test_from_abp(
"@@||googletagservices.com/tag/js/gpt.js$domain=allestoringen.nl|allestörungen.at",
r####"[{
"action": {
"type": "ignore-previous-rules"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?googletagservices\\.com/tag/js/gpt\\.js",
"if-domain": [
"*allestoringen.nl",
"*xn--allestrungen-9ib.at"
]
}
}]"####,
);
test_from_abp(
"@@||advertising.autotrader.co.uk^$~third-party",
r####"[{
"action": {
"type": "ignore-previous-rules"
},
"trigger": {
"load-type": [
"first-party"
],
"url-filter": "^[^:]+:(//)?([^/]+\\.)?advertising\\.autotrader\\.co\\.uk"
}
}]"####,
);
test_from_abp(
"@@||advertising.racingpost.com^$image,script,stylesheet,~third-party,xmlhttprequest",
r####"[{
"action": {
"type": "ignore-previous-rules"
},
"trigger": {
"load-type": [
"first-party"
],
"url-filter": "^[^:]+:(//)?([^/]+\\.)?advertising\\.racingpost\\.com",
"resource-type": [
"image",
"style-sheet",
"script",
"raw"
]
}
}]"####,
);
}
#[test]
fn test_ignore_previous_fp_documents() {
assert_eq!(
vec![ignore_previous_fp_documents()],
serde_json::from_str::<Vec<CbRule>>(
r####"[{
"trigger":{
"url-filter":".*",
"resource-type":["document"],
"load-type":["first-party"]
},
"action":{"type":"ignore-previous-rules"}
}]"####
)
.expect("content blocking rule under test could not be deserialized")
);
}
#[test]
fn escape_literal_backslashes() {
test_from_abp(
r#"||gamer.no/?module=Tumedia\DFProxy\Modules^"#,
r####"[{
"action": {
"type": "block"
},
"trigger": {
"url-filter": "^[^:]+:(//)?([^/]+\\.)?gamer\\.no/\\?module=tumedia\\\\dfproxy\\\\modules"
}
}]"####,
);
}
}
#[cfg(test)]
mod filterset_tests {
use crate::lists::{FilterSet, ParseOptions, RuleTypes};
const FILTER_LIST: &[&str] = &[
"||example.com^$script",
"||test.net^$image,third-party",
"/trackme.js^$script",
"example.com##.ad-banner",
"##.ad-640x480",
"##p.sponsored",
];
#[test]
fn convert_all_rules() -> Result<(), ()> {
let mut set = FilterSet::new(true);
set.add_filters(FILTER_LIST, Default::default());
let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules, FILTER_LIST);
// All 6 rules plus `ignore_previous_fp_documents()`
assert_eq!(cb_rules.len(), 7);
Ok(())
}
#[test]
fn convert_network_only() -> Result<(), ()> {
let parse_opts = ParseOptions {
rule_types: RuleTypes::NetworkOnly,
..Default::default()
};
let mut set = FilterSet::new(true);
set.add_filters(FILTER_LIST, parse_opts);
let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules, &FILTER_LIST[0..3]);
// 3 network rules plus `ignore_previous_fp_documents()`
assert_eq!(cb_rules.len(), 4);
Ok(())
}
#[test]
fn convert_cosmetic_only() -> Result<(), ()> {
let parse_opts = ParseOptions {
rule_types: RuleTypes::CosmeticOnly,
..Default::default()
};
let mut set = FilterSet::new(true);
set.add_filters(FILTER_LIST, parse_opts);
let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules, &FILTER_LIST[3..6]);
// 3 cosmetic rules only
assert_eq!(cb_rules.len(), 3);
Ok(())
}
#[test]
fn ignore_unsupported_rules() -> Result<(), ()> {
let mut set = FilterSet::new(true);
set.add_filters(FILTER_LIST, Default::default());
set.add_filters([
// unicode characters
"||rgmechanics.info/uploads/660х90_",
"||insaattrendy.com/Upload/bükerbanner*.jpg",
// from domain
"/siropu/am/core.min.js$script,important,from=~audi-sport.net|~hifiwigwam.com",
], Default::default());
let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules, FILTER_LIST);
// All 6 rules plus `ignore_previous_fp_documents()`
assert_eq!(cb_rules.len(), 7);
Ok(())
}
#[test]
fn punycode_if_domains() -> Result<(), ()> {
let list = [
"smskaraborg.se,örnsköldsviksgymnasium.se,mojligheternashusab.se##.env-modal-dialog__backdrop",
];
let mut set = FilterSet::new(true);
set.add_filters(&list, Default::default());
let (cb_rules, used_rules) = set.into_content_blocking()?;
assert_eq!(used_rules, list);
assert_eq!(cb_rules.len(), 1);
assert!(cb_rules[0].trigger.if_domain.is_some());
assert_eq!(cb_rules[0].trigger.if_domain.as_ref().unwrap(), &["smskaraborg.se", "xn--rnskldsviksgymnasium-29be.se", "mojligheternashusab.se"]);
Ok(())
}
}