use memchr::{memchr as find_char, memrchr as find_char_reverse};
use serde::Serialize;
use std::collections::HashSet;
use std::ops::DerefMut;
use std::sync::OnceLock;
use crate::filters::fb_network_builder::NetworkFilterListId;
use crate::filters::filter_data_context::FilterDataContextRef;
use crate::filters::network::NetworkFilterMaskHelper;
use crate::network_filter_list::NetworkFilterList;
use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy};
use crate::request::Request;
use crate::resources::ResourceStorage;
pub struct BlockerOptions {
pub enable_optimizations: bool,
}
#[derive(Debug, Serialize, Default)]
pub struct BlockerResult {
pub matched: bool,
pub important: bool,
pub redirect: Option<String>,
pub rewritten_url: Option<String>,
pub exception: Option<String>,
pub filter: Option<String>,
}
fn get_no_tags() -> &'static HashSet<String> {
static NO_TAGS: OnceLock<HashSet<String>> = OnceLock::new();
NO_TAGS.get_or_init(&HashSet::new)
}
pub struct Blocker {
pub(crate) tags_enabled: HashSet<String>,
#[cfg(feature = "single-thread")]
pub(crate) regex_manager: std::cell::RefCell<RegexManager>,
#[cfg(not(feature = "single-thread"))]
pub(crate) regex_manager: std::sync::Mutex<RegexManager>,
pub(crate) filter_data_context: FilterDataContextRef,
}
#[cfg(feature = "single-thread")]
pub(crate) type RegexManagerRef<'a> = std::cell::RefMut<'a, RegexManager>;
#[cfg(not(feature = "single-thread"))]
pub(crate) type RegexManagerRef<'a> = std::sync::MutexGuard<'a, RegexManager>;
impl Blocker {
pub fn check(&self, request: &Request, resources: &ResourceStorage) -> BlockerResult {
self.check_parameterised(request, resources, false, false)
}
pub(crate) fn get_list(&self, id: NetworkFilterListId) -> NetworkFilterList<'_> {
NetworkFilterList {
list: self
.filter_data_context
.memory
.root()
.network_rules()
.get(id as usize),
filter_data_context: &self.filter_data_context,
}
}
pub(crate) fn csp(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::Csp)
}
pub(crate) fn exceptions(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::Exceptions)
}
pub(crate) fn importants(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::Importants)
}
pub(crate) fn redirects(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::Redirects)
}
pub(crate) fn removeparam(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::RemoveParam)
}
pub(crate) fn filters(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::Filters)
}
pub(crate) fn generic_hide(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::GenericHide)
}
pub(crate) fn tagged_filters_all(&self) -> NetworkFilterList<'_> {
self.get_list(NetworkFilterListId::TaggedFiltersAll)
}
pub(crate) fn borrow_regex_manager(&self) -> RegexManagerRef<'_> {
#[cfg(feature = "single-thread")]
#[allow(unused_mut)]
let mut manager = self.regex_manager.borrow_mut();
#[cfg(not(feature = "single-thread"))]
let mut manager = self.regex_manager.lock().unwrap();
#[cfg(not(target_arch = "wasm32"))]
manager.update_time();
manager
}
pub fn check_generic_hide(&self, hostname_request: &Request) -> bool {
let mut regex_manager = self.borrow_regex_manager();
self.generic_hide()
.check(hostname_request, &HashSet::new(), &mut regex_manager)
.is_some()
}
#[cfg(test)]
pub(crate) fn check_exceptions(&self, request: &Request) -> bool {
let mut regex_manager = self.borrow_regex_manager();
self.exceptions()
.check(request, &HashSet::new(), &mut regex_manager)
.is_some()
}
pub fn check_parameterised(
&self,
request: &Request,
resources: &ResourceStorage,
matched_rule: bool,
force_check_exceptions: bool,
) -> BlockerResult {
let mut regex_manager = self.borrow_regex_manager();
if !request.is_supported {
return BlockerResult::default();
}
let important_filter = self
.importants()
.check(request, get_no_tags(), &mut regex_manager);
let filter = if important_filter.is_none() && !matched_rule {
self.tagged_filters_all()
.check(request, &self.tags_enabled, &mut regex_manager)
.or_else(|| {
self.filters()
.check(request, get_no_tags(), &mut regex_manager)
})
} else {
important_filter
};
let exception = match filter.as_ref() {
None if matched_rule || force_check_exceptions => {
self.exceptions()
.check(request, &self.tags_enabled, &mut regex_manager)
}
None => None,
Some(f) if f.is_important() => None,
Some(_) => self
.exceptions()
.check(request, &self.tags_enabled, &mut regex_manager),
};
let redirect_filters =
self.redirects()
.check_all(request, get_no_tags(), regex_manager.deref_mut());
let redirect_resource = {
let mut exceptions = vec![];
for redirect_filter in redirect_filters.iter() {
if redirect_filter.is_exception() {
if let Some(redirect) = redirect_filter.modifier_option.as_ref() {
exceptions.push(redirect);
}
}
}
let mut resource_and_priority = None;
for redirect_filter in redirect_filters.iter() {
if !redirect_filter.is_exception() {
if let Some(redirect) = redirect_filter.modifier_option.as_ref() {
if !exceptions.contains(&redirect) {
let (resource, priority) =
if let Some(idx) = find_char_reverse(b':', redirect.as_bytes()) {
let priority_str = &redirect[idx + 1..];
let resource = &redirect[..idx];
if let Ok(priority) = priority_str.parse::<i32>() {
(resource, priority)
} else {
(&redirect[..], 0)
}
} else {
(&redirect[..], 0)
};
if let Some((_, p1)) = resource_and_priority {
if priority > p1 {
resource_and_priority = Some((resource, priority));
}
} else {
resource_and_priority = Some((resource, priority));
}
}
}
}
}
resource_and_priority.map(|(r, _)| r)
};
let redirect: Option<String> = redirect_resource.and_then(|resource_name| {
resources.get_redirect_resource(resource_name).or({
#[cfg(test)]
eprintln!("Matched rule with redirect option but did not find corresponding resource to send");
None
})
});
let important = filter.is_some()
&& filter
.as_ref()
.map(|f| f.is_important())
.unwrap_or_else(|| false);
let rewritten_url = if important {
None
} else {
Self::apply_removeparam(&self.removeparam(), request, regex_manager.deref_mut())
};
let matched = exception.is_none() && (filter.is_some() || matched_rule);
BlockerResult {
matched,
important,
redirect,
rewritten_url,
exception: exception.as_ref().map(|f| f.to_string()), filter: filter.as_ref().map(|f| f.to_string()), }
}
fn apply_removeparam(
removeparam_filters: &NetworkFilterList,
request: &Request,
regex_manager: &mut RegexManager,
) -> Option<String> {
enum QParam<'a> {
KeyOnly(&'a str),
KeyValue(&'a str, &'a str),
}
impl std::fmt::Display for QParam<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::KeyOnly(k) => write!(f, "{k}"),
Self::KeyValue(k, v) => write!(f, "{k}={v}"),
}
}
}
let url = &request.original_url;
if let Some(i) = find_char(b'?', url.as_bytes()) {
let params_start = i + 1;
let hash_index = if let Some(j) = find_char(b'#', &url.as_bytes()[params_start..]) {
params_start + j
} else {
url.len()
};
let qparams = &url[params_start..hash_index];
let mut params: Vec<(QParam, bool)> = qparams
.split('&')
.map(|pair| {
if let Some((k, v)) = pair.split_once('=') {
QParam::KeyValue(k, v)
} else {
QParam::KeyOnly(pair)
}
})
.map(|param| (param, true))
.collect();
let filters = removeparam_filters.check_all(request, get_no_tags(), regex_manager);
let mut rewrite = false;
for removeparam_filter in filters {
if let Some(removeparam) = &removeparam_filter.modifier_option {
params.iter_mut().for_each(|(param, include)| {
if let QParam::KeyValue(k, v) = param {
if !v.is_empty() && k == removeparam {
*include = false;
rewrite = true;
}
}
});
}
}
if rewrite {
let p = itertools::join(
params
.into_iter()
.filter(|(_, include)| *include)
.map(|(param, _)| param.to_string()),
"&",
);
let new_param_str = if p.is_empty() {
String::from("")
} else {
format!("?{p}")
};
Some(format!(
"{}{}{}",
&url[0..i],
new_param_str,
&url[hash_index..]
))
} else {
None
}
} else {
None
}
}
pub fn get_csp_directives(&self, request: &Request) -> Option<String> {
use crate::request::RequestType;
if request.request_type != RequestType::Document
&& request.request_type != RequestType::Subdocument
{
return None;
}
let mut regex_manager = self.borrow_regex_manager();
let filters = self
.csp()
.check_all(request, &self.tags_enabled, &mut regex_manager);
if filters.is_empty() {
return None;
}
let mut disabled_directives: HashSet<&str> = HashSet::new();
let mut enabled_directives: HashSet<&str> = HashSet::new();
for filter in filters.iter() {
if filter.is_exception() {
if filter.is_csp() {
if let Some(csp_directive) = &filter.modifier_option {
disabled_directives.insert(csp_directive);
} else {
return None;
}
}
} else if filter.is_csp() {
if let Some(csp_directive) = &filter.modifier_option {
enabled_directives.insert(csp_directive);
}
}
}
let mut remaining_directives = enabled_directives.difference(&disabled_directives);
let mut merged = if let Some(directive) = remaining_directives.next() {
String::from(*directive)
} else {
return None;
};
remaining_directives.for_each(|directive| {
merged.push(',');
merged.push_str(directive);
});
Some(merged)
}
pub(crate) fn from_context(filter_data_context: FilterDataContextRef) -> Self {
Self {
filter_data_context,
tags_enabled: HashSet::new(),
regex_manager: Default::default(),
}
}
#[cfg(test)]
pub fn new(
network_filters: Vec<crate::filters::network::NetworkFilter>,
options: &BlockerOptions,
) -> Self {
use crate::engine::Engine;
use crate::FilterSet;
let mut filter_set = FilterSet::new(true);
filter_set.network_filters = network_filters;
let engine = Engine::from_filter_set(filter_set, options.enable_optimizations);
Self::from_context(engine.filter_data_context())
}
pub fn use_tags(&mut self, tags: &[&str]) {
let tag_set: HashSet<String> = tags.iter().map(|&t| String::from(t)).collect();
self.tags_with_set(tag_set);
}
pub fn enable_tags(&mut self, tags: &[&str]) {
let tag_set: HashSet<String> = tags
.iter()
.map(|&t| String::from(t))
.collect::<HashSet<_>>()
.union(&self.tags_enabled)
.cloned()
.collect();
self.tags_with_set(tag_set);
}
pub fn disable_tags(&mut self, tags: &[&str]) {
let tag_set: HashSet<String> = self
.tags_enabled
.difference(&tags.iter().map(|&t| String::from(t)).collect())
.cloned()
.collect();
self.tags_with_set(tag_set);
}
fn tags_with_set(&mut self, tags_enabled: HashSet<String>) {
self.tags_enabled = tags_enabled;
}
pub fn tags_enabled(&self) -> Vec<String> {
self.tags_enabled.iter().cloned().collect()
}
pub fn set_regex_discard_policy(&self, new_discard_policy: RegexManagerDiscardPolicy) {
let mut regex_manager = self.borrow_regex_manager();
regex_manager.set_discard_policy(new_discard_policy);
}
#[cfg(feature = "debug-info")]
pub fn discard_regex(&self, regex_id: u64) {
let mut regex_manager = self.borrow_regex_manager();
regex_manager.discard_regex(regex_id);
}
#[cfg(feature = "debug-info")]
pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo {
let regex_manager = self.borrow_regex_manager();
regex_manager.get_debug_info()
}
}
#[cfg(test)]
#[path = "../tests/unit/blocker.rs"]
mod unit_tests;