adblock 0.12.2

Native Rust module for Adblock Plus syntax (e.g. EasyList, EasyPrivacy) filter parsing and matching.
Documentation
//! The adblock [`Engine`] is the primary interface for adblocking.

use crate::blocker::{Blocker, BlockerResult};
use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources};
use crate::cosmetic_filter_cache_builder::CosmeticFilterCacheBuilder;
use crate::data_format::{deserialize_dat_file, serialize_dat_file, DeserializationError};
use crate::filters::cosmetic::CosmeticFilter;
use crate::filters::fb_builder::EngineFlatBuilder;
use crate::filters::fb_network_builder::NetworkRulesBuilder;
use crate::filters::filter_data_context::{FilterDataContext, FilterDataContextRef};
use crate::filters::network::NetworkFilter;
use crate::flatbuffers::containers::flat_serialize::FlatSerialize;
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
use crate::lists::{FilterSet, ParseOptions};
use crate::regex_manager::RegexManagerDiscardPolicy;
use crate::request::Request;
use crate::resources::{Resource, ResourceStorage, ResourceStorageBackend};

use std::collections::HashSet;

/// Drives high-level blocking logic and is responsible for loading filter lists into an optimized
/// format that can be queried efficiently.
///
/// For performance optimization reasons, the [`Engine`] is not designed to have rules added or
/// removed after its initial creation. Making changes to the rules loaded is accomplished by
/// creating a new engine to replace it.
///
/// ## Usage
///
/// ### Initialization
///
/// You'll first want to combine all of your filter lists in a [`FilterSet`], which will parse list
/// header metadata. Once all lists have been composed together, you can call
/// [`Engine::from_filter_set`] to start using them for blocking.
///
/// You may also want to supply certain assets for `$redirect` filters and `##+js(...)` scriptlet
/// injections. These are known as [`Resource`]s, and can be provided with
/// [`Engine::use_resources`]. See the [`crate::resources`] module for more information.
///
/// ### Network blocking
///
/// Use the [`Engine::check_network_request`] method to determine how to handle a network request.
///
/// If you _only_ need network blocking, consider using a [`Blocker`] directly.
///
/// ### Cosmetic filtering
///
/// Call [`Engine::url_cosmetic_resources`] to determine what actions should be taken to prepare a
/// particular page before it starts loading.
///
/// Once the page has been loaded, any new CSS classes or ids that appear on the page should be passed to
/// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that
/// should be hidden dynamically.
pub struct Engine {
    blocker: Blocker,
    cosmetic_cache: CosmeticFilterCache,
    resources: ResourceStorage,
    filter_data_context: FilterDataContextRef,
}

#[cfg(feature = "debug-info")]
pub struct EngineDebugInfo {
    pub regex_debug_info: crate::regex_manager::RegexDebugInfo,
    pub flatbuffer_size: usize,
}

impl Default for Engine {
    fn default() -> Self {
        Self::from_filter_set(FilterSet::new(false), false)
    }
}

impl Engine {
    /// Loads rules in a single format, enabling optimizations and discarding debug information.
    pub fn from_rules(
        rules: impl IntoIterator<Item = impl AsRef<str>>,
        opts: ParseOptions,
    ) -> Self {
        let mut filter_set = FilterSet::new(false);
        filter_set.add_filters(rules, opts);
        Self::from_filter_set(filter_set, true)
    }

    /// Loads rules, enabling optimizations and including debug information.
    pub fn from_rules_debug(
        rules: impl IntoIterator<Item = impl AsRef<str>>,
        opts: ParseOptions,
    ) -> Self {
        Self::from_rules_parametrised(rules, opts, true, true)
    }

    pub fn from_rules_parametrised(
        filter_rules: impl IntoIterator<Item = impl AsRef<str>>,
        opts: ParseOptions,
        debug: bool,
        optimize: bool,
    ) -> Self {
        let mut filter_set = FilterSet::new(debug);
        filter_set.add_filters(filter_rules, opts);
        Self::from_filter_set(filter_set, optimize)
    }

    #[cfg(test)]
    pub(crate) fn cosmetic_cache(self) -> CosmeticFilterCache {
        self.cosmetic_cache
    }

    #[cfg(test)]
    pub(crate) fn filter_data_context(self) -> FilterDataContextRef {
        self.filter_data_context
    }

    /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding
    /// rules from multiple sources.
    pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self {
        let FilterSet {
            network_filters,
            cosmetic_filters,
            ..
        } = set;

        let memory = make_flatbuffer(network_filters, cosmetic_filters, optimize);

        let filter_data_context = FilterDataContext::new(memory);

        Self {
            blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)),
            cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone(
                &filter_data_context,
            )),
            resources: ResourceStorage::default(),
            filter_data_context,
        }
    }

    /// Check if a request for a network resource from `url`, of type `request_type`, initiated by
    /// `source_url`, should be blocked.
    pub fn check_network_request(&self, request: &Request) -> BlockerResult {
        self.blocker.check(request, &self.resources)
    }

    #[cfg(test)]
    pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool {
        self.blocker.check_exceptions(request)
    }

    pub fn check_network_request_subset(
        &self,
        request: &Request,
        previously_matched_rule: bool,
        force_check_exceptions: bool,
    ) -> BlockerResult {
        self.blocker.check_parameterised(
            request,
            &self.resources,
            previously_matched_rule,
            force_check_exceptions,
        )
    }

    /// Returns a string containing any additional CSP directives that should be added to this
    /// request's response. Only applies to document and subdocument requests.
    ///
    /// If multiple policies are present from different rules, they will be joined by commas.
    pub fn get_csp_directives(&self, request: &Request) -> Option<String> {
        self.blocker.get_csp_directives(request)
    }

    /// Sets this engine's tags to be _only_ the ones provided in `tags`.
    ///
    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
    /// option.
    pub fn use_tags(&mut self, tags: &[&str]) {
        self.blocker.use_tags(tags);
    }

    /// Sets this engine's tags to additionally include the ones provided in `tags`.
    ///
    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
    /// option.
    pub fn enable_tags(&mut self, tags: &[&str]) {
        self.blocker.enable_tags(tags);
    }

    /// Sets this engine's tags to no longer include the ones provided in `tags`.
    ///
    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
    /// option.
    pub fn disable_tags(&mut self, tags: &[&str]) {
        self.blocker.disable_tags(tags);
    }

    /// Checks if a given tag exists in this engine.
    ///
    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
    /// option.
    pub fn tag_exists(&self, tag: &str) -> bool {
        self.blocker.tags_enabled().contains(&tag.to_owned())
    }

    /// Sets this engine's [Resource]s to be _only_ the ones provided in `resources`.
    ///
    /// The resources will be held in-memory. If you have special caching, management, or sharing
    /// requirements, consider [Engine::use_resource_storage] instead.
    pub fn use_resources(&mut self, resources: impl IntoIterator<Item = Resource>) {
        let storage = crate::resources::InMemoryResourceStorage::from_resources(resources);
        self.use_resource_storage(storage);
    }

    /// Sets this engine's backend for [Resource] storage to a custom implementation of
    /// [ResourceStorageBackend].
    ///
    /// If you're okay with the [Engine] holding these resources in-memory, use
    /// [Engine::use_resources] instead.
    #[cfg(not(feature = "single-thread"))]
    pub fn use_resource_storage<R: ResourceStorageBackend + 'static + Sync + Send>(
        &mut self,
        resources: R,
    ) {
        self.resources = ResourceStorage::from_backend(resources);
    }

    /// Sets this engine's backend for [Resource] storage to a custom implementation of
    /// [ResourceStorageBackend].
    ///
    /// If you're okay with the [Engine] holding these resources in-memory, use
    /// [Engine::use_resources] instead.
    #[cfg(feature = "single-thread")]
    pub fn use_resource_storage<R: ResourceStorageBackend + 'static>(&mut self, resources: R) {
        self.resources = ResourceStorage::from_backend(resources);
    }

    // Cosmetic filter functionality

    /// If any of the provided CSS classes or ids could cause a certain generic CSS hide rule
    /// (i.e. `{ display: none !important; }`) to be required, this method will return a list of
    /// CSS selectors corresponding to rules referencing those classes or ids, provided that the
    /// corresponding rules are not excepted.
    ///
    /// `exceptions` should be passed directly from `UrlSpecificResources`.
    pub fn hidden_class_id_selectors(
        &self,
        classes: impl IntoIterator<Item = impl AsRef<str>>,
        ids: impl IntoIterator<Item = impl AsRef<str>>,
        exceptions: &HashSet<String>,
    ) -> Vec<String> {
        self.cosmetic_cache
            .hidden_class_id_selectors(classes, ids, exceptions)
    }

    /// Returns a set of cosmetic filter resources required for a particular url. Once this has
    /// been called, all CSS ids and classes on a page should be passed to
    /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the
    /// returned `generichide` value is false).
    pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources {
        let request = if let Ok(request) = Request::new(url, url, "document") {
            request
        } else {
            return UrlSpecificResources::empty();
        };

        let generichide = self.blocker.check_generic_hide(&request);
        self.cosmetic_cache.hostname_cosmetic_resources(
            &self.resources,
            &request.hostname,
            generichide,
        )
    }

    pub fn set_regex_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) {
        self.blocker.set_regex_discard_policy(new_discard_policy);
    }

    #[cfg(test)]
    pub fn borrow_regex_manager(&self) -> crate::blocker::RegexManagerRef<'_> {
        self.blocker.borrow_regex_manager()
    }

    #[cfg(feature = "debug-info")]
    pub fn discard_regex(&mut self, regex_id: u64) {
        self.blocker.discard_regex(regex_id);
    }

    #[cfg(feature = "debug-info")]
    pub fn get_debug_info(&self) -> EngineDebugInfo {
        EngineDebugInfo {
            regex_debug_info: self.blocker.get_regex_debug_info(),
            flatbuffer_size: self.filter_data_context.memory.data().len(),
        }
    }

    /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later.
    pub fn serialize(&self) -> Vec<u8> {
        let data = self.filter_data_context.memory.data();
        serialize_dat_file(data)
    }

    /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`.
    ///
    /// Note that the binary format has a built-in version number that may be incremented. There is
    /// no guarantee that later versions of the format will be deserializable across minor versions
    /// of adblock-rust; the format is provided only as a caching optimization.
    pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> {
        let current_tags = self.blocker.tags_enabled();

        let data = deserialize_dat_file(serialized)?;
        let memory = VerifiedFlatbufferMemory::from_raw(data)
            .map_err(DeserializationError::FlatBufferParsingError)?;

        let context = FilterDataContext::new(memory);
        self.filter_data_context = context;
        self.blocker =
            Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context));
        self.blocker
            .use_tags(&current_tags.iter().map(|s| &**s).collect::<Vec<_>>());
        self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone(
            &self.filter_data_context,
        ));
        Ok(())
    }
}

/// Static assertions for `Engine: Send + Sync` traits.
#[cfg(not(feature = "single-thread"))]
fn _assertions() {
    fn _assert_send<T: Send>() {}
    fn _assert_sync<T: Sync>() {}

    _assert_send::<Engine>();
    _assert_sync::<Engine>();
}

fn make_flatbuffer(
    network_filters: Vec<NetworkFilter>,
    cosmetic_filters: Vec<CosmeticFilter>,
    optimize: bool,
) -> VerifiedFlatbufferMemory {
    let mut builder = EngineFlatBuilder::default();
    let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize);
    let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder);
    let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters, &mut builder);
    let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder);
    builder.finish(network_rules, cosmetic_rules)
}

#[cfg(test)]
#[path = "../tests/unit/engine.rs"]
mod unit_tests;