adblock/
engine.rs

1//! The adblock [`Engine`] is the primary interface for adblocking.
2
3use crate::blocker::{Blocker, BlockerResult};
4use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources};
5use crate::cosmetic_filter_cache_builder::CosmeticFilterCacheBuilder;
6use crate::data_format::{deserialize_dat_file, serialize_dat_file, DeserializationError};
7use crate::filters::cosmetic::CosmeticFilter;
8use crate::filters::fb_builder::EngineFlatBuilder;
9use crate::filters::fb_network_builder::NetworkRulesBuilder;
10use crate::filters::filter_data_context::{FilterDataContext, FilterDataContextRef};
11use crate::filters::network::NetworkFilter;
12use crate::flatbuffers::containers::flat_serialize::FlatSerialize;
13use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
14use crate::lists::{FilterSet, ParseOptions};
15use crate::regex_manager::RegexManagerDiscardPolicy;
16use crate::request::Request;
17use crate::resources::{Resource, ResourceStorage, ResourceStorageBackend};
18
19use std::collections::HashSet;
20
21/// Drives high-level blocking logic and is responsible for loading filter lists into an optimized
22/// format that can be queried efficiently.
23///
24/// For performance optimization reasons, the [`Engine`] is not designed to have rules added or
25/// removed after its initial creation. Making changes to the rules loaded is accomplished by
26/// creating a new engine to replace it.
27///
28/// ## Usage
29///
30/// ### Initialization
31///
32/// You'll first want to combine all of your filter lists in a [`FilterSet`], which will parse list
33/// header metadata. Once all lists have been composed together, you can call
34/// [`Engine::from_filter_set`] to start using them for blocking.
35///
36/// You may also want to supply certain assets for `$redirect` filters and `##+js(...)` scriptlet
37/// injections. These are known as [`Resource`]s, and can be provided with
38/// [`Engine::use_resources`]. See the [`crate::resources`] module for more information.
39///
40/// ### Network blocking
41///
42/// Use the [`Engine::check_network_request`] method to determine how to handle a network request.
43///
44/// If you _only_ need network blocking, consider using a [`Blocker`] directly.
45///
46/// ### Cosmetic filtering
47///
48/// Call [`Engine::url_cosmetic_resources`] to determine what actions should be taken to prepare a
49/// particular page before it starts loading.
50///
51/// Once the page has been loaded, any new CSS classes or ids that appear on the page should be passed to
52/// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that
53/// should be hidden dynamically.
54pub struct Engine {
55    blocker: Blocker,
56    cosmetic_cache: CosmeticFilterCache,
57    resources: ResourceStorage,
58    filter_data_context: FilterDataContextRef,
59}
60
61#[cfg(feature = "debug-info")]
62pub struct EngineDebugInfo {
63    pub regex_debug_info: crate::regex_manager::RegexDebugInfo,
64    pub flatbuffer_size: usize,
65}
66
67impl Default for Engine {
68    fn default() -> Self {
69        Self::from_filter_set(FilterSet::new(false), false)
70    }
71}
72
73impl Engine {
74    /// Loads rules in a single format, enabling optimizations and discarding debug information.
75    pub fn from_rules(
76        rules: impl IntoIterator<Item = impl AsRef<str>>,
77        opts: ParseOptions,
78    ) -> Self {
79        let mut filter_set = FilterSet::new(false);
80        filter_set.add_filters(rules, opts);
81        Self::from_filter_set(filter_set, true)
82    }
83
84    /// Loads rules, enabling optimizations and including debug information.
85    pub fn from_rules_debug(
86        rules: impl IntoIterator<Item = impl AsRef<str>>,
87        opts: ParseOptions,
88    ) -> Self {
89        Self::from_rules_parametrised(rules, opts, true, true)
90    }
91
92    pub fn from_rules_parametrised(
93        filter_rules: impl IntoIterator<Item = impl AsRef<str>>,
94        opts: ParseOptions,
95        debug: bool,
96        optimize: bool,
97    ) -> Self {
98        let mut filter_set = FilterSet::new(debug);
99        filter_set.add_filters(filter_rules, opts);
100        Self::from_filter_set(filter_set, optimize)
101    }
102
103    #[cfg(test)]
104    pub(crate) fn cosmetic_cache(self) -> CosmeticFilterCache {
105        self.cosmetic_cache
106    }
107
108    #[cfg(test)]
109    pub(crate) fn filter_data_context(self) -> FilterDataContextRef {
110        self.filter_data_context
111    }
112
113    /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding
114    /// rules from multiple sources.
115    pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self {
116        let FilterSet {
117            network_filters,
118            cosmetic_filters,
119            ..
120        } = set;
121
122        let memory = make_flatbuffer(network_filters, cosmetic_filters, optimize);
123
124        let filter_data_context = FilterDataContext::new(memory);
125
126        Self {
127            blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)),
128            cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone(
129                &filter_data_context,
130            )),
131            resources: ResourceStorage::default(),
132            filter_data_context,
133        }
134    }
135
136    /// Check if a request for a network resource from `url`, of type `request_type`, initiated by
137    /// `source_url`, should be blocked.
138    pub fn check_network_request(&self, request: &Request) -> BlockerResult {
139        self.blocker.check(request, &self.resources)
140    }
141
142    pub fn check_network_request_subset(
143        &self,
144        request: &Request,
145        previously_matched_rule: bool,
146        force_check_exceptions: bool,
147    ) -> BlockerResult {
148        self.blocker.check_parameterised(
149            request,
150            &self.resources,
151            previously_matched_rule,
152            force_check_exceptions,
153        )
154    }
155
156    /// Returns a string containing any additional CSP directives that should be added to this
157    /// request's response. Only applies to document and subdocument requests.
158    ///
159    /// If multiple policies are present from different rules, they will be joined by commas.
160    pub fn get_csp_directives(&self, request: &Request) -> Option<String> {
161        self.blocker.get_csp_directives(request)
162    }
163
164    /// Sets this engine's tags to be _only_ the ones provided in `tags`.
165    ///
166    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
167    /// option.
168    pub fn use_tags(&mut self, tags: &[&str]) {
169        self.blocker.use_tags(tags);
170    }
171
172    /// Sets this engine's tags to additionally include the ones provided in `tags`.
173    ///
174    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
175    /// option.
176    pub fn enable_tags(&mut self, tags: &[&str]) {
177        self.blocker.enable_tags(tags);
178    }
179
180    /// Sets this engine's tags to no longer include the ones provided in `tags`.
181    ///
182    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
183    /// option.
184    pub fn disable_tags(&mut self, tags: &[&str]) {
185        self.blocker.disable_tags(tags);
186    }
187
188    /// Checks if a given tag exists in this engine.
189    ///
190    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
191    /// option.
192    pub fn tag_exists(&self, tag: &str) -> bool {
193        self.blocker.tags_enabled().contains(&tag.to_owned())
194    }
195
196    /// Sets this engine's [Resource]s to be _only_ the ones provided in `resources`.
197    ///
198    /// The resources will be held in-memory. If you have special caching, management, or sharing
199    /// requirements, consider [Engine::use_resource_storage] instead.
200    pub fn use_resources(&mut self, resources: impl IntoIterator<Item = Resource>) {
201        let storage = crate::resources::InMemoryResourceStorage::from_resources(resources);
202        self.use_resource_storage(storage);
203    }
204
205    /// Sets this engine's backend for [Resource] storage to a custom implementation of
206    /// [ResourceStorageBackend].
207    ///
208    /// If you're okay with the [Engine] holding these resources in-memory, use
209    /// [Engine::use_resources] instead.
210    #[cfg(not(feature = "single-thread"))]
211    pub fn use_resource_storage<R: ResourceStorageBackend + 'static + Sync + Send>(
212        &mut self,
213        resources: R,
214    ) {
215        self.resources = ResourceStorage::from_backend(resources);
216    }
217
218    /// Sets this engine's backend for [Resource] storage to a custom implementation of
219    /// [ResourceStorageBackend].
220    ///
221    /// If you're okay with the [Engine] holding these resources in-memory, use
222    /// [Engine::use_resources] instead.
223    #[cfg(feature = "single-thread")]
224    pub fn use_resource_storage<R: ResourceStorageBackend + 'static>(&mut self, resources: R) {
225        self.resources = ResourceStorage::from_backend(resources);
226    }
227
228    // Cosmetic filter functionality
229
230    /// If any of the provided CSS classes or ids could cause a certain generic CSS hide rule
231    /// (i.e. `{ display: none !important; }`) to be required, this method will return a list of
232    /// CSS selectors corresponding to rules referencing those classes or ids, provided that the
233    /// corresponding rules are not excepted.
234    ///
235    /// `exceptions` should be passed directly from `UrlSpecificResources`.
236    pub fn hidden_class_id_selectors(
237        &self,
238        classes: impl IntoIterator<Item = impl AsRef<str>>,
239        ids: impl IntoIterator<Item = impl AsRef<str>>,
240        exceptions: &HashSet<String>,
241    ) -> Vec<String> {
242        self.cosmetic_cache
243            .hidden_class_id_selectors(classes, ids, exceptions)
244    }
245
246    /// Returns a set of cosmetic filter resources required for a particular url. Once this has
247    /// been called, all CSS ids and classes on a page should be passed to
248    /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the
249    /// returned `generichide` value is false).
250    pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources {
251        let request = if let Ok(request) = Request::new(url, url, "document") {
252            request
253        } else {
254            return UrlSpecificResources::empty();
255        };
256
257        let generichide = self.blocker.check_generic_hide(&request);
258        self.cosmetic_cache.hostname_cosmetic_resources(
259            &self.resources,
260            &request.hostname,
261            generichide,
262        )
263    }
264
265    pub fn set_regex_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) {
266        self.blocker.set_regex_discard_policy(new_discard_policy);
267    }
268
269    #[cfg(feature = "debug-info")]
270    pub fn discard_regex(&mut self, regex_id: u64) {
271        self.blocker.discard_regex(regex_id);
272    }
273
274    #[cfg(feature = "debug-info")]
275    pub fn get_debug_info(&self) -> EngineDebugInfo {
276        EngineDebugInfo {
277            regex_debug_info: self.blocker.get_regex_debug_info(),
278            flatbuffer_size: self.filter_data_context.memory.data().len(),
279        }
280    }
281
282    /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later.
283    pub fn serialize(&self) -> Vec<u8> {
284        let data = self.filter_data_context.memory.data();
285        serialize_dat_file(data)
286    }
287
288    /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`.
289    ///
290    /// Note that the binary format has a built-in version number that may be incremented. There is
291    /// no guarantee that later versions of the format will be deserializable across minor versions
292    /// of adblock-rust; the format is provided only as a caching optimization.
293    pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> {
294        let current_tags = self.blocker.tags_enabled();
295
296        let data = deserialize_dat_file(serialized)?;
297        let memory = VerifiedFlatbufferMemory::from_raw(data)
298            .map_err(DeserializationError::FlatBufferParsingError)?;
299
300        let context = FilterDataContext::new(memory);
301        self.filter_data_context = context;
302        self.blocker =
303            Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context));
304        self.blocker
305            .use_tags(&current_tags.iter().map(|s| &**s).collect::<Vec<_>>());
306        self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone(
307            &self.filter_data_context,
308        ));
309        Ok(())
310    }
311}
312
313/// Static assertions for `Engine: Send + Sync` traits.
314#[cfg(not(feature = "single-thread"))]
315fn _assertions() {
316    fn _assert_send<T: Send>() {}
317    fn _assert_sync<T: Sync>() {}
318
319    _assert_send::<Engine>();
320    _assert_sync::<Engine>();
321}
322
323fn make_flatbuffer(
324    network_filters: Vec<NetworkFilter>,
325    cosmetic_filters: Vec<CosmeticFilter>,
326    optimize: bool,
327) -> VerifiedFlatbufferMemory {
328    let mut builder = EngineFlatBuilder::default();
329    let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize);
330    let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder);
331    let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters, &mut builder);
332    let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder);
333    builder.finish(network_rules, cosmetic_rules)
334}
335
336#[cfg(test)]
337#[path = "../tests/unit/engine.rs"]
338mod unit_tests;