adblock/
engine.rs

1//! The adblock [`Engine`] is the primary interface for adblocking.
2
3use crate::blocker::{Blocker, BlockerResult};
4use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources};
5use crate::cosmetic_filter_cache_builder::CosmeticFilterCacheBuilder;
6use crate::data_format::{deserialize_dat_file, serialize_dat_file, DeserializationError};
7use crate::filters::cosmetic::CosmeticFilter;
8use crate::filters::fb_builder::EngineFlatBuilder;
9use crate::filters::fb_network_builder::NetworkRulesBuilder;
10use crate::filters::filter_data_context::{FilterDataContext, FilterDataContextRef};
11use crate::filters::network::NetworkFilter;
12use crate::flatbuffers::containers::flat_serialize::FlatSerialize;
13use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
14use crate::lists::{FilterSet, ParseOptions};
15use crate::regex_manager::RegexManagerDiscardPolicy;
16use crate::request::Request;
17use crate::resources::{Resource, ResourceStorage, ResourceStorageBackend};
18
19use std::collections::HashSet;
20
21/// Drives high-level blocking logic and is responsible for loading filter lists into an optimized
22/// format that can be queried efficiently.
23///
24/// For performance optimization reasons, the [`Engine`] is not designed to have rules added or
25/// removed after its initial creation. Making changes to the rules loaded is accomplished by
26/// creating a new engine to replace it.
27///
28/// ## Usage
29///
30/// ### Initialization
31///
32/// You'll first want to combine all of your filter lists in a [`FilterSet`], which will parse list
33/// header metadata. Once all lists have been composed together, you can call
34/// [`Engine::from_filter_set`] to start using them for blocking.
35///
36/// You may also want to supply certain assets for `$redirect` filters and `##+js(...)` scriptlet
37/// injections. These are known as [`Resource`]s, and can be provided with
38/// [`Engine::use_resources`]. See the [`crate::resources`] module for more information.
39///
40/// ### Network blocking
41///
42/// Use the [`Engine::check_network_request`] method to determine how to handle a network request.
43///
44/// If you _only_ need network blocking, consider using a [`Blocker`] directly.
45///
46/// ### Cosmetic filtering
47///
48/// Call [`Engine::url_cosmetic_resources`] to determine what actions should be taken to prepare a
49/// particular page before it starts loading.
50///
51/// Once the page has been loaded, any new CSS classes or ids that appear on the page should be passed to
52/// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that
53/// should be hidden dynamically.
54pub struct Engine {
55    blocker: Blocker,
56    cosmetic_cache: CosmeticFilterCache,
57    resources: ResourceStorage,
58    filter_data_context: FilterDataContextRef,
59}
60
61#[cfg(feature = "debug-info")]
62pub struct EngineDebugInfo {
63    pub regex_debug_info: crate::regex_manager::RegexDebugInfo,
64    pub flatbuffer_size: usize,
65}
66
67impl Default for Engine {
68    fn default() -> Self {
69        Self::from_filter_set(FilterSet::new(false), false)
70    }
71}
72
73impl Engine {
74    /// Loads rules in a single format, enabling optimizations and discarding debug information.
75    pub fn from_rules(
76        rules: impl IntoIterator<Item = impl AsRef<str>>,
77        opts: ParseOptions,
78    ) -> Self {
79        let mut filter_set = FilterSet::new(false);
80        filter_set.add_filters(rules, opts);
81        Self::from_filter_set(filter_set, true)
82    }
83
84    /// Loads rules, enabling optimizations and including debug information.
85    pub fn from_rules_debug(
86        rules: impl IntoIterator<Item = impl AsRef<str>>,
87        opts: ParseOptions,
88    ) -> Self {
89        Self::from_rules_parametrised(rules, opts, true, true)
90    }
91
92    pub fn from_rules_parametrised(
93        filter_rules: impl IntoIterator<Item = impl AsRef<str>>,
94        opts: ParseOptions,
95        debug: bool,
96        optimize: bool,
97    ) -> Self {
98        let mut filter_set = FilterSet::new(debug);
99        filter_set.add_filters(filter_rules, opts);
100        Self::from_filter_set(filter_set, optimize)
101    }
102
103    #[cfg(test)]
104    pub(crate) fn cosmetic_cache(self) -> CosmeticFilterCache {
105        self.cosmetic_cache
106    }
107
108    #[cfg(test)]
109    pub(crate) fn filter_data_context(self) -> FilterDataContextRef {
110        self.filter_data_context
111    }
112
113    /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding
114    /// rules from multiple sources.
115    pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self {
116        let FilterSet {
117            network_filters,
118            cosmetic_filters,
119            ..
120        } = set;
121
122        let memory = make_flatbuffer(network_filters, cosmetic_filters, optimize);
123
124        let filter_data_context = FilterDataContext::new(memory);
125
126        Self {
127            blocker: Blocker::from_context(FilterDataContextRef::clone(&filter_data_context)),
128            cosmetic_cache: CosmeticFilterCache::from_context(FilterDataContextRef::clone(
129                &filter_data_context,
130            )),
131            resources: ResourceStorage::default(),
132            filter_data_context,
133        }
134    }
135
136    /// Check if a request for a network resource from `url`, of type `request_type`, initiated by
137    /// `source_url`, should be blocked.
138    pub fn check_network_request(&self, request: &Request) -> BlockerResult {
139        self.blocker.check(request, &self.resources)
140    }
141
142    #[cfg(test)]
143    pub(crate) fn check_network_request_exceptions(&self, request: &Request) -> bool {
144        self.blocker.check_exceptions(request)
145    }
146
147    pub fn check_network_request_subset(
148        &self,
149        request: &Request,
150        previously_matched_rule: bool,
151        force_check_exceptions: bool,
152    ) -> BlockerResult {
153        self.blocker.check_parameterised(
154            request,
155            &self.resources,
156            previously_matched_rule,
157            force_check_exceptions,
158        )
159    }
160
161    /// Returns a string containing any additional CSP directives that should be added to this
162    /// request's response. Only applies to document and subdocument requests.
163    ///
164    /// If multiple policies are present from different rules, they will be joined by commas.
165    pub fn get_csp_directives(&self, request: &Request) -> Option<String> {
166        self.blocker.get_csp_directives(request)
167    }
168
169    /// Sets this engine's tags to be _only_ the ones provided in `tags`.
170    ///
171    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
172    /// option.
173    pub fn use_tags(&mut self, tags: &[&str]) {
174        self.blocker.use_tags(tags);
175    }
176
177    /// Sets this engine's tags to additionally include the ones provided in `tags`.
178    ///
179    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
180    /// option.
181    pub fn enable_tags(&mut self, tags: &[&str]) {
182        self.blocker.enable_tags(tags);
183    }
184
185    /// Sets this engine's tags to no longer include the ones provided in `tags`.
186    ///
187    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
188    /// option.
189    pub fn disable_tags(&mut self, tags: &[&str]) {
190        self.blocker.disable_tags(tags);
191    }
192
193    /// Checks if a given tag exists in this engine.
194    ///
195    /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag`
196    /// option.
197    pub fn tag_exists(&self, tag: &str) -> bool {
198        self.blocker.tags_enabled().contains(&tag.to_owned())
199    }
200
201    /// Sets this engine's [Resource]s to be _only_ the ones provided in `resources`.
202    ///
203    /// The resources will be held in-memory. If you have special caching, management, or sharing
204    /// requirements, consider [Engine::use_resource_storage] instead.
205    pub fn use_resources(&mut self, resources: impl IntoIterator<Item = Resource>) {
206        let storage = crate::resources::InMemoryResourceStorage::from_resources(resources);
207        self.use_resource_storage(storage);
208    }
209
210    /// Sets this engine's backend for [Resource] storage to a custom implementation of
211    /// [ResourceStorageBackend].
212    ///
213    /// If you're okay with the [Engine] holding these resources in-memory, use
214    /// [Engine::use_resources] instead.
215    #[cfg(not(feature = "single-thread"))]
216    pub fn use_resource_storage<R: ResourceStorageBackend + 'static + Sync + Send>(
217        &mut self,
218        resources: R,
219    ) {
220        self.resources = ResourceStorage::from_backend(resources);
221    }
222
223    /// Sets this engine's backend for [Resource] storage to a custom implementation of
224    /// [ResourceStorageBackend].
225    ///
226    /// If you're okay with the [Engine] holding these resources in-memory, use
227    /// [Engine::use_resources] instead.
228    #[cfg(feature = "single-thread")]
229    pub fn use_resource_storage<R: ResourceStorageBackend + 'static>(&mut self, resources: R) {
230        self.resources = ResourceStorage::from_backend(resources);
231    }
232
233    // Cosmetic filter functionality
234
235    /// If any of the provided CSS classes or ids could cause a certain generic CSS hide rule
236    /// (i.e. `{ display: none !important; }`) to be required, this method will return a list of
237    /// CSS selectors corresponding to rules referencing those classes or ids, provided that the
238    /// corresponding rules are not excepted.
239    ///
240    /// `exceptions` should be passed directly from `UrlSpecificResources`.
241    pub fn hidden_class_id_selectors(
242        &self,
243        classes: impl IntoIterator<Item = impl AsRef<str>>,
244        ids: impl IntoIterator<Item = impl AsRef<str>>,
245        exceptions: &HashSet<String>,
246    ) -> Vec<String> {
247        self.cosmetic_cache
248            .hidden_class_id_selectors(classes, ids, exceptions)
249    }
250
251    /// Returns a set of cosmetic filter resources required for a particular url. Once this has
252    /// been called, all CSS ids and classes on a page should be passed to
253    /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the
254    /// returned `generichide` value is false).
255    pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources {
256        let request = if let Ok(request) = Request::new(url, url, "document") {
257            request
258        } else {
259            return UrlSpecificResources::empty();
260        };
261
262        let generichide = self.blocker.check_generic_hide(&request);
263        self.cosmetic_cache.hostname_cosmetic_resources(
264            &self.resources,
265            &request.hostname,
266            generichide,
267        )
268    }
269
270    pub fn set_regex_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) {
271        self.blocker.set_regex_discard_policy(new_discard_policy);
272    }
273
274    #[cfg(test)]
275    pub fn borrow_regex_manager(&self) -> crate::blocker::RegexManagerRef<'_> {
276        self.blocker.borrow_regex_manager()
277    }
278
279    #[cfg(feature = "debug-info")]
280    pub fn discard_regex(&mut self, regex_id: u64) {
281        self.blocker.discard_regex(regex_id);
282    }
283
284    #[cfg(feature = "debug-info")]
285    pub fn get_debug_info(&self) -> EngineDebugInfo {
286        EngineDebugInfo {
287            regex_debug_info: self.blocker.get_regex_debug_info(),
288            flatbuffer_size: self.filter_data_context.memory.data().len(),
289        }
290    }
291
292    /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later.
293    pub fn serialize(&self) -> Vec<u8> {
294        let data = self.filter_data_context.memory.data();
295        serialize_dat_file(data)
296    }
297
298    /// Deserialize the `Engine` from the binary format generated by `Engine::serialize`.
299    ///
300    /// Note that the binary format has a built-in version number that may be incremented. There is
301    /// no guarantee that later versions of the format will be deserializable across minor versions
302    /// of adblock-rust; the format is provided only as a caching optimization.
303    pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), DeserializationError> {
304        let current_tags = self.blocker.tags_enabled();
305
306        let data = deserialize_dat_file(serialized)?;
307        let memory = VerifiedFlatbufferMemory::from_raw(data)
308            .map_err(DeserializationError::FlatBufferParsingError)?;
309
310        let context = FilterDataContext::new(memory);
311        self.filter_data_context = context;
312        self.blocker =
313            Blocker::from_context(FilterDataContextRef::clone(&self.filter_data_context));
314        self.blocker
315            .use_tags(&current_tags.iter().map(|s| &**s).collect::<Vec<_>>());
316        self.cosmetic_cache = CosmeticFilterCache::from_context(FilterDataContextRef::clone(
317            &self.filter_data_context,
318        ));
319        Ok(())
320    }
321}
322
323/// Static assertions for `Engine: Send + Sync` traits.
324#[cfg(not(feature = "single-thread"))]
325fn _assertions() {
326    fn _assert_send<T: Send>() {}
327    fn _assert_sync<T: Sync>() {}
328
329    _assert_send::<Engine>();
330    _assert_sync::<Engine>();
331}
332
333fn make_flatbuffer(
334    network_filters: Vec<NetworkFilter>,
335    cosmetic_filters: Vec<CosmeticFilter>,
336    optimize: bool,
337) -> VerifiedFlatbufferMemory {
338    let mut builder = EngineFlatBuilder::default();
339    let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize);
340    let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder);
341    let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters, &mut builder);
342    let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder);
343    builder.finish(network_rules, cosmetic_rules)
344}
345
346#[cfg(test)]
347#[path = "../tests/unit/engine.rs"]
348mod unit_tests;