chromiumoxide/handler/
network.rs

1#[cfg(any(feature = "adblock", feature = "firewall"))]
2use super::blockers::block_websites::block_ads;
3use super::blockers::{
4    block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
5    xhr::IGNORE_XHR_ASSETS,
6};
7use crate::auth::Credentials;
8#[cfg(feature = "_cache")]
9use crate::cache::BasicCachePolicy;
10use crate::cmd::CommandChain;
11use crate::handler::http::HttpRequest;
12use crate::handler::network_utils::{base_domain_from_host, host_and_rest};
13use aho_corasick::AhoCorasick;
14use case_insensitive_string::CaseInsensitiveString;
15use chromiumoxide_cdp::cdp::browser_protocol::fetch::{RequestPattern, RequestStage};
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17    EmulateNetworkConditionsByRuleParams, EventLoadingFailed, EventLoadingFinished,
18    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19    InterceptionId, NetworkConditions, RequestId, ResourceType, Response, SetCacheDisabledParams,
20    SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23    fetch::{
24        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26    },
27    network::SetBypassServiceWorkerParams,
28};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
31};
32use chromiumoxide_types::{Command, Method, MethodId};
33use hashbrown::{HashMap, HashSet};
34use lazy_static::lazy_static;
35use reqwest::header::PROXY_AUTHORIZATION;
36use spider_network_blocker::intercept_manager::NetworkInterceptManager;
37pub use spider_network_blocker::scripts::{
38    URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
39};
40use std::borrow::Cow;
41use std::collections::VecDeque;
42use std::time::{Duration, Instant};
43
44lazy_static! {
45    /// General patterns for popular libraries and resources
46    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
47        "jquery",           // Covers jquery.min.js, jquery.js, etc.
48        "angular",
49        "react",            // Covers all React-related patterns
50        "vue",              // Covers all Vue-related patterns
51        "bootstrap",
52        "d3",
53        "lodash",
54        "ajax",
55        "application",
56        "app",              // Covers general app scripts like app.js
57        "main",
58        "index",
59        "bundle",
60        "vendor",
61        "runtime",
62        "polyfill",
63        "scripts",
64        "es2015.",
65        "es2020.",
66        "webpack",
67        "captcha",
68        "client",
69        "/cdn-cgi/challenge-platform/",
70        "/wp-content/js/",  // Covers Wordpress content
71        // Verified 3rd parties for request
72        "https://m.stripe.network/",
73        "https://challenges.cloudflare.com/",
74        "https://www.google.com/recaptcha/",
75        "https://google.com/recaptcha/api.js",
76        "https://www.gstatic.com/recaptcha/",
77        "https://captcha.px-cloud.net/",
78        "https://geo.captcha-delivery.com/",
79        "https://api.leminnow.com/captcha/",
80        "https://cdn.auth0.com/js/lock/",
81        "https://captcha.gtimg.com",
82        "https://client-api.arkoselabs.com/",
83        "https://www.capy.me/puzzle/",
84        "https://newassets.hcaptcha.com/",
85        "https://cdn.auth0.com/client",
86        "https://js.stripe.com/",
87        "https://cdn.prod.website-files.com/", // webflow cdn scripts
88        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
89        "https://code.jquery.com/jquery-"
90    ];
91
92    /// Determine if a script should be rendered in the browser by name.
93    ///
94    /// NOTE: with "allow all scripts unless blocklisted", this is not used as a gate anymore,
95    /// but we keep it for compatibility and other call sites.
96    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
97
98    /// General patterns for popular libraries and resources
99    static ref JS_FRAMEWORK_ALLOW_3RD_PARTY: Vec<&'static str> = vec![
100        // Verified 3rd parties for request
101        "https://m.stripe.network/",
102        "https://challenges.cloudflare.com/",
103        "https://js.stripe.com/",
104        "https://cdn.prod.website-files.com/", // webflow cdn scripts
105        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
106        "https://code.jquery.com/jquery-",
107        "https://ct.captcha-delivery.com/",
108        "https://geo.captcha-delivery.com/",
109        "https://img1.wsimg.com/parking-lander/static/js/main.d9ebbb8c.js", // parking landing page iframe
110        "https://cdn.auth0.com/client",
111        "https://captcha.px-cloud.net/",
112        "https://www.capy.me/puzzle/",
113        "https://www.gstatic.com/recaptcha/",
114        "https://google.com/recaptcha/",
115        "https://www.google.com/recaptcha/",
116        "https://www.recaptcha.net/recaptcha/",
117        "https://js.hcaptcha.com/1/api.js",
118        "https://hcaptcha.com/1/api.js",
119        "https://js.datadome.co/tags.js",
120        "https://api-js.datadome.co/",
121        "https://client.perimeterx.net/",
122        "https://captcha.px-cdn.net/",
123        "https://newassets.hcaptcha.com/",
124        "https://captcha.px-cloud.net/",
125        "https://s.perimeterx.net/",
126        "https://api.leminnow.com/captcha/",
127        "https://client-api.arkoselabs.com/",
128        "https://static.geetest.com/v4/gt4.js",
129        "https://static.geetest.com/",
130        "https://cdn.jsdelivr.net/npm/@friendlycaptcha/",
131        "https://cdn.perfdrive.com/aperture/",
132        "https://assets.queue-it.net/",
133        "discourse-cdn.com/",
134        "hcaptcha.com",
135        "/cdn-cgi/challenge-platform/",
136        "/_Incapsula_Resource"
137    ];
138
139    /// Determine if a script should be rendered in the browser by name.
140    pub static ref ALLOWED_MATCHER_3RD_PARTY: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW_3RD_PARTY.iter()).expect("matcher to build");
141
142    /// path of a js framework
143    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
144        phf::phf_set! {
145            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
146            "_astro/", "_app/immutable"
147        }
148    };
149
150    /// Ignore the content types.
151    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
152        "application/pdf",
153        "application/zip",
154        "application/x-rar-compressed",
155        "application/x-tar",
156        "image/png",
157        "image/jpeg",
158        "image/gif",
159        "image/bmp",
160        "image/webp",
161        "image/svg+xml",
162        "video/mp4",
163        "video/x-msvideo",
164        "video/x-matroska",
165        "video/webm",
166        "audio/mpeg",
167        "audio/ogg",
168        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
169        "application/vnd.ms-excel",
170        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
171        "application/vnd.ms-powerpoint",
172        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
173        "application/x-7z-compressed",
174        "application/x-rpm",
175        "application/x-shockwave-flash",
176        "application/rtf",
177    };
178
179    /// Ignore the resources for visual content types.
180    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
181        "Image",
182        "Media",
183        "Font"
184    };
185
186    /// Ignore the resources for visual content types.
187    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
188        "CspViolationReport",
189        "Ping",
190    };
191
192    /// Case insenstive css matching
193    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
194
195    /// The command chain.
196    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
197        let enable = EnableParams::default();
198
199        if let Ok(c) = serde_json::to_value(&enable) {
200            vec![(enable.identifier(), c)]
201        } else {
202            vec![]
203        }
204    };
205
206    /// The command chain with https ignore.
207    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
208        let enable = EnableParams::default();
209        let mut v = vec![];
210        if let Ok(c) = serde_json::to_value(&enable) {
211            v.push((enable.identifier(), c));
212        }
213        let ignore = SetIgnoreCertificateErrorsParams::new(true);
214        if let Ok(ignored) = serde_json::to_value(&ignore) {
215            v.push((ignore.identifier(), ignored));
216        }
217
218        v
219    };
220
221    /// Enable the fetch intercept command
222    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
223        fetch::EnableParams::builder()
224        .handle_auth_requests(true)
225        .pattern(RequestPattern::builder().url_pattern("*").request_stage(RequestStage::Request).build())
226        .build()
227    };
228}
229
230/// Determine if a redirect is true.
231pub(crate) fn is_redirect_status(status: i64) -> bool {
232    matches!(status, 301 | 302 | 303 | 307 | 308)
233}
234
235/// How long a buffered `requests_will_be_sent` / `request_id_to_interception_id`
236/// entry may linger before being evicted. 30 seconds is generous — the CDP
237/// round-trip that reconciles the two racing events normally completes in
238/// milliseconds.
239const STALE_BUFFER_SECS: u64 = 30;
240
241/// Wrapper around `adblock::Engine` that implements `Debug`.
242#[cfg(feature = "adblock")]
243pub struct AdblockEngine(std::sync::Arc<adblock::Engine>);
244
245#[cfg(feature = "adblock")]
246impl std::fmt::Debug for AdblockEngine {
247    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
248        f.debug_struct("AdblockEngine").finish()
249    }
250}
251
252#[cfg(feature = "adblock")]
253impl std::ops::Deref for AdblockEngine {
254    type Target = adblock::Engine;
255    fn deref(&self) -> &Self::Target {
256        &self.0
257    }
258}
259
260#[derive(Debug)]
261/// The base network manager.
262pub struct NetworkManager {
263    /// FIFO queue of internal `NetworkEvent`s emitted by the manager.
264    ///
265    /// The manager pushes events here as CDP commands are scheduled (e.g. `SendCdpRequest`)
266    /// and as request lifecycle transitions occur (`RequestFinished`, `RequestFailed`, etc.).
267    /// Consumers pull from this queue via `poll()`.
268    queued_events: VecDeque<NetworkEvent>,
269    /// If `true`, the init command chain includes `Security.setIgnoreCertificateErrors(true)`.
270    ///
271    /// This is used to allow navigation / resource loading to proceed on sites with invalid TLS
272    /// certificates (self-signed, expired, MITM proxies, etc.).
273    ignore_httpserrors: bool,
274    /// Active in-flight requests keyed by CDP `RequestId`.
275    ///
276    /// Each entry tracks request/response metadata, redirect chain, optional interception id,
277    /// and final state used to emit `RequestFinished` / `RequestFailed`.
278    requests: HashMap<RequestId, HttpRequest>,
279    /// Temporary storage for `Network.requestWillBeSent` events when the corresponding
280    /// `Fetch.requestPaused` arrives later (or vice versa).
281    ///
282    /// When Fetch interception is enabled, `requestPaused` and `requestWillBeSent` can race.
283    /// We buffer `requestWillBeSent` here until we can attach the `InterceptionId`.
284    /// Entries older than `STALE_BUFFER_SECS` are evicted to prevent unbounded growth.
285    requests_will_be_sent: HashMap<RequestId, (EventRequestWillBeSent, Instant)>,
286    /// Extra HTTP headers to apply to subsequent network requests via CDP.
287    ///
288    /// This map is mirrored from user-supplied headers but stripped of proxy auth headers
289    /// (`Proxy-Authorization`) to avoid accidental leakage / incorrect forwarding.
290    extra_headers: std::collections::HashMap<String, String>,
291    /// Mapping from Network `RequestId` to Fetch `InterceptionId`.
292    ///
293    /// When `Fetch.requestPaused` fires before `Network.requestWillBeSent`, we temporarily
294    /// store the interception id here so it can be attached to the `HttpRequest` once the
295    /// network request is observed.
296    /// Entries older than `STALE_BUFFER_SECS` are evicted to prevent unbounded growth.
297    request_id_to_interception_id: HashMap<RequestId, (InterceptionId, Instant)>,
298    /// Whether the user has disabled the browser cache.
299    ///
300    /// This is surfaced via `Network.setCacheDisabled(true/false)` and toggled through
301    /// `set_cache_enabled()`. Internally the field is stored as “disabled” to match the CDP API.
302    user_cache_disabled: bool,
303    /// Tracks which requests have already attempted authentication.
304    ///
305    /// Used to prevent infinite auth retry loops when the origin repeatedly issues
306    /// authentication challenges (407/401). Once a request id is present here, subsequent
307    /// challenges for the same request are canceled.
308    attempted_authentications: HashSet<RequestId>,
309    /// Optional credentials used to respond to `Fetch.authRequired` challenges.
310    ///
311    /// When set, the manager will answer challenges with `ProvideCredentials` once per request
312    /// (guarded by `attempted_authentications`), otherwise it falls back to default handling.
313    credentials: Option<Credentials>,
314    /// User-facing toggle indicating whether request interception is desired.
315    ///
316    /// This is the “intent” flag controlled by `set_request_interception()`. On its own it does
317    /// not guarantee interception is active; interception is actually enabled/disabled by
318    /// `update_protocol_request_interception()` which reconciles this flag with `credentials`.
319    ///
320    /// In other words: if this is `false` but `credentials.is_some()`, interception may still be
321    /// enabled to satisfy auth challenges.
322    pub(crate) user_request_interception_enabled: bool,
323    /// Hard kill-switch to block all network traffic.
324    ///
325    /// When `true`, the manager immediately blocks requests (typically via
326    /// `FailRequest(BlockedByClient)` or fulfillment with an empty response depending on path),
327    /// and short-circuits most decision logic. This is used for safety conditions such as
328    /// exceeding `max_bytes_allowed` or other runtime protections.
329    block_all: bool,
330    /// Tracks whether the Fetch interception protocol is currently enabled in CDP.
331    ///
332    /// This is the “actual state” flag that reflects whether we have sent `Fetch.enable` or
333    /// `Fetch.disable` to the browser. It is updated by `update_protocol_request_interception()`
334    /// when `user_request_interception_enabled` or `credentials` change.
335    pub(crate) protocol_request_interception_enabled: bool,
336    /// The network is offline.
337    offline: bool,
338    /// The page request timeout.
339    pub request_timeout: Duration,
340    // made_request: bool,
341    /// Ignore visuals (no pings, prefetching, and etc).
342    pub ignore_visuals: bool,
343    /// Block CSS stylesheets.
344    pub block_stylesheets: bool,
345    /// Block javascript that is not critical to rendering.
346    ///
347    /// NOTE: With "allow all scripts unless blocklisted", this no longer blocks scripts
348    /// by itself (it remains for config compatibility).
349    pub block_javascript: bool,
350    /// Block analytics from rendering
351    pub block_analytics: bool,
352    /// Block pre-fetch request
353    pub block_prefetch: bool,
354    /// Only html from loading.
355    pub only_html: bool,
356    /// Is xml document?
357    pub xml_document: bool,
358    /// The custom intercept handle logic to run on the website.
359    pub intercept_manager: NetworkInterceptManager,
360    /// Track the amount of times the document reloaded.
361    pub document_reload_tracker: u8,
362    /// The initial target url. We want to use a new page on every navigation to prevent re-using the old domain.
363    pub document_target_url: String,
364    /// The initial target domain. We want to use a new page on every navigation to prevent re-using the old domain.
365    pub document_target_domain: String,
366    /// The max bytes to receive.
367    pub max_bytes_allowed: Option<u64>,
368    #[cfg(feature = "_cache")]
369    /// The cache site_key to use.
370    pub cache_site_key: Option<String>,
371    /// The cache policy to use.
372    #[cfg(feature = "_cache")]
373    pub cache_policy: Option<BasicCachePolicy>,
374    /// Optional per-run/per-site whitelist of URL substrings (scripts/resources).
375    whitelist_patterns: Vec<String>,
376    /// Compiled matcher for whitelist_patterns (rebuilt when patterns change).
377    whitelist_matcher: Option<AhoCorasick>,
378    /// Optional per-run/per-site blacklist of URL substrings (scripts/resources).
379    blacklist_patterns: Vec<String>,
380    /// Compiled matcher for blacklist_patterns (rebuilt when patterns change).
381    blacklist_matcher: Option<AhoCorasick>,
382    /// If true, blacklist always wins (cannot be unblocked by whitelist/3p allow).
383    blacklist_strict: bool,
384    /// Custom adblock engine built from user-supplied filter rules.
385    /// When `Some`, takes precedence over the global default engine.
386    #[cfg(feature = "adblock")]
387    adblock_engine: Option<AdblockEngine>,
388}
389
390impl NetworkManager {
391    /// A new network manager.
392    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
393        Self {
394            queued_events: Default::default(),
395            ignore_httpserrors,
396            requests: Default::default(),
397            requests_will_be_sent: Default::default(),
398            extra_headers: Default::default(),
399            request_id_to_interception_id: Default::default(),
400            user_cache_disabled: false,
401            attempted_authentications: Default::default(),
402            credentials: None,
403            block_all: false,
404            user_request_interception_enabled: false,
405            protocol_request_interception_enabled: false,
406            offline: false,
407            request_timeout,
408            ignore_visuals: false,
409            block_javascript: false,
410            block_stylesheets: false,
411            block_prefetch: true,
412            block_analytics: true,
413            only_html: false,
414            xml_document: false,
415            intercept_manager: NetworkInterceptManager::Unknown,
416            document_reload_tracker: 0,
417            document_target_url: String::new(),
418            document_target_domain: String::new(),
419            whitelist_patterns: Vec::new(),
420            whitelist_matcher: None,
421            blacklist_patterns: Vec::new(),
422            blacklist_matcher: None,
423            blacklist_strict: true,
424            max_bytes_allowed: None,
425            #[cfg(feature = "_cache")]
426            cache_site_key: None,
427            #[cfg(feature = "_cache")]
428            cache_policy: None,
429            #[cfg(feature = "adblock")]
430            adblock_engine: None,
431        }
432    }
433
434    /// Set a custom adblock engine built from user-supplied filter rules.
435    #[cfg(feature = "adblock")]
436    pub fn set_adblock_engine(&mut self, engine: std::sync::Arc<adblock::Engine>) {
437        self.adblock_engine = Some(AdblockEngine(engine));
438    }
439
440    /// Replace the whitelist patterns (compiled once).
441    pub fn set_whitelist_patterns<I, S>(&mut self, patterns: I)
442    where
443        I: IntoIterator<Item = S>,
444        S: Into<String>,
445    {
446        self.whitelist_patterns = patterns.into_iter().map(Into::into).collect();
447        self.rebuild_whitelist_matcher();
448    }
449
450    /// Replace the blacklist patterns (compiled once).
451    pub fn set_blacklist_patterns<I, S>(&mut self, patterns: I)
452    where
453        I: IntoIterator<Item = S>,
454        S: Into<String>,
455    {
456        self.blacklist_patterns = patterns.into_iter().map(Into::into).collect();
457        self.rebuild_blacklist_matcher();
458    }
459
460    /// Add one pattern (cheap) and rebuild (call this sparingly).
461    pub fn add_blacklist_pattern<S: Into<String>>(&mut self, pattern: S) {
462        self.blacklist_patterns.push(pattern.into());
463        self.rebuild_blacklist_matcher();
464    }
465
466    /// Add many patterns and rebuild once.
467    pub fn add_blacklist_patterns<I, S>(&mut self, patterns: I)
468    where
469        I: IntoIterator<Item = S>,
470        S: Into<String>,
471    {
472        self.blacklist_patterns
473            .extend(patterns.into_iter().map(Into::into));
474        self.rebuild_blacklist_matcher();
475    }
476
477    /// Clear blacklist entirely.
478    pub fn clear_blacklist(&mut self) {
479        self.blacklist_patterns.clear();
480        self.blacklist_matcher = None;
481    }
482
483    /// Control precedence: when true, blacklist always wins.
484    pub fn set_blacklist_strict(&mut self, strict: bool) {
485        self.blacklist_strict = strict;
486    }
487
488    #[inline]
489    fn rebuild_blacklist_matcher(&mut self) {
490        if self.blacklist_patterns.is_empty() {
491            self.blacklist_matcher = None;
492            return;
493        }
494
495        self.blacklist_matcher =
496            AhoCorasick::new(self.blacklist_patterns.iter().map(|s| s.as_str())).ok();
497    }
498
499    #[inline]
500    fn is_blacklisted(&self, url: &str) -> bool {
501        self.blacklist_matcher
502            .as_ref()
503            .map(|m| m.is_match(url))
504            .unwrap_or(false)
505    }
506
507    /// Add one pattern (cheap) and rebuild (call this sparingly).
508    pub fn add_whitelist_pattern<S: Into<String>>(&mut self, pattern: S) {
509        self.whitelist_patterns.push(pattern.into());
510        self.rebuild_whitelist_matcher();
511    }
512
513    /// Add many patterns and rebuild once.
514    pub fn add_whitelist_patterns<I, S>(&mut self, patterns: I)
515    where
516        I: IntoIterator<Item = S>,
517        S: Into<String>,
518    {
519        self.whitelist_patterns
520            .extend(patterns.into_iter().map(Into::into));
521        self.rebuild_whitelist_matcher();
522    }
523
524    #[inline]
525    fn rebuild_whitelist_matcher(&mut self) {
526        if self.whitelist_patterns.is_empty() {
527            self.whitelist_matcher = None;
528            return;
529        }
530
531        // If building fails (shouldn’t for simple patterns), just disable matcher.
532        self.whitelist_matcher =
533            AhoCorasick::new(self.whitelist_patterns.iter().map(|s| s.as_str())).ok();
534    }
535
536    #[inline]
537    fn is_whitelisted(&self, url: &str) -> bool {
538        self.whitelist_matcher
539            .as_ref()
540            .map(|m| m.is_match(url))
541            .unwrap_or(false)
542    }
543
544    /// Commands to init the chain with.
545    pub fn init_commands(&self) -> CommandChain {
546        let cmds = if self.ignore_httpserrors {
547            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
548        } else {
549            INIT_CHAIN.clone()
550        };
551        CommandChain::new(cmds, self.request_timeout)
552    }
553
554    /// Push the CDP request.
555    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
556        let method = cmd.identifier();
557        if let Ok(params) = serde_json::to_value(cmd) {
558            self.queued_events
559                .push_back(NetworkEvent::SendCdpRequest((method, params)));
560        }
561    }
562
563    /// The next event to handle.
564    pub fn poll(&mut self) -> Option<NetworkEvent> {
565        self.queued_events.pop_front()
566    }
567
568    /// Evict stale entries from the race-condition buffers and from
569    /// `attempted_authentications`. Call this periodically (e.g. from the
570    /// handler's eviction tick) so that lost CDP events cannot cause unbounded
571    /// map growth.
572    pub fn evict_stale_entries(&mut self) {
573        let cutoff = Instant::now() - Duration::from_secs(STALE_BUFFER_SECS);
574
575        self.requests_will_be_sent.retain(|_, (_, ts)| *ts > cutoff);
576        self.request_id_to_interception_id
577            .retain(|_, (_, ts)| *ts > cutoff);
578
579        // `attempted_authentications` entries reference interception IDs that
580        // are cleaned up on loading-finished / loading-failed. If those events
581        // are lost, the set grows forever. Cross-reference with `requests`:
582        // any interception ID that no longer appears in a live request is stale.
583        if !self.attempted_authentications.is_empty() {
584            let live: HashSet<&str> = self
585                .requests
586                .values()
587                .filter_map(|r| r.interception_id.as_ref().map(|id| id.as_ref()))
588                .collect();
589            self.attempted_authentications
590                .retain(|id| live.contains(id.as_ref()));
591        }
592    }
593
594    /// Get the extra headers.
595    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
596        &self.extra_headers
597    }
598
599    /// Set extra HTTP headers.
600    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
601        self.extra_headers = headers;
602        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
603        self.extra_headers.remove("Proxy-Authorization");
604        if !self.extra_headers.is_empty() {
605            if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
606                self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
607            }
608        }
609    }
610
611    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
612        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
613    }
614
615    pub fn set_block_all(&mut self, block_all: bool) {
616        self.block_all = block_all;
617    }
618
619    pub fn set_request_interception(&mut self, enabled: bool) {
620        self.user_request_interception_enabled = enabled;
621        self.update_protocol_request_interception();
622    }
623
624    pub fn set_cache_enabled(&mut self, enabled: bool) {
625        let run = self.user_cache_disabled == enabled;
626        self.user_cache_disabled = !enabled;
627        if run {
628            self.update_protocol_cache_disabled();
629        }
630    }
631
632    /// Enable fetch interception.
633    pub fn enable_request_intercept(&mut self) {
634        self.protocol_request_interception_enabled = true;
635    }
636
637    /// Disable fetch interception.
638    pub fn disable_request_intercept(&mut self) {
639        self.protocol_request_interception_enabled = false;
640    }
641
642    /// Set the cache site key.
643    #[cfg(feature = "_cache")]
644    pub fn set_cache_site_key(&mut self, cache_site_key: Option<String>) {
645        self.cache_site_key = cache_site_key;
646    }
647
648    /// Set the cache policy.
649    #[cfg(feature = "_cache")]
650    pub fn set_cache_policy(&mut self, cache_policy: Option<BasicCachePolicy>) {
651        self.cache_policy = cache_policy;
652    }
653
654    pub fn update_protocol_cache_disabled(&mut self) {
655        self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
656    }
657
658    pub fn authenticate(&mut self, credentials: Credentials) {
659        self.credentials = Some(credentials);
660        self.update_protocol_request_interception();
661        self.protocol_request_interception_enabled = true;
662    }
663
664    fn update_protocol_request_interception(&mut self) {
665        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
666
667        if enabled == self.protocol_request_interception_enabled {
668            return;
669        }
670
671        if enabled {
672            self.push_cdp_request(ENABLE_FETCH.clone())
673        } else {
674            self.push_cdp_request(DisableParams::default())
675        }
676    }
677
678    /// Blocklist-only script blocking.
679    /// Returns true only when the URL matches an explicit blocklist condition.
680    #[inline]
681    fn should_block_script_blocklist_only(&self, url: &str) -> bool {
682        // If analytics blocking is off, skip all analytics tries.
683        let block_analytics = self.block_analytics;
684
685        // 1) Explicit full-URL prefix trie (some rules are full URL prefixes).
686        if block_analytics && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url)
687        {
688            return true;
689        }
690
691        // 2) Custom website block list (explicit).
692        if crate::handler::blockers::block_websites::block_website(url) {
693            return true;
694        }
695
696        // 3) Path-based explicit tries / fallbacks.
697        //
698        // We run these on:
699        // - path with leading slash ("/js/app.js")
700        // - path without leading slash ("js/app.js")
701        // - basename ("app.js") for filename-only rules (this is the fast "analytics.js" fallback)
702        if let Some(path_with_slash) = Self::url_path_with_leading_slash(url) {
703            // Remove query/fragment so matching stays stable.
704            let p_slash = Self::strip_query_fragment(path_with_slash);
705            let p_noslash = p_slash.strip_prefix('/').unwrap_or(p_slash);
706
707            // Basename for filename-only lists.
708            let base = match p_slash.rsplit('/').next() {
709                Some(b) => b,
710                None => p_slash,
711            };
712
713            // ---- Trie checks ----
714            // Some tries store prefixes like "/cdn-cgi/..." (leading slash) OR "cdn-cgi/..." (no slash).
715            if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_slash) {
716                return true;
717            }
718            if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_noslash) {
719                return true;
720            }
721            if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(base) {
722                return true;
723            }
724
725            // Base-path ignore tries (framework noise / known ignorable script paths).
726            // Note: these are explicit tries, so they are valid “blocklist-only” checks.
727            if URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(p_noslash) {
728                return true;
729            }
730
731            // Style path ignores only when visuals are ignored.
732            if self.ignore_visuals && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(p_noslash) {
733                return true;
734            }
735        }
736
737        false
738    }
739
740    /// Extract the absolute URL path portion WITH the leading slash.
741    ///
742    /// Example:
743    /// - "https://cdn.example.net/js/app.js?x=y" -> Some("/js/app.js?x=y")
744    #[inline]
745    fn url_path_with_leading_slash(url: &str) -> Option<&str> {
746        // find scheme separator
747        let idx = url.find("//")?;
748        let after_slashes = idx + 2;
749
750        // find first slash after host
751        let slash_rel = url[after_slashes..].find('/')?;
752        let slash_idx = after_slashes + slash_rel;
753
754        if slash_idx < url.len() {
755            Some(&url[slash_idx..])
756        } else {
757            None
758        }
759    }
760
761    /// Strip query string and fragment from a path-ish string.
762    ///
763    /// Example:
764    /// - "/a/b.js?x=1#y" -> "/a/b.js"
765    #[inline]
766    fn strip_query_fragment(s: &str) -> &str {
767        let q = s.find('?');
768        let h = s.find('#');
769
770        match (q, h) {
771            (None, None) => s,
772            (Some(i), None) => &s[..i],
773            (None, Some(i)) => &s[..i],
774            (Some(i), Some(j)) => &s[..i.min(j)],
775        }
776    }
777
778    /// Determine if the request should be skipped.
779    #[inline]
780    fn skip_xhr(
781        &self,
782        skip_networking: bool,
783        event: &EventRequestPaused,
784        network_event: bool,
785    ) -> bool {
786        // XHR check
787        if !skip_networking && network_event {
788            let request_url = event.request.url.as_str();
789
790            // check if part of ignore scripts.
791            let skip_analytics =
792                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
793
794            if skip_analytics {
795                true
796            } else if self.block_stylesheets || self.ignore_visuals {
797                let block_css = self.block_stylesheets;
798                let block_media = self.ignore_visuals;
799
800                let mut block_request = false;
801
802                if let Some(position) = request_url.rfind('.') {
803                    let hlen = request_url.len();
804                    let has_asset = hlen - position;
805
806                    if has_asset >= 3 {
807                        let next_position = position + 1;
808
809                        if block_media
810                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
811                                &request_url[next_position..].into(),
812                            )
813                        {
814                            block_request = true;
815                        } else if block_css {
816                            block_request = CaseInsensitiveString::from(
817                                &request_url.as_bytes()[next_position..],
818                            )
819                            .contains(&**CSS_EXTENSION)
820                        }
821                    }
822                }
823
824                if !block_request {
825                    block_request = ignore_script_xhr_media(request_url);
826                }
827
828                block_request
829            } else {
830                skip_networking
831            }
832        } else {
833            skip_networking
834        }
835    }
836
837    #[cfg(feature = "adblock")]
838    #[inline]
839    /// Detect if ad enabled.
840    fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
841        if skip_networking {
842            true
843        } else {
844            block_ads(&event.request.url) || self.detect_ad(event)
845        }
846    }
847
848    /// When adblock feature is disabled, this is a no-op.
849    #[cfg(not(feature = "adblock"))]
850    #[inline]
851    fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
852        use crate::handler::blockers::block_websites::block_ads;
853        if skip_networking {
854            true
855        } else {
856            block_ads(&event.request.url)
857        }
858    }
859
860    #[inline]
861    /// Fail request
862    fn fail_request_blocked(
863        &mut self,
864        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
865    ) {
866        let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FailRequestParams::new(
867            request_id.clone(),
868            chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason::BlockedByClient,
869        );
870        self.push_cdp_request(params);
871    }
872
873    #[inline]
874    /// Fulfill request
875    fn fulfill_request_empty_200(
876        &mut self,
877        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
878    ) {
879        let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FulfillRequestParams::new(
880            request_id.clone(),
881            200,
882        );
883        self.push_cdp_request(params);
884    }
885
886    #[cfg(feature = "_cache")]
887    #[inline]
888    /// Fulfill a paused Fetch request from cached bytes + header map.
889    ///
890    /// `headers` should be response headers (e.g. Content-Type, Cache-Control, etc).
891    fn fulfill_request_from_cache(
892        &mut self,
893        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
894        body: &[u8],
895        headers: &std::collections::HashMap<String, String>,
896        status: i64,
897    ) {
898        use crate::cdp::browser_protocol::fetch::HeaderEntry;
899        use crate::handler::network::fetch::FulfillRequestParams;
900        use base64::Engine;
901
902        let mut resp_headers = Vec::<HeaderEntry>::with_capacity(headers.len());
903
904        for (k, v) in headers.iter() {
905            resp_headers.push(HeaderEntry {
906                name: k.clone().into(),
907                value: v.clone().into(),
908            });
909        }
910
911        let mut params = FulfillRequestParams::new(request_id.clone(), status);
912
913        // TODO: have this already encoded prior.
914        params.body = Some(
915            base64::engine::general_purpose::STANDARD
916                .encode(body)
917                .into(),
918        );
919
920        params.response_headers = Some(resp_headers);
921
922        self.push_cdp_request(params);
923    }
924
925    #[inline]
926    /// Continue the request url.
927    fn continue_request_with_url(
928        &mut self,
929        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
930        url: Option<&str>,
931        intercept_response: bool,
932    ) {
933        let mut params = ContinueRequestParams::new(request_id.clone());
934        if let Some(url) = url {
935            params.url = Some(url.to_string());
936            params.intercept_response = Some(intercept_response);
937        }
938        self.push_cdp_request(params);
939    }
940
941    /// On fetch request paused interception.
942    #[inline]
943    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
944        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
945            return;
946        }
947
948        if self.block_all {
949            tracing::debug!(
950                "Blocked (block_all): {:?} - {}",
951                event.resource_type,
952                event.request.url
953            );
954            return self.fail_request_blocked(&event.request_id);
955        }
956
957        if let Some(network_id) = event.network_id.as_ref() {
958            if let Some((request_will_be_sent, _)) =
959                self.requests_will_be_sent.remove(network_id.as_ref())
960            {
961                self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
962            } else {
963                self.request_id_to_interception_id.insert(
964                    network_id.clone(),
965                    (event.request_id.clone().into(), Instant::now()),
966                );
967            }
968        }
969
970        // From here on, we handle the full decision tree.
971        let javascript_resource = event.resource_type == ResourceType::Script;
972        let document_resource = event.resource_type == ResourceType::Document;
973        let network_resource =
974            !document_resource && crate::utils::is_data_resource(&event.resource_type);
975
976        // Start with static / cheap skip checks.
977        let mut skip_networking =
978            self.block_all || IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
979
980        if event.resource_type == ResourceType::Prefetch && !self.block_prefetch {
981            skip_networking = true;
982        }
983
984        // Also short-circuit if we've reloaded this document too many times.
985        if !skip_networking {
986            skip_networking = self.document_reload_tracker >= 3;
987        }
988
989        // Handle document redirect / masking and track xml documents.
990        let (current_url_cow, had_replacer) =
991            self.handle_document_replacement_and_tracking(event, document_resource);
992
993        let current_url: &str = current_url_cow.as_ref();
994
995        let blacklisted = self.is_blacklisted(current_url);
996
997        if !self.blacklist_strict && blacklisted {
998            skip_networking = true;
999        }
1000
1001        if !skip_networking {
1002            // Allow XSL for sitemap XML.
1003            if self.xml_document && current_url.ends_with(".xsl") {
1004                skip_networking = false;
1005            } else {
1006                skip_networking = self.should_skip_for_visuals_and_basic(&event.resource_type);
1007            }
1008        }
1009
1010        skip_networking = self.detect_ad_if_enabled(event, skip_networking);
1011
1012        // Ignore embedded scripts, tracker stylesheets, and tracker images when only_html or ignore_visuals is set.
1013        if !skip_networking
1014            && self.block_javascript
1015            && (self.only_html || self.ignore_visuals)
1016            && (javascript_resource
1017                || document_resource
1018                || event.resource_type == ResourceType::Stylesheet
1019                || event.resource_type == ResourceType::Image)
1020        {
1021            skip_networking = ignore_script_embedded(current_url);
1022        }
1023
1024        // Script policy: allow-by-default.
1025        // Block only if explicit block list patterns match.
1026        if !skip_networking && javascript_resource {
1027            skip_networking = self.should_block_script_blocklist_only(current_url);
1028        }
1029
1030        // XHR / data resources.
1031        skip_networking = self.skip_xhr(skip_networking, event, network_resource);
1032
1033        // Custom interception layer.
1034        if !skip_networking && (javascript_resource || network_resource || document_resource) {
1035            skip_networking = self.intercept_manager.intercept_detection(
1036                current_url,
1037                self.ignore_visuals,
1038                network_resource,
1039            );
1040        }
1041
1042        // Custom website block list.
1043        if !skip_networking && (javascript_resource || network_resource) {
1044            skip_networking = crate::handler::blockers::block_websites::block_website(current_url);
1045        }
1046
1047        // whitelist 3rd party
1048        // not required unless explicit blocking.
1049        if skip_networking && javascript_resource && ALLOWED_MATCHER_3RD_PARTY.is_match(current_url)
1050        {
1051            skip_networking = false;
1052        }
1053
1054        // check if the url is in the whitelist.
1055        if skip_networking && self.is_whitelisted(current_url) {
1056            skip_networking = false;
1057        }
1058
1059        if self.blacklist_strict && blacklisted {
1060            skip_networking = true;
1061        }
1062
1063        if skip_networking {
1064            tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
1065            self.fulfill_request_empty_200(&event.request_id);
1066        } else {
1067            #[cfg(feature = "_cache")]
1068            {
1069                if let (Some(policy), Some(cache_site_key)) =
1070                    (self.cache_policy.as_ref(), self.cache_site_key.as_deref())
1071                {
1072                    let current_url = format!("{}:{}", event.request.method, &current_url);
1073
1074                    if let Some((res, cache_policy)) =
1075                        crate::cache::remote::get_session_cache_item(cache_site_key, &current_url)
1076                    {
1077                        if policy.allows_cached(&cache_policy) {
1078                            tracing::debug!(
1079                                "Remote Cached: {:?} - {}",
1080                                &event.resource_type,
1081                                &current_url
1082                            );
1083                            let flat_headers = crate::http::headers_from_multi(&res.headers);
1084                            return self.fulfill_request_from_cache(
1085                                &event.request_id,
1086                                &res.body,
1087                                &flat_headers,
1088                                res.status as i64,
1089                            );
1090                        }
1091                    }
1092                }
1093            }
1094
1095            // check our frame cache for the run.
1096            tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
1097            self.continue_request_with_url(
1098                &event.request_id,
1099                if had_replacer {
1100                    Some(current_url)
1101                } else {
1102                    None
1103                },
1104                !had_replacer,
1105            );
1106        }
1107    }
1108
1109    /// Shared "visuals + basic blocking" logic.
1110    ///
1111    /// IMPORTANT: Scripts are NOT blocked here anymore.
1112    /// Scripts are allowed by default and only blocked via explicit blocklists
1113    /// (should_block_script_blocklist_only / adblock / block_websites / intercept_manager).
1114    #[inline]
1115    fn should_skip_for_visuals_and_basic(&self, resource_type: &ResourceType) -> bool {
1116        (self.ignore_visuals && IGNORE_VISUAL_RESOURCE_MAP.contains(resource_type.as_ref()))
1117            || (self.block_stylesheets && *resource_type == ResourceType::Stylesheet)
1118    }
1119
1120    /// Does the network manager have a target domain?
1121    pub fn has_target_domain(&self) -> bool {
1122        !self.document_target_url.is_empty()
1123    }
1124
1125    /// Set the target page url for tracking.
1126    pub fn set_page_url(&mut self, page_target_url: String) {
1127        let host_base = host_and_rest(&page_target_url)
1128            .map(|(h, _)| base_domain_from_host(h))
1129            .unwrap_or("");
1130
1131        self.document_target_domain = host_base.to_string();
1132        self.document_target_url = page_target_url;
1133    }
1134
1135    /// Clear the initial target domain on every navigation.
1136    pub fn clear_target_domain(&mut self) {
1137        self.document_reload_tracker = 0;
1138        self.document_target_url = Default::default();
1139        self.document_target_domain = Default::default();
1140    }
1141
1142    /// Handles:
1143    /// - document reload tracking (`document_reload_tracker`)
1144    /// - redirect masking / replacement
1145    /// - xml document detection (`xml_document`)
1146    /// - `document_target_url` updates
1147    ///
1148    /// Returns (current_url, had_replacer).
1149    #[inline]
1150    fn handle_document_replacement_and_tracking<'a>(
1151        &mut self,
1152        event: &'a EventRequestPaused,
1153        document_resource: bool,
1154    ) -> (Cow<'a, str>, bool) {
1155        let mut replacer: Option<String> = None;
1156        let current_url = event.request.url.as_str();
1157
1158        if document_resource {
1159            if self.document_target_url == current_url {
1160                self.document_reload_tracker += 1;
1161            } else if !self.document_target_url.is_empty() && event.redirected_request_id.is_some()
1162            {
1163                let (http_document_replacement, mut https_document_replacement) =
1164                    if self.document_target_url.starts_with("http://") {
1165                        (
1166                            self.document_target_url.replacen("http://", "http//", 1),
1167                            self.document_target_url.replacen("http://", "https://", 1),
1168                        )
1169                    } else {
1170                        (
1171                            self.document_target_url.replacen("https://", "https//", 1),
1172                            self.document_target_url.replacen("https://", "http://", 1),
1173                        )
1174                    };
1175
1176                // Track trailing slash to restore later.
1177                let trailing = https_document_replacement.ends_with('/');
1178                if trailing {
1179                    https_document_replacement.pop();
1180                }
1181                if https_document_replacement.ends_with('/') {
1182                    https_document_replacement.pop();
1183                }
1184
1185                let redirect_mask = format!(
1186                    "{}{}",
1187                    https_document_replacement, http_document_replacement
1188                );
1189
1190                if current_url == redirect_mask {
1191                    replacer = Some(if trailing {
1192                        format!("{}/", https_document_replacement)
1193                    } else {
1194                        https_document_replacement
1195                    });
1196                }
1197            }
1198
1199            if self.document_target_url.is_empty() && current_url.ends_with(".xml") {
1200                self.xml_document = true;
1201            }
1202
1203            // Track last seen document URL.
1204            self.document_target_url = event.request.url.clone();
1205            self.document_target_domain = host_and_rest(&self.document_target_url)
1206                .map(|(h, _)| base_domain_from_host(h).to_string())
1207                .unwrap_or_default();
1208        }
1209
1210        let current_url_cow = match replacer {
1211            Some(r) => Cow::Owned(r),
1212            None => Cow::Borrowed(event.request.url.as_str()),
1213        };
1214
1215        let had_replacer = matches!(current_url_cow, Cow::Owned(_));
1216        (current_url_cow, had_replacer)
1217    }
1218
1219    /// Perform a page intercept for chrome using the adblock engine.
1220    /// Uses the custom engine when user-supplied filter rules are configured,
1221    /// otherwise falls back to the global default engine with built-in patterns.
1222    #[cfg(feature = "adblock")]
1223    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
1224        use adblock::{
1225            lists::{FilterSet, ParseOptions, RuleTypes},
1226            Engine,
1227        };
1228
1229        lazy_static::lazy_static! {
1230            static ref AD_ENGINE: Engine = {
1231                let mut filter_set = FilterSet::new(false);
1232                let mut rules = ParseOptions::default();
1233                rules.rule_types = RuleTypes::All;
1234
1235                filter_set.add_filters(
1236                    &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
1237                    rules.clone(),
1238                );
1239
1240                // When adblock_easylist is enabled, EasyList + EasyPrivacy are
1241                // embedded at build time for zero-cost runtime loading.
1242                #[cfg(feature = "adblock_easylist")]
1243                {
1244                    static EASYLIST: &str = include_str!(concat!(env!("OUT_DIR"), "/easylist.txt"));
1245                    static EASYPRIVACY: &str = include_str!(concat!(env!("OUT_DIR"), "/easyprivacy.txt"));
1246
1247                    if !EASYLIST.is_empty() {
1248                        filter_set.add_filter_list(EASYLIST, rules.clone());
1249                    }
1250                    if !EASYPRIVACY.is_empty() {
1251                        filter_set.add_filter_list(EASYPRIVACY, rules);
1252                    }
1253                }
1254
1255                Engine::from_filter_set(filter_set, true)
1256            };
1257        }
1258
1259        let blockable = event.resource_type == ResourceType::Script
1260            || event.resource_type == ResourceType::Image
1261            || event.resource_type == ResourceType::Media
1262            || event.resource_type == ResourceType::Stylesheet
1263            || event.resource_type == ResourceType::Document
1264            || event.resource_type == ResourceType::Fetch
1265            || event.resource_type == ResourceType::Xhr;
1266
1267        if !blockable {
1268            return false;
1269        }
1270
1271        let u = &event.request.url;
1272
1273        let source_domain = if self.document_target_domain.is_empty() {
1274            "example.com"
1275        } else {
1276            &self.document_target_domain
1277        };
1278
1279        // Fast hostname extraction without full URL parsing.
1280        // preparsed(url, request_hostname, source_hostname, type, third_party)
1281        let hostname = u
1282            .strip_prefix("https://")
1283            .or_else(|| u.strip_prefix("http://"))
1284            .and_then(|rest| rest.split('/').next())
1285            // Strip userinfo (user:pass@) if present.
1286            .map(|authority| match authority.rfind('@') {
1287                Some(i) => &authority[i + 1..],
1288                None => authority,
1289            })
1290            // Strip port (:8080) if present.
1291            .and_then(|host_port| host_port.split(':').next())
1292            .unwrap_or(source_domain);
1293
1294        let resource_type_str = match event.resource_type {
1295            ResourceType::Script => "script",
1296            ResourceType::Image => "image",
1297            ResourceType::Media => "media",
1298            ResourceType::Stylesheet => "stylesheet",
1299            ResourceType::Document => "document",
1300            ResourceType::Fetch => "fetch",
1301            ResourceType::Xhr => "xhr",
1302            _ => "other",
1303        };
1304
1305        let request = adblock::request::Request::preparsed(
1306            u,
1307            hostname,
1308            source_domain,
1309            resource_type_str,
1310            !event.request.is_same_site.unwrap_or_default(),
1311        );
1312
1313        let engine: &Engine = match self.adblock_engine.as_ref() {
1314            Some(custom) => custom,
1315            None => &AD_ENGINE,
1316        };
1317
1318        engine.check_network_request(&request).matched
1319    }
1320
1321    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
1322        let response = if self
1323            .attempted_authentications
1324            .contains(event.request_id.as_ref())
1325        {
1326            AuthChallengeResponseResponse::CancelAuth
1327        } else if self.credentials.is_some() {
1328            self.attempted_authentications
1329                .insert(event.request_id.clone().into());
1330            AuthChallengeResponseResponse::ProvideCredentials
1331        } else {
1332            AuthChallengeResponseResponse::Default
1333        };
1334
1335        let mut auth = AuthChallengeResponse::new(response);
1336        if let Some(creds) = self.credentials.clone() {
1337            auth.username = Some(creds.username);
1338            auth.password = Some(creds.password);
1339        }
1340        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
1341    }
1342
1343    /// Set the page offline network emulation condition.
1344    pub fn set_offline_mode(&mut self, value: bool) {
1345        if self.offline == value {
1346            return;
1347        }
1348        self.offline = value;
1349        if let Ok(network) = EmulateNetworkConditionsByRuleParams::builder()
1350            .offline(self.offline)
1351            .matched_network_condition(
1352                NetworkConditions::builder()
1353                    .url_pattern("")
1354                    .latency(0)
1355                    .download_throughput(-1.)
1356                    .upload_throughput(-1.)
1357                    .build()
1358                    .unwrap(),
1359            )
1360            .build()
1361        {
1362            self.push_cdp_request(network);
1363        }
1364    }
1365
1366    /// Request interception doesn't happen for data URLs with Network Service.
1367    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
1368        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
1369            if let Some((interception_id, _)) = self
1370                .request_id_to_interception_id
1371                .remove(event.request_id.as_ref())
1372            {
1373                self.on_request(event, Some(interception_id));
1374            } else {
1375                self.requests_will_be_sent
1376                    .insert(event.request_id.clone(), (event.clone(), Instant::now()));
1377            }
1378        } else {
1379            self.on_request(event, None);
1380        }
1381    }
1382
1383    /// The request was served from the cache.
1384    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
1385        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
1386            request.from_memory_cache = true;
1387        }
1388    }
1389
1390    /// On network response received.
1391    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
1392        let mut request_failed = false;
1393
1394        // Track how many bytes we actually deducted from this target.
1395        let mut deducted: u64 = 0;
1396
1397        if let Some(max_bytes) = self.max_bytes_allowed.as_mut() {
1398            let before = *max_bytes;
1399
1400            // encoded_data_length -> saturating cast to u64
1401            let received_bytes: u64 = event.response.encoded_data_length as u64;
1402
1403            // Safe parse of Content-Length
1404            let content_length: Option<u64> = event
1405                .response
1406                .headers
1407                .inner()
1408                .get("content-length")
1409                .and_then(|v| v.as_str())
1410                .and_then(|s| s.trim().parse::<u64>().ok());
1411
1412            // Deduct what we actually received
1413            *max_bytes = max_bytes.saturating_sub(received_bytes);
1414
1415            // If the declared size can't fit, zero out now
1416            if let Some(cl) = content_length {
1417                if cl > *max_bytes {
1418                    *max_bytes = 0;
1419                }
1420            }
1421
1422            request_failed = *max_bytes == 0;
1423
1424            // Compute exact delta deducted on this event
1425            deducted = before.saturating_sub(*max_bytes);
1426        }
1427
1428        // Bubble up the deduction (even if request continues)
1429        if deducted > 0 {
1430            self.queued_events
1431                .push_back(NetworkEvent::BytesConsumed(deducted));
1432        }
1433
1434        // block all network request moving forward.
1435        if request_failed && self.max_bytes_allowed.is_some() {
1436            self.set_block_all(true);
1437        }
1438
1439        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1440            request.set_response(event.response.clone());
1441            self.queued_events.push_back(if request_failed {
1442                NetworkEvent::RequestFailed(request)
1443            } else {
1444                NetworkEvent::RequestFinished(request)
1445            });
1446        }
1447    }
1448
1449    /// On network loading finished.
1450    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
1451        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
1452            if let Some(interception_id) = request.interception_id.as_ref() {
1453                self.attempted_authentications
1454                    .remove(interception_id.as_ref());
1455            }
1456            self.queued_events
1457                .push_back(NetworkEvent::RequestFinished(request));
1458        }
1459    }
1460
1461    /// On network loading failed.
1462    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
1463        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1464            request.failure_text = Some(event.error_text.clone());
1465            if let Some(interception_id) = request.interception_id.as_ref() {
1466                self.attempted_authentications
1467                    .remove(interception_id.as_ref());
1468            }
1469            self.queued_events
1470                .push_back(NetworkEvent::RequestFailed(request));
1471        }
1472    }
1473
1474    /// On request will be sent.
1475    fn on_request(
1476        &mut self,
1477        event: &EventRequestWillBeSent,
1478        interception_id: Option<InterceptionId>,
1479    ) {
1480        let mut redirect_chain = Vec::new();
1481        let mut redirect_location = None;
1482
1483        if let Some(redirect_resp) = &event.redirect_response {
1484            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1485                if is_redirect_status(redirect_resp.status) {
1486                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
1487                        if redirect_resp.url != location {
1488                            let fixed_location = location.replace(&redirect_resp.url, "");
1489
1490                            if !fixed_location.is_empty() {
1491                                if let Some(resp) = request.response.as_mut() {
1492                                    resp.headers.0["Location"] =
1493                                        serde_json::Value::String(fixed_location.clone());
1494                                }
1495                            }
1496
1497                            redirect_location = Some(fixed_location);
1498                        }
1499                    }
1500                }
1501
1502                {
1503                    let mut redirect_resp = redirect_resp.clone();
1504
1505                    if let Some(redirect_location) = redirect_location {
1506                        if !redirect_location.is_empty() {
1507                            redirect_resp.headers.0["Location"] =
1508                                serde_json::Value::String(redirect_location);
1509                        }
1510                    }
1511
1512                    self.handle_request_redirect(&mut request, redirect_resp);
1513                }
1514
1515                redirect_chain = std::mem::take(&mut request.redirect_chain);
1516                redirect_chain.push(request);
1517            }
1518        }
1519
1520        let request = HttpRequest::new(
1521            event.request_id.clone(),
1522            event.frame_id.clone(),
1523            interception_id,
1524            self.user_request_interception_enabled,
1525            redirect_chain,
1526        );
1527
1528        self.requests.insert(event.request_id.clone(), request);
1529        self.queued_events
1530            .push_back(NetworkEvent::Request(event.request_id.clone()));
1531    }
1532
1533    /// Handle request redirect.
1534    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1535        request.set_response(response);
1536        if let Some(interception_id) = request.interception_id.as_ref() {
1537            self.attempted_authentications
1538                .remove(interception_id.as_ref());
1539        }
1540    }
1541}
1542
1543#[derive(Debug)]
1544pub enum NetworkEvent {
1545    /// Send a CDP request.
1546    SendCdpRequest((MethodId, serde_json::Value)),
1547    /// Request.
1548    Request(RequestId),
1549    /// Response
1550    Response(RequestId),
1551    /// Request failed.
1552    RequestFailed(HttpRequest),
1553    /// Request finished.
1554    RequestFinished(HttpRequest),
1555    /// Bytes consumed.
1556    BytesConsumed(u64),
1557}
1558
1559#[cfg(test)]
1560mod tests {
1561    use super::ALLOWED_MATCHER_3RD_PARTY;
1562    use crate::handler::network::NetworkManager;
1563    use std::time::Duration;
1564
1565    #[test]
1566    fn test_allowed_matcher_3rd_party() {
1567        // Should be allowed (matches "/cdn-cgi/challenge-platform/")
1568        let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1569        assert!(
1570            ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1571            "expected Cloudflare challenge script to be allowed"
1572        );
1573
1574        // Should NOT be allowed (not in allow-list)
1575        let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1576        assert!(
1577            !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1578            "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1579        );
1580
1581        // A couple sanity checks for existing allow patterns
1582        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1583        assert!(ALLOWED_MATCHER_3RD_PARTY
1584            .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1585        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1586    }
1587
1588    #[test]
1589    fn test_script_allowed_by_default_when_not_blocklisted() {
1590        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1591        nm.set_page_url(
1592            "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1593        );
1594
1595        // A random script that should not match your block tries.
1596        let ok = "https://cdn.example.net/assets/some-app-bundle-12345.js";
1597        assert!(
1598            !nm.should_block_script_blocklist_only(ok),
1599            "expected non-blocklisted script to be allowed"
1600        );
1601    }
1602
1603    #[test]
1604    fn test_script_blocked_when_matches_ignore_trie_or_blocklist() {
1605        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1606        nm.set_page_url(
1607            "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1608        );
1609
1610        // This should match URL_IGNORE_TRIE_PATHS fallback ("analytics.js") logic.
1611        let bad = "https://cdn.example.net/js/analytics.js";
1612        assert!(
1613            nm.should_block_script_blocklist_only(bad),
1614            "expected analytics.js to be blocklisted"
1615        );
1616    }
1617
1618    #[test]
1619    fn test_allowed_matcher_3rd_party_sanity() {
1620        // Should be allowed (matches "/cdn-cgi/challenge-platform/")
1621        let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1622        assert!(
1623            ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1624            "expected Cloudflare challenge script to be allowed"
1625        );
1626
1627        // Should NOT be allowed (not in allow-list)
1628        let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1629        assert!(
1630            !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1631            "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1632        );
1633
1634        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1635        assert!(ALLOWED_MATCHER_3RD_PARTY
1636            .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1637        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1638    }
1639    #[test]
1640    fn test_dynamic_blacklist_blocks_url() {
1641        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1642        nm.set_page_url("https://example.com/".to_string());
1643
1644        nm.set_blacklist_patterns(["static.cloudflareinsights.com", "googletagmanager.com"]);
1645        assert!(nm.is_blacklisted("https://static.cloudflareinsights.com/beacon.min.js"));
1646        assert!(nm.is_blacklisted("https://www.googletagmanager.com/gtm.js?id=GTM-XXXX"));
1647
1648        assert!(!nm.is_blacklisted("https://cdn.example.net/assets/app.js"));
1649    }
1650
1651    #[test]
1652    fn test_blacklist_strict_wins_over_whitelist() {
1653        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1654        nm.set_page_url("https://example.com/".to_string());
1655
1656        // Same URL in both lists.
1657        nm.set_blacklist_patterns(["beacon.min.js"]);
1658        nm.set_whitelist_patterns(["beacon.min.js"]);
1659
1660        nm.set_blacklist_strict(true);
1661
1662        let u = "https://static.cloudflareinsights.com/beacon.min.js";
1663        assert!(nm.is_whitelisted(u));
1664        assert!(nm.is_blacklisted(u));
1665
1666        // In strict mode, it should still be considered blocked at decision time.
1667        // (We can only directly assert the matchers here; the decision logic is exercised in integration.)
1668        assert!(nm.blacklist_strict);
1669    }
1670
1671    #[cfg(feature = "adblock")]
1672    fn make_request_paused(
1673        url: &str,
1674        resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
1675        is_same_site: bool,
1676    ) -> chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused {
1677        use chromiumoxide_cdp::cdp::browser_protocol::network::{
1678            Headers, Request, ResourcePriority, RequestReferrerPolicy,
1679        };
1680        use chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused;
1681
1682        EventRequestPaused {
1683            request_id: chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
1684                "test-req".to_string(),
1685            )
1686            .into(),
1687            request: Request {
1688                url: url.to_string(),
1689                method: "GET".to_string(),
1690                headers: Headers::new(serde_json::Value::Object(Default::default())),
1691                initial_priority: ResourcePriority::Medium,
1692                referrer_policy: RequestReferrerPolicy::NoReferrer,
1693                url_fragment: None,
1694                has_post_data: None,
1695                post_data_entries: None,
1696                mixed_content_type: None,
1697                is_link_preload: None,
1698                trust_token_params: None,
1699                is_same_site: Some(is_same_site),
1700                is_ad_related: None,
1701            },
1702            frame_id: chromiumoxide_cdp::cdp::browser_protocol::page::FrameId::from(
1703                "frame1".to_string(),
1704            ),
1705            resource_type,
1706            response_error_reason: None,
1707            response_status_code: None,
1708            response_status_text: None,
1709            response_headers: None,
1710            network_id: None,
1711            redirected_request_id: None,
1712        }
1713    }
1714
1715    #[cfg(feature = "adblock")]
1716    #[test]
1717    fn test_detect_ad_blocks_known_tracker_scripts() {
1718        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1719
1720        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1721        nm.set_page_url("https://www.wine-searcher.com/".to_string());
1722
1723        let event = make_request_paused(
1724            "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
1725            ResourceType::Script,
1726            false,
1727        );
1728
1729        assert!(
1730            nm.detect_ad(&event),
1731            "googletagmanager.com script should be detected as ad"
1732        );
1733    }
1734
1735    #[cfg(feature = "adblock")]
1736    #[test]
1737    fn test_detect_ad_allows_legitimate_scripts() {
1738        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1739
1740        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1741        nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
1742
1743        let event = make_request_paused(
1744            "https://www.mylegitsite-test.com/static/js/app-bundle.js",
1745            ResourceType::Script,
1746            true,
1747        );
1748
1749        assert!(
1750            !nm.detect_ad(&event),
1751            "legitimate first-party app bundle should not be blocked"
1752        );
1753    }
1754
1755    #[cfg(feature = "adblock")]
1756    #[test]
1757    fn test_detect_ad_uses_source_domain() {
1758        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1759
1760        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1761        nm.set_page_url("https://www.wine-searcher.com/some-page".to_string());
1762
1763        assert!(
1764            !nm.document_target_domain.is_empty(),
1765            "document_target_domain should be set after set_page_url"
1766        );
1767
1768        let event = make_request_paused(
1769            "https://www.google-analytics.com/analytics.js",
1770            ResourceType::Script,
1771            false,
1772        );
1773
1774        assert!(
1775            nm.detect_ad(&event),
1776            "google-analytics.com should be blocked as tracker"
1777        );
1778    }
1779
1780    #[cfg(feature = "adblock")]
1781    #[test]
1782    fn test_custom_adblock_engine_takes_precedence() {
1783        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1784
1785        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1786        nm.set_page_url("https://example.com/".to_string());
1787
1788        // Build a custom engine with a specific rule.
1789        let mut filter_set = adblock::lists::FilterSet::new(false);
1790        let mut opts = adblock::lists::ParseOptions::default();
1791        opts.rule_types = adblock::lists::RuleTypes::All;
1792        filter_set.add_filters(["||custom-tracker.example.net^"], opts);
1793        let engine = adblock::Engine::from_filter_set(filter_set, true);
1794        nm.set_adblock_engine(std::sync::Arc::new(engine));
1795
1796        let event = make_request_paused(
1797            "https://custom-tracker.example.net/pixel.js",
1798            ResourceType::Script,
1799            false,
1800        );
1801
1802        assert!(
1803            nm.detect_ad(&event),
1804            "custom engine rule should block custom-tracker.example.net"
1805        );
1806    }
1807
1808    /// Helper: run a URL through the full `on_fetch_request_paused` pipeline
1809    /// and return whether it was blocked (true) or allowed (false).
1810    #[cfg(feature = "adblock")]
1811    fn run_full_interception(nm: &mut NetworkManager, url: &str, resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType, is_same_site: bool) -> bool {
1812        use super::NetworkEvent;
1813
1814        // Drain any prior events.
1815        while nm.poll().is_some() {}
1816
1817        let event = make_request_paused(url, resource_type, is_same_site);
1818        nm.on_fetch_request_paused(&event);
1819
1820        // Check what was emitted: Fetch.fulfillRequest = blocked, Fetch.continueRequest = allowed.
1821        let mut blocked = false;
1822        while let Some(ev) = nm.poll() {
1823            if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
1824                let m: &str = method.as_ref();
1825                if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
1826                    blocked = true;
1827                }
1828            }
1829        }
1830        blocked
1831    }
1832
1833    // ── End-to-end interception tests ───────────────────────────────────
1834
1835    #[cfg(feature = "adblock")]
1836    #[test]
1837    fn test_e2e_tracker_script_blocked() {
1838        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1839
1840        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1841        nm.set_page_url("https://www.wine-searcher.com/".to_string());
1842
1843        assert!(
1844            run_full_interception(
1845                &mut nm,
1846                "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
1847                ResourceType::Script,
1848                false,
1849            ),
1850            "GTM script should be blocked through full pipeline"
1851        );
1852    }
1853
1854    #[cfg(feature = "adblock")]
1855    #[test]
1856    fn test_e2e_legitimate_script_allowed() {
1857        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1858
1859        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1860        nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
1861
1862        assert!(
1863            !run_full_interception(
1864                &mut nm,
1865                "https://www.mylegitsite-test.com/static/js/app-bundle.js",
1866                ResourceType::Script,
1867                true,
1868            ),
1869            "legitimate first-party script should be allowed through full pipeline"
1870        );
1871    }
1872
1873    #[cfg(feature = "adblock")]
1874    #[test]
1875    fn test_e2e_analytics_xhr_blocked() {
1876        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1877
1878        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1879        nm.set_page_url("https://example.org/".to_string());
1880
1881        assert!(
1882            run_full_interception(
1883                &mut nm,
1884                "https://www.google-analytics.com/g/collect?v=2&tid=UA-123",
1885                ResourceType::Xhr,
1886                false,
1887            ),
1888            "Google Analytics XHR should be blocked through full pipeline"
1889        );
1890    }
1891
1892    #[cfg(feature = "adblock")]
1893    #[test]
1894    fn test_e2e_whitelisted_overrides_adblock() {
1895        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1896
1897        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1898        nm.set_page_url("https://example.org/".to_string());
1899        nm.set_whitelist_patterns(["googletagmanager.com"]);
1900
1901        // GTM would normally be blocked by adblock, but whitelist overrides.
1902        assert!(
1903            !run_full_interception(
1904                &mut nm,
1905                "https://www.googletagmanager.com/gtm.js?id=GTM-TEST",
1906                ResourceType::Script,
1907                false,
1908            ),
1909            "whitelisted tracker should be allowed even when adblock would block it"
1910        );
1911    }
1912
1913    #[cfg(feature = "adblock")]
1914    #[test]
1915    fn test_e2e_blacklist_strict_overrides_whitelist() {
1916        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1917
1918        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1919        nm.set_page_url("https://example.org/".to_string());
1920        nm.set_blacklist_patterns(["cdn.example.net/evil.js"]);
1921        nm.set_whitelist_patterns(["cdn.example.net/evil.js"]);
1922        nm.set_blacklist_strict(true);
1923
1924        assert!(
1925            run_full_interception(
1926                &mut nm,
1927                "https://cdn.example.net/evil.js",
1928                ResourceType::Script,
1929                false,
1930            ),
1931            "strict blacklist should win over whitelist"
1932        );
1933    }
1934
1935    #[cfg(feature = "adblock")]
1936    #[test]
1937    fn test_e2e_first_party_document_not_blocked() {
1938        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1939
1940        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1941        nm.set_page_url("https://www.nytimes.com/".to_string());
1942
1943        assert!(
1944            !run_full_interception(
1945                &mut nm,
1946                "https://www.nytimes.com/2024/article.html",
1947                ResourceType::Document,
1948                true,
1949            ),
1950            "first-party document navigation should never be blocked"
1951        );
1952    }
1953
1954    #[cfg(feature = "adblock")]
1955    #[test]
1956    fn test_e2e_custom_engine_blocks_through_pipeline() {
1957        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1958
1959        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1960        nm.set_page_url("https://mysite.com/".to_string());
1961
1962        let mut filter_set = adblock::lists::FilterSet::new(false);
1963        let mut opts = adblock::lists::ParseOptions::default();
1964        opts.rule_types = adblock::lists::RuleTypes::All;
1965        filter_set.add_filters(["||evil-cdn.example.net^$script"], opts);
1966        let engine = adblock::Engine::from_filter_set(filter_set, true);
1967        nm.set_adblock_engine(std::sync::Arc::new(engine));
1968
1969        assert!(
1970            run_full_interception(
1971                &mut nm,
1972                "https://evil-cdn.example.net/tracker.js",
1973                ResourceType::Script,
1974                false,
1975            ),
1976            "custom engine rule should block through full pipeline"
1977        );
1978
1979        // Legitimate script on the same site should still pass.
1980        assert!(
1981            !run_full_interception(
1982                &mut nm,
1983                "https://mysite.com/app.js",
1984                ResourceType::Script,
1985                true,
1986            ),
1987            "first-party script should still be allowed with custom engine"
1988        );
1989    }
1990
1991    #[cfg(feature = "adblock")]
1992    #[test]
1993    fn test_e2e_ad_image_blocked() {
1994        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1995
1996        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1997        nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
1998
1999        // Ad tracking pixel should be blocked via adblock pattern or trie.
2000        assert!(
2001            run_full_interception(
2002                &mut nm,
2003                "https://googleads.g.doubleclick.net/pagead/viewthroughconversion/123/?random=456",
2004                ResourceType::Image,
2005                false,
2006            ),
2007            "doubleclick ad image/tracking pixel should be blocked"
2008        );
2009
2010        // Legitimate first-party image should pass.
2011        assert!(
2012            !run_full_interception(
2013                &mut nm,
2014                "https://www.mylegitsite-test.com/images/logo.png",
2015                ResourceType::Image,
2016                true,
2017            ),
2018            "legitimate first-party image should not be blocked"
2019        );
2020    }
2021
2022    #[cfg(feature = "adblock")]
2023    #[test]
2024    fn test_e2e_hostname_with_userinfo() {
2025        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2026
2027        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2028        nm.set_page_url("https://example.org/".to_string());
2029
2030        // URL with userinfo should still correctly identify googletagmanager.com.
2031        assert!(
2032            run_full_interception(
2033                &mut nm,
2034                "https://user:pass@www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2035                ResourceType::Script,
2036                false,
2037            ),
2038            "tracker URL with userinfo should still be blocked"
2039        );
2040    }
2041
2042    #[test]
2043    fn test_blacklist_non_strict_allows_whitelist_override() {
2044        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2045        nm.set_page_url("https://example.com/".to_string());
2046
2047        nm.set_blacklist_patterns(["beacon.min.js"]);
2048        nm.set_whitelist_patterns(["beacon.min.js"]);
2049
2050        nm.set_blacklist_strict(false);
2051
2052        let u = "https://static.cloudflareinsights.com/beacon.min.js";
2053        assert!(nm.is_blacklisted(u));
2054        assert!(nm.is_whitelisted(u));
2055        assert!(!nm.blacklist_strict);
2056    }
2057}
chromiumoxide/handler/network.rs

chromiumoxide/handler/
network.rs