chromiumoxide/handler/
network.rs

1#[cfg(any(feature = "adblock", feature = "firewall"))]
2use super::blockers::block_websites::block_ads;
3use super::blockers::{
4    block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
5    xhr::IGNORE_XHR_ASSETS,
6};
7use crate::auth::Credentials;
8#[cfg(feature = "_cache")]
9use crate::cache::BasicCachePolicy;
10use crate::cmd::CommandChain;
11use crate::handler::http::HttpRequest;
12use crate::handler::network_utils::{base_domain_from_host, host_and_rest};
13use aho_corasick::AhoCorasick;
14use case_insensitive_string::CaseInsensitiveString;
15use chromiumoxide_cdp::cdp::browser_protocol::fetch::{RequestPattern, RequestStage};
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17    EmulateNetworkConditionsByRuleParams, EventLoadingFailed, EventLoadingFinished,
18    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19    InitiatorType, InterceptionId, NetworkConditions, RequestId, ResourceType, Response,
20    SetCacheDisabledParams, SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23    fetch::{
24        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26    },
27    network::SetBypassServiceWorkerParams,
28};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
31};
32use chromiumoxide_types::{Command, Method, MethodId};
33use hashbrown::{HashMap, HashSet};
34use lazy_static::lazy_static;
35use reqwest::header::PROXY_AUTHORIZATION;
36use spider_network_blocker::intercept_manager::NetworkInterceptManager;
37pub use spider_network_blocker::scripts::{
38    URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
39};
40use std::borrow::Cow;
41use std::collections::VecDeque;
42use std::time::{Duration, Instant};
43
44lazy_static! {
45    /// General patterns for popular libraries and resources
46    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
47        "jquery",           // Covers jquery.min.js, jquery.js, etc.
48        "angular",
49        "react",            // Covers all React-related patterns
50        "vue",              // Covers all Vue-related patterns
51        "bootstrap",
52        "d3",
53        "lodash",
54        "ajax",
55        "application",
56        "app",              // Covers general app scripts like app.js
57        "main",
58        "index",
59        "bundle",
60        "vendor",
61        "runtime",
62        "polyfill",
63        "scripts",
64        "es2015.",
65        "es2020.",
66        "webpack",
67        "captcha",
68        "client",
69        "/cdn-cgi/challenge-platform/",
70        "/wp-content/js/",  // Covers Wordpress content
71        // Verified 3rd parties for request
72        "https://m.stripe.network/",
73        "https://challenges.cloudflare.com/",
74        "https://www.google.com/recaptcha/",
75        "https://google.com/recaptcha/api.js",
76        "https://www.gstatic.com/recaptcha/",
77        "https://captcha.px-cloud.net/",
78        "https://geo.captcha-delivery.com/",
79        "https://api.leminnow.com/captcha/",
80        "https://cdn.auth0.com/js/lock/",
81        "https://captcha.gtimg.com",
82        "https://client-api.arkoselabs.com/",
83        "https://www.capy.me/puzzle/",
84        "https://newassets.hcaptcha.com/",
85        "https://cdn.auth0.com/client",
86        "https://js.stripe.com/",
87        "https://cdn.prod.website-files.com/", // webflow cdn scripts
88        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
89        "https://code.jquery.com/jquery-"
90    ];
91
92    /// Determine if a script should be rendered in the browser by name.
93    ///
94    /// NOTE: with "allow all scripts unless blocklisted", this is not used as a gate anymore,
95    /// but we keep it for compatibility and other call sites.
96    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
97
98    /// General patterns for popular libraries and resources
99    static ref JS_FRAMEWORK_ALLOW_3RD_PARTY: Vec<&'static str> = vec![
100        // Verified 3rd parties for request
101        "https://m.stripe.network/",
102        "https://challenges.cloudflare.com/",
103        "https://js.stripe.com/",
104        "https://cdn.prod.website-files.com/", // webflow cdn scripts
105        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
106        "https://code.jquery.com/jquery-",
107        "https://ct.captcha-delivery.com/",
108        "https://geo.captcha-delivery.com/",
109        "https://img1.wsimg.com/parking-lander/static/js/main.d9ebbb8c.js", // parking landing page iframe
110        "https://cdn.auth0.com/client",
111        "https://captcha.px-cloud.net/",
112        "https://www.capy.me/puzzle/",
113        "https://www.gstatic.com/recaptcha/",
114        "https://google.com/recaptcha/",
115        "https://www.google.com/recaptcha/",
116        "https://www.recaptcha.net/recaptcha/",
117        "https://js.hcaptcha.com/1/api.js",
118        "https://hcaptcha.com/1/api.js",
119        "https://js.datadome.co/tags.js",
120        "https://api-js.datadome.co/",
121        "https://client.perimeterx.net/",
122        "https://captcha.px-cdn.net/",
123        "https://newassets.hcaptcha.com/",
124        "https://captcha.px-cloud.net/",
125        "https://s.perimeterx.net/",
126        "https://api.leminnow.com/captcha/",
127        "https://client-api.arkoselabs.com/",
128        "https://static.geetest.com/v4/gt4.js",
129        "https://static.geetest.com/",
130        "https://cdn.jsdelivr.net/npm/@friendlycaptcha/",
131        "https://cdn.perfdrive.com/aperture/",
132        "https://assets.queue-it.net/",
133        "discourse-cdn.com/",
134        "hcaptcha.com",
135        "/cdn-cgi/challenge-platform/",
136        "/_Incapsula_Resource"
137    ];
138
139    /// Determine if a script should be rendered in the browser by name.
140    pub static ref ALLOWED_MATCHER_3RD_PARTY: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW_3RD_PARTY.iter()).expect("matcher to build");
141
142    /// path of a js framework
143    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
144        phf::phf_set! {
145            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
146            "_astro/", "_app/immutable"
147        }
148    };
149
150    /// Ignore the content types.
151    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
152        "application/pdf",
153        "application/zip",
154        "application/x-rar-compressed",
155        "application/x-tar",
156        "image/png",
157        "image/jpeg",
158        "image/gif",
159        "image/bmp",
160        "image/webp",
161        "image/svg+xml",
162        "video/mp4",
163        "video/x-msvideo",
164        "video/x-matroska",
165        "video/webm",
166        "audio/mpeg",
167        "audio/ogg",
168        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
169        "application/vnd.ms-excel",
170        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
171        "application/vnd.ms-powerpoint",
172        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
173        "application/x-7z-compressed",
174        "application/x-rpm",
175        "application/x-shockwave-flash",
176        "application/rtf",
177    };
178
179    /// Ignore the resources for visual content types.
180    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
181        "Image",
182        "Media",
183        "Font"
184    };
185
186    /// Ignore the resources for visual content types.
187    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
188        "CspViolationReport",
189        "Ping",
190    };
191
192    /// Case insenstive css matching
193    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
194
195    /// The command chain.
196    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
197        let enable = EnableParams::default();
198
199        if let Ok(c) = serde_json::to_value(&enable) {
200            vec![(enable.identifier(), c)]
201        } else {
202            vec![]
203        }
204    };
205
206    /// The command chain with https ignore.
207    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
208        let enable = EnableParams::default();
209        let mut v = vec![];
210        if let Ok(c) = serde_json::to_value(&enable) {
211            v.push((enable.identifier(), c));
212        }
213        let ignore = SetIgnoreCertificateErrorsParams::new(true);
214        if let Ok(ignored) = serde_json::to_value(&ignore) {
215            v.push((ignore.identifier(), ignored));
216        }
217
218        v
219    };
220
221    /// Enable the fetch intercept command
222    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
223        fetch::EnableParams::builder()
224        .handle_auth_requests(true)
225        .pattern(RequestPattern::builder().url_pattern("*").request_stage(RequestStage::Request).build())
226        .build()
227    };
228}
229
230/// Determine if a redirect is true.
231pub(crate) fn is_redirect_status(status: i64) -> bool {
232    matches!(status, 301 | 302 | 303 | 307 | 308)
233}
234
235/// How long a buffered `requests_will_be_sent` / `request_id_to_interception_id`
236/// entry may linger before being evicted. 30 seconds is generous — the CDP
237/// round-trip that reconciles the two racing events normally completes in
238/// milliseconds.
239const STALE_BUFFER_SECS: u64 = 30;
240
241/// How long an in-flight request entry (`requests` map) can live without
242/// being resolved by a `loadingFinished` / `loadingFailed` / `loadingCanceled`
243/// event before it is considered orphaned and evicted.  Longer than the
244/// race-condition buffer timeout because real requests can legitimately take
245/// tens of seconds (streaming, slow origins, etc.).
246const STALE_REQUEST_SECS: u64 = 120;
247
248/// Wrapper around `adblock::Engine` that implements `Debug`.
249#[cfg(feature = "adblock")]
250pub struct AdblockEngine(std::sync::Arc<adblock::Engine>);
251
252#[cfg(feature = "adblock")]
253impl std::fmt::Debug for AdblockEngine {
254    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255        f.debug_struct("AdblockEngine").finish()
256    }
257}
258
259#[cfg(feature = "adblock")]
260impl std::ops::Deref for AdblockEngine {
261    type Target = adblock::Engine;
262    fn deref(&self) -> &Self::Target {
263        &self.0
264    }
265}
266
267#[derive(Debug)]
268/// The base network manager.
269pub struct NetworkManager {
270    /// FIFO queue of internal `NetworkEvent`s emitted by the manager.
271    ///
272    /// The manager pushes events here as CDP commands are scheduled (e.g. `SendCdpRequest`)
273    /// and as request lifecycle transitions occur (`RequestFinished`, `RequestFailed`, etc.).
274    /// Consumers pull from this queue via `poll()`.
275    queued_events: VecDeque<NetworkEvent>,
276    /// If `true`, the init command chain includes `Security.setIgnoreCertificateErrors(true)`.
277    ///
278    /// This is used to allow navigation / resource loading to proceed on sites with invalid TLS
279    /// certificates (self-signed, expired, MITM proxies, etc.).
280    ignore_httpserrors: bool,
281    /// Active in-flight requests keyed by CDP `RequestId`.
282    ///
283    /// Each entry tracks request/response metadata, redirect chain, optional interception id,
284    /// and final state used to emit `RequestFinished` / `RequestFailed`.
285    requests: HashMap<RequestId, HttpRequest>,
286    /// Temporary storage for `Network.requestWillBeSent` events when the corresponding
287    /// `Fetch.requestPaused` arrives later (or vice versa).
288    ///
289    /// When Fetch interception is enabled, `requestPaused` and `requestWillBeSent` can race.
290    /// We buffer `requestWillBeSent` here until we can attach the `InterceptionId`.
291    /// Entries older than `STALE_BUFFER_SECS` are evicted to prevent unbounded growth.
292    requests_will_be_sent: HashMap<RequestId, (EventRequestWillBeSent, Instant)>,
293    /// Extra HTTP headers to apply to subsequent network requests via CDP.
294    ///
295    /// This map is mirrored from user-supplied headers but stripped of proxy auth headers
296    /// (`Proxy-Authorization`) to avoid accidental leakage / incorrect forwarding.
297    extra_headers: std::collections::HashMap<String, String>,
298    /// Mapping from Network `RequestId` to Fetch `InterceptionId`.
299    ///
300    /// When `Fetch.requestPaused` fires before `Network.requestWillBeSent`, we temporarily
301    /// store the interception id here so it can be attached to the `HttpRequest` once the
302    /// network request is observed.
303    /// Entries older than `STALE_BUFFER_SECS` are evicted to prevent unbounded growth.
304    request_id_to_interception_id: HashMap<RequestId, (InterceptionId, Instant)>,
305    /// Whether the user has disabled the browser cache.
306    ///
307    /// This is surfaced via `Network.setCacheDisabled(true/false)` and toggled through
308    /// `set_cache_enabled()`. Internally the field is stored as “disabled” to match the CDP API.
309    user_cache_disabled: bool,
310    /// Tracks which requests have already attempted authentication.
311    ///
312    /// Used to prevent infinite auth retry loops when the origin repeatedly issues
313    /// authentication challenges (407/401). Once a request id is present here, subsequent
314    /// challenges for the same request are canceled.
315    attempted_authentications: HashSet<RequestId>,
316    /// Optional credentials used to respond to `Fetch.authRequired` challenges.
317    ///
318    /// When set, the manager will answer challenges with `ProvideCredentials` once per request
319    /// (guarded by `attempted_authentications`), otherwise it falls back to default handling.
320    credentials: Option<Credentials>,
321    /// User-facing toggle indicating whether request interception is desired.
322    ///
323    /// This is the “intent” flag controlled by `set_request_interception()`. On its own it does
324    /// not guarantee interception is active; interception is actually enabled/disabled by
325    /// `update_protocol_request_interception()` which reconciles this flag with `credentials`.
326    ///
327    /// In other words: if this is `false` but `credentials.is_some()`, interception may still be
328    /// enabled to satisfy auth challenges.
329    pub(crate) user_request_interception_enabled: bool,
330    /// Hard kill-switch to block all network traffic.
331    ///
332    /// When `true`, the manager immediately blocks requests (typically via
333    /// `FailRequest(BlockedByClient)` or fulfillment with an empty response depending on path),
334    /// and short-circuits most decision logic. This is used for safety conditions such as
335    /// exceeding `max_bytes_allowed` or other runtime protections.
336    block_all: bool,
337    /// Tracks whether the Fetch interception protocol is currently enabled in CDP.
338    ///
339    /// This is the “actual state” flag that reflects whether we have sent `Fetch.enable` or
340    /// `Fetch.disable` to the browser. It is updated by `update_protocol_request_interception()`
341    /// when `user_request_interception_enabled` or `credentials` change.
342    pub(crate) protocol_request_interception_enabled: bool,
343    /// The network is offline.
344    offline: bool,
345    /// The page request timeout.
346    pub request_timeout: Duration,
347    // made_request: bool,
348    /// Ignore visuals (no pings, prefetching, and etc).
349    pub ignore_visuals: bool,
350    /// Block CSS stylesheets.
351    pub block_stylesheets: bool,
352    /// Block javascript that is not critical to rendering.
353    ///
354    /// NOTE: With "allow all scripts unless blocklisted", this no longer blocks scripts
355    /// by itself (it remains for config compatibility).
356    pub block_javascript: bool,
357    /// When `block_stylesheets` would skip a stylesheet, allow it through if
358    /// the request URL is first-party (registrable domain matches the page's
359    /// primary frame). Default `true` so SPAs that load their own CSS via
360    /// dynamic imports still hydrate when callers pass `block_stylesheets`
361    /// for bandwidth. Set `false` to strictly block ALL stylesheets.
362    pub allow_first_party_stylesheets: bool,
363    /// When a downstream blocker (intercept_manager / adblock / blocklists)
364    /// would skip a script, allow it through if the request URL is
365    /// first-party. Default `true` so SPA bootloaders are not collateral
366    /// damage from third-party tracker rules.
367    pub allow_first_party_javascript: bool,
368    /// When `ignore_visuals` would skip an image/media/font, allow it through
369    /// if the request URL is first-party. Default `true` so first-party
370    /// image-driven SPA renderers (gallery code-splits, font-blocking
371    /// hydration) still complete when callers pass `ignore_visuals`.
372    pub allow_first_party_visuals: bool,
373    /// Block analytics from rendering
374    pub block_analytics: bool,
375    /// Block pre-fetch request
376    pub block_prefetch: bool,
377    /// Only html from loading.
378    pub only_html: bool,
379    /// Is xml document?
380    pub xml_document: bool,
381    /// The custom intercept handle logic to run on the website.
382    pub intercept_manager: NetworkInterceptManager,
383    /// Track the amount of times the document reloaded.
384    pub document_reload_tracker: u8,
385    /// The initial target url. We want to use a new page on every navigation to prevent re-using the old domain.
386    pub document_target_url: String,
387    /// The initial target domain. We want to use a new page on every navigation to prevent re-using the old domain.
388    pub document_target_domain: String,
389    /// The max bytes to receive.
390    pub max_bytes_allowed: Option<u64>,
391    /// Cap on main-frame Document redirect hops before the navigation is aborted.
392    ///
393    /// `None` disables enforcement (default, preserves prior behavior). When `Some(n)`,
394    /// the (n+1)th Document redirect short-circuits: a synthetic `RequestFailed` event
395    /// is emitted with `failure_text = "net::ERR_TOO_MANY_REDIRECTS"` and
396    /// `Page.stopLoading` is dispatched to abort in-flight navigation. The accumulated
397    /// `redirect_chain` is preserved on the failed request so consumers can inspect it.
398    pub max_redirects: Option<usize>,
399    #[cfg(feature = "_cache")]
400    /// The cache site_key to use.
401    pub cache_site_key: Option<String>,
402    /// The cache policy to use.
403    #[cfg(feature = "_cache")]
404    pub cache_policy: Option<BasicCachePolicy>,
405    /// Optional per-run/per-site whitelist of URL substrings (scripts/resources).
406    whitelist_patterns: Vec<String>,
407    /// Compiled matcher for whitelist_patterns (rebuilt when patterns change).
408    whitelist_matcher: Option<AhoCorasick>,
409    /// Optional per-run/per-site blacklist of URL substrings (scripts/resources).
410    blacklist_patterns: Vec<String>,
411    /// Compiled matcher for blacklist_patterns (rebuilt when patterns change).
412    blacklist_matcher: Option<AhoCorasick>,
413    /// If true, blacklist always wins (cannot be unblocked by whitelist/3p allow).
414    blacklist_strict: bool,
415    /// Custom adblock engine built from user-supplied filter rules.
416    /// When `Some`, takes precedence over the global default engine.
417    #[cfg(feature = "adblock")]
418    adblock_engine: Option<AdblockEngine>,
419}
420
421impl NetworkManager {
422    /// A new network manager.
423    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
424        Self {
425            queued_events: Default::default(),
426            ignore_httpserrors,
427            requests: Default::default(),
428            requests_will_be_sent: Default::default(),
429            extra_headers: Default::default(),
430            request_id_to_interception_id: Default::default(),
431            user_cache_disabled: false,
432            attempted_authentications: Default::default(),
433            credentials: None,
434            block_all: false,
435            user_request_interception_enabled: false,
436            protocol_request_interception_enabled: false,
437            offline: false,
438            request_timeout,
439            ignore_visuals: false,
440            block_javascript: false,
441            block_stylesheets: false,
442            allow_first_party_stylesheets: true,
443            allow_first_party_javascript: true,
444            allow_first_party_visuals: true,
445            block_prefetch: true,
446            block_analytics: true,
447            only_html: false,
448            xml_document: false,
449            intercept_manager: NetworkInterceptManager::Unknown,
450            document_reload_tracker: 0,
451            document_target_url: String::new(),
452            document_target_domain: String::new(),
453            whitelist_patterns: Vec::new(),
454            whitelist_matcher: None,
455            blacklist_patterns: Vec::new(),
456            blacklist_matcher: None,
457            blacklist_strict: true,
458            max_bytes_allowed: None,
459            max_redirects: None,
460            #[cfg(feature = "_cache")]
461            cache_site_key: None,
462            #[cfg(feature = "_cache")]
463            cache_policy: None,
464            #[cfg(feature = "adblock")]
465            adblock_engine: None,
466        }
467    }
468
469    /// Set a custom adblock engine built from user-supplied filter rules.
470    #[cfg(feature = "adblock")]
471    pub fn set_adblock_engine(&mut self, engine: std::sync::Arc<adblock::Engine>) {
472        self.adblock_engine = Some(AdblockEngine(engine));
473    }
474
475    /// Replace the whitelist patterns (compiled once).
476    pub fn set_whitelist_patterns<I, S>(&mut self, patterns: I)
477    where
478        I: IntoIterator<Item = S>,
479        S: Into<String>,
480    {
481        self.whitelist_patterns = patterns.into_iter().map(Into::into).collect();
482        self.rebuild_whitelist_matcher();
483    }
484
485    /// Replace the blacklist patterns (compiled once).
486    pub fn set_blacklist_patterns<I, S>(&mut self, patterns: I)
487    where
488        I: IntoIterator<Item = S>,
489        S: Into<String>,
490    {
491        self.blacklist_patterns = patterns.into_iter().map(Into::into).collect();
492        self.rebuild_blacklist_matcher();
493    }
494
495    /// Add one pattern (cheap) and rebuild (call this sparingly).
496    pub fn add_blacklist_pattern<S: Into<String>>(&mut self, pattern: S) {
497        self.blacklist_patterns.push(pattern.into());
498        self.rebuild_blacklist_matcher();
499    }
500
501    /// Add many patterns and rebuild once.
502    pub fn add_blacklist_patterns<I, S>(&mut self, patterns: I)
503    where
504        I: IntoIterator<Item = S>,
505        S: Into<String>,
506    {
507        self.blacklist_patterns
508            .extend(patterns.into_iter().map(Into::into));
509        self.rebuild_blacklist_matcher();
510    }
511
512    /// Clear blacklist entirely.
513    pub fn clear_blacklist(&mut self) {
514        self.blacklist_patterns.clear();
515        self.blacklist_matcher = None;
516    }
517
518    /// Control precedence: when true, blacklist always wins.
519    pub fn set_blacklist_strict(&mut self, strict: bool) {
520        self.blacklist_strict = strict;
521    }
522
523    #[inline]
524    fn rebuild_blacklist_matcher(&mut self) {
525        if self.blacklist_patterns.is_empty() {
526            self.blacklist_matcher = None;
527            return;
528        }
529
530        self.blacklist_matcher =
531            AhoCorasick::new(self.blacklist_patterns.iter().map(|s| s.as_str())).ok();
532    }
533
534    #[inline]
535    fn is_blacklisted(&self, url: &str) -> bool {
536        self.blacklist_matcher
537            .as_ref()
538            .map(|m| m.is_match(url))
539            .unwrap_or(false)
540    }
541
542    /// Add one pattern (cheap) and rebuild (call this sparingly).
543    pub fn add_whitelist_pattern<S: Into<String>>(&mut self, pattern: S) {
544        self.whitelist_patterns.push(pattern.into());
545        self.rebuild_whitelist_matcher();
546    }
547
548    /// Add many patterns and rebuild once.
549    pub fn add_whitelist_patterns<I, S>(&mut self, patterns: I)
550    where
551        I: IntoIterator<Item = S>,
552        S: Into<String>,
553    {
554        self.whitelist_patterns
555            .extend(patterns.into_iter().map(Into::into));
556        self.rebuild_whitelist_matcher();
557    }
558
559    #[inline]
560    fn rebuild_whitelist_matcher(&mut self) {
561        if self.whitelist_patterns.is_empty() {
562            self.whitelist_matcher = None;
563            return;
564        }
565
566        // If building fails (shouldn’t for simple patterns), just disable matcher.
567        self.whitelist_matcher =
568            AhoCorasick::new(self.whitelist_patterns.iter().map(|s| s.as_str())).ok();
569    }
570
571    #[inline]
572    fn is_whitelisted(&self, url: &str) -> bool {
573        self.whitelist_matcher
574            .as_ref()
575            .map(|m| m.is_match(url))
576            .unwrap_or(false)
577    }
578
579    /// Commands to init the chain with.
580    pub fn init_commands(&self) -> CommandChain {
581        let cmds = if self.ignore_httpserrors {
582            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
583        } else {
584            INIT_CHAIN.clone()
585        };
586        CommandChain::new(cmds, self.request_timeout)
587    }
588
589    /// Push the CDP request.
590    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
591        let method = cmd.identifier();
592        if let Ok(params) = serde_json::to_value(cmd) {
593            self.queued_events
594                .push_back(NetworkEvent::SendCdpRequest((method, params)));
595        }
596    }
597
598    /// The next event to handle.
599    pub fn poll(&mut self) -> Option<NetworkEvent> {
600        self.queued_events.pop_front()
601    }
602
603    /// Evict stale entries from the race-condition buffers and from
604    /// `attempted_authentications`. Call this periodically (e.g. from the
605    /// handler's eviction tick) so that lost CDP events cannot cause unbounded
606    /// map growth.
607    pub fn evict_stale_entries(&mut self, now: Instant) {
608        let cutoff = now - Duration::from_secs(STALE_BUFFER_SECS);
609
610        self.requests_will_be_sent.retain(|_, (_, ts)| *ts > cutoff);
611        self.request_id_to_interception_id
612            .retain(|_, (_, ts)| *ts > cutoff);
613
614        // Evict orphaned in-flight requests whose completion events
615        // (`loadingFinished` / `loadingFailed` / `loadingCanceled`) were
616        // never received.  Uses a longer timeout than the race-condition
617        // buffers since real requests can legitimately be long-lived.
618        let request_cutoff = now - Duration::from_secs(STALE_REQUEST_SECS);
619        self.requests
620            .retain(|_, req| req.created_at > request_cutoff);
621
622        // `attempted_authentications` entries reference interception IDs that
623        // are cleaned up on loading-finished / loading-failed. If those events
624        // are lost, the set grows forever. Cross-reference with `requests`:
625        // any interception ID that no longer appears in a live request is stale.
626        if !self.attempted_authentications.is_empty() {
627            let live: HashSet<&str> = self
628                .requests
629                .values()
630                .filter_map(|r| r.interception_id.as_ref().map(|id| id.as_ref()))
631                .collect();
632            self.attempted_authentications
633                .retain(|id| live.contains(id.as_ref()));
634        }
635    }
636
637    /// Get the extra headers.
638    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
639        &self.extra_headers
640    }
641
642    /// Set extra HTTP headers.
643    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
644        self.extra_headers = headers;
645        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
646        self.extra_headers.remove("Proxy-Authorization");
647        if !self.extra_headers.is_empty() {
648            if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
649                self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
650            }
651        }
652    }
653
654    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
655        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
656    }
657
658    pub fn set_block_all(&mut self, block_all: bool) {
659        self.block_all = block_all;
660    }
661
662    pub fn set_request_interception(&mut self, enabled: bool) {
663        self.user_request_interception_enabled = enabled;
664        self.update_protocol_request_interception();
665    }
666
667    pub fn set_cache_enabled(&mut self, enabled: bool) {
668        let run = self.user_cache_disabled == enabled;
669        self.user_cache_disabled = !enabled;
670        if run {
671            self.update_protocol_cache_disabled();
672        }
673    }
674
675    /// Enable fetch interception.
676    pub fn enable_request_intercept(&mut self) {
677        self.protocol_request_interception_enabled = true;
678    }
679
680    /// Disable fetch interception.
681    pub fn disable_request_intercept(&mut self) {
682        self.protocol_request_interception_enabled = false;
683    }
684
685    /// Set the cache site key.
686    #[cfg(feature = "_cache")]
687    pub fn set_cache_site_key(&mut self, cache_site_key: Option<String>) {
688        self.cache_site_key = cache_site_key;
689    }
690
691    /// Set the cache policy.
692    #[cfg(feature = "_cache")]
693    pub fn set_cache_policy(&mut self, cache_policy: Option<BasicCachePolicy>) {
694        self.cache_policy = cache_policy;
695    }
696
697    pub fn update_protocol_cache_disabled(&mut self) {
698        self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
699    }
700
701    pub fn authenticate(&mut self, credentials: Credentials) {
702        self.credentials = Some(credentials);
703        self.update_protocol_request_interception();
704        self.protocol_request_interception_enabled = true;
705    }
706
707    fn update_protocol_request_interception(&mut self) {
708        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
709
710        if enabled == self.protocol_request_interception_enabled {
711            return;
712        }
713
714        if enabled {
715            self.push_cdp_request(ENABLE_FETCH.clone())
716        } else {
717            self.push_cdp_request(DisableParams::default())
718        }
719    }
720
721    /// Blocklist-only script blocking.
722    /// Returns true only when the URL matches an explicit blocklist condition.
723    #[inline]
724    fn should_block_script_blocklist_only(&self, url: &str) -> bool {
725        // If analytics blocking is off, skip all analytics tries.
726        let block_analytics = self.block_analytics;
727
728        // 1) Explicit full-URL prefix trie (some rules are full URL prefixes).
729        if block_analytics && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url)
730        {
731            return true;
732        }
733
734        // 2) Custom website block list (explicit).
735        if crate::handler::blockers::block_websites::block_website(url) {
736            return true;
737        }
738
739        // 3) Path-based explicit tries / fallbacks.
740        //
741        // We run these on:
742        // - path with leading slash ("/js/app.js")
743        // - path without leading slash ("js/app.js")
744        // - basename ("app.js") for filename-only rules (this is the fast "analytics.js" fallback)
745        if let Some(path_with_slash) = Self::url_path_with_leading_slash(url) {
746            // Remove query/fragment so matching stays stable.
747            let p_slash = Self::strip_query_fragment(path_with_slash);
748            let p_noslash = p_slash.strip_prefix('/').unwrap_or(p_slash);
749
750            // Basename for filename-only lists.
751            let base = match p_slash.rsplit('/').next() {
752                Some(b) => b,
753                None => p_slash,
754            };
755
756            // ---- Trie checks ----
757            // Some tries store prefixes like "/cdn-cgi/..." (leading slash) OR "cdn-cgi/..." (no slash).
758            if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_slash) {
759                return true;
760            }
761            if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_noslash) {
762                return true;
763            }
764            if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(base) {
765                return true;
766            }
767
768            // Base-path ignore tries (framework noise / known ignorable script paths).
769            // Note: these are explicit tries, so they are valid “blocklist-only” checks.
770            if URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(p_noslash) {
771                return true;
772            }
773
774            // Style path ignores only when visuals are ignored.
775            if self.ignore_visuals && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(p_noslash) {
776                return true;
777            }
778        }
779
780        false
781    }
782
783    /// Extract the absolute URL path portion WITH the leading slash.
784    ///
785    /// Example:
786    /// - "https://cdn.example.net/js/app.js?x=y" -> Some("/js/app.js?x=y")
787    #[inline]
788    fn url_path_with_leading_slash(url: &str) -> Option<&str> {
789        // find scheme separator
790        let bytes = url.as_bytes();
791        let idx = memchr::memmem::find(bytes, b"//")?;
792        let after_slashes = idx + 2;
793
794        // find first slash after host
795        let slash_rel = memchr::memchr(b'/', &bytes[after_slashes..])?;
796        let slash_idx = after_slashes + slash_rel;
797
798        if slash_idx < url.len() {
799            Some(&url[slash_idx..])
800        } else {
801            None
802        }
803    }
804
805    /// Strip query string and fragment from a path-ish string.
806    ///
807    /// Example:
808    /// - "/a/b.js?x=1#y" -> "/a/b.js"
809    #[inline]
810    fn strip_query_fragment(s: &str) -> &str {
811        match memchr::memchr2(b'?', b'#', s.as_bytes()) {
812            Some(i) => &s[..i],
813            None => s,
814        }
815    }
816
817    /// Determine if the request should be skipped.
818    #[inline]
819    fn skip_xhr(
820        &self,
821        skip_networking: bool,
822        event: &EventRequestPaused,
823        network_event: bool,
824    ) -> bool {
825        // XHR check
826        if !skip_networking && network_event {
827            let request_url = event.request.url.as_str();
828
829            // check if part of ignore scripts.
830            let skip_analytics =
831                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
832
833            if skip_analytics {
834                true
835            } else if self.block_stylesheets || self.ignore_visuals {
836                let block_css = self.block_stylesheets;
837                let block_media = self.ignore_visuals;
838
839                let mut block_request = false;
840
841                if let Some(position) = memchr::memrchr(b'.', request_url.as_bytes()) {
842                    let hlen = request_url.len();
843                    let has_asset = hlen - position;
844
845                    if has_asset >= 3 {
846                        let next_position = position + 1;
847
848                        if block_media
849                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
850                                &request_url[next_position..].into(),
851                            )
852                        {
853                            block_request = true;
854                        } else if block_css {
855                            block_request = CaseInsensitiveString::from(
856                                &request_url.as_bytes()[next_position..],
857                            )
858                            .contains(&**CSS_EXTENSION)
859                        }
860                    }
861                }
862
863                if !block_request {
864                    block_request = ignore_script_xhr_media(request_url);
865                }
866
867                block_request
868            } else {
869                skip_networking
870            }
871        } else {
872            skip_networking
873        }
874    }
875
876    #[cfg(feature = "adblock")]
877    #[inline]
878    /// Detect if ad enabled.
879    fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
880        if skip_networking {
881            true
882        } else {
883            block_ads(&event.request.url) || self.detect_ad(event)
884        }
885    }
886
887    /// When adblock feature is disabled, this is a no-op.
888    #[cfg(not(feature = "adblock"))]
889    #[inline]
890    fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
891        use crate::handler::blockers::block_websites::block_ads;
892        if skip_networking {
893            true
894        } else {
895            block_ads(&event.request.url)
896        }
897    }
898
899    #[inline]
900    /// Fail request
901    fn fail_request_blocked(
902        &mut self,
903        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
904    ) {
905        let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FailRequestParams::new(
906            request_id.clone(),
907            chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason::BlockedByClient,
908        );
909        self.push_cdp_request(params);
910    }
911
912    #[inline]
913    /// Fulfill request
914    fn fulfill_request_empty_200(
915        &mut self,
916        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
917    ) {
918        let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FulfillRequestParams::new(
919            request_id.clone(),
920            200,
921        );
922        self.push_cdp_request(params);
923    }
924
925    #[cfg(feature = "_cache")]
926    #[inline]
927    /// Fulfill a paused Fetch request from cached bytes + header map.
928    ///
929    /// `headers` should be response headers (e.g. Content-Type, Cache-Control, etc).
930    fn fulfill_request_from_cache(
931        &mut self,
932        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
933        body: &[u8],
934        headers: &std::collections::HashMap<String, String>,
935        status: i64,
936    ) {
937        use crate::cdp::browser_protocol::fetch::HeaderEntry;
938        use crate::handler::network::fetch::FulfillRequestParams;
939        use base64::Engine;
940
941        let mut resp_headers = Vec::<HeaderEntry>::with_capacity(headers.len());
942
943        for (k, v) in headers.iter() {
944            resp_headers.push(HeaderEntry {
945                name: k.clone(),
946                value: v.clone(),
947            });
948        }
949
950        let mut params = FulfillRequestParams::new(request_id.clone(), status);
951
952        // TODO: have this already encoded prior.
953        params.body = Some(
954            base64::engine::general_purpose::STANDARD
955                .encode(body)
956                .into(),
957        );
958
959        params.response_headers = Some(resp_headers);
960
961        self.push_cdp_request(params);
962    }
963
964    #[inline]
965    /// Continue the request url.
966    fn continue_request_with_url(
967        &mut self,
968        request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
969        url: Option<&str>,
970        intercept_response: bool,
971    ) {
972        let mut params = ContinueRequestParams::new(request_id.clone());
973        if let Some(url) = url {
974            params.url = Some(url.to_string());
975            params.intercept_response = Some(intercept_response);
976        }
977        self.push_cdp_request(params);
978    }
979
980    /// On fetch request paused interception.
981    #[inline]
982    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
983        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
984            return;
985        }
986
987        if self.block_all {
988            tracing::debug!(
989                "Blocked (block_all): {:?} - {}",
990                event.resource_type,
991                event.request.url
992            );
993            return self.fail_request_blocked(&event.request_id);
994        }
995
996        // Capture the CDP initiator type (set by Chrome on
997        // `Network.requestWillBeSent`) before consuming the cached event.
998        // Used by the legacy stylesheet heuristic below as an additive
999        // fallback alongside the new `allow_first_party_*` flags — keeping
1000        // both keeps 2.48.2's third-party-with-unknown-initiator stylesheet
1001        // pass-through bug-compatible (strict superset of allowed traffic).
1002        // Cheap clone of an `Option<InitiatorType>` enum (no allocation).
1003        let initiator_type: Option<InitiatorType> = event
1004            .network_id
1005            .as_ref()
1006            .and_then(|nid| self.requests_will_be_sent.get(nid.as_ref()))
1007            .map(|(rwbs, _)| rwbs.initiator.r#type.clone());
1008
1009        if let Some(network_id) = event.network_id.as_ref() {
1010            if let Some((request_will_be_sent, _)) =
1011                self.requests_will_be_sent.remove(network_id.as_ref())
1012            {
1013                self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
1014            } else {
1015                self.request_id_to_interception_id.insert(
1016                    network_id.clone(),
1017                    (event.request_id.clone().into(), Instant::now()),
1018                );
1019            }
1020        }
1021
1022        // From here on, we handle the full decision tree.
1023        let javascript_resource = event.resource_type == ResourceType::Script;
1024        let document_resource = event.resource_type == ResourceType::Document;
1025        let network_resource =
1026            !document_resource && crate::utils::is_data_resource(&event.resource_type);
1027
1028        // Start with static / cheap skip checks.
1029        let mut skip_networking =
1030            self.block_all || IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
1031
1032        if event.resource_type == ResourceType::Prefetch && !self.block_prefetch {
1033            skip_networking = true;
1034        }
1035
1036        // Also short-circuit if we've reloaded this document too many times.
1037        if !skip_networking {
1038            skip_networking = self.document_reload_tracker >= 3;
1039        }
1040
1041        // Handle document redirect / masking and track xml documents.
1042        let (current_url_cow, had_replacer) =
1043            self.handle_document_replacement_and_tracking(event, document_resource);
1044
1045        let current_url: &str = current_url_cow.as_ref();
1046
1047        let blacklisted = self.is_blacklisted(current_url);
1048
1049        if !self.blacklist_strict && blacklisted {
1050            skip_networking = true;
1051        }
1052
1053        if !skip_networking {
1054            // Allow XSL for sitemap XML.
1055            if self.xml_document && current_url.ends_with(".xsl") {
1056                skip_networking = false;
1057            } else {
1058                skip_networking = self.should_skip_for_visuals_and_basic(&event.resource_type);
1059            }
1060        }
1061
1062        // Skip ad detection for the user-requested top-level Document and
1063        // every step of its redirect chain. The crawler explicitly targets
1064        // this URL — fulfilling-empty-200 a page just because its host
1065        // matches an ad classifier breaks the user's intent (you can
1066        // legitimately want to scrape an ad page). Reproduced on
1067        // https://logrocket.com/careers, where the firewall ad list
1068        // flagged the host and chromey emitted a 17-byte stub for the
1069        // document; downstream sub-resources (script/img/iframe/etc.)
1070        // remain subject to ad blocking through the rest of the tree.
1071        //
1072        // Signals in short-circuit order (cheap → expensive):
1073        //   1. `redirected_request_id.is_some()` — explicit redirect hop
1074        //   2. `had_replacer` — chromey's masked-URL repair path
1075        //   3. `document_target_url.is_empty()` — very first nav, tracker
1076        //      not yet populated
1077        //   4. URL equality against the target — last because string
1078        //      compare is the only non-O(1) op (`handle_document_
1079        //      replacement_and_tracking` above just set the target to
1080        //      the current url, so this is the always-true fallback)
1081        //
1082        // Sub-resources (Script/Image/Font/Stylesheet/XHR/iframe content)
1083        // remain subject to ad blocking through the rest of the tree.
1084        let is_main_document_request = document_resource
1085            && (event.redirected_request_id.is_some()
1086                || had_replacer
1087                || self.document_target_url.is_empty()
1088                || event.request.url == self.document_target_url);
1089        if !is_main_document_request {
1090            skip_networking = self.detect_ad_if_enabled(event, skip_networking);
1091        }
1092
1093        // Ignore embedded scripts, tracker stylesheets, and tracker images when only_html or ignore_visuals is set.
1094        if !skip_networking
1095            && self.block_javascript
1096            && (self.only_html || self.ignore_visuals)
1097            && (javascript_resource
1098                || document_resource
1099                || event.resource_type == ResourceType::Stylesheet
1100                || event.resource_type == ResourceType::Image)
1101        {
1102            skip_networking = ignore_script_embedded(current_url);
1103        }
1104
1105        // Script policy: allow-by-default.
1106        // Block only if explicit block list patterns match.
1107        if !skip_networking && javascript_resource {
1108            skip_networking = self.should_block_script_blocklist_only(current_url);
1109        }
1110
1111        // XHR / data resources.
1112        skip_networking = self.skip_xhr(skip_networking, event, network_resource);
1113
1114        // Custom interception layer.
1115        if !skip_networking && (javascript_resource || network_resource || document_resource) {
1116            skip_networking = self.intercept_manager.intercept_detection(
1117                current_url,
1118                self.ignore_visuals,
1119                network_resource,
1120            );
1121        }
1122
1123        // Custom website block list.
1124        if !skip_networking && (javascript_resource || network_resource) {
1125            skip_networking = crate::handler::blockers::block_websites::block_website(current_url);
1126        }
1127
1128        // whitelist 3rd party
1129        // not required unless explicit blocking.
1130        if skip_networking && javascript_resource && ALLOWED_MATCHER_3RD_PARTY.is_match(current_url)
1131        {
1132            skip_networking = false;
1133        }
1134
1135        // check if the url is in the whitelist.
1136        if skip_networking && self.is_whitelisted(current_url) {
1137            skip_networking = false;
1138        }
1139
1140        // First-party allow (default ON for stylesheets/javascript/visuals).
1141        //
1142        // `block_stylesheets` and `ignore_visuals` were originally coarse
1143        // "drop all" bandwidth optimizations, but modern SPAs (React/Next.js
1144        // with dynamic `import()`, AppFabric, requirejs-style loaders, etc.)
1145        // gate hydration on the `load` event of resources they themselves
1146        // load — blocking those leaves outer_html_bytes capturing only the
1147        // pre-hydration shell. To stay flexible without regressing the
1148        // bandwidth case, we use registrable-domain (eTLD+1) matching:
1149        // when a request is first-party to the page's primary frame, the
1150        // corresponding `allow_first_party_*` flag (default `true`) lets it
1151        // through; third-party requests still hit the original block path.
1152        //
1153        // Set the matching `allow_first_party_*` flag to `false` to restore
1154        // the strict "block ALL of this resource type" semantics.
1155        if skip_networking && !self.document_target_domain.is_empty() {
1156            let allow = match event.resource_type {
1157                ResourceType::Stylesheet => self.allow_first_party_stylesheets,
1158                ResourceType::Script => self.allow_first_party_javascript,
1159                _ if IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()) => {
1160                    self.allow_first_party_visuals
1161                }
1162                _ => false,
1163            };
1164            if allow && self.is_first_party_url(current_url) {
1165                skip_networking = false;
1166            }
1167        }
1168
1169        // Legacy stylesheet allow (kept as additive fallback for strict
1170        // bug-compat with chromey 2.48.2). For parser-dispatched
1171        // <link rel="stylesheet"> Chrome routinely fires
1172        // `Fetch.requestPaused` *before* the companion
1173        // `Network.requestWillBeSent`, so `initiator_type` is `None`; this
1174        // rescues those even on cross-origin CDNs where the eTLD+1 differs
1175        // from the page (e.g. intuit.com → intuitcdn.net). Tracker CSS
1176        // injected by JS that runs after parser yield carries
1177        // `Some(InitiatorType::Script)` and stays blocked here.
1178        //
1179        // Gated on `allow_first_party_stylesheets=true` so callers who opt
1180        // out of first-party allow get a strict "block ALL stylesheets"
1181        // semantics with no surprises from the heuristic side-channel.
1182        if skip_networking
1183            && self.allow_first_party_stylesheets
1184            && self.block_stylesheets
1185            && event.resource_type == ResourceType::Stylesheet
1186            && !matches!(initiator_type, Some(InitiatorType::Script))
1187        {
1188            skip_networking = false;
1189        }
1190
1191        if self.blacklist_strict && blacklisted {
1192            skip_networking = true;
1193        }
1194
1195        if skip_networking {
1196            tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
1197            self.fulfill_request_empty_200(&event.request_id);
1198        } else {
1199            #[cfg(feature = "_cache")]
1200            {
1201                if let (Some(policy), Some(cache_site_key)) =
1202                    (self.cache_policy.as_ref(), self.cache_site_key.as_deref())
1203                {
1204                    let current_url = format!("{}:{}", event.request.method, &current_url);
1205
1206                    if let Some((res, cache_policy)) =
1207                        crate::cache::remote::get_session_cache_item(cache_site_key, &current_url)
1208                    {
1209                        if policy.allows_cached(&cache_policy) {
1210                            tracing::debug!(
1211                                "Remote Cached: {:?} - {}",
1212                                &event.resource_type,
1213                                &current_url
1214                            );
1215                            let flat_headers = crate::http::headers_from_multi(&res.headers);
1216                            return self.fulfill_request_from_cache(
1217                                &event.request_id,
1218                                &res.body,
1219                                &flat_headers,
1220                                res.status as i64,
1221                            );
1222                        }
1223                    }
1224                }
1225            }
1226
1227            // check our frame cache for the run.
1228            tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
1229            self.continue_request_with_url(
1230                &event.request_id,
1231                if had_replacer {
1232                    Some(current_url)
1233                } else {
1234                    None
1235                },
1236                !had_replacer,
1237            );
1238        }
1239    }
1240
1241    /// Shared "visuals + basic blocking" logic.
1242    ///
1243    /// IMPORTANT: Scripts are NOT blocked here anymore.
1244    /// Scripts are allowed by default and only blocked via explicit blocklists
1245    /// (should_block_script_blocklist_only / adblock / block_websites / intercept_manager).
1246    #[inline]
1247    fn should_skip_for_visuals_and_basic(&self, resource_type: &ResourceType) -> bool {
1248        (self.ignore_visuals && IGNORE_VISUAL_RESOURCE_MAP.contains(resource_type.as_ref()))
1249            || (self.block_stylesheets && *resource_type == ResourceType::Stylesheet)
1250    }
1251
1252    /// Does the network manager have a target domain?
1253    pub fn has_target_domain(&self) -> bool {
1254        !self.document_target_url.is_empty()
1255    }
1256
1257    /// True when `url`'s registrable domain matches the page's primary
1258    /// frame. Empty `document_target_domain` (no nav yet, or a redirect
1259    /// reset) returns `false` so we don't accidentally treat every URL
1260    /// as first-party.
1261    #[inline]
1262    fn is_first_party_url(&self, url: &str) -> bool {
1263        if self.document_target_domain.is_empty() {
1264            return false;
1265        }
1266        match host_and_rest(url) {
1267            Some((host, _)) => base_domain_from_host(host) == self.document_target_domain,
1268            None => false,
1269        }
1270    }
1271
1272    /// Set the target page url for tracking.
1273    pub fn set_page_url(&mut self, page_target_url: String) {
1274        let host_base = host_and_rest(&page_target_url)
1275            .map(|(h, _)| base_domain_from_host(h))
1276            .unwrap_or("");
1277
1278        self.document_target_domain = host_base.to_string();
1279        self.document_target_url = page_target_url;
1280    }
1281
1282    /// Clear the initial target domain on every navigation.
1283    pub fn clear_target_domain(&mut self) {
1284        self.document_reload_tracker = 0;
1285        self.document_target_url = Default::default();
1286        self.document_target_domain = Default::default();
1287    }
1288
1289    /// Handles:
1290    /// - document reload tracking (`document_reload_tracker`)
1291    /// - redirect masking / replacement
1292    /// - xml document detection (`xml_document`)
1293    /// - `document_target_url` updates
1294    ///
1295    /// Returns (current_url, had_replacer).
1296    #[inline]
1297    fn handle_document_replacement_and_tracking<'a>(
1298        &mut self,
1299        event: &'a EventRequestPaused,
1300        document_resource: bool,
1301    ) -> (Cow<'a, str>, bool) {
1302        let mut replacer: Option<String> = None;
1303        let current_url = event.request.url.as_str();
1304
1305        if document_resource {
1306            if self.document_target_url == current_url {
1307                self.document_reload_tracker += 1;
1308            } else if !self.document_target_url.is_empty() && event.redirected_request_id.is_some()
1309            {
1310                let (http_document_replacement, mut https_document_replacement) =
1311                    if self.document_target_url.starts_with("http://") {
1312                        (
1313                            self.document_target_url.replacen("http://", "http//", 1),
1314                            self.document_target_url.replacen("http://", "https://", 1),
1315                        )
1316                    } else {
1317                        (
1318                            self.document_target_url.replacen("https://", "https//", 1),
1319                            self.document_target_url.replacen("https://", "http://", 1),
1320                        )
1321                    };
1322
1323                // Track trailing slash to restore later.
1324                let trailing = https_document_replacement.ends_with('/');
1325                if trailing {
1326                    https_document_replacement.pop();
1327                }
1328                if https_document_replacement.ends_with('/') {
1329                    https_document_replacement.pop();
1330                }
1331
1332                let redirect_mask = format!(
1333                    "{}{}",
1334                    https_document_replacement, http_document_replacement
1335                );
1336
1337                if current_url == redirect_mask {
1338                    replacer = Some(if trailing {
1339                        format!("{}/", https_document_replacement)
1340                    } else {
1341                        https_document_replacement
1342                    });
1343                }
1344            }
1345
1346            if self.document_target_url.is_empty() && current_url.ends_with(".xml") {
1347                self.xml_document = true;
1348            }
1349
1350            // Track last seen document URL.
1351            self.document_target_url = event.request.url.clone();
1352            self.document_target_domain = host_and_rest(&self.document_target_url)
1353                .map(|(h, _)| base_domain_from_host(h).to_string())
1354                .unwrap_or_default();
1355        }
1356
1357        let current_url_cow = match replacer {
1358            Some(r) => Cow::Owned(r),
1359            None => Cow::Borrowed(event.request.url.as_str()),
1360        };
1361
1362        let had_replacer = matches!(current_url_cow, Cow::Owned(_));
1363        (current_url_cow, had_replacer)
1364    }
1365
1366    /// Perform a page intercept for chrome using the adblock engine.
1367    /// Uses the custom engine when user-supplied filter rules are configured,
1368    /// otherwise falls back to the global default engine with built-in patterns.
1369    #[cfg(feature = "adblock")]
1370    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
1371        use adblock::{
1372            lists::{FilterSet, ParseOptions, RuleTypes},
1373            Engine,
1374        };
1375
1376        lazy_static::lazy_static! {
1377            static ref AD_ENGINE: Engine = {
1378                let mut filter_set = FilterSet::new(false);
1379                let mut rules = ParseOptions::default();
1380                rules.rule_types = RuleTypes::All;
1381
1382                filter_set.add_filters(
1383                    &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
1384                    rules,
1385                );
1386
1387                // When adblock_easylist is enabled, EasyList + EasyPrivacy are
1388                // embedded at build time for zero-cost runtime loading.
1389                #[cfg(feature = "adblock_easylist")]
1390                {
1391                    static EASYLIST: &str = include_str!(concat!(env!("OUT_DIR"), "/easylist.txt"));
1392                    static EASYPRIVACY: &str = include_str!(concat!(env!("OUT_DIR"), "/easyprivacy.txt"));
1393
1394                    if !EASYLIST.is_empty() {
1395                        filter_set.add_filter_list(EASYLIST, rules);
1396                    }
1397                    if !EASYPRIVACY.is_empty() {
1398                        filter_set.add_filter_list(EASYPRIVACY, rules);
1399                    }
1400                }
1401
1402                Engine::from_filter_set(filter_set, true)
1403            };
1404        }
1405
1406        let blockable = event.resource_type == ResourceType::Script
1407            || event.resource_type == ResourceType::Image
1408            || event.resource_type == ResourceType::Media
1409            || event.resource_type == ResourceType::Stylesheet
1410            || event.resource_type == ResourceType::Document
1411            || event.resource_type == ResourceType::Fetch
1412            || event.resource_type == ResourceType::Xhr;
1413
1414        if !blockable {
1415            return false;
1416        }
1417
1418        let u = &event.request.url;
1419
1420        let source_domain = if self.document_target_domain.is_empty() {
1421            "example.com"
1422        } else {
1423            &self.document_target_domain
1424        };
1425
1426        // Fast hostname extraction without full URL parsing.
1427        // preparsed(url, request_hostname, source_hostname, type, third_party)
1428        let hostname = u
1429            .strip_prefix("https://")
1430            .or_else(|| u.strip_prefix("http://"))
1431            .and_then(|rest| rest.split('/').next())
1432            // Strip userinfo (user:pass@) if present.
1433            .map(
1434                |authority| match memchr::memrchr(b'@', authority.as_bytes()) {
1435                    Some(i) => &authority[i + 1..],
1436                    None => authority,
1437                },
1438            )
1439            // Strip port (:8080) if present.
1440            .and_then(|host_port| host_port.split(':').next())
1441            .unwrap_or(source_domain);
1442
1443        let resource_type_str = match event.resource_type {
1444            ResourceType::Script => "script",
1445            ResourceType::Image => "image",
1446            ResourceType::Media => "media",
1447            ResourceType::Stylesheet => "stylesheet",
1448            ResourceType::Document => "document",
1449            ResourceType::Fetch => "fetch",
1450            ResourceType::Xhr => "xhr",
1451            _ => "other",
1452        };
1453
1454        let request = adblock::request::Request::preparsed(
1455            u,
1456            hostname,
1457            source_domain,
1458            resource_type_str,
1459            !event.request.is_same_site.unwrap_or_default(),
1460        );
1461
1462        let engine: &Engine = match self.adblock_engine.as_ref() {
1463            Some(custom) => custom,
1464            None => &AD_ENGINE,
1465        };
1466
1467        engine.check_network_request(&request).matched
1468    }
1469
1470    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
1471        let response = if self
1472            .attempted_authentications
1473            .contains(event.request_id.as_ref())
1474        {
1475            AuthChallengeResponseResponse::CancelAuth
1476        } else if self.credentials.is_some() {
1477            self.attempted_authentications
1478                .insert(event.request_id.clone().into());
1479            AuthChallengeResponseResponse::ProvideCredentials
1480        } else {
1481            AuthChallengeResponseResponse::Default
1482        };
1483
1484        let mut auth = AuthChallengeResponse::new(response);
1485        if let Some(creds) = self.credentials.clone() {
1486            auth.username = Some(creds.username);
1487            auth.password = Some(creds.password);
1488        }
1489        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
1490    }
1491
1492    /// Set the page offline network emulation condition.
1493    pub fn set_offline_mode(&mut self, value: bool) {
1494        if self.offline == value {
1495            return;
1496        }
1497        self.offline = value;
1498        if let Ok(condition) = NetworkConditions::builder()
1499            .url_pattern("")
1500            .latency(0)
1501            .download_throughput(-1.)
1502            .upload_throughput(-1.)
1503            .build()
1504        {
1505            if let Ok(network) = EmulateNetworkConditionsByRuleParams::builder()
1506                .offline(self.offline)
1507                .matched_network_condition(condition)
1508                .build()
1509            {
1510                self.push_cdp_request(network);
1511            }
1512        }
1513    }
1514
1515    /// Request interception doesn't happen for data URLs with Network Service.
1516    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
1517        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
1518            if let Some((interception_id, _)) = self
1519                .request_id_to_interception_id
1520                .remove(event.request_id.as_ref())
1521            {
1522                self.on_request(event, Some(interception_id));
1523            } else {
1524                self.requests_will_be_sent
1525                    .insert(event.request_id.clone(), (event.clone(), Instant::now()));
1526            }
1527        } else {
1528            self.on_request(event, None);
1529        }
1530    }
1531
1532    /// The request was served from the cache.
1533    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
1534        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
1535            request.from_memory_cache = true;
1536        }
1537    }
1538
1539    /// On network response received.
1540    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
1541        let mut request_failed = false;
1542
1543        // Track how many bytes we actually deducted from this target.
1544        let mut deducted: u64 = 0;
1545
1546        if let Some(max_bytes) = self.max_bytes_allowed.as_mut() {
1547            let before = *max_bytes;
1548
1549            // encoded_data_length -> saturating cast to u64
1550            let received_bytes: u64 = event.response.encoded_data_length as u64;
1551
1552            // Safe parse of Content-Length
1553            let content_length: Option<u64> = event
1554                .response
1555                .headers
1556                .inner()
1557                .get("content-length")
1558                .and_then(|v| v.as_str())
1559                .and_then(|s| s.trim().parse::<u64>().ok());
1560
1561            // Deduct what we actually received
1562            *max_bytes = max_bytes.saturating_sub(received_bytes);
1563
1564            // If the declared size can't fit, zero out now
1565            if let Some(cl) = content_length {
1566                if cl > *max_bytes {
1567                    *max_bytes = 0;
1568                }
1569            }
1570
1571            request_failed = *max_bytes == 0;
1572
1573            // Compute exact delta deducted on this event
1574            deducted = before.saturating_sub(*max_bytes);
1575        }
1576
1577        // Bubble up the deduction (even if request continues)
1578        if deducted > 0 {
1579            self.queued_events
1580                .push_back(NetworkEvent::BytesConsumed(deducted));
1581        }
1582
1583        // block all network request moving forward.
1584        if request_failed && self.max_bytes_allowed.is_some() {
1585            self.set_block_all(true);
1586        }
1587
1588        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1589            request.set_response(event.response.clone());
1590            self.queued_events.push_back(if request_failed {
1591                NetworkEvent::RequestFailed(request)
1592            } else {
1593                NetworkEvent::RequestFinished(request)
1594            });
1595        }
1596    }
1597
1598    /// On network loading finished.
1599    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
1600        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
1601            if let Some(interception_id) = request.interception_id.as_ref() {
1602                self.attempted_authentications
1603                    .remove(interception_id.as_ref());
1604            }
1605            self.queued_events
1606                .push_back(NetworkEvent::RequestFinished(request));
1607        }
1608    }
1609
1610    /// On network loading failed.
1611    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
1612        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1613            request.failure_text = Some(event.error_text.clone());
1614            if let Some(interception_id) = request.interception_id.as_ref() {
1615                self.attempted_authentications
1616                    .remove(interception_id.as_ref());
1617            }
1618            self.queued_events
1619                .push_back(NetworkEvent::RequestFailed(request));
1620        }
1621    }
1622
1623    /// On request will be sent.
1624    fn on_request(
1625        &mut self,
1626        event: &EventRequestWillBeSent,
1627        interception_id: Option<InterceptionId>,
1628    ) {
1629        let mut redirect_chain = Vec::new();
1630        let mut redirect_location = None;
1631
1632        if let Some(redirect_resp) = &event.redirect_response {
1633            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1634                if is_redirect_status(redirect_resp.status) {
1635                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
1636                        if redirect_resp.url != location {
1637                            let fixed_location = location.replace(&redirect_resp.url, "");
1638
1639                            if !fixed_location.is_empty() {
1640                                if let Some(resp) = request.response.as_mut() {
1641                                    resp.headers.0["Location"] =
1642                                        serde_json::Value::String(fixed_location.clone());
1643                                }
1644                            }
1645
1646                            redirect_location = Some(fixed_location);
1647                        }
1648                    }
1649                }
1650
1651                {
1652                    let mut redirect_resp = redirect_resp.clone();
1653
1654                    if let Some(redirect_location) = redirect_location {
1655                        if !redirect_location.is_empty() {
1656                            redirect_resp.headers.0["Location"] =
1657                                serde_json::Value::String(redirect_location);
1658                        }
1659                    }
1660
1661                    self.handle_request_redirect(&mut request, redirect_resp);
1662                }
1663
1664                redirect_chain = std::mem::take(&mut request.redirect_chain);
1665                redirect_chain.push(request);
1666            }
1667        }
1668
1669        // Redirect cap: applies only to Document-type hops and only when
1670        // `max_redirects` is set. Sub-resource chains are untouched.
1671        if let Some(cap) = self.max_redirects {
1672            let is_document = matches!(event.r#type, Some(ResourceType::Document));
1673            if is_document && redirect_chain.len() > cap {
1674                let mut failed = HttpRequest::new(
1675                    event.request_id.clone(),
1676                    event.frame_id.clone(),
1677                    interception_id,
1678                    self.user_request_interception_enabled,
1679                    redirect_chain,
1680                );
1681                failed.url = Some(event.request.url.clone());
1682                failed.method = Some(event.request.method.clone());
1683                failed.failure_text = Some("net::ERR_TOO_MANY_REDIRECTS".into());
1684                self.push_cdp_request(
1685                    chromiumoxide_cdp::cdp::browser_protocol::page::StopLoadingParams::default(),
1686                );
1687                self.queued_events
1688                    .push_back(NetworkEvent::RequestFailed(failed));
1689                return;
1690            }
1691        }
1692
1693        let request = HttpRequest::new(
1694            event.request_id.clone(),
1695            event.frame_id.clone(),
1696            interception_id,
1697            self.user_request_interception_enabled,
1698            redirect_chain,
1699        );
1700
1701        let rid = event.request_id.clone();
1702        self.queued_events
1703            .push_back(NetworkEvent::Request(rid.clone()));
1704        self.requests.insert(rid, request);
1705    }
1706
1707    /// Handle request redirect.
1708    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1709        request.set_response(response);
1710        if let Some(interception_id) = request.interception_id.as_ref() {
1711            self.attempted_authentications
1712                .remove(interception_id.as_ref());
1713        }
1714    }
1715}
1716
1717#[derive(Debug)]
1718pub enum NetworkEvent {
1719    /// Send a CDP request.
1720    SendCdpRequest((MethodId, serde_json::Value)),
1721    /// Request.
1722    Request(RequestId),
1723    /// Response
1724    Response(RequestId),
1725    /// Request failed.
1726    RequestFailed(HttpRequest),
1727    /// Request finished.
1728    RequestFinished(HttpRequest),
1729    /// Bytes consumed.
1730    BytesConsumed(u64),
1731}
1732
1733#[cfg(test)]
1734mod tests {
1735    use super::ALLOWED_MATCHER_3RD_PARTY;
1736    use crate::handler::network::NetworkManager;
1737    use std::time::Duration;
1738
1739    #[test]
1740    fn test_allowed_matcher_3rd_party() {
1741        // Should be allowed (matches "/cdn-cgi/challenge-platform/")
1742        let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1743        assert!(
1744            ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1745            "expected Cloudflare challenge script to be allowed"
1746        );
1747
1748        // Should NOT be allowed (not in allow-list)
1749        let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1750        assert!(
1751            !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1752            "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1753        );
1754
1755        // A couple sanity checks for existing allow patterns
1756        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1757        assert!(ALLOWED_MATCHER_3RD_PARTY
1758            .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1759        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1760    }
1761
1762    #[test]
1763    fn test_script_allowed_by_default_when_not_blocklisted() {
1764        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1765        nm.set_page_url(
1766            "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1767        );
1768
1769        // A random script that should not match your block tries.
1770        let ok = "https://cdn.example.net/assets/some-app-bundle-12345.js";
1771        assert!(
1772            !nm.should_block_script_blocklist_only(ok),
1773            "expected non-blocklisted script to be allowed"
1774        );
1775    }
1776
1777    #[test]
1778    fn test_script_blocked_when_matches_ignore_trie_or_blocklist() {
1779        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1780        nm.set_page_url(
1781            "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1782        );
1783
1784        // This should match URL_IGNORE_TRIE_PATHS fallback ("analytics.js") logic.
1785        let bad = "https://cdn.example.net/js/analytics.js";
1786        assert!(
1787            nm.should_block_script_blocklist_only(bad),
1788            "expected analytics.js to be blocklisted"
1789        );
1790    }
1791
1792    #[test]
1793    fn test_allowed_matcher_3rd_party_sanity() {
1794        // Should be allowed (matches "/cdn-cgi/challenge-platform/")
1795        let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1796        assert!(
1797            ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1798            "expected Cloudflare challenge script to be allowed"
1799        );
1800
1801        // Should NOT be allowed (not in allow-list)
1802        let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1803        assert!(
1804            !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1805            "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1806        );
1807
1808        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1809        assert!(ALLOWED_MATCHER_3RD_PARTY
1810            .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1811        assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1812    }
1813    #[test]
1814    fn test_dynamic_blacklist_blocks_url() {
1815        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1816        nm.set_page_url("https://example.com/".to_string());
1817
1818        nm.set_blacklist_patterns(["static.cloudflareinsights.com", "googletagmanager.com"]);
1819        assert!(nm.is_blacklisted("https://static.cloudflareinsights.com/beacon.min.js"));
1820        assert!(nm.is_blacklisted("https://www.googletagmanager.com/gtm.js?id=GTM-XXXX"));
1821
1822        assert!(!nm.is_blacklisted("https://cdn.example.net/assets/app.js"));
1823    }
1824
1825    #[test]
1826    fn test_blacklist_strict_wins_over_whitelist() {
1827        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1828        nm.set_page_url("https://example.com/".to_string());
1829
1830        // Same URL in both lists.
1831        nm.set_blacklist_patterns(["beacon.min.js"]);
1832        nm.set_whitelist_patterns(["beacon.min.js"]);
1833
1834        nm.set_blacklist_strict(true);
1835
1836        let u = "https://static.cloudflareinsights.com/beacon.min.js";
1837        assert!(nm.is_whitelisted(u));
1838        assert!(nm.is_blacklisted(u));
1839
1840        // In strict mode, it should still be considered blocked at decision time.
1841        // (We can only directly assert the matchers here; the decision logic is exercised in integration.)
1842        assert!(nm.blacklist_strict);
1843    }
1844
1845    #[cfg(feature = "adblock")]
1846    fn make_request_paused(
1847        url: &str,
1848        resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
1849        is_same_site: bool,
1850    ) -> chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused {
1851        use chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused;
1852        use chromiumoxide_cdp::cdp::browser_protocol::network::{
1853            Headers, Request, RequestReferrerPolicy, ResourcePriority,
1854        };
1855
1856        EventRequestPaused {
1857            request_id: chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
1858                "test-req".to_string(),
1859            )
1860            .into(),
1861            request: Request {
1862                url: url.to_string(),
1863                method: "GET".to_string(),
1864                headers: Headers::new(serde_json::Value::Object(Default::default())),
1865                initial_priority: ResourcePriority::Medium,
1866                referrer_policy: RequestReferrerPolicy::NoReferrer,
1867                url_fragment: None,
1868                has_post_data: None,
1869                post_data_entries: None,
1870                mixed_content_type: None,
1871                is_link_preload: None,
1872                trust_token_params: None,
1873                is_same_site: Some(is_same_site),
1874                is_ad_related: None,
1875            },
1876            frame_id: chromiumoxide_cdp::cdp::browser_protocol::page::FrameId::from(
1877                "frame1".to_string(),
1878            ),
1879            resource_type,
1880            response_error_reason: None,
1881            response_status_code: None,
1882            response_status_text: None,
1883            response_headers: None,
1884            network_id: None,
1885            redirected_request_id: None,
1886        }
1887    }
1888
1889    #[cfg(feature = "adblock")]
1890    #[test]
1891    fn test_detect_ad_blocks_known_tracker_scripts() {
1892        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1893
1894        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1895        nm.set_page_url("https://www.wine-searcher.com/".to_string());
1896
1897        let event = make_request_paused(
1898            "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
1899            ResourceType::Script,
1900            false,
1901        );
1902
1903        assert!(
1904            nm.detect_ad(&event),
1905            "googletagmanager.com script should be detected as ad"
1906        );
1907    }
1908
1909    #[cfg(feature = "adblock")]
1910    #[test]
1911    fn test_detect_ad_allows_legitimate_scripts() {
1912        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1913
1914        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1915        nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
1916
1917        let event = make_request_paused(
1918            "https://www.mylegitsite-test.com/static/js/app-bundle.js",
1919            ResourceType::Script,
1920            true,
1921        );
1922
1923        assert!(
1924            !nm.detect_ad(&event),
1925            "legitimate first-party app bundle should not be blocked"
1926        );
1927    }
1928
1929    #[cfg(feature = "adblock")]
1930    #[test]
1931    fn test_detect_ad_uses_source_domain() {
1932        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1933
1934        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1935        nm.set_page_url("https://www.wine-searcher.com/some-page".to_string());
1936
1937        assert!(
1938            !nm.document_target_domain.is_empty(),
1939            "document_target_domain should be set after set_page_url"
1940        );
1941
1942        let event = make_request_paused(
1943            "https://www.google-analytics.com/analytics.js",
1944            ResourceType::Script,
1945            false,
1946        );
1947
1948        assert!(
1949            nm.detect_ad(&event),
1950            "google-analytics.com should be blocked as tracker"
1951        );
1952    }
1953
1954    #[cfg(feature = "adblock")]
1955    #[test]
1956    fn test_custom_adblock_engine_takes_precedence() {
1957        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1958
1959        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1960        nm.set_page_url("https://example.com/".to_string());
1961
1962        // Build a custom engine with a specific rule.
1963        let mut filter_set = adblock::lists::FilterSet::new(false);
1964        let mut opts = adblock::lists::ParseOptions::default();
1965        opts.rule_types = adblock::lists::RuleTypes::All;
1966        filter_set.add_filters(["||custom-tracker.example.net^"], opts);
1967        let engine = adblock::Engine::from_filter_set(filter_set, true);
1968        nm.set_adblock_engine(std::sync::Arc::new(engine));
1969
1970        let event = make_request_paused(
1971            "https://custom-tracker.example.net/pixel.js",
1972            ResourceType::Script,
1973            false,
1974        );
1975
1976        assert!(
1977            nm.detect_ad(&event),
1978            "custom engine rule should block custom-tracker.example.net"
1979        );
1980    }
1981
1982    /// Helper: run a URL through the full `on_fetch_request_paused` pipeline
1983    /// and return whether it was blocked (true) or allowed (false).
1984    #[cfg(feature = "adblock")]
1985    fn run_full_interception(
1986        nm: &mut NetworkManager,
1987        url: &str,
1988        resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
1989        is_same_site: bool,
1990    ) -> bool {
1991        use super::NetworkEvent;
1992
1993        // Drain any prior events.
1994        while nm.poll().is_some() {}
1995
1996        let event = make_request_paused(url, resource_type, is_same_site);
1997        nm.on_fetch_request_paused(&event);
1998
1999        // Check what was emitted: Fetch.fulfillRequest = blocked, Fetch.continueRequest = allowed.
2000        let mut blocked = false;
2001        while let Some(ev) = nm.poll() {
2002            if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
2003                let m: &str = method.as_ref();
2004                if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
2005                    blocked = true;
2006                }
2007            }
2008        }
2009        blocked
2010    }
2011
2012    // ── End-to-end interception tests ───────────────────────────────────
2013
2014    #[cfg(feature = "adblock")]
2015    #[test]
2016    fn test_e2e_tracker_script_blocked() {
2017        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2018
2019        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2020        nm.set_page_url("https://www.wine-searcher.com/".to_string());
2021
2022        assert!(
2023            run_full_interception(
2024                &mut nm,
2025                "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2026                ResourceType::Script,
2027                false,
2028            ),
2029            "GTM script should be blocked through full pipeline"
2030        );
2031    }
2032
2033    #[cfg(feature = "adblock")]
2034    #[test]
2035    fn test_e2e_legitimate_script_allowed() {
2036        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2037
2038        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2039        nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
2040
2041        assert!(
2042            !run_full_interception(
2043                &mut nm,
2044                "https://www.mylegitsite-test.com/static/js/app-bundle.js",
2045                ResourceType::Script,
2046                true,
2047            ),
2048            "legitimate first-party script should be allowed through full pipeline"
2049        );
2050    }
2051
2052    #[cfg(feature = "adblock")]
2053    #[test]
2054    fn test_e2e_analytics_xhr_blocked() {
2055        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2056
2057        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2058        nm.set_page_url("https://example.org/".to_string());
2059
2060        assert!(
2061            run_full_interception(
2062                &mut nm,
2063                "https://www.google-analytics.com/g/collect?v=2&tid=UA-123",
2064                ResourceType::Xhr,
2065                false,
2066            ),
2067            "Google Analytics XHR should be blocked through full pipeline"
2068        );
2069    }
2070
2071    #[cfg(feature = "adblock")]
2072    #[test]
2073    fn test_e2e_whitelisted_overrides_adblock() {
2074        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2075
2076        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2077        nm.set_page_url("https://example.org/".to_string());
2078        nm.set_whitelist_patterns(["googletagmanager.com"]);
2079
2080        // GTM would normally be blocked by adblock, but whitelist overrides.
2081        assert!(
2082            !run_full_interception(
2083                &mut nm,
2084                "https://www.googletagmanager.com/gtm.js?id=GTM-TEST",
2085                ResourceType::Script,
2086                false,
2087            ),
2088            "whitelisted tracker should be allowed even when adblock would block it"
2089        );
2090    }
2091
2092    #[cfg(feature = "adblock")]
2093    #[test]
2094    fn test_e2e_blacklist_strict_overrides_whitelist() {
2095        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2096
2097        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2098        nm.set_page_url("https://example.org/".to_string());
2099        nm.set_blacklist_patterns(["cdn.example.net/evil.js"]);
2100        nm.set_whitelist_patterns(["cdn.example.net/evil.js"]);
2101        nm.set_blacklist_strict(true);
2102
2103        assert!(
2104            run_full_interception(
2105                &mut nm,
2106                "https://cdn.example.net/evil.js",
2107                ResourceType::Script,
2108                false,
2109            ),
2110            "strict blacklist should win over whitelist"
2111        );
2112    }
2113
2114    #[cfg(feature = "adblock")]
2115    #[test]
2116    fn test_e2e_first_party_stylesheet_passes_when_block_stylesheets_on() {
2117        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2118
2119        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2120        nm.set_page_url("https://developer.intuit.com/".to_string());
2121        nm.block_stylesheets = true;
2122
2123        assert!(
2124            !run_full_interception(
2125                &mut nm,
2126                "https://developer.intuit.com/static/app.css",
2127                ResourceType::Stylesheet,
2128                true,
2129            ),
2130            "first-party CSS must pass when allow_first_party_stylesheets default-true"
2131        );
2132    }
2133
2134    #[cfg(feature = "adblock")]
2135    #[test]
2136    fn test_e2e_first_party_stylesheet_blocked_when_allow_disabled() {
2137        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2138
2139        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2140        nm.set_page_url("https://developer.intuit.com/".to_string());
2141        nm.block_stylesheets = true;
2142        nm.allow_first_party_stylesheets = false;
2143
2144        assert!(
2145            run_full_interception(
2146                &mut nm,
2147                "https://developer.intuit.com/static/app.css",
2148                ResourceType::Stylesheet,
2149                true,
2150            ),
2151            "first-party CSS must be blocked when allow_first_party_stylesheets=false"
2152        );
2153    }
2154
2155    #[cfg(feature = "adblock")]
2156    #[test]
2157    fn test_e2e_third_party_stylesheet_still_blocked_with_default_allow() {
2158        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2159
2160        // requestWillBeSent fired with `initiator.type = "script"` —
2161        // disqualifies the legacy heuristic fallback. Default first-party
2162        // allow is on but this URL is third-party, so it should still block.
2163        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2164        nm.set_page_url("https://developer.intuit.com/".to_string());
2165        nm.block_stylesheets = true;
2166        // Required for `on_request_will_be_sent` to actually cache the
2167        // RWBS event into `requests_will_be_sent` (otherwise it dispatches
2168        // straight to `on_request` and the initiator lookup misses).
2169        nm.protocol_request_interception_enabled = true;
2170
2171        let rwbs_url = "https://tracker.evil.example/track.css";
2172        let rwbs_json = serde_json::json!({
2173            "requestId": "tp-css-1",
2174            "loaderId": "test-loader",
2175            "documentURL": "https://developer.intuit.com/",
2176            "request": {
2177                "url": rwbs_url,
2178                "method": "GET",
2179                "headers": {},
2180                "initialPriority": "Medium",
2181                "referrerPolicy": "no-referrer"
2182            },
2183            "timestamp": 0.0,
2184            "wallTime": 0.0,
2185            "initiator": { "type": "script" },
2186            "redirectHasExtraInfo": false,
2187            "type": "Stylesheet",
2188            "frameId": "frame1"
2189        });
2190        let rwbs_event: chromiumoxide_cdp::cdp::browser_protocol::network::EventRequestWillBeSent =
2191            serde_json::from_value(rwbs_json).unwrap();
2192        nm.on_request_will_be_sent(&rwbs_event);
2193
2194        // Use the same requestId in the requestPaused event so the
2195        // initiator capture finds the cached RWBS entry.
2196        use super::NetworkEvent;
2197        while nm.poll().is_some() {}
2198        let mut paused_event = make_request_paused(rwbs_url, ResourceType::Stylesheet, false);
2199        paused_event.network_id = Some(
2200            chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
2201                "tp-css-1".to_string(),
2202            ),
2203        );
2204        nm.on_fetch_request_paused(&paused_event);
2205
2206        let mut blocked = false;
2207        while let Some(ev) = nm.poll() {
2208            if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
2209                let m: &str = method.as_ref();
2210                if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
2211                    blocked = true;
2212                }
2213            }
2214        }
2215        assert!(blocked, "third-party Script-initiated CSS must remain blocked");
2216    }
2217
2218    #[cfg(feature = "adblock")]
2219    #[test]
2220    fn test_e2e_first_party_image_passes_when_ignore_visuals_on() {
2221        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2222
2223        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2224        nm.set_page_url("https://shop.example/".to_string());
2225        nm.ignore_visuals = true;
2226
2227        assert!(
2228            !run_full_interception(
2229                &mut nm,
2230                "https://shop.example/img/hero.png",
2231                ResourceType::Image,
2232                true,
2233            ),
2234            "first-party image must pass when allow_first_party_visuals default-true"
2235        );
2236    }
2237
2238    #[cfg(feature = "adblock")]
2239    #[test]
2240    fn test_e2e_third_party_image_blocked_when_ignore_visuals_on() {
2241        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2242
2243        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2244        nm.set_page_url("https://shop.example/".to_string());
2245        nm.ignore_visuals = true;
2246
2247        assert!(
2248            run_full_interception(
2249                &mut nm,
2250                "https://cdn.thirdparty.io/banner.png",
2251                ResourceType::Image,
2252                false,
2253            ),
2254            "third-party image must remain blocked when ignore_visuals=true"
2255        );
2256    }
2257
2258    #[cfg(feature = "adblock")]
2259    #[test]
2260    fn test_e2e_first_party_document_not_blocked() {
2261        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2262
2263        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2264        nm.set_page_url("https://www.nytimes.com/".to_string());
2265
2266        assert!(
2267            !run_full_interception(
2268                &mut nm,
2269                "https://www.nytimes.com/2024/article.html",
2270                ResourceType::Document,
2271                true,
2272            ),
2273            "first-party document navigation should never be blocked"
2274        );
2275    }
2276
2277    #[cfg(feature = "adblock")]
2278    #[test]
2279    fn test_e2e_custom_engine_blocks_through_pipeline() {
2280        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2281
2282        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2283        nm.set_page_url("https://mysite.com/".to_string());
2284
2285        let mut filter_set = adblock::lists::FilterSet::new(false);
2286        let mut opts = adblock::lists::ParseOptions::default();
2287        opts.rule_types = adblock::lists::RuleTypes::All;
2288        filter_set.add_filters(["||evil-cdn.example.net^$script"], opts);
2289        let engine = adblock::Engine::from_filter_set(filter_set, true);
2290        nm.set_adblock_engine(std::sync::Arc::new(engine));
2291
2292        assert!(
2293            run_full_interception(
2294                &mut nm,
2295                "https://evil-cdn.example.net/tracker.js",
2296                ResourceType::Script,
2297                false,
2298            ),
2299            "custom engine rule should block through full pipeline"
2300        );
2301
2302        // Legitimate script on the same site should still pass.
2303        assert!(
2304            !run_full_interception(
2305                &mut nm,
2306                "https://mysite.com/app.js",
2307                ResourceType::Script,
2308                true,
2309            ),
2310            "first-party script should still be allowed with custom engine"
2311        );
2312    }
2313
2314    #[cfg(feature = "adblock")]
2315    #[test]
2316    fn test_e2e_ad_image_blocked() {
2317        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2318
2319        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2320        nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
2321
2322        // Ad tracking pixel should be blocked via adblock pattern or trie.
2323        assert!(
2324            run_full_interception(
2325                &mut nm,
2326                "https://googleads.g.doubleclick.net/pagead/viewthroughconversion/123/?random=456",
2327                ResourceType::Image,
2328                false,
2329            ),
2330            "doubleclick ad image/tracking pixel should be blocked"
2331        );
2332
2333        // Legitimate first-party image should pass.
2334        assert!(
2335            !run_full_interception(
2336                &mut nm,
2337                "https://www.mylegitsite-test.com/images/logo.png",
2338                ResourceType::Image,
2339                true,
2340            ),
2341            "legitimate first-party image should not be blocked"
2342        );
2343    }
2344
2345    #[cfg(feature = "adblock")]
2346    #[test]
2347    fn test_e2e_hostname_with_userinfo() {
2348        use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2349
2350        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2351        nm.set_page_url("https://example.org/".to_string());
2352
2353        // URL with userinfo should still correctly identify googletagmanager.com.
2354        assert!(
2355            run_full_interception(
2356                &mut nm,
2357                "https://user:pass@www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2358                ResourceType::Script,
2359                false,
2360            ),
2361            "tracker URL with userinfo should still be blocked"
2362        );
2363    }
2364
2365    #[test]
2366    fn test_blacklist_non_strict_allows_whitelist_override() {
2367        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2368        nm.set_page_url("https://example.com/".to_string());
2369
2370        nm.set_blacklist_patterns(["beacon.min.js"]);
2371        nm.set_whitelist_patterns(["beacon.min.js"]);
2372
2373        nm.set_blacklist_strict(false);
2374
2375        let u = "https://static.cloudflareinsights.com/beacon.min.js";
2376        assert!(nm.is_blacklisted(u));
2377        assert!(nm.is_whitelisted(u));
2378        assert!(!nm.blacklist_strict);
2379    }
2380
2381    // ── max_redirects enforcement ───────────────────────────────────────
2382    //
2383    // The redirect cap short-circuits in NetworkManager::on_request when a
2384    // Document-type chain exceeds the configured limit. We drive it via the
2385    // public on_request_will_be_sent entry point by deserializing synthetic
2386    // events — builder APIs exist but require every non-optional field, and
2387    // JSON is less fragile to cdp schema additions.
2388
2389    fn make_request_will_be_sent(
2390        request_id: &str,
2391        url: &str,
2392        resource_type: &str,
2393        redirect_from_url: Option<&str>,
2394    ) -> chromiumoxide_cdp::cdp::browser_protocol::network::EventRequestWillBeSent {
2395        let mut v = serde_json::json!({
2396            "requestId": request_id,
2397            "loaderId": "test-loader",
2398            "documentURL": url,
2399            "request": {
2400                "url": url,
2401                "method": "GET",
2402                "headers": {},
2403                "initialPriority": "Medium",
2404                "referrerPolicy": "no-referrer"
2405            },
2406            "timestamp": 0.0,
2407            "wallTime": 0.0,
2408            "initiator": { "type": "other" },
2409            "redirectHasExtraInfo": false,
2410            "type": resource_type,
2411            "frameId": "frame1"
2412        });
2413        if let Some(from) = redirect_from_url {
2414            v["redirectResponse"] = serde_json::json!({
2415                "url": from,
2416                "status": 302,
2417                "statusText": "Found",
2418                "headers": { "Location": url },
2419                "mimeType": "text/html",
2420                "charset": "",
2421                "connectionReused": false,
2422                "connectionId": 0.0,
2423                "encodedDataLength": 0.0,
2424                "securityState": "unknown"
2425            });
2426        }
2427        serde_json::from_value(v).expect("EventRequestWillBeSent should deserialize")
2428    }
2429
2430    fn drain_too_many_redirects(nm: &mut NetworkManager) -> Option<super::HttpRequest> {
2431        while let Some(ev) = nm.poll() {
2432            if let super::NetworkEvent::RequestFailed(req) = ev {
2433                if req.failure_text.as_deref() == Some("net::ERR_TOO_MANY_REDIRECTS") {
2434                    return Some(req);
2435                }
2436            }
2437        }
2438        None
2439    }
2440
2441    fn drain_stop_loading(nm: &mut NetworkManager) -> bool {
2442        while let Some(ev) = nm.poll() {
2443            if let super::NetworkEvent::SendCdpRequest((method, _)) = ev {
2444                let m: &str = method.as_ref();
2445                if m == "Page.stopLoading" {
2446                    return true;
2447                }
2448            }
2449        }
2450        false
2451    }
2452
2453    #[test]
2454    fn test_max_redirects_none_allows_unlimited_chain() {
2455        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2456        // max_redirects left at its default (None).
2457
2458        // 10 sequential Document hops sharing the same request_id.
2459        nm.on_request_will_be_sent(&make_request_will_be_sent(
2460            "r1",
2461            "https://example.com/0",
2462            "Document",
2463            None,
2464        ));
2465        for i in 1..10 {
2466            nm.on_request_will_be_sent(&make_request_will_be_sent(
2467                "r1",
2468                &format!("https://example.com/{i}"),
2469                "Document",
2470                Some(&format!("https://example.com/{}", i - 1)),
2471            ));
2472        }
2473
2474        assert!(
2475            drain_too_many_redirects(&mut nm).is_none(),
2476            "no cap set: chain of 10 hops must not emit ERR_TOO_MANY_REDIRECTS"
2477        );
2478    }
2479
2480    #[test]
2481    fn test_max_redirects_caps_document_chain() {
2482        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2483        nm.max_redirects = Some(3);
2484
2485        // Initial request + 4 redirect hops. The 4th redirect (chain length 4 > 3)
2486        // must trip the cap.
2487        nm.on_request_will_be_sent(&make_request_will_be_sent(
2488            "r1",
2489            "https://example.com/0",
2490            "Document",
2491            None,
2492        ));
2493        for i in 1..=4 {
2494            nm.on_request_will_be_sent(&make_request_will_be_sent(
2495                "r1",
2496                &format!("https://example.com/{i}"),
2497                "Document",
2498                Some(&format!("https://example.com/{}", i - 1)),
2499            ));
2500        }
2501
2502        let failed = drain_too_many_redirects(&mut nm)
2503            .expect("cap of 3 on a 4-hop chain must emit ERR_TOO_MANY_REDIRECTS");
2504        assert_eq!(
2505            failed.redirect_chain.len(),
2506            4,
2507            "failed request should preserve the full accumulated chain"
2508        );
2509        assert_eq!(
2510            failed.url.as_deref(),
2511            Some("https://example.com/4"),
2512            "failed request url should be the hop that tripped the cap"
2513        );
2514
2515        // Second navigation after the cap is tripped must also schedule
2516        // Page.stopLoading to actually abort the tab.
2517        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2518        nm.max_redirects = Some(3);
2519        nm.on_request_will_be_sent(&make_request_will_be_sent(
2520            "r2",
2521            "https://example.com/0",
2522            "Document",
2523            None,
2524        ));
2525        for i in 1..=4 {
2526            nm.on_request_will_be_sent(&make_request_will_be_sent(
2527                "r2",
2528                &format!("https://example.com/{i}"),
2529                "Document",
2530                Some(&format!("https://example.com/{}", i - 1)),
2531            ));
2532        }
2533        assert!(
2534            drain_stop_loading(&mut nm),
2535            "cap hit must dispatch Page.stopLoading to abort navigation"
2536        );
2537    }
2538
2539    #[test]
2540    fn test_max_redirects_ignores_subresources() {
2541        let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2542        nm.max_redirects = Some(2);
2543
2544        // A 5-hop script redirect chain — sub-resources are exempt by design.
2545        nm.on_request_will_be_sent(&make_request_will_be_sent(
2546            "s1",
2547            "https://cdn.example.com/0.js",
2548            "Script",
2549            None,
2550        ));
2551        for i in 1..=5 {
2552            nm.on_request_will_be_sent(&make_request_will_be_sent(
2553                "s1",
2554                &format!("https://cdn.example.com/{i}.js"),
2555                "Script",
2556                Some(&format!("https://cdn.example.com/{}.js", i - 1)),
2557            ));
2558        }
2559
2560        assert!(
2561            drain_too_many_redirects(&mut nm).is_none(),
2562            "sub-resource redirect chains must never be capped"
2563        );
2564    }
2565}
chromiumoxide/handler/network.rs

chromiumoxide/handler/
network.rs