1#[cfg(any(feature = "adblock", feature = "firewall"))]
2use super::blockers::block_websites::block_ads;
3use super::blockers::{
4 block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
5 xhr::IGNORE_XHR_ASSETS,
6};
7use crate::auth::Credentials;
8#[cfg(feature = "_cache")]
9use crate::cache::BasicCachePolicy;
10use crate::cmd::CommandChain;
11use crate::handler::http::HttpRequest;
12use crate::handler::network_utils::{base_domain_from_host, host_and_rest};
13use aho_corasick::AhoCorasick;
14use case_insensitive_string::CaseInsensitiveString;
15use chromiumoxide_cdp::cdp::browser_protocol::fetch::{RequestPattern, RequestStage};
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17 EmulateNetworkConditionsByRuleParams, EventLoadingFailed, EventLoadingFinished,
18 EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19 InitiatorType, InterceptionId, NetworkConditions, RequestId, ResourceType, Response,
20 SetCacheDisabledParams, SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23 fetch::{
24 self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25 ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26 },
27 network::SetBypassServiceWorkerParams,
28};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30 network::EnableParams, security::SetIgnoreCertificateErrorsParams,
31};
32use chromiumoxide_types::{Command, Method, MethodId};
33use hashbrown::{HashMap, HashSet};
34use lazy_static::lazy_static;
35use reqwest::header::PROXY_AUTHORIZATION;
36use spider_network_blocker::intercept_manager::NetworkInterceptManager;
37pub use spider_network_blocker::scripts::{
38 URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
39};
40use std::borrow::Cow;
41use std::collections::VecDeque;
42use std::time::{Duration, Instant};
43
44lazy_static! {
45 static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
47 "jquery", "angular",
49 "react", "vue", "bootstrap",
52 "d3",
53 "lodash",
54 "ajax",
55 "application",
56 "app", "main",
58 "index",
59 "bundle",
60 "vendor",
61 "runtime",
62 "polyfill",
63 "scripts",
64 "es2015.",
65 "es2020.",
66 "webpack",
67 "captcha",
68 "client",
69 "/cdn-cgi/challenge-platform/",
70 "/wp-content/js/", "https://m.stripe.network/",
73 "https://challenges.cloudflare.com/",
74 "https://www.google.com/recaptcha/",
75 "https://google.com/recaptcha/api.js",
76 "https://www.gstatic.com/recaptcha/",
77 "https://captcha.px-cloud.net/",
78 "https://geo.captcha-delivery.com/",
79 "https://api.leminnow.com/captcha/",
80 "https://cdn.auth0.com/js/lock/",
81 "https://captcha.gtimg.com",
82 "https://client-api.arkoselabs.com/",
83 "https://www.capy.me/puzzle/",
84 "https://newassets.hcaptcha.com/",
85 "https://cdn.auth0.com/client",
86 "https://js.stripe.com/",
87 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-"
90 ];
91
92 pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
97
98 static ref JS_FRAMEWORK_ALLOW_3RD_PARTY: Vec<&'static str> = vec![
100 "https://m.stripe.network/",
102 "https://challenges.cloudflare.com/",
103 "https://js.stripe.com/",
104 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-",
107 "https://ct.captcha-delivery.com/",
108 "https://geo.captcha-delivery.com/",
109 "https://img1.wsimg.com/parking-lander/static/js/main.d9ebbb8c.js", "https://cdn.auth0.com/client",
111 "https://captcha.px-cloud.net/",
112 "https://www.capy.me/puzzle/",
113 "https://www.gstatic.com/recaptcha/",
114 "https://google.com/recaptcha/",
115 "https://www.google.com/recaptcha/",
116 "https://www.recaptcha.net/recaptcha/",
117 "https://js.hcaptcha.com/1/api.js",
118 "https://hcaptcha.com/1/api.js",
119 "https://js.datadome.co/tags.js",
120 "https://api-js.datadome.co/",
121 "https://client.perimeterx.net/",
122 "https://captcha.px-cdn.net/",
123 "https://newassets.hcaptcha.com/",
124 "https://captcha.px-cloud.net/",
125 "https://s.perimeterx.net/",
126 "https://api.leminnow.com/captcha/",
127 "https://client-api.arkoselabs.com/",
128 "https://static.geetest.com/v4/gt4.js",
129 "https://static.geetest.com/",
130 "https://cdn.jsdelivr.net/npm/@friendlycaptcha/",
131 "https://cdn.perfdrive.com/aperture/",
132 "https://assets.queue-it.net/",
133 "discourse-cdn.com/",
134 "hcaptcha.com",
135 "/cdn-cgi/challenge-platform/",
136 "/_Incapsula_Resource"
137 ];
138
139 pub static ref ALLOWED_MATCHER_3RD_PARTY: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW_3RD_PARTY.iter()).expect("matcher to build");
141
142 pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
144 phf::phf_set! {
145 "_astro/", "_app/immutable"
147 }
148 };
149
150 pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
152 "application/pdf",
153 "application/zip",
154 "application/x-rar-compressed",
155 "application/x-tar",
156 "image/png",
157 "image/jpeg",
158 "image/gif",
159 "image/bmp",
160 "image/webp",
161 "image/svg+xml",
162 "video/mp4",
163 "video/x-msvideo",
164 "video/x-matroska",
165 "video/webm",
166 "audio/mpeg",
167 "audio/ogg",
168 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
169 "application/vnd.ms-excel",
170 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
171 "application/vnd.ms-powerpoint",
172 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
173 "application/x-7z-compressed",
174 "application/x-rpm",
175 "application/x-shockwave-flash",
176 "application/rtf",
177 };
178
179 pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
181 "Image",
182 "Media",
183 "Font"
184 };
185
186 pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
188 "CspViolationReport",
189 "Ping",
190 };
191
192 pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
194
195 pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
197 let enable = EnableParams::default();
198
199 if let Ok(c) = serde_json::to_value(&enable) {
200 vec![(enable.identifier(), c)]
201 } else {
202 vec![]
203 }
204 };
205
206 pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
208 let enable = EnableParams::default();
209 let mut v = vec![];
210 if let Ok(c) = serde_json::to_value(&enable) {
211 v.push((enable.identifier(), c));
212 }
213 let ignore = SetIgnoreCertificateErrorsParams::new(true);
214 if let Ok(ignored) = serde_json::to_value(&ignore) {
215 v.push((ignore.identifier(), ignored));
216 }
217
218 v
219 };
220
221 pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
223 fetch::EnableParams::builder()
224 .handle_auth_requests(true)
225 .pattern(RequestPattern::builder().url_pattern("*").request_stage(RequestStage::Request).build())
226 .build()
227 };
228}
229
230pub(crate) fn is_redirect_status(status: i64) -> bool {
232 matches!(status, 301 | 302 | 303 | 307 | 308)
233}
234
235const STALE_BUFFER_SECS: u64 = 30;
240
241const STALE_REQUEST_SECS: u64 = 120;
247
248#[cfg(feature = "adblock")]
250pub struct AdblockEngine(std::sync::Arc<adblock::Engine>);
251
252#[cfg(feature = "adblock")]
253impl std::fmt::Debug for AdblockEngine {
254 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255 f.debug_struct("AdblockEngine").finish()
256 }
257}
258
259#[cfg(feature = "adblock")]
260impl std::ops::Deref for AdblockEngine {
261 type Target = adblock::Engine;
262 fn deref(&self) -> &Self::Target {
263 &self.0
264 }
265}
266
267#[derive(Debug)]
268pub struct NetworkManager {
270 queued_events: VecDeque<NetworkEvent>,
276 ignore_httpserrors: bool,
281 requests: HashMap<RequestId, HttpRequest>,
286 requests_will_be_sent: HashMap<RequestId, (EventRequestWillBeSent, Instant)>,
293 extra_headers: std::collections::HashMap<String, String>,
298 request_id_to_interception_id: HashMap<RequestId, (InterceptionId, Instant)>,
305 user_cache_disabled: bool,
310 attempted_authentications: HashSet<RequestId>,
316 credentials: Option<Credentials>,
321 pub(crate) user_request_interception_enabled: bool,
330 block_all: bool,
337 pub(crate) protocol_request_interception_enabled: bool,
343 offline: bool,
345 pub request_timeout: Duration,
347 pub ignore_visuals: bool,
350 pub block_stylesheets: bool,
352 pub block_javascript: bool,
357 pub allow_first_party_stylesheets: bool,
363 pub allow_first_party_javascript: bool,
368 pub allow_first_party_visuals: bool,
373 pub block_analytics: bool,
375 pub block_prefetch: bool,
377 pub only_html: bool,
379 pub xml_document: bool,
381 pub intercept_manager: NetworkInterceptManager,
383 pub document_reload_tracker: u8,
385 pub document_target_url: String,
387 pub document_target_domain: String,
389 pub max_bytes_allowed: Option<u64>,
391 pub max_redirects: Option<usize>,
399 #[cfg(feature = "_cache")]
400 pub cache_site_key: Option<String>,
402 #[cfg(feature = "_cache")]
404 pub cache_policy: Option<BasicCachePolicy>,
405 whitelist_patterns: Vec<String>,
407 whitelist_matcher: Option<AhoCorasick>,
409 blacklist_patterns: Vec<String>,
411 blacklist_matcher: Option<AhoCorasick>,
413 blacklist_strict: bool,
415 remote_local_policy: bool,
426 #[cfg(feature = "adblock")]
429 adblock_engine: Option<AdblockEngine>,
430}
431
432impl NetworkManager {
433 pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
435 Self {
436 queued_events: Default::default(),
437 ignore_httpserrors,
438 requests: Default::default(),
439 requests_will_be_sent: Default::default(),
440 extra_headers: Default::default(),
441 request_id_to_interception_id: Default::default(),
442 user_cache_disabled: false,
443 attempted_authentications: Default::default(),
444 credentials: None,
445 block_all: false,
446 user_request_interception_enabled: false,
447 protocol_request_interception_enabled: false,
448 offline: false,
449 request_timeout,
450 ignore_visuals: false,
451 block_javascript: false,
452 block_stylesheets: false,
453 allow_first_party_stylesheets: true,
454 allow_first_party_javascript: true,
455 allow_first_party_visuals: true,
456 block_prefetch: true,
457 block_analytics: true,
458 only_html: false,
459 xml_document: false,
460 intercept_manager: NetworkInterceptManager::Unknown,
461 document_reload_tracker: 0,
462 document_target_url: String::new(),
463 document_target_domain: String::new(),
464 whitelist_patterns: Vec::new(),
465 whitelist_matcher: None,
466 blacklist_patterns: Vec::new(),
467 blacklist_matcher: None,
468 blacklist_strict: true,
469 remote_local_policy: false,
470 max_bytes_allowed: None,
471 max_redirects: None,
472 #[cfg(feature = "_cache")]
473 cache_site_key: None,
474 #[cfg(feature = "_cache")]
475 cache_policy: None,
476 #[cfg(feature = "adblock")]
477 adblock_engine: None,
478 }
479 }
480
481 #[cfg(feature = "adblock")]
483 pub fn set_adblock_engine(&mut self, engine: std::sync::Arc<adblock::Engine>) {
484 self.adblock_engine = Some(AdblockEngine(engine));
485 }
486
487 pub fn set_whitelist_patterns<I, S>(&mut self, patterns: I)
489 where
490 I: IntoIterator<Item = S>,
491 S: Into<String>,
492 {
493 self.whitelist_patterns = patterns.into_iter().map(Into::into).collect();
494 self.rebuild_whitelist_matcher();
495 }
496
497 pub fn set_blacklist_patterns<I, S>(&mut self, patterns: I)
499 where
500 I: IntoIterator<Item = S>,
501 S: Into<String>,
502 {
503 self.blacklist_patterns = patterns.into_iter().map(Into::into).collect();
504 self.rebuild_blacklist_matcher();
505 }
506
507 pub fn add_blacklist_pattern<S: Into<String>>(&mut self, pattern: S) {
509 self.blacklist_patterns.push(pattern.into());
510 self.rebuild_blacklist_matcher();
511 }
512
513 pub fn add_blacklist_patterns<I, S>(&mut self, patterns: I)
515 where
516 I: IntoIterator<Item = S>,
517 S: Into<String>,
518 {
519 self.blacklist_patterns
520 .extend(patterns.into_iter().map(Into::into));
521 self.rebuild_blacklist_matcher();
522 }
523
524 pub fn clear_blacklist(&mut self) {
526 self.blacklist_patterns.clear();
527 self.blacklist_matcher = None;
528 }
529
530 pub fn set_blacklist_strict(&mut self, strict: bool) {
532 self.blacklist_strict = strict;
533 }
534
535 #[inline]
536 fn rebuild_blacklist_matcher(&mut self) {
537 if self.blacklist_patterns.is_empty() {
538 self.blacklist_matcher = None;
539 return;
540 }
541
542 self.blacklist_matcher =
543 AhoCorasick::new(self.blacklist_patterns.iter().map(|s| s.as_str())).ok();
544 }
545
546 #[inline]
547 fn is_blacklisted(&self, url: &str) -> bool {
548 self.blacklist_matcher
549 .as_ref()
550 .map(|m| m.is_match(url))
551 .unwrap_or(false)
552 }
553
554 pub fn add_whitelist_pattern<S: Into<String>>(&mut self, pattern: S) {
556 self.whitelist_patterns.push(pattern.into());
557 self.rebuild_whitelist_matcher();
558 }
559
560 pub fn add_whitelist_patterns<I, S>(&mut self, patterns: I)
562 where
563 I: IntoIterator<Item = S>,
564 S: Into<String>,
565 {
566 self.whitelist_patterns
567 .extend(patterns.into_iter().map(Into::into));
568 self.rebuild_whitelist_matcher();
569 }
570
571 #[inline]
572 fn rebuild_whitelist_matcher(&mut self) {
573 if self.whitelist_patterns.is_empty() {
574 self.whitelist_matcher = None;
575 return;
576 }
577
578 self.whitelist_matcher =
580 AhoCorasick::new(self.whitelist_patterns.iter().map(|s| s.as_str())).ok();
581 }
582
583 #[inline]
584 fn is_whitelisted(&self, url: &str) -> bool {
585 self.whitelist_matcher
586 .as_ref()
587 .map(|m| m.is_match(url))
588 .unwrap_or(false)
589 }
590
591 pub fn init_commands(&self) -> CommandChain {
593 let cmds = if self.ignore_httpserrors {
594 INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
595 } else {
596 INIT_CHAIN.clone()
597 };
598 CommandChain::new(cmds, self.request_timeout)
599 }
600
601 pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
603 let method = cmd.identifier();
604 if let Ok(params) = serde_json::to_value(cmd) {
605 self.queued_events
606 .push_back(NetworkEvent::SendCdpRequest((method, params)));
607 }
608 }
609
610 pub fn poll(&mut self) -> Option<NetworkEvent> {
612 self.queued_events.pop_front()
613 }
614
615 pub fn evict_stale_entries(&mut self, now: Instant) {
620 let cutoff = now - Duration::from_secs(STALE_BUFFER_SECS);
621
622 self.requests_will_be_sent.retain(|_, (_, ts)| *ts > cutoff);
623 self.request_id_to_interception_id
624 .retain(|_, (_, ts)| *ts > cutoff);
625
626 let request_cutoff = now - Duration::from_secs(STALE_REQUEST_SECS);
631 self.requests
632 .retain(|_, req| req.created_at > request_cutoff);
633
634 if !self.attempted_authentications.is_empty() {
639 let live: HashSet<&str> = self
640 .requests
641 .values()
642 .filter_map(|r| r.interception_id.as_ref().map(|id| id.as_ref()))
643 .collect();
644 self.attempted_authentications
645 .retain(|id| live.contains(id.as_ref()));
646 }
647 }
648
649 pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
651 &self.extra_headers
652 }
653
654 pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
656 self.extra_headers = headers;
657 self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
658 self.extra_headers.remove("Proxy-Authorization");
659 if !self.extra_headers.is_empty() {
660 if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
661 self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
662 }
663 }
664 }
665
666 pub fn set_service_worker_enabled(&mut self, bypass: bool) {
667 self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
668 }
669
670 pub fn set_block_all(&mut self, block_all: bool) {
671 self.block_all = block_all;
672 }
673
674 pub fn set_remote_local_policy(&mut self, enabled: bool) {
677 self.remote_local_policy = enabled;
678 }
679
680 fn request_policy_params(&self) -> serde_json::Value {
685 serde_json::json!({
686 "version": 1,
687 "enabled": true,
688 "flags": {
689 "blockAll": self.block_all,
690 "blockVisuals": self.ignore_visuals,
691 "blockStylesheets": self.block_stylesheets,
692 "blockJavascript": self.block_javascript,
693 "blockAnalytics": self.block_analytics,
694 "blockAds": self.block_ads_enabled(),
695 "blockPrefetch": self.block_prefetch,
696 "onlyHtml": self.only_html,
697 "blacklistStrict": self.blacklist_strict,
698 "allowFirstPartyStylesheets": self.allow_first_party_stylesheets,
699 "allowFirstPartyJavascript": self.allow_first_party_javascript,
700 "allowFirstPartyVisuals": self.allow_first_party_visuals,
701 },
702 "blacklist": self.blacklist_patterns,
703 "whitelist": self.whitelist_patterns,
704 "interceptManager": format!("{:?}", self.intercept_manager),
707 "pageUrl": self.document_target_url,
708 })
709 }
710
711 #[inline]
715 fn block_ads_enabled(&self) -> bool {
716 cfg!(feature = "firewall")
717 }
718
719 pub fn emit_request_policy(&mut self) {
723 if !self.remote_local_policy || !self.protocol_request_interception_enabled {
724 return;
725 }
726 let params = self.request_policy_params();
727 self.queued_events.push_back(NetworkEvent::SendCdpRequest((
728 Cow::Borrowed("Interception.setPolicy"),
729 params,
730 )));
731 }
732
733 pub fn set_request_interception(&mut self, enabled: bool) {
734 self.user_request_interception_enabled = enabled;
735 self.update_protocol_request_interception();
736 }
737
738 pub fn set_cache_enabled(&mut self, enabled: bool) {
739 let run = self.user_cache_disabled == enabled;
740 self.user_cache_disabled = !enabled;
741 if run {
742 self.update_protocol_cache_disabled();
743 }
744 }
745
746 pub fn enable_request_intercept(&mut self) {
748 self.protocol_request_interception_enabled = true;
749 }
750
751 pub fn disable_request_intercept(&mut self) {
753 self.protocol_request_interception_enabled = false;
754 }
755
756 #[cfg(feature = "_cache")]
758 pub fn set_cache_site_key(&mut self, cache_site_key: Option<String>) {
759 self.cache_site_key = cache_site_key;
760 }
761
762 #[cfg(feature = "_cache")]
764 pub fn set_cache_policy(&mut self, cache_policy: Option<BasicCachePolicy>) {
765 self.cache_policy = cache_policy;
766 }
767
768 pub fn update_protocol_cache_disabled(&mut self) {
769 self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
770 }
771
772 pub fn authenticate(&mut self, credentials: Credentials) {
773 self.credentials = Some(credentials);
774 self.update_protocol_request_interception();
775 self.protocol_request_interception_enabled = true;
776 }
777
778 fn update_protocol_request_interception(&mut self) {
779 let enabled = self.user_request_interception_enabled || self.credentials.is_some();
780
781 if enabled == self.protocol_request_interception_enabled {
782 return;
783 }
784
785 if enabled {
786 self.push_cdp_request(ENABLE_FETCH.clone())
787 } else {
788 self.push_cdp_request(DisableParams::default())
789 }
790 }
791
792 #[inline]
795 fn should_block_script_blocklist_only(&self, url: &str) -> bool {
796 let block_analytics = self.block_analytics;
798
799 if block_analytics && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url)
801 {
802 return true;
803 }
804
805 if crate::handler::blockers::block_websites::block_website(url) {
807 return true;
808 }
809
810 if let Some(path_with_slash) = Self::url_path_with_leading_slash(url) {
817 let p_slash = Self::strip_query_fragment(path_with_slash);
819 let p_noslash = p_slash.strip_prefix('/').unwrap_or(p_slash);
820
821 let base = match p_slash.rsplit('/').next() {
823 Some(b) => b,
824 None => p_slash,
825 };
826
827 if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_slash) {
830 return true;
831 }
832 if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_noslash) {
833 return true;
834 }
835 if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(base) {
836 return true;
837 }
838
839 if URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(p_noslash) {
842 return true;
843 }
844
845 if self.ignore_visuals && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(p_noslash) {
847 return true;
848 }
849 }
850
851 false
852 }
853
854 #[inline]
859 fn url_path_with_leading_slash(url: &str) -> Option<&str> {
860 let bytes = url.as_bytes();
862 let idx = memchr::memmem::find(bytes, b"//")?;
863 let after_slashes = idx + 2;
864
865 let slash_rel = memchr::memchr(b'/', &bytes[after_slashes..])?;
867 let slash_idx = after_slashes + slash_rel;
868
869 if slash_idx < url.len() {
870 Some(&url[slash_idx..])
871 } else {
872 None
873 }
874 }
875
876 #[inline]
881 fn strip_query_fragment(s: &str) -> &str {
882 match memchr::memchr2(b'?', b'#', s.as_bytes()) {
883 Some(i) => &s[..i],
884 None => s,
885 }
886 }
887
888 #[inline]
890 fn skip_xhr(
891 &self,
892 skip_networking: bool,
893 event: &EventRequestPaused,
894 network_event: bool,
895 ) -> bool {
896 if !skip_networking && network_event {
898 let request_url = event.request.url.as_str();
899
900 let skip_analytics =
902 self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
903
904 if skip_analytics {
905 true
906 } else if self.block_stylesheets || self.ignore_visuals {
907 let block_css = self.block_stylesheets;
908 let block_media = self.ignore_visuals;
909
910 let mut block_request = false;
911
912 if let Some(position) = memchr::memrchr(b'.', request_url.as_bytes()) {
913 let hlen = request_url.len();
914 let has_asset = hlen - position;
915
916 if has_asset >= 3 {
917 let next_position = position + 1;
918
919 if block_media
920 && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
921 &request_url[next_position..].into(),
922 )
923 {
924 block_request = true;
925 } else if block_css {
926 block_request = CaseInsensitiveString::from(
927 &request_url.as_bytes()[next_position..],
928 )
929 .contains(&**CSS_EXTENSION)
930 }
931 }
932 }
933
934 if !block_request {
935 block_request = ignore_script_xhr_media(request_url);
936 }
937
938 block_request
939 } else {
940 skip_networking
941 }
942 } else {
943 skip_networking
944 }
945 }
946
947 #[cfg(feature = "adblock")]
948 #[inline]
949 fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
951 if skip_networking {
952 true
953 } else {
954 block_ads(&event.request.url) || self.detect_ad(event)
955 }
956 }
957
958 #[cfg(not(feature = "adblock"))]
960 #[inline]
961 fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
962 use crate::handler::blockers::block_websites::block_ads;
963 if skip_networking {
964 true
965 } else {
966 block_ads(&event.request.url)
967 }
968 }
969
970 #[inline]
971 fn fail_request_blocked(
973 &mut self,
974 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
975 ) {
976 let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FailRequestParams::new(
977 request_id.clone(),
978 chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason::BlockedByClient,
979 );
980 self.push_cdp_request(params);
981 }
982
983 #[inline]
984 fn fulfill_request_empty_200(
986 &mut self,
987 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
988 ) {
989 let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FulfillRequestParams::new(
990 request_id.clone(),
991 200,
992 );
993 self.push_cdp_request(params);
994 }
995
996 #[cfg(feature = "_cache")]
997 #[inline]
998 fn fulfill_request_from_cache(
1002 &mut self,
1003 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
1004 body: &[u8],
1005 headers: &std::collections::HashMap<String, String>,
1006 status: i64,
1007 ) {
1008 use crate::cdp::browser_protocol::fetch::HeaderEntry;
1009 use crate::handler::network::fetch::FulfillRequestParams;
1010 use base64::Engine;
1011
1012 let mut resp_headers = Vec::<HeaderEntry>::with_capacity(headers.len());
1013
1014 for (k, v) in headers.iter() {
1015 resp_headers.push(HeaderEntry {
1016 name: k.clone(),
1017 value: v.clone(),
1018 });
1019 }
1020
1021 let mut params = FulfillRequestParams::new(request_id.clone(), status);
1022
1023 params.body = Some(
1025 base64::engine::general_purpose::STANDARD
1026 .encode(body)
1027 .into(),
1028 );
1029
1030 params.response_headers = Some(resp_headers);
1031
1032 self.push_cdp_request(params);
1033 }
1034
1035 #[inline]
1036 fn continue_request_with_url(
1038 &mut self,
1039 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
1040 url: Option<&str>,
1041 intercept_response: bool,
1042 ) {
1043 let mut params = ContinueRequestParams::new(request_id.clone());
1044 if let Some(url) = url {
1045 params.url = Some(url.to_string());
1046 params.intercept_response = Some(intercept_response);
1047 }
1048 self.push_cdp_request(params);
1049 }
1050
1051 #[inline]
1053 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
1054 if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
1055 return;
1056 }
1057
1058 if self.block_all {
1059 tracing::debug!(
1060 "Blocked (block_all): {:?} - {}",
1061 event.resource_type,
1062 event.request.url
1063 );
1064 return self.fail_request_blocked(&event.request_id);
1065 }
1066
1067 let initiator_type: Option<InitiatorType> = event
1075 .network_id
1076 .as_ref()
1077 .and_then(|nid| self.requests_will_be_sent.get(nid.as_ref()))
1078 .map(|(rwbs, _)| rwbs.initiator.r#type.clone());
1079
1080 if let Some(network_id) = event.network_id.as_ref() {
1081 if let Some((request_will_be_sent, _)) =
1082 self.requests_will_be_sent.remove(network_id.as_ref())
1083 {
1084 self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
1085 } else {
1086 self.request_id_to_interception_id.insert(
1087 network_id.clone(),
1088 (event.request_id.clone().into(), Instant::now()),
1089 );
1090 }
1091 }
1092
1093 let javascript_resource = event.resource_type == ResourceType::Script;
1095 let document_resource = event.resource_type == ResourceType::Document;
1096 let network_resource =
1097 !document_resource && crate::utils::is_data_resource(&event.resource_type);
1098
1099 let mut skip_networking =
1101 self.block_all || IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
1102
1103 if event.resource_type == ResourceType::Prefetch && !self.block_prefetch {
1104 skip_networking = true;
1105 }
1106
1107 if !skip_networking {
1109 skip_networking = self.document_reload_tracker >= 3;
1110 }
1111
1112 let (current_url_cow, had_replacer) =
1114 self.handle_document_replacement_and_tracking(event, document_resource);
1115
1116 let current_url: &str = current_url_cow.as_ref();
1117
1118 let blacklisted = self.is_blacklisted(current_url);
1119
1120 if !self.blacklist_strict && blacklisted {
1121 skip_networking = true;
1122 }
1123
1124 if !skip_networking {
1125 if self.xml_document && current_url.ends_with(".xsl") {
1127 skip_networking = false;
1128 } else {
1129 skip_networking = self.should_skip_for_visuals_and_basic(&event.resource_type);
1130 }
1131 }
1132
1133 let is_main_document_request = document_resource
1156 && (event.redirected_request_id.is_some()
1157 || had_replacer
1158 || self.document_target_url.is_empty()
1159 || event.request.url == self.document_target_url);
1160 if !is_main_document_request {
1161 skip_networking = self.detect_ad_if_enabled(event, skip_networking);
1162 }
1163
1164 if !skip_networking
1166 && self.block_javascript
1167 && (self.only_html || self.ignore_visuals)
1168 && (javascript_resource
1169 || document_resource
1170 || event.resource_type == ResourceType::Stylesheet
1171 || event.resource_type == ResourceType::Image)
1172 {
1173 skip_networking = ignore_script_embedded(current_url);
1174 }
1175
1176 if !skip_networking && javascript_resource {
1179 skip_networking = self.should_block_script_blocklist_only(current_url);
1180 }
1181
1182 skip_networking = self.skip_xhr(skip_networking, event, network_resource);
1184
1185 if !skip_networking && (javascript_resource || network_resource || document_resource) {
1187 skip_networking = self.intercept_manager.intercept_detection(
1188 current_url,
1189 self.ignore_visuals,
1190 network_resource,
1191 );
1192 }
1193
1194 if !skip_networking && (javascript_resource || network_resource) {
1196 skip_networking = crate::handler::blockers::block_websites::block_website(current_url);
1197 }
1198
1199 if skip_networking && javascript_resource && ALLOWED_MATCHER_3RD_PARTY.is_match(current_url)
1202 {
1203 skip_networking = false;
1204 }
1205
1206 if skip_networking && self.is_whitelisted(current_url) {
1208 skip_networking = false;
1209 }
1210
1211 if skip_networking && !self.document_target_domain.is_empty() {
1227 let allow = match event.resource_type {
1228 ResourceType::Stylesheet => self.allow_first_party_stylesheets,
1229 ResourceType::Script => self.allow_first_party_javascript,
1230 _ if IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()) => {
1231 self.allow_first_party_visuals
1232 }
1233 _ => false,
1234 };
1235 if allow && self.is_first_party_url(current_url) {
1236 skip_networking = false;
1237 }
1238 }
1239
1240 if skip_networking
1254 && self.allow_first_party_stylesheets
1255 && self.block_stylesheets
1256 && event.resource_type == ResourceType::Stylesheet
1257 && !matches!(initiator_type, Some(InitiatorType::Script))
1258 {
1259 skip_networking = false;
1260 }
1261
1262 if self.blacklist_strict && blacklisted {
1263 skip_networking = true;
1264 }
1265
1266 if skip_networking {
1267 tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
1268 self.fulfill_request_empty_200(&event.request_id);
1269 } else {
1270 #[cfg(feature = "_cache")]
1271 {
1272 if let (Some(policy), Some(cache_site_key)) =
1273 (self.cache_policy.as_ref(), self.cache_site_key.as_deref())
1274 {
1275 let current_url = format!("{}:{}", event.request.method, ¤t_url);
1276
1277 if let Some((res, cache_policy)) =
1278 crate::cache::remote::get_session_cache_item(cache_site_key, ¤t_url)
1279 {
1280 if policy.allows_cached(&cache_policy) {
1281 tracing::debug!(
1282 "Remote Cached: {:?} - {}",
1283 &event.resource_type,
1284 ¤t_url
1285 );
1286 let flat_headers = crate::http::headers_from_multi(&res.headers);
1287 return self.fulfill_request_from_cache(
1288 &event.request_id,
1289 &res.body,
1290 &flat_headers,
1291 res.status as i64,
1292 );
1293 }
1294 }
1295 }
1296 }
1297
1298 tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
1300 self.continue_request_with_url(
1301 &event.request_id,
1302 if had_replacer {
1303 Some(current_url)
1304 } else {
1305 None
1306 },
1307 !had_replacer,
1308 );
1309 }
1310 }
1311
1312 #[inline]
1318 fn should_skip_for_visuals_and_basic(&self, resource_type: &ResourceType) -> bool {
1319 (self.ignore_visuals && IGNORE_VISUAL_RESOURCE_MAP.contains(resource_type.as_ref()))
1320 || (self.block_stylesheets && *resource_type == ResourceType::Stylesheet)
1321 }
1322
1323 pub fn has_target_domain(&self) -> bool {
1325 !self.document_target_url.is_empty()
1326 }
1327
1328 #[inline]
1333 fn is_first_party_url(&self, url: &str) -> bool {
1334 if self.document_target_domain.is_empty() {
1335 return false;
1336 }
1337 match host_and_rest(url) {
1338 Some((host, _)) => base_domain_from_host(host) == self.document_target_domain,
1339 None => false,
1340 }
1341 }
1342
1343 pub fn set_page_url(&mut self, page_target_url: String) {
1345 let host_base = host_and_rest(&page_target_url)
1346 .map(|(h, _)| base_domain_from_host(h))
1347 .unwrap_or("");
1348
1349 self.document_target_domain = host_base.to_string();
1350 self.document_target_url = page_target_url;
1351
1352 self.emit_request_policy();
1356 }
1357
1358 pub fn clear_target_domain(&mut self) {
1360 self.document_reload_tracker = 0;
1361 self.document_target_url = Default::default();
1362 self.document_target_domain = Default::default();
1363 }
1364
1365 #[inline]
1373 fn handle_document_replacement_and_tracking<'a>(
1374 &mut self,
1375 event: &'a EventRequestPaused,
1376 document_resource: bool,
1377 ) -> (Cow<'a, str>, bool) {
1378 let mut replacer: Option<String> = None;
1379 let current_url = event.request.url.as_str();
1380
1381 if document_resource {
1382 if self.document_target_url == current_url {
1383 self.document_reload_tracker += 1;
1384 } else if !self.document_target_url.is_empty() && event.redirected_request_id.is_some()
1385 {
1386 let (http_document_replacement, mut https_document_replacement) =
1387 if self.document_target_url.starts_with("http://") {
1388 (
1389 self.document_target_url.replacen("http://", "http//", 1),
1390 self.document_target_url.replacen("http://", "https://", 1),
1391 )
1392 } else {
1393 (
1394 self.document_target_url.replacen("https://", "https//", 1),
1395 self.document_target_url.replacen("https://", "http://", 1),
1396 )
1397 };
1398
1399 let trailing = https_document_replacement.ends_with('/');
1401 if trailing {
1402 https_document_replacement.pop();
1403 }
1404 if https_document_replacement.ends_with('/') {
1405 https_document_replacement.pop();
1406 }
1407
1408 let redirect_mask = format!(
1409 "{}{}",
1410 https_document_replacement, http_document_replacement
1411 );
1412
1413 if current_url == redirect_mask {
1414 replacer = Some(if trailing {
1415 format!("{}/", https_document_replacement)
1416 } else {
1417 https_document_replacement
1418 });
1419 }
1420 }
1421
1422 if self.document_target_url.is_empty() && current_url.ends_with(".xml") {
1423 self.xml_document = true;
1424 }
1425
1426 self.document_target_url = event.request.url.clone();
1428 self.document_target_domain = host_and_rest(&self.document_target_url)
1429 .map(|(h, _)| base_domain_from_host(h).to_string())
1430 .unwrap_or_default();
1431 }
1432
1433 let current_url_cow = match replacer {
1434 Some(r) => Cow::Owned(r),
1435 None => Cow::Borrowed(event.request.url.as_str()),
1436 };
1437
1438 let had_replacer = matches!(current_url_cow, Cow::Owned(_));
1439 (current_url_cow, had_replacer)
1440 }
1441
1442 #[cfg(feature = "adblock")]
1446 pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
1447 use adblock::{
1448 lists::{FilterSet, ParseOptions, RuleTypes},
1449 Engine,
1450 };
1451
1452 lazy_static::lazy_static! {
1453 static ref AD_ENGINE: Engine = {
1454 let mut filter_set = FilterSet::new(false);
1455 let mut rules = ParseOptions::default();
1456 rules.rule_types = RuleTypes::All;
1457
1458 filter_set.add_filters(
1459 &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
1460 rules,
1461 );
1462
1463 #[cfg(feature = "adblock_easylist")]
1466 {
1467 static EASYLIST: &str = include_str!(concat!(env!("OUT_DIR"), "/easylist.txt"));
1468 static EASYPRIVACY: &str = include_str!(concat!(env!("OUT_DIR"), "/easyprivacy.txt"));
1469
1470 if !EASYLIST.is_empty() {
1471 filter_set.add_filter_list(EASYLIST, rules);
1472 }
1473 if !EASYPRIVACY.is_empty() {
1474 filter_set.add_filter_list(EASYPRIVACY, rules);
1475 }
1476 }
1477
1478 Engine::from_filter_set(filter_set, true)
1479 };
1480 }
1481
1482 let blockable = event.resource_type == ResourceType::Script
1483 || event.resource_type == ResourceType::Image
1484 || event.resource_type == ResourceType::Media
1485 || event.resource_type == ResourceType::Stylesheet
1486 || event.resource_type == ResourceType::Document
1487 || event.resource_type == ResourceType::Fetch
1488 || event.resource_type == ResourceType::Xhr;
1489
1490 if !blockable {
1491 return false;
1492 }
1493
1494 let u = &event.request.url;
1495
1496 let source_domain = if self.document_target_domain.is_empty() {
1497 "example.com"
1498 } else {
1499 &self.document_target_domain
1500 };
1501
1502 let hostname = u
1505 .strip_prefix("https://")
1506 .or_else(|| u.strip_prefix("http://"))
1507 .and_then(|rest| rest.split('/').next())
1508 .map(
1510 |authority| match memchr::memrchr(b'@', authority.as_bytes()) {
1511 Some(i) => &authority[i + 1..],
1512 None => authority,
1513 },
1514 )
1515 .and_then(|host_port| host_port.split(':').next())
1517 .unwrap_or(source_domain);
1518
1519 let resource_type_str = match event.resource_type {
1520 ResourceType::Script => "script",
1521 ResourceType::Image => "image",
1522 ResourceType::Media => "media",
1523 ResourceType::Stylesheet => "stylesheet",
1524 ResourceType::Document => "document",
1525 ResourceType::Fetch => "fetch",
1526 ResourceType::Xhr => "xhr",
1527 _ => "other",
1528 };
1529
1530 let request = adblock::request::Request::preparsed(
1531 u,
1532 hostname,
1533 source_domain,
1534 resource_type_str,
1535 !event.request.is_same_site.unwrap_or_default(),
1536 );
1537
1538 let engine: &Engine = match self.adblock_engine.as_ref() {
1539 Some(custom) => custom,
1540 None => &AD_ENGINE,
1541 };
1542
1543 engine.check_network_request(&request).matched
1544 }
1545
1546 pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
1547 let response = if self
1548 .attempted_authentications
1549 .contains(event.request_id.as_ref())
1550 {
1551 AuthChallengeResponseResponse::CancelAuth
1552 } else if self.credentials.is_some() {
1553 self.attempted_authentications
1554 .insert(event.request_id.clone().into());
1555 AuthChallengeResponseResponse::ProvideCredentials
1556 } else {
1557 AuthChallengeResponseResponse::Default
1558 };
1559
1560 let mut auth = AuthChallengeResponse::new(response);
1561 if let Some(creds) = self.credentials.clone() {
1562 auth.username = Some(creds.username);
1563 auth.password = Some(creds.password);
1564 }
1565 self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
1566 }
1567
1568 pub fn set_offline_mode(&mut self, value: bool) {
1570 if self.offline == value {
1571 return;
1572 }
1573 self.offline = value;
1574 if let Ok(condition) = NetworkConditions::builder()
1575 .url_pattern("")
1576 .latency(0)
1577 .download_throughput(-1.)
1578 .upload_throughput(-1.)
1579 .build()
1580 {
1581 if let Ok(network) = EmulateNetworkConditionsByRuleParams::builder()
1582 .offline(self.offline)
1583 .matched_network_condition(condition)
1584 .build()
1585 {
1586 self.push_cdp_request(network);
1587 }
1588 }
1589 }
1590
1591 pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
1593 if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
1594 if let Some((interception_id, _)) = self
1595 .request_id_to_interception_id
1596 .remove(event.request_id.as_ref())
1597 {
1598 self.on_request(event, Some(interception_id));
1599 } else {
1600 self.requests_will_be_sent
1601 .insert(event.request_id.clone(), (event.clone(), Instant::now()));
1602 }
1603 } else {
1604 self.on_request(event, None);
1605 }
1606 }
1607
1608 pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
1610 if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
1611 request.from_memory_cache = true;
1612 }
1613 }
1614
1615 pub fn on_response_received(&mut self, event: &EventResponseReceived) {
1617 let mut request_failed = false;
1618
1619 let mut deducted: u64 = 0;
1621
1622 if let Some(max_bytes) = self.max_bytes_allowed.as_mut() {
1623 let before = *max_bytes;
1624
1625 let received_bytes: u64 = event.response.encoded_data_length as u64;
1627
1628 let content_length: Option<u64> = event
1630 .response
1631 .headers
1632 .inner()
1633 .get("content-length")
1634 .and_then(|v| v.as_str())
1635 .and_then(|s| s.trim().parse::<u64>().ok());
1636
1637 *max_bytes = max_bytes.saturating_sub(received_bytes);
1639
1640 if let Some(cl) = content_length {
1642 if cl > *max_bytes {
1643 *max_bytes = 0;
1644 }
1645 }
1646
1647 request_failed = *max_bytes == 0;
1648
1649 deducted = before.saturating_sub(*max_bytes);
1651 }
1652
1653 if deducted > 0 {
1655 self.queued_events
1656 .push_back(NetworkEvent::BytesConsumed(deducted));
1657 }
1658
1659 if request_failed && self.max_bytes_allowed.is_some() {
1661 self.set_block_all(true);
1662 }
1663
1664 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1665 request.set_response(event.response.clone());
1666 self.queued_events.push_back(if request_failed {
1667 NetworkEvent::RequestFailed(request)
1668 } else {
1669 NetworkEvent::RequestFinished(request)
1670 });
1671 }
1672 }
1673
1674 pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
1676 if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
1677 if let Some(interception_id) = request.interception_id.as_ref() {
1678 self.attempted_authentications
1679 .remove(interception_id.as_ref());
1680 }
1681 self.queued_events
1682 .push_back(NetworkEvent::RequestFinished(request));
1683 }
1684 }
1685
1686 pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
1688 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1689 request.failure_text = Some(event.error_text.clone());
1690 if let Some(interception_id) = request.interception_id.as_ref() {
1691 self.attempted_authentications
1692 .remove(interception_id.as_ref());
1693 }
1694 self.queued_events
1695 .push_back(NetworkEvent::RequestFailed(request));
1696 }
1697 }
1698
1699 fn on_request(
1701 &mut self,
1702 event: &EventRequestWillBeSent,
1703 interception_id: Option<InterceptionId>,
1704 ) {
1705 let mut redirect_chain = Vec::new();
1706 let mut redirect_location = None;
1707
1708 if let Some(redirect_resp) = &event.redirect_response {
1709 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1710 if is_redirect_status(redirect_resp.status) {
1711 if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
1712 if redirect_resp.url != location {
1713 let fixed_location = location.replace(&redirect_resp.url, "");
1714
1715 if !fixed_location.is_empty() {
1716 if let Some(resp) = request.response.as_mut() {
1717 resp.headers.0["Location"] =
1718 serde_json::Value::String(fixed_location.clone());
1719 }
1720 }
1721
1722 redirect_location = Some(fixed_location);
1723 }
1724 }
1725 }
1726
1727 {
1728 let mut redirect_resp = redirect_resp.clone();
1729
1730 if let Some(redirect_location) = redirect_location {
1731 if !redirect_location.is_empty() {
1732 redirect_resp.headers.0["Location"] =
1733 serde_json::Value::String(redirect_location);
1734 }
1735 }
1736
1737 self.handle_request_redirect(&mut request, redirect_resp);
1738 }
1739
1740 redirect_chain = std::mem::take(&mut request.redirect_chain);
1741 redirect_chain.push(request);
1742 }
1743 }
1744
1745 if let Some(cap) = self.max_redirects {
1748 let is_document = matches!(event.r#type, Some(ResourceType::Document));
1749 if is_document && redirect_chain.len() > cap {
1750 let mut failed = HttpRequest::new(
1751 event.request_id.clone(),
1752 event.frame_id.clone(),
1753 interception_id,
1754 self.user_request_interception_enabled,
1755 redirect_chain,
1756 );
1757 failed.url = Some(event.request.url.clone());
1758 failed.method = Some(event.request.method.clone());
1759 failed.failure_text = Some("net::ERR_TOO_MANY_REDIRECTS".into());
1760 self.push_cdp_request(
1761 chromiumoxide_cdp::cdp::browser_protocol::page::StopLoadingParams::default(),
1762 );
1763 self.queued_events
1764 .push_back(NetworkEvent::RequestFailed(failed));
1765 return;
1766 }
1767 }
1768
1769 let request = HttpRequest::new(
1770 event.request_id.clone(),
1771 event.frame_id.clone(),
1772 interception_id,
1773 self.user_request_interception_enabled,
1774 redirect_chain,
1775 );
1776
1777 let rid = event.request_id.clone();
1778 self.queued_events
1779 .push_back(NetworkEvent::Request(rid.clone()));
1780 self.requests.insert(rid, request);
1781 }
1782
1783 fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1785 request.set_response(response);
1786 if let Some(interception_id) = request.interception_id.as_ref() {
1787 self.attempted_authentications
1788 .remove(interception_id.as_ref());
1789 }
1790 }
1791}
1792
1793#[derive(Debug)]
1794pub enum NetworkEvent {
1795 SendCdpRequest((MethodId, serde_json::Value)),
1797 Request(RequestId),
1799 Response(RequestId),
1801 RequestFailed(HttpRequest),
1803 RequestFinished(HttpRequest),
1805 BytesConsumed(u64),
1807}
1808
1809#[cfg(test)]
1810mod tests {
1811 use super::ALLOWED_MATCHER_3RD_PARTY;
1812 use crate::handler::network::NetworkManager;
1813 use std::time::Duration;
1814
1815 #[test]
1816 fn test_allowed_matcher_3rd_party() {
1817 let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1819 assert!(
1820 ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1821 "expected Cloudflare challenge script to be allowed"
1822 );
1823
1824 let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1826 assert!(
1827 !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1828 "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1829 );
1830
1831 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1833 assert!(ALLOWED_MATCHER_3RD_PARTY
1834 .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1835 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1836 }
1837
1838 #[test]
1839 fn test_script_allowed_by_default_when_not_blocklisted() {
1840 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1841 nm.set_page_url(
1842 "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1843 );
1844
1845 let ok = "https://cdn.example.net/assets/some-app-bundle-12345.js";
1847 assert!(
1848 !nm.should_block_script_blocklist_only(ok),
1849 "expected non-blocklisted script to be allowed"
1850 );
1851 }
1852
1853 #[test]
1854 fn test_script_blocked_when_matches_ignore_trie_or_blocklist() {
1855 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1856 nm.set_page_url(
1857 "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1858 );
1859
1860 let bad = "https://cdn.example.net/js/analytics.js";
1862 assert!(
1863 nm.should_block_script_blocklist_only(bad),
1864 "expected analytics.js to be blocklisted"
1865 );
1866 }
1867
1868 #[test]
1869 fn test_allowed_matcher_3rd_party_sanity() {
1870 let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1872 assert!(
1873 ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1874 "expected Cloudflare challenge script to be allowed"
1875 );
1876
1877 let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1879 assert!(
1880 !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1881 "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1882 );
1883
1884 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1885 assert!(ALLOWED_MATCHER_3RD_PARTY
1886 .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1887 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1888 }
1889 #[test]
1890 fn test_dynamic_blacklist_blocks_url() {
1891 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1892 nm.set_page_url("https://example.com/".to_string());
1893
1894 nm.set_blacklist_patterns(["static.cloudflareinsights.com", "googletagmanager.com"]);
1895 assert!(nm.is_blacklisted("https://static.cloudflareinsights.com/beacon.min.js"));
1896 assert!(nm.is_blacklisted("https://www.googletagmanager.com/gtm.js?id=GTM-XXXX"));
1897
1898 assert!(!nm.is_blacklisted("https://cdn.example.net/assets/app.js"));
1899 }
1900
1901 #[test]
1902 fn test_blacklist_strict_wins_over_whitelist() {
1903 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1904 nm.set_page_url("https://example.com/".to_string());
1905
1906 nm.set_blacklist_patterns(["beacon.min.js"]);
1908 nm.set_whitelist_patterns(["beacon.min.js"]);
1909
1910 nm.set_blacklist_strict(true);
1911
1912 let u = "https://static.cloudflareinsights.com/beacon.min.js";
1913 assert!(nm.is_whitelisted(u));
1914 assert!(nm.is_blacklisted(u));
1915
1916 assert!(nm.blacklist_strict);
1919 }
1920
1921 #[cfg(feature = "adblock")]
1922 fn make_request_paused(
1923 url: &str,
1924 resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
1925 is_same_site: bool,
1926 ) -> chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused {
1927 use chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused;
1928 use chromiumoxide_cdp::cdp::browser_protocol::network::{
1929 Headers, Request, RequestReferrerPolicy, ResourcePriority,
1930 };
1931
1932 EventRequestPaused {
1933 request_id: chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
1934 "test-req".to_string(),
1935 )
1936 .into(),
1937 request: Request {
1938 url: url.to_string(),
1939 method: "GET".to_string(),
1940 headers: Headers::new(serde_json::Value::Object(Default::default())),
1941 initial_priority: ResourcePriority::Medium,
1942 referrer_policy: RequestReferrerPolicy::NoReferrer,
1943 url_fragment: None,
1944 has_post_data: None,
1945 post_data_entries: None,
1946 mixed_content_type: None,
1947 is_link_preload: None,
1948 trust_token_params: None,
1949 is_same_site: Some(is_same_site),
1950 is_ad_related: None,
1951 },
1952 frame_id: chromiumoxide_cdp::cdp::browser_protocol::page::FrameId::from(
1953 "frame1".to_string(),
1954 ),
1955 resource_type,
1956 response_error_reason: None,
1957 response_status_code: None,
1958 response_status_text: None,
1959 response_headers: None,
1960 network_id: None,
1961 redirected_request_id: None,
1962 }
1963 }
1964
1965 #[cfg(feature = "adblock")]
1966 #[test]
1967 fn test_detect_ad_blocks_known_tracker_scripts() {
1968 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1969
1970 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1971 nm.set_page_url("https://www.wine-searcher.com/".to_string());
1972
1973 let event = make_request_paused(
1974 "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
1975 ResourceType::Script,
1976 false,
1977 );
1978
1979 assert!(
1980 nm.detect_ad(&event),
1981 "googletagmanager.com script should be detected as ad"
1982 );
1983 }
1984
1985 #[cfg(feature = "adblock")]
1986 #[test]
1987 fn test_detect_ad_allows_legitimate_scripts() {
1988 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1989
1990 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1991 nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
1992
1993 let event = make_request_paused(
1994 "https://www.mylegitsite-test.com/static/js/app-bundle.js",
1995 ResourceType::Script,
1996 true,
1997 );
1998
1999 assert!(
2000 !nm.detect_ad(&event),
2001 "legitimate first-party app bundle should not be blocked"
2002 );
2003 }
2004
2005 #[cfg(feature = "adblock")]
2006 #[test]
2007 fn test_detect_ad_uses_source_domain() {
2008 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2009
2010 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2011 nm.set_page_url("https://www.wine-searcher.com/some-page".to_string());
2012
2013 assert!(
2014 !nm.document_target_domain.is_empty(),
2015 "document_target_domain should be set after set_page_url"
2016 );
2017
2018 let event = make_request_paused(
2019 "https://www.google-analytics.com/analytics.js",
2020 ResourceType::Script,
2021 false,
2022 );
2023
2024 assert!(
2025 nm.detect_ad(&event),
2026 "google-analytics.com should be blocked as tracker"
2027 );
2028 }
2029
2030 #[cfg(feature = "adblock")]
2031 #[test]
2032 fn test_custom_adblock_engine_takes_precedence() {
2033 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2034
2035 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2036 nm.set_page_url("https://example.com/".to_string());
2037
2038 let mut filter_set = adblock::lists::FilterSet::new(false);
2040 let mut opts = adblock::lists::ParseOptions::default();
2041 opts.rule_types = adblock::lists::RuleTypes::All;
2042 filter_set.add_filters(["||custom-tracker.example.net^"], opts);
2043 let engine = adblock::Engine::from_filter_set(filter_set, true);
2044 nm.set_adblock_engine(std::sync::Arc::new(engine));
2045
2046 let event = make_request_paused(
2047 "https://custom-tracker.example.net/pixel.js",
2048 ResourceType::Script,
2049 false,
2050 );
2051
2052 assert!(
2053 nm.detect_ad(&event),
2054 "custom engine rule should block custom-tracker.example.net"
2055 );
2056 }
2057
2058 #[cfg(feature = "adblock")]
2061 fn run_full_interception(
2062 nm: &mut NetworkManager,
2063 url: &str,
2064 resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
2065 is_same_site: bool,
2066 ) -> bool {
2067 use super::NetworkEvent;
2068
2069 while nm.poll().is_some() {}
2071
2072 let event = make_request_paused(url, resource_type, is_same_site);
2073 nm.on_fetch_request_paused(&event);
2074
2075 let mut blocked = false;
2077 while let Some(ev) = nm.poll() {
2078 if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
2079 let m: &str = method.as_ref();
2080 if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
2081 blocked = true;
2082 }
2083 }
2084 }
2085 blocked
2086 }
2087
2088 #[cfg(feature = "adblock")]
2091 #[test]
2092 fn test_e2e_tracker_script_blocked() {
2093 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2094
2095 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2096 nm.set_page_url("https://www.wine-searcher.com/".to_string());
2097
2098 assert!(
2099 run_full_interception(
2100 &mut nm,
2101 "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2102 ResourceType::Script,
2103 false,
2104 ),
2105 "GTM script should be blocked through full pipeline"
2106 );
2107 }
2108
2109 #[cfg(feature = "adblock")]
2110 #[test]
2111 fn test_e2e_legitimate_script_allowed() {
2112 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2113
2114 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2115 nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
2116
2117 assert!(
2118 !run_full_interception(
2119 &mut nm,
2120 "https://www.mylegitsite-test.com/static/js/app-bundle.js",
2121 ResourceType::Script,
2122 true,
2123 ),
2124 "legitimate first-party script should be allowed through full pipeline"
2125 );
2126 }
2127
2128 #[cfg(feature = "adblock")]
2129 #[test]
2130 fn test_e2e_analytics_xhr_blocked() {
2131 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2132
2133 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2134 nm.set_page_url("https://example.org/".to_string());
2135
2136 assert!(
2137 run_full_interception(
2138 &mut nm,
2139 "https://www.google-analytics.com/g/collect?v=2&tid=UA-123",
2140 ResourceType::Xhr,
2141 false,
2142 ),
2143 "Google Analytics XHR should be blocked through full pipeline"
2144 );
2145 }
2146
2147 #[cfg(feature = "adblock")]
2148 #[test]
2149 fn test_e2e_whitelisted_overrides_adblock() {
2150 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2151
2152 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2153 nm.set_page_url("https://example.org/".to_string());
2154 nm.set_whitelist_patterns(["googletagmanager.com"]);
2155
2156 assert!(
2158 !run_full_interception(
2159 &mut nm,
2160 "https://www.googletagmanager.com/gtm.js?id=GTM-TEST",
2161 ResourceType::Script,
2162 false,
2163 ),
2164 "whitelisted tracker should be allowed even when adblock would block it"
2165 );
2166 }
2167
2168 #[cfg(feature = "adblock")]
2169 #[test]
2170 fn test_e2e_blacklist_strict_overrides_whitelist() {
2171 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2172
2173 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2174 nm.set_page_url("https://example.org/".to_string());
2175 nm.set_blacklist_patterns(["cdn.example.net/evil.js"]);
2176 nm.set_whitelist_patterns(["cdn.example.net/evil.js"]);
2177 nm.set_blacklist_strict(true);
2178
2179 assert!(
2180 run_full_interception(
2181 &mut nm,
2182 "https://cdn.example.net/evil.js",
2183 ResourceType::Script,
2184 false,
2185 ),
2186 "strict blacklist should win over whitelist"
2187 );
2188 }
2189
2190 #[cfg(feature = "adblock")]
2191 #[test]
2192 fn test_e2e_first_party_stylesheet_passes_when_block_stylesheets_on() {
2193 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2194
2195 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2196 nm.set_page_url("https://developer.intuit.com/".to_string());
2197 nm.block_stylesheets = true;
2198
2199 assert!(
2200 !run_full_interception(
2201 &mut nm,
2202 "https://developer.intuit.com/static/app.css",
2203 ResourceType::Stylesheet,
2204 true,
2205 ),
2206 "first-party CSS must pass when allow_first_party_stylesheets default-true"
2207 );
2208 }
2209
2210 #[cfg(feature = "adblock")]
2211 #[test]
2212 fn test_e2e_first_party_stylesheet_blocked_when_allow_disabled() {
2213 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2214
2215 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2216 nm.set_page_url("https://developer.intuit.com/".to_string());
2217 nm.block_stylesheets = true;
2218 nm.allow_first_party_stylesheets = false;
2219
2220 assert!(
2221 run_full_interception(
2222 &mut nm,
2223 "https://developer.intuit.com/static/app.css",
2224 ResourceType::Stylesheet,
2225 true,
2226 ),
2227 "first-party CSS must be blocked when allow_first_party_stylesheets=false"
2228 );
2229 }
2230
2231 #[cfg(feature = "adblock")]
2232 #[test]
2233 fn test_e2e_third_party_stylesheet_still_blocked_with_default_allow() {
2234 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2235
2236 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2240 nm.set_page_url("https://developer.intuit.com/".to_string());
2241 nm.block_stylesheets = true;
2242 nm.protocol_request_interception_enabled = true;
2246
2247 let rwbs_url = "https://tracker.evil.example/track.css";
2248 let rwbs_json = serde_json::json!({
2249 "requestId": "tp-css-1",
2250 "loaderId": "test-loader",
2251 "documentURL": "https://developer.intuit.com/",
2252 "request": {
2253 "url": rwbs_url,
2254 "method": "GET",
2255 "headers": {},
2256 "initialPriority": "Medium",
2257 "referrerPolicy": "no-referrer"
2258 },
2259 "timestamp": 0.0,
2260 "wallTime": 0.0,
2261 "initiator": { "type": "script" },
2262 "redirectHasExtraInfo": false,
2263 "type": "Stylesheet",
2264 "frameId": "frame1"
2265 });
2266 let rwbs_event: chromiumoxide_cdp::cdp::browser_protocol::network::EventRequestWillBeSent =
2267 serde_json::from_value(rwbs_json).unwrap();
2268 nm.on_request_will_be_sent(&rwbs_event);
2269
2270 use super::NetworkEvent;
2273 while nm.poll().is_some() {}
2274 let mut paused_event = make_request_paused(rwbs_url, ResourceType::Stylesheet, false);
2275 paused_event.network_id = Some(
2276 chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
2277 "tp-css-1".to_string(),
2278 ),
2279 );
2280 nm.on_fetch_request_paused(&paused_event);
2281
2282 let mut blocked = false;
2283 while let Some(ev) = nm.poll() {
2284 if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
2285 let m: &str = method.as_ref();
2286 if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
2287 blocked = true;
2288 }
2289 }
2290 }
2291 assert!(
2292 blocked,
2293 "third-party Script-initiated CSS must remain blocked"
2294 );
2295 }
2296
2297 #[cfg(feature = "adblock")]
2298 #[test]
2299 fn test_e2e_first_party_image_passes_when_ignore_visuals_on() {
2300 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2301
2302 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2303 nm.set_page_url("https://shop.example/".to_string());
2304 nm.ignore_visuals = true;
2305
2306 assert!(
2307 !run_full_interception(
2308 &mut nm,
2309 "https://shop.example/img/hero.png",
2310 ResourceType::Image,
2311 true,
2312 ),
2313 "first-party image must pass when allow_first_party_visuals default-true"
2314 );
2315 }
2316
2317 #[cfg(feature = "adblock")]
2318 #[test]
2319 fn test_e2e_third_party_image_blocked_when_ignore_visuals_on() {
2320 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2321
2322 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2323 nm.set_page_url("https://shop.example/".to_string());
2324 nm.ignore_visuals = true;
2325
2326 assert!(
2327 run_full_interception(
2328 &mut nm,
2329 "https://cdn.thirdparty.io/banner.png",
2330 ResourceType::Image,
2331 false,
2332 ),
2333 "third-party image must remain blocked when ignore_visuals=true"
2334 );
2335 }
2336
2337 #[cfg(feature = "adblock")]
2338 #[test]
2339 fn test_e2e_first_party_document_not_blocked() {
2340 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2341
2342 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2343 nm.set_page_url("https://www.nytimes.com/".to_string());
2344
2345 assert!(
2346 !run_full_interception(
2347 &mut nm,
2348 "https://www.nytimes.com/2024/article.html",
2349 ResourceType::Document,
2350 true,
2351 ),
2352 "first-party document navigation should never be blocked"
2353 );
2354 }
2355
2356 #[cfg(feature = "adblock")]
2357 #[test]
2358 fn test_e2e_custom_engine_blocks_through_pipeline() {
2359 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2360
2361 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2362 nm.set_page_url("https://mysite.com/".to_string());
2363
2364 let mut filter_set = adblock::lists::FilterSet::new(false);
2365 let mut opts = adblock::lists::ParseOptions::default();
2366 opts.rule_types = adblock::lists::RuleTypes::All;
2367 filter_set.add_filters(["||evil-cdn.example.net^$script"], opts);
2368 let engine = adblock::Engine::from_filter_set(filter_set, true);
2369 nm.set_adblock_engine(std::sync::Arc::new(engine));
2370
2371 assert!(
2372 run_full_interception(
2373 &mut nm,
2374 "https://evil-cdn.example.net/tracker.js",
2375 ResourceType::Script,
2376 false,
2377 ),
2378 "custom engine rule should block through full pipeline"
2379 );
2380
2381 assert!(
2383 !run_full_interception(
2384 &mut nm,
2385 "https://mysite.com/app.js",
2386 ResourceType::Script,
2387 true,
2388 ),
2389 "first-party script should still be allowed with custom engine"
2390 );
2391 }
2392
2393 #[cfg(feature = "adblock")]
2394 #[test]
2395 fn test_e2e_ad_image_blocked() {
2396 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2397
2398 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2399 nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
2400
2401 assert!(
2403 run_full_interception(
2404 &mut nm,
2405 "https://googleads.g.doubleclick.net/pagead/viewthroughconversion/123/?random=456",
2406 ResourceType::Image,
2407 false,
2408 ),
2409 "doubleclick ad image/tracking pixel should be blocked"
2410 );
2411
2412 assert!(
2414 !run_full_interception(
2415 &mut nm,
2416 "https://www.mylegitsite-test.com/images/logo.png",
2417 ResourceType::Image,
2418 true,
2419 ),
2420 "legitimate first-party image should not be blocked"
2421 );
2422 }
2423
2424 #[cfg(feature = "adblock")]
2425 #[test]
2426 fn test_e2e_hostname_with_userinfo() {
2427 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2428
2429 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2430 nm.set_page_url("https://example.org/".to_string());
2431
2432 assert!(
2434 run_full_interception(
2435 &mut nm,
2436 "https://user:pass@www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2437 ResourceType::Script,
2438 false,
2439 ),
2440 "tracker URL with userinfo should still be blocked"
2441 );
2442 }
2443
2444 #[test]
2445 fn test_blacklist_non_strict_allows_whitelist_override() {
2446 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2447 nm.set_page_url("https://example.com/".to_string());
2448
2449 nm.set_blacklist_patterns(["beacon.min.js"]);
2450 nm.set_whitelist_patterns(["beacon.min.js"]);
2451
2452 nm.set_blacklist_strict(false);
2453
2454 let u = "https://static.cloudflareinsights.com/beacon.min.js";
2455 assert!(nm.is_blacklisted(u));
2456 assert!(nm.is_whitelisted(u));
2457 assert!(!nm.blacklist_strict);
2458 }
2459
2460 fn make_request_will_be_sent(
2469 request_id: &str,
2470 url: &str,
2471 resource_type: &str,
2472 redirect_from_url: Option<&str>,
2473 ) -> chromiumoxide_cdp::cdp::browser_protocol::network::EventRequestWillBeSent {
2474 let mut v = serde_json::json!({
2475 "requestId": request_id,
2476 "loaderId": "test-loader",
2477 "documentURL": url,
2478 "request": {
2479 "url": url,
2480 "method": "GET",
2481 "headers": {},
2482 "initialPriority": "Medium",
2483 "referrerPolicy": "no-referrer"
2484 },
2485 "timestamp": 0.0,
2486 "wallTime": 0.0,
2487 "initiator": { "type": "other" },
2488 "redirectHasExtraInfo": false,
2489 "type": resource_type,
2490 "frameId": "frame1"
2491 });
2492 if let Some(from) = redirect_from_url {
2493 v["redirectResponse"] = serde_json::json!({
2494 "url": from,
2495 "status": 302,
2496 "statusText": "Found",
2497 "headers": { "Location": url },
2498 "mimeType": "text/html",
2499 "charset": "",
2500 "connectionReused": false,
2501 "connectionId": 0.0,
2502 "encodedDataLength": 0.0,
2503 "securityState": "unknown"
2504 });
2505 }
2506 serde_json::from_value(v).expect("EventRequestWillBeSent should deserialize")
2507 }
2508
2509 fn drain_too_many_redirects(nm: &mut NetworkManager) -> Option<super::HttpRequest> {
2510 while let Some(ev) = nm.poll() {
2511 if let super::NetworkEvent::RequestFailed(req) = ev {
2512 if req.failure_text.as_deref() == Some("net::ERR_TOO_MANY_REDIRECTS") {
2513 return Some(req);
2514 }
2515 }
2516 }
2517 None
2518 }
2519
2520 fn drain_stop_loading(nm: &mut NetworkManager) -> bool {
2521 while let Some(ev) = nm.poll() {
2522 if let super::NetworkEvent::SendCdpRequest((method, _)) = ev {
2523 let m: &str = method.as_ref();
2524 if m == "Page.stopLoading" {
2525 return true;
2526 }
2527 }
2528 }
2529 false
2530 }
2531
2532 #[test]
2533 fn test_max_redirects_none_allows_unlimited_chain() {
2534 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2535 nm.on_request_will_be_sent(&make_request_will_be_sent(
2539 "r1",
2540 "https://example.com/0",
2541 "Document",
2542 None,
2543 ));
2544 for i in 1..10 {
2545 nm.on_request_will_be_sent(&make_request_will_be_sent(
2546 "r1",
2547 &format!("https://example.com/{i}"),
2548 "Document",
2549 Some(&format!("https://example.com/{}", i - 1)),
2550 ));
2551 }
2552
2553 assert!(
2554 drain_too_many_redirects(&mut nm).is_none(),
2555 "no cap set: chain of 10 hops must not emit ERR_TOO_MANY_REDIRECTS"
2556 );
2557 }
2558
2559 #[test]
2560 fn test_max_redirects_caps_document_chain() {
2561 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2562 nm.max_redirects = Some(3);
2563
2564 nm.on_request_will_be_sent(&make_request_will_be_sent(
2567 "r1",
2568 "https://example.com/0",
2569 "Document",
2570 None,
2571 ));
2572 for i in 1..=4 {
2573 nm.on_request_will_be_sent(&make_request_will_be_sent(
2574 "r1",
2575 &format!("https://example.com/{i}"),
2576 "Document",
2577 Some(&format!("https://example.com/{}", i - 1)),
2578 ));
2579 }
2580
2581 let failed = drain_too_many_redirects(&mut nm)
2582 .expect("cap of 3 on a 4-hop chain must emit ERR_TOO_MANY_REDIRECTS");
2583 assert_eq!(
2584 failed.redirect_chain.len(),
2585 4,
2586 "failed request should preserve the full accumulated chain"
2587 );
2588 assert_eq!(
2589 failed.url.as_deref(),
2590 Some("https://example.com/4"),
2591 "failed request url should be the hop that tripped the cap"
2592 );
2593
2594 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2597 nm.max_redirects = Some(3);
2598 nm.on_request_will_be_sent(&make_request_will_be_sent(
2599 "r2",
2600 "https://example.com/0",
2601 "Document",
2602 None,
2603 ));
2604 for i in 1..=4 {
2605 nm.on_request_will_be_sent(&make_request_will_be_sent(
2606 "r2",
2607 &format!("https://example.com/{i}"),
2608 "Document",
2609 Some(&format!("https://example.com/{}", i - 1)),
2610 ));
2611 }
2612 assert!(
2613 drain_stop_loading(&mut nm),
2614 "cap hit must dispatch Page.stopLoading to abort navigation"
2615 );
2616 }
2617
2618 #[test]
2619 fn test_max_redirects_ignores_subresources() {
2620 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2621 nm.max_redirects = Some(2);
2622
2623 nm.on_request_will_be_sent(&make_request_will_be_sent(
2625 "s1",
2626 "https://cdn.example.com/0.js",
2627 "Script",
2628 None,
2629 ));
2630 for i in 1..=5 {
2631 nm.on_request_will_be_sent(&make_request_will_be_sent(
2632 "s1",
2633 &format!("https://cdn.example.com/{i}.js"),
2634 "Script",
2635 Some(&format!("https://cdn.example.com/{}.js", i - 1)),
2636 ));
2637 }
2638
2639 assert!(
2640 drain_too_many_redirects(&mut nm).is_none(),
2641 "sub-resource redirect chains must never be capped"
2642 );
2643 }
2644}