1#[cfg(any(feature = "adblock", feature = "firewall"))]
2use super::blockers::block_websites::block_ads;
3use super::blockers::{
4 block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
5 xhr::IGNORE_XHR_ASSETS,
6};
7use crate::auth::Credentials;
8#[cfg(feature = "_cache")]
9use crate::cache::BasicCachePolicy;
10use crate::cmd::CommandChain;
11use crate::handler::http::HttpRequest;
12use crate::handler::network_utils::{base_domain_from_host, host_and_rest};
13use aho_corasick::AhoCorasick;
14use case_insensitive_string::CaseInsensitiveString;
15use chromiumoxide_cdp::cdp::browser_protocol::fetch::{RequestPattern, RequestStage};
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17 EmulateNetworkConditionsByRuleParams, EventLoadingFailed, EventLoadingFinished,
18 EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19 InitiatorType, InterceptionId, NetworkConditions, RequestId, ResourceType, Response,
20 SetCacheDisabledParams, SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23 fetch::{
24 self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25 ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26 },
27 network::SetBypassServiceWorkerParams,
28};
29use chromiumoxide_cdp::cdp::browser_protocol::{
30 network::EnableParams, security::SetIgnoreCertificateErrorsParams,
31};
32use chromiumoxide_types::{Command, Method, MethodId};
33use hashbrown::{HashMap, HashSet};
34use lazy_static::lazy_static;
35use reqwest::header::PROXY_AUTHORIZATION;
36use spider_network_blocker::intercept_manager::NetworkInterceptManager;
37pub use spider_network_blocker::scripts::{
38 URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
39};
40use std::borrow::Cow;
41use std::collections::VecDeque;
42use std::time::{Duration, Instant};
43
44lazy_static! {
45 static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
47 "jquery", "angular",
49 "react", "vue", "bootstrap",
52 "d3",
53 "lodash",
54 "ajax",
55 "application",
56 "app", "main",
58 "index",
59 "bundle",
60 "vendor",
61 "runtime",
62 "polyfill",
63 "scripts",
64 "es2015.",
65 "es2020.",
66 "webpack",
67 "captcha",
68 "client",
69 "/cdn-cgi/challenge-platform/",
70 "/wp-content/js/", "https://m.stripe.network/",
73 "https://challenges.cloudflare.com/",
74 "https://www.google.com/recaptcha/",
75 "https://google.com/recaptcha/api.js",
76 "https://www.gstatic.com/recaptcha/",
77 "https://captcha.px-cloud.net/",
78 "https://geo.captcha-delivery.com/",
79 "https://api.leminnow.com/captcha/",
80 "https://cdn.auth0.com/js/lock/",
81 "https://captcha.gtimg.com",
82 "https://client-api.arkoselabs.com/",
83 "https://www.capy.me/puzzle/",
84 "https://newassets.hcaptcha.com/",
85 "https://cdn.auth0.com/client",
86 "https://js.stripe.com/",
87 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-"
90 ];
91
92 pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
97
98 static ref JS_FRAMEWORK_ALLOW_3RD_PARTY: Vec<&'static str> = vec![
100 "https://m.stripe.network/",
102 "https://challenges.cloudflare.com/",
103 "https://js.stripe.com/",
104 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-",
107 "https://ct.captcha-delivery.com/",
108 "https://geo.captcha-delivery.com/",
109 "https://img1.wsimg.com/parking-lander/static/js/main.d9ebbb8c.js", "https://cdn.auth0.com/client",
111 "https://captcha.px-cloud.net/",
112 "https://www.capy.me/puzzle/",
113 "https://www.gstatic.com/recaptcha/",
114 "https://google.com/recaptcha/",
115 "https://www.google.com/recaptcha/",
116 "https://www.recaptcha.net/recaptcha/",
117 "https://js.hcaptcha.com/1/api.js",
118 "https://hcaptcha.com/1/api.js",
119 "https://js.datadome.co/tags.js",
120 "https://api-js.datadome.co/",
121 "https://client.perimeterx.net/",
122 "https://captcha.px-cdn.net/",
123 "https://newassets.hcaptcha.com/",
124 "https://captcha.px-cloud.net/",
125 "https://s.perimeterx.net/",
126 "https://api.leminnow.com/captcha/",
127 "https://client-api.arkoselabs.com/",
128 "https://static.geetest.com/v4/gt4.js",
129 "https://static.geetest.com/",
130 "https://cdn.jsdelivr.net/npm/@friendlycaptcha/",
131 "https://cdn.perfdrive.com/aperture/",
132 "https://assets.queue-it.net/",
133 "discourse-cdn.com/",
134 "hcaptcha.com",
135 "/cdn-cgi/challenge-platform/",
136 "/_Incapsula_Resource"
137 ];
138
139 pub static ref ALLOWED_MATCHER_3RD_PARTY: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW_3RD_PARTY.iter()).expect("matcher to build");
141
142 pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
144 phf::phf_set! {
145 "_astro/", "_app/immutable"
147 }
148 };
149
150 pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
152 "application/pdf",
153 "application/zip",
154 "application/x-rar-compressed",
155 "application/x-tar",
156 "image/png",
157 "image/jpeg",
158 "image/gif",
159 "image/bmp",
160 "image/webp",
161 "image/svg+xml",
162 "video/mp4",
163 "video/x-msvideo",
164 "video/x-matroska",
165 "video/webm",
166 "audio/mpeg",
167 "audio/ogg",
168 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
169 "application/vnd.ms-excel",
170 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
171 "application/vnd.ms-powerpoint",
172 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
173 "application/x-7z-compressed",
174 "application/x-rpm",
175 "application/x-shockwave-flash",
176 "application/rtf",
177 };
178
179 pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
181 "Image",
182 "Media",
183 "Font"
184 };
185
186 pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
188 "CspViolationReport",
189 "Ping",
190 };
191
192 pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
194
195 pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
197 let enable = EnableParams::default();
198
199 if let Ok(c) = serde_json::to_value(&enable) {
200 vec![(enable.identifier(), c)]
201 } else {
202 vec![]
203 }
204 };
205
206 pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
208 let enable = EnableParams::default();
209 let mut v = vec![];
210 if let Ok(c) = serde_json::to_value(&enable) {
211 v.push((enable.identifier(), c));
212 }
213 let ignore = SetIgnoreCertificateErrorsParams::new(true);
214 if let Ok(ignored) = serde_json::to_value(&ignore) {
215 v.push((ignore.identifier(), ignored));
216 }
217
218 v
219 };
220
221 pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
223 fetch::EnableParams::builder()
224 .handle_auth_requests(true)
225 .pattern(RequestPattern::builder().url_pattern("*").request_stage(RequestStage::Request).build())
226 .build()
227 };
228}
229
230pub(crate) fn is_redirect_status(status: i64) -> bool {
232 matches!(status, 301 | 302 | 303 | 307 | 308)
233}
234
235const STALE_BUFFER_SECS: u64 = 30;
240
241const STALE_REQUEST_SECS: u64 = 120;
247
248#[cfg(feature = "adblock")]
250pub struct AdblockEngine(std::sync::Arc<adblock::Engine>);
251
252#[cfg(feature = "adblock")]
253impl std::fmt::Debug for AdblockEngine {
254 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
255 f.debug_struct("AdblockEngine").finish()
256 }
257}
258
259#[cfg(feature = "adblock")]
260impl std::ops::Deref for AdblockEngine {
261 type Target = adblock::Engine;
262 fn deref(&self) -> &Self::Target {
263 &self.0
264 }
265}
266
267#[derive(Debug)]
268pub struct NetworkManager {
270 queued_events: VecDeque<NetworkEvent>,
276 ignore_httpserrors: bool,
281 requests: HashMap<RequestId, HttpRequest>,
286 requests_will_be_sent: HashMap<RequestId, (EventRequestWillBeSent, Instant)>,
293 extra_headers: std::collections::HashMap<String, String>,
298 request_id_to_interception_id: HashMap<RequestId, (InterceptionId, Instant)>,
305 user_cache_disabled: bool,
310 attempted_authentications: HashSet<RequestId>,
316 credentials: Option<Credentials>,
321 pub(crate) user_request_interception_enabled: bool,
330 block_all: bool,
337 pub(crate) protocol_request_interception_enabled: bool,
343 offline: bool,
345 pub request_timeout: Duration,
347 pub ignore_visuals: bool,
350 pub block_stylesheets: bool,
352 pub block_javascript: bool,
357 pub allow_first_party_stylesheets: bool,
363 pub allow_first_party_javascript: bool,
368 pub allow_first_party_visuals: bool,
373 pub block_analytics: bool,
375 pub block_prefetch: bool,
377 pub only_html: bool,
379 pub xml_document: bool,
381 pub intercept_manager: NetworkInterceptManager,
383 pub document_reload_tracker: u8,
385 pub document_target_url: String,
387 pub document_target_domain: String,
389 pub max_bytes_allowed: Option<u64>,
391 pub max_redirects: Option<usize>,
399 #[cfg(feature = "_cache")]
400 pub cache_site_key: Option<String>,
402 #[cfg(feature = "_cache")]
404 pub cache_policy: Option<BasicCachePolicy>,
405 whitelist_patterns: Vec<String>,
407 whitelist_matcher: Option<AhoCorasick>,
409 blacklist_patterns: Vec<String>,
411 blacklist_matcher: Option<AhoCorasick>,
413 blacklist_strict: bool,
415 #[cfg(feature = "adblock")]
418 adblock_engine: Option<AdblockEngine>,
419}
420
421impl NetworkManager {
422 pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
424 Self {
425 queued_events: Default::default(),
426 ignore_httpserrors,
427 requests: Default::default(),
428 requests_will_be_sent: Default::default(),
429 extra_headers: Default::default(),
430 request_id_to_interception_id: Default::default(),
431 user_cache_disabled: false,
432 attempted_authentications: Default::default(),
433 credentials: None,
434 block_all: false,
435 user_request_interception_enabled: false,
436 protocol_request_interception_enabled: false,
437 offline: false,
438 request_timeout,
439 ignore_visuals: false,
440 block_javascript: false,
441 block_stylesheets: false,
442 allow_first_party_stylesheets: true,
443 allow_first_party_javascript: true,
444 allow_first_party_visuals: true,
445 block_prefetch: true,
446 block_analytics: true,
447 only_html: false,
448 xml_document: false,
449 intercept_manager: NetworkInterceptManager::Unknown,
450 document_reload_tracker: 0,
451 document_target_url: String::new(),
452 document_target_domain: String::new(),
453 whitelist_patterns: Vec::new(),
454 whitelist_matcher: None,
455 blacklist_patterns: Vec::new(),
456 blacklist_matcher: None,
457 blacklist_strict: true,
458 max_bytes_allowed: None,
459 max_redirects: None,
460 #[cfg(feature = "_cache")]
461 cache_site_key: None,
462 #[cfg(feature = "_cache")]
463 cache_policy: None,
464 #[cfg(feature = "adblock")]
465 adblock_engine: None,
466 }
467 }
468
469 #[cfg(feature = "adblock")]
471 pub fn set_adblock_engine(&mut self, engine: std::sync::Arc<adblock::Engine>) {
472 self.adblock_engine = Some(AdblockEngine(engine));
473 }
474
475 pub fn set_whitelist_patterns<I, S>(&mut self, patterns: I)
477 where
478 I: IntoIterator<Item = S>,
479 S: Into<String>,
480 {
481 self.whitelist_patterns = patterns.into_iter().map(Into::into).collect();
482 self.rebuild_whitelist_matcher();
483 }
484
485 pub fn set_blacklist_patterns<I, S>(&mut self, patterns: I)
487 where
488 I: IntoIterator<Item = S>,
489 S: Into<String>,
490 {
491 self.blacklist_patterns = patterns.into_iter().map(Into::into).collect();
492 self.rebuild_blacklist_matcher();
493 }
494
495 pub fn add_blacklist_pattern<S: Into<String>>(&mut self, pattern: S) {
497 self.blacklist_patterns.push(pattern.into());
498 self.rebuild_blacklist_matcher();
499 }
500
501 pub fn add_blacklist_patterns<I, S>(&mut self, patterns: I)
503 where
504 I: IntoIterator<Item = S>,
505 S: Into<String>,
506 {
507 self.blacklist_patterns
508 .extend(patterns.into_iter().map(Into::into));
509 self.rebuild_blacklist_matcher();
510 }
511
512 pub fn clear_blacklist(&mut self) {
514 self.blacklist_patterns.clear();
515 self.blacklist_matcher = None;
516 }
517
518 pub fn set_blacklist_strict(&mut self, strict: bool) {
520 self.blacklist_strict = strict;
521 }
522
523 #[inline]
524 fn rebuild_blacklist_matcher(&mut self) {
525 if self.blacklist_patterns.is_empty() {
526 self.blacklist_matcher = None;
527 return;
528 }
529
530 self.blacklist_matcher =
531 AhoCorasick::new(self.blacklist_patterns.iter().map(|s| s.as_str())).ok();
532 }
533
534 #[inline]
535 fn is_blacklisted(&self, url: &str) -> bool {
536 self.blacklist_matcher
537 .as_ref()
538 .map(|m| m.is_match(url))
539 .unwrap_or(false)
540 }
541
542 pub fn add_whitelist_pattern<S: Into<String>>(&mut self, pattern: S) {
544 self.whitelist_patterns.push(pattern.into());
545 self.rebuild_whitelist_matcher();
546 }
547
548 pub fn add_whitelist_patterns<I, S>(&mut self, patterns: I)
550 where
551 I: IntoIterator<Item = S>,
552 S: Into<String>,
553 {
554 self.whitelist_patterns
555 .extend(patterns.into_iter().map(Into::into));
556 self.rebuild_whitelist_matcher();
557 }
558
559 #[inline]
560 fn rebuild_whitelist_matcher(&mut self) {
561 if self.whitelist_patterns.is_empty() {
562 self.whitelist_matcher = None;
563 return;
564 }
565
566 self.whitelist_matcher =
568 AhoCorasick::new(self.whitelist_patterns.iter().map(|s| s.as_str())).ok();
569 }
570
571 #[inline]
572 fn is_whitelisted(&self, url: &str) -> bool {
573 self.whitelist_matcher
574 .as_ref()
575 .map(|m| m.is_match(url))
576 .unwrap_or(false)
577 }
578
579 pub fn init_commands(&self) -> CommandChain {
581 let cmds = if self.ignore_httpserrors {
582 INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
583 } else {
584 INIT_CHAIN.clone()
585 };
586 CommandChain::new(cmds, self.request_timeout)
587 }
588
589 pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
591 let method = cmd.identifier();
592 if let Ok(params) = serde_json::to_value(cmd) {
593 self.queued_events
594 .push_back(NetworkEvent::SendCdpRequest((method, params)));
595 }
596 }
597
598 pub fn poll(&mut self) -> Option<NetworkEvent> {
600 self.queued_events.pop_front()
601 }
602
603 pub fn evict_stale_entries(&mut self, now: Instant) {
608 let cutoff = now - Duration::from_secs(STALE_BUFFER_SECS);
609
610 self.requests_will_be_sent.retain(|_, (_, ts)| *ts > cutoff);
611 self.request_id_to_interception_id
612 .retain(|_, (_, ts)| *ts > cutoff);
613
614 let request_cutoff = now - Duration::from_secs(STALE_REQUEST_SECS);
619 self.requests
620 .retain(|_, req| req.created_at > request_cutoff);
621
622 if !self.attempted_authentications.is_empty() {
627 let live: HashSet<&str> = self
628 .requests
629 .values()
630 .filter_map(|r| r.interception_id.as_ref().map(|id| id.as_ref()))
631 .collect();
632 self.attempted_authentications
633 .retain(|id| live.contains(id.as_ref()));
634 }
635 }
636
637 pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
639 &self.extra_headers
640 }
641
642 pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
644 self.extra_headers = headers;
645 self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
646 self.extra_headers.remove("Proxy-Authorization");
647 if !self.extra_headers.is_empty() {
648 if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
649 self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
650 }
651 }
652 }
653
654 pub fn set_service_worker_enabled(&mut self, bypass: bool) {
655 self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
656 }
657
658 pub fn set_block_all(&mut self, block_all: bool) {
659 self.block_all = block_all;
660 }
661
662 pub fn set_request_interception(&mut self, enabled: bool) {
663 self.user_request_interception_enabled = enabled;
664 self.update_protocol_request_interception();
665 }
666
667 pub fn set_cache_enabled(&mut self, enabled: bool) {
668 let run = self.user_cache_disabled == enabled;
669 self.user_cache_disabled = !enabled;
670 if run {
671 self.update_protocol_cache_disabled();
672 }
673 }
674
675 pub fn enable_request_intercept(&mut self) {
677 self.protocol_request_interception_enabled = true;
678 }
679
680 pub fn disable_request_intercept(&mut self) {
682 self.protocol_request_interception_enabled = false;
683 }
684
685 #[cfg(feature = "_cache")]
687 pub fn set_cache_site_key(&mut self, cache_site_key: Option<String>) {
688 self.cache_site_key = cache_site_key;
689 }
690
691 #[cfg(feature = "_cache")]
693 pub fn set_cache_policy(&mut self, cache_policy: Option<BasicCachePolicy>) {
694 self.cache_policy = cache_policy;
695 }
696
697 pub fn update_protocol_cache_disabled(&mut self) {
698 self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
699 }
700
701 pub fn authenticate(&mut self, credentials: Credentials) {
702 self.credentials = Some(credentials);
703 self.update_protocol_request_interception();
704 self.protocol_request_interception_enabled = true;
705 }
706
707 fn update_protocol_request_interception(&mut self) {
708 let enabled = self.user_request_interception_enabled || self.credentials.is_some();
709
710 if enabled == self.protocol_request_interception_enabled {
711 return;
712 }
713
714 if enabled {
715 self.push_cdp_request(ENABLE_FETCH.clone())
716 } else {
717 self.push_cdp_request(DisableParams::default())
718 }
719 }
720
721 #[inline]
724 fn should_block_script_blocklist_only(&self, url: &str) -> bool {
725 let block_analytics = self.block_analytics;
727
728 if block_analytics && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url)
730 {
731 return true;
732 }
733
734 if crate::handler::blockers::block_websites::block_website(url) {
736 return true;
737 }
738
739 if let Some(path_with_slash) = Self::url_path_with_leading_slash(url) {
746 let p_slash = Self::strip_query_fragment(path_with_slash);
748 let p_noslash = p_slash.strip_prefix('/').unwrap_or(p_slash);
749
750 let base = match p_slash.rsplit('/').next() {
752 Some(b) => b,
753 None => p_slash,
754 };
755
756 if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_slash) {
759 return true;
760 }
761 if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(p_noslash) {
762 return true;
763 }
764 if block_analytics && URL_IGNORE_TRIE_PATHS.contains_prefix(base) {
765 return true;
766 }
767
768 if URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(p_noslash) {
771 return true;
772 }
773
774 if self.ignore_visuals && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(p_noslash) {
776 return true;
777 }
778 }
779
780 false
781 }
782
783 #[inline]
788 fn url_path_with_leading_slash(url: &str) -> Option<&str> {
789 let bytes = url.as_bytes();
791 let idx = memchr::memmem::find(bytes, b"//")?;
792 let after_slashes = idx + 2;
793
794 let slash_rel = memchr::memchr(b'/', &bytes[after_slashes..])?;
796 let slash_idx = after_slashes + slash_rel;
797
798 if slash_idx < url.len() {
799 Some(&url[slash_idx..])
800 } else {
801 None
802 }
803 }
804
805 #[inline]
810 fn strip_query_fragment(s: &str) -> &str {
811 match memchr::memchr2(b'?', b'#', s.as_bytes()) {
812 Some(i) => &s[..i],
813 None => s,
814 }
815 }
816
817 #[inline]
819 fn skip_xhr(
820 &self,
821 skip_networking: bool,
822 event: &EventRequestPaused,
823 network_event: bool,
824 ) -> bool {
825 if !skip_networking && network_event {
827 let request_url = event.request.url.as_str();
828
829 let skip_analytics =
831 self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
832
833 if skip_analytics {
834 true
835 } else if self.block_stylesheets || self.ignore_visuals {
836 let block_css = self.block_stylesheets;
837 let block_media = self.ignore_visuals;
838
839 let mut block_request = false;
840
841 if let Some(position) = memchr::memrchr(b'.', request_url.as_bytes()) {
842 let hlen = request_url.len();
843 let has_asset = hlen - position;
844
845 if has_asset >= 3 {
846 let next_position = position + 1;
847
848 if block_media
849 && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
850 &request_url[next_position..].into(),
851 )
852 {
853 block_request = true;
854 } else if block_css {
855 block_request = CaseInsensitiveString::from(
856 &request_url.as_bytes()[next_position..],
857 )
858 .contains(&**CSS_EXTENSION)
859 }
860 }
861 }
862
863 if !block_request {
864 block_request = ignore_script_xhr_media(request_url);
865 }
866
867 block_request
868 } else {
869 skip_networking
870 }
871 } else {
872 skip_networking
873 }
874 }
875
876 #[cfg(feature = "adblock")]
877 #[inline]
878 fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
880 if skip_networking {
881 true
882 } else {
883 block_ads(&event.request.url) || self.detect_ad(event)
884 }
885 }
886
887 #[cfg(not(feature = "adblock"))]
889 #[inline]
890 fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
891 use crate::handler::blockers::block_websites::block_ads;
892 if skip_networking {
893 true
894 } else {
895 block_ads(&event.request.url)
896 }
897 }
898
899 #[inline]
900 fn fail_request_blocked(
902 &mut self,
903 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
904 ) {
905 let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FailRequestParams::new(
906 request_id.clone(),
907 chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason::BlockedByClient,
908 );
909 self.push_cdp_request(params);
910 }
911
912 #[inline]
913 fn fulfill_request_empty_200(
915 &mut self,
916 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
917 ) {
918 let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FulfillRequestParams::new(
919 request_id.clone(),
920 200,
921 );
922 self.push_cdp_request(params);
923 }
924
925 #[cfg(feature = "_cache")]
926 #[inline]
927 fn fulfill_request_from_cache(
931 &mut self,
932 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
933 body: &[u8],
934 headers: &std::collections::HashMap<String, String>,
935 status: i64,
936 ) {
937 use crate::cdp::browser_protocol::fetch::HeaderEntry;
938 use crate::handler::network::fetch::FulfillRequestParams;
939 use base64::Engine;
940
941 let mut resp_headers = Vec::<HeaderEntry>::with_capacity(headers.len());
942
943 for (k, v) in headers.iter() {
944 resp_headers.push(HeaderEntry {
945 name: k.clone(),
946 value: v.clone(),
947 });
948 }
949
950 let mut params = FulfillRequestParams::new(request_id.clone(), status);
951
952 params.body = Some(
954 base64::engine::general_purpose::STANDARD
955 .encode(body)
956 .into(),
957 );
958
959 params.response_headers = Some(resp_headers);
960
961 self.push_cdp_request(params);
962 }
963
964 #[inline]
965 fn continue_request_with_url(
967 &mut self,
968 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
969 url: Option<&str>,
970 intercept_response: bool,
971 ) {
972 let mut params = ContinueRequestParams::new(request_id.clone());
973 if let Some(url) = url {
974 params.url = Some(url.to_string());
975 params.intercept_response = Some(intercept_response);
976 }
977 self.push_cdp_request(params);
978 }
979
980 #[inline]
982 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
983 if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
984 return;
985 }
986
987 if self.block_all {
988 tracing::debug!(
989 "Blocked (block_all): {:?} - {}",
990 event.resource_type,
991 event.request.url
992 );
993 return self.fail_request_blocked(&event.request_id);
994 }
995
996 let initiator_type: Option<InitiatorType> = event
1004 .network_id
1005 .as_ref()
1006 .and_then(|nid| self.requests_will_be_sent.get(nid.as_ref()))
1007 .map(|(rwbs, _)| rwbs.initiator.r#type.clone());
1008
1009 if let Some(network_id) = event.network_id.as_ref() {
1010 if let Some((request_will_be_sent, _)) =
1011 self.requests_will_be_sent.remove(network_id.as_ref())
1012 {
1013 self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
1014 } else {
1015 self.request_id_to_interception_id.insert(
1016 network_id.clone(),
1017 (event.request_id.clone().into(), Instant::now()),
1018 );
1019 }
1020 }
1021
1022 let javascript_resource = event.resource_type == ResourceType::Script;
1024 let document_resource = event.resource_type == ResourceType::Document;
1025 let network_resource =
1026 !document_resource && crate::utils::is_data_resource(&event.resource_type);
1027
1028 let mut skip_networking =
1030 self.block_all || IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
1031
1032 if event.resource_type == ResourceType::Prefetch && !self.block_prefetch {
1033 skip_networking = true;
1034 }
1035
1036 if !skip_networking {
1038 skip_networking = self.document_reload_tracker >= 3;
1039 }
1040
1041 let (current_url_cow, had_replacer) =
1043 self.handle_document_replacement_and_tracking(event, document_resource);
1044
1045 let current_url: &str = current_url_cow.as_ref();
1046
1047 let blacklisted = self.is_blacklisted(current_url);
1048
1049 if !self.blacklist_strict && blacklisted {
1050 skip_networking = true;
1051 }
1052
1053 if !skip_networking {
1054 if self.xml_document && current_url.ends_with(".xsl") {
1056 skip_networking = false;
1057 } else {
1058 skip_networking = self.should_skip_for_visuals_and_basic(&event.resource_type);
1059 }
1060 }
1061
1062 let is_main_document_request = document_resource
1085 && (event.redirected_request_id.is_some()
1086 || had_replacer
1087 || self.document_target_url.is_empty()
1088 || event.request.url == self.document_target_url);
1089 if !is_main_document_request {
1090 skip_networking = self.detect_ad_if_enabled(event, skip_networking);
1091 }
1092
1093 if !skip_networking
1095 && self.block_javascript
1096 && (self.only_html || self.ignore_visuals)
1097 && (javascript_resource
1098 || document_resource
1099 || event.resource_type == ResourceType::Stylesheet
1100 || event.resource_type == ResourceType::Image)
1101 {
1102 skip_networking = ignore_script_embedded(current_url);
1103 }
1104
1105 if !skip_networking && javascript_resource {
1108 skip_networking = self.should_block_script_blocklist_only(current_url);
1109 }
1110
1111 skip_networking = self.skip_xhr(skip_networking, event, network_resource);
1113
1114 if !skip_networking && (javascript_resource || network_resource || document_resource) {
1116 skip_networking = self.intercept_manager.intercept_detection(
1117 current_url,
1118 self.ignore_visuals,
1119 network_resource,
1120 );
1121 }
1122
1123 if !skip_networking && (javascript_resource || network_resource) {
1125 skip_networking = crate::handler::blockers::block_websites::block_website(current_url);
1126 }
1127
1128 if skip_networking && javascript_resource && ALLOWED_MATCHER_3RD_PARTY.is_match(current_url)
1131 {
1132 skip_networking = false;
1133 }
1134
1135 if skip_networking && self.is_whitelisted(current_url) {
1137 skip_networking = false;
1138 }
1139
1140 if skip_networking && !self.document_target_domain.is_empty() {
1156 let allow = match event.resource_type {
1157 ResourceType::Stylesheet => self.allow_first_party_stylesheets,
1158 ResourceType::Script => self.allow_first_party_javascript,
1159 _ if IGNORE_VISUAL_RESOURCE_MAP.contains(event.resource_type.as_ref()) => {
1160 self.allow_first_party_visuals
1161 }
1162 _ => false,
1163 };
1164 if allow && self.is_first_party_url(current_url) {
1165 skip_networking = false;
1166 }
1167 }
1168
1169 if skip_networking
1183 && self.allow_first_party_stylesheets
1184 && self.block_stylesheets
1185 && event.resource_type == ResourceType::Stylesheet
1186 && !matches!(initiator_type, Some(InitiatorType::Script))
1187 {
1188 skip_networking = false;
1189 }
1190
1191 if self.blacklist_strict && blacklisted {
1192 skip_networking = true;
1193 }
1194
1195 if skip_networking {
1196 tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
1197 self.fulfill_request_empty_200(&event.request_id);
1198 } else {
1199 #[cfg(feature = "_cache")]
1200 {
1201 if let (Some(policy), Some(cache_site_key)) =
1202 (self.cache_policy.as_ref(), self.cache_site_key.as_deref())
1203 {
1204 let current_url = format!("{}:{}", event.request.method, ¤t_url);
1205
1206 if let Some((res, cache_policy)) =
1207 crate::cache::remote::get_session_cache_item(cache_site_key, ¤t_url)
1208 {
1209 if policy.allows_cached(&cache_policy) {
1210 tracing::debug!(
1211 "Remote Cached: {:?} - {}",
1212 &event.resource_type,
1213 ¤t_url
1214 );
1215 let flat_headers = crate::http::headers_from_multi(&res.headers);
1216 return self.fulfill_request_from_cache(
1217 &event.request_id,
1218 &res.body,
1219 &flat_headers,
1220 res.status as i64,
1221 );
1222 }
1223 }
1224 }
1225 }
1226
1227 tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
1229 self.continue_request_with_url(
1230 &event.request_id,
1231 if had_replacer {
1232 Some(current_url)
1233 } else {
1234 None
1235 },
1236 !had_replacer,
1237 );
1238 }
1239 }
1240
1241 #[inline]
1247 fn should_skip_for_visuals_and_basic(&self, resource_type: &ResourceType) -> bool {
1248 (self.ignore_visuals && IGNORE_VISUAL_RESOURCE_MAP.contains(resource_type.as_ref()))
1249 || (self.block_stylesheets && *resource_type == ResourceType::Stylesheet)
1250 }
1251
1252 pub fn has_target_domain(&self) -> bool {
1254 !self.document_target_url.is_empty()
1255 }
1256
1257 #[inline]
1262 fn is_first_party_url(&self, url: &str) -> bool {
1263 if self.document_target_domain.is_empty() {
1264 return false;
1265 }
1266 match host_and_rest(url) {
1267 Some((host, _)) => base_domain_from_host(host) == self.document_target_domain,
1268 None => false,
1269 }
1270 }
1271
1272 pub fn set_page_url(&mut self, page_target_url: String) {
1274 let host_base = host_and_rest(&page_target_url)
1275 .map(|(h, _)| base_domain_from_host(h))
1276 .unwrap_or("");
1277
1278 self.document_target_domain = host_base.to_string();
1279 self.document_target_url = page_target_url;
1280 }
1281
1282 pub fn clear_target_domain(&mut self) {
1284 self.document_reload_tracker = 0;
1285 self.document_target_url = Default::default();
1286 self.document_target_domain = Default::default();
1287 }
1288
1289 #[inline]
1297 fn handle_document_replacement_and_tracking<'a>(
1298 &mut self,
1299 event: &'a EventRequestPaused,
1300 document_resource: bool,
1301 ) -> (Cow<'a, str>, bool) {
1302 let mut replacer: Option<String> = None;
1303 let current_url = event.request.url.as_str();
1304
1305 if document_resource {
1306 if self.document_target_url == current_url {
1307 self.document_reload_tracker += 1;
1308 } else if !self.document_target_url.is_empty() && event.redirected_request_id.is_some()
1309 {
1310 let (http_document_replacement, mut https_document_replacement) =
1311 if self.document_target_url.starts_with("http://") {
1312 (
1313 self.document_target_url.replacen("http://", "http//", 1),
1314 self.document_target_url.replacen("http://", "https://", 1),
1315 )
1316 } else {
1317 (
1318 self.document_target_url.replacen("https://", "https//", 1),
1319 self.document_target_url.replacen("https://", "http://", 1),
1320 )
1321 };
1322
1323 let trailing = https_document_replacement.ends_with('/');
1325 if trailing {
1326 https_document_replacement.pop();
1327 }
1328 if https_document_replacement.ends_with('/') {
1329 https_document_replacement.pop();
1330 }
1331
1332 let redirect_mask = format!(
1333 "{}{}",
1334 https_document_replacement, http_document_replacement
1335 );
1336
1337 if current_url == redirect_mask {
1338 replacer = Some(if trailing {
1339 format!("{}/", https_document_replacement)
1340 } else {
1341 https_document_replacement
1342 });
1343 }
1344 }
1345
1346 if self.document_target_url.is_empty() && current_url.ends_with(".xml") {
1347 self.xml_document = true;
1348 }
1349
1350 self.document_target_url = event.request.url.clone();
1352 self.document_target_domain = host_and_rest(&self.document_target_url)
1353 .map(|(h, _)| base_domain_from_host(h).to_string())
1354 .unwrap_or_default();
1355 }
1356
1357 let current_url_cow = match replacer {
1358 Some(r) => Cow::Owned(r),
1359 None => Cow::Borrowed(event.request.url.as_str()),
1360 };
1361
1362 let had_replacer = matches!(current_url_cow, Cow::Owned(_));
1363 (current_url_cow, had_replacer)
1364 }
1365
1366 #[cfg(feature = "adblock")]
1370 pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
1371 use adblock::{
1372 lists::{FilterSet, ParseOptions, RuleTypes},
1373 Engine,
1374 };
1375
1376 lazy_static::lazy_static! {
1377 static ref AD_ENGINE: Engine = {
1378 let mut filter_set = FilterSet::new(false);
1379 let mut rules = ParseOptions::default();
1380 rules.rule_types = RuleTypes::All;
1381
1382 filter_set.add_filters(
1383 &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
1384 rules,
1385 );
1386
1387 #[cfg(feature = "adblock_easylist")]
1390 {
1391 static EASYLIST: &str = include_str!(concat!(env!("OUT_DIR"), "/easylist.txt"));
1392 static EASYPRIVACY: &str = include_str!(concat!(env!("OUT_DIR"), "/easyprivacy.txt"));
1393
1394 if !EASYLIST.is_empty() {
1395 filter_set.add_filter_list(EASYLIST, rules);
1396 }
1397 if !EASYPRIVACY.is_empty() {
1398 filter_set.add_filter_list(EASYPRIVACY, rules);
1399 }
1400 }
1401
1402 Engine::from_filter_set(filter_set, true)
1403 };
1404 }
1405
1406 let blockable = event.resource_type == ResourceType::Script
1407 || event.resource_type == ResourceType::Image
1408 || event.resource_type == ResourceType::Media
1409 || event.resource_type == ResourceType::Stylesheet
1410 || event.resource_type == ResourceType::Document
1411 || event.resource_type == ResourceType::Fetch
1412 || event.resource_type == ResourceType::Xhr;
1413
1414 if !blockable {
1415 return false;
1416 }
1417
1418 let u = &event.request.url;
1419
1420 let source_domain = if self.document_target_domain.is_empty() {
1421 "example.com"
1422 } else {
1423 &self.document_target_domain
1424 };
1425
1426 let hostname = u
1429 .strip_prefix("https://")
1430 .or_else(|| u.strip_prefix("http://"))
1431 .and_then(|rest| rest.split('/').next())
1432 .map(
1434 |authority| match memchr::memrchr(b'@', authority.as_bytes()) {
1435 Some(i) => &authority[i + 1..],
1436 None => authority,
1437 },
1438 )
1439 .and_then(|host_port| host_port.split(':').next())
1441 .unwrap_or(source_domain);
1442
1443 let resource_type_str = match event.resource_type {
1444 ResourceType::Script => "script",
1445 ResourceType::Image => "image",
1446 ResourceType::Media => "media",
1447 ResourceType::Stylesheet => "stylesheet",
1448 ResourceType::Document => "document",
1449 ResourceType::Fetch => "fetch",
1450 ResourceType::Xhr => "xhr",
1451 _ => "other",
1452 };
1453
1454 let request = adblock::request::Request::preparsed(
1455 u,
1456 hostname,
1457 source_domain,
1458 resource_type_str,
1459 !event.request.is_same_site.unwrap_or_default(),
1460 );
1461
1462 let engine: &Engine = match self.adblock_engine.as_ref() {
1463 Some(custom) => custom,
1464 None => &AD_ENGINE,
1465 };
1466
1467 engine.check_network_request(&request).matched
1468 }
1469
1470 pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
1471 let response = if self
1472 .attempted_authentications
1473 .contains(event.request_id.as_ref())
1474 {
1475 AuthChallengeResponseResponse::CancelAuth
1476 } else if self.credentials.is_some() {
1477 self.attempted_authentications
1478 .insert(event.request_id.clone().into());
1479 AuthChallengeResponseResponse::ProvideCredentials
1480 } else {
1481 AuthChallengeResponseResponse::Default
1482 };
1483
1484 let mut auth = AuthChallengeResponse::new(response);
1485 if let Some(creds) = self.credentials.clone() {
1486 auth.username = Some(creds.username);
1487 auth.password = Some(creds.password);
1488 }
1489 self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
1490 }
1491
1492 pub fn set_offline_mode(&mut self, value: bool) {
1494 if self.offline == value {
1495 return;
1496 }
1497 self.offline = value;
1498 if let Ok(condition) = NetworkConditions::builder()
1499 .url_pattern("")
1500 .latency(0)
1501 .download_throughput(-1.)
1502 .upload_throughput(-1.)
1503 .build()
1504 {
1505 if let Ok(network) = EmulateNetworkConditionsByRuleParams::builder()
1506 .offline(self.offline)
1507 .matched_network_condition(condition)
1508 .build()
1509 {
1510 self.push_cdp_request(network);
1511 }
1512 }
1513 }
1514
1515 pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
1517 if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
1518 if let Some((interception_id, _)) = self
1519 .request_id_to_interception_id
1520 .remove(event.request_id.as_ref())
1521 {
1522 self.on_request(event, Some(interception_id));
1523 } else {
1524 self.requests_will_be_sent
1525 .insert(event.request_id.clone(), (event.clone(), Instant::now()));
1526 }
1527 } else {
1528 self.on_request(event, None);
1529 }
1530 }
1531
1532 pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
1534 if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
1535 request.from_memory_cache = true;
1536 }
1537 }
1538
1539 pub fn on_response_received(&mut self, event: &EventResponseReceived) {
1541 let mut request_failed = false;
1542
1543 let mut deducted: u64 = 0;
1545
1546 if let Some(max_bytes) = self.max_bytes_allowed.as_mut() {
1547 let before = *max_bytes;
1548
1549 let received_bytes: u64 = event.response.encoded_data_length as u64;
1551
1552 let content_length: Option<u64> = event
1554 .response
1555 .headers
1556 .inner()
1557 .get("content-length")
1558 .and_then(|v| v.as_str())
1559 .and_then(|s| s.trim().parse::<u64>().ok());
1560
1561 *max_bytes = max_bytes.saturating_sub(received_bytes);
1563
1564 if let Some(cl) = content_length {
1566 if cl > *max_bytes {
1567 *max_bytes = 0;
1568 }
1569 }
1570
1571 request_failed = *max_bytes == 0;
1572
1573 deducted = before.saturating_sub(*max_bytes);
1575 }
1576
1577 if deducted > 0 {
1579 self.queued_events
1580 .push_back(NetworkEvent::BytesConsumed(deducted));
1581 }
1582
1583 if request_failed && self.max_bytes_allowed.is_some() {
1585 self.set_block_all(true);
1586 }
1587
1588 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1589 request.set_response(event.response.clone());
1590 self.queued_events.push_back(if request_failed {
1591 NetworkEvent::RequestFailed(request)
1592 } else {
1593 NetworkEvent::RequestFinished(request)
1594 });
1595 }
1596 }
1597
1598 pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
1600 if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
1601 if let Some(interception_id) = request.interception_id.as_ref() {
1602 self.attempted_authentications
1603 .remove(interception_id.as_ref());
1604 }
1605 self.queued_events
1606 .push_back(NetworkEvent::RequestFinished(request));
1607 }
1608 }
1609
1610 pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
1612 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1613 request.failure_text = Some(event.error_text.clone());
1614 if let Some(interception_id) = request.interception_id.as_ref() {
1615 self.attempted_authentications
1616 .remove(interception_id.as_ref());
1617 }
1618 self.queued_events
1619 .push_back(NetworkEvent::RequestFailed(request));
1620 }
1621 }
1622
1623 fn on_request(
1625 &mut self,
1626 event: &EventRequestWillBeSent,
1627 interception_id: Option<InterceptionId>,
1628 ) {
1629 let mut redirect_chain = Vec::new();
1630 let mut redirect_location = None;
1631
1632 if let Some(redirect_resp) = &event.redirect_response {
1633 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1634 if is_redirect_status(redirect_resp.status) {
1635 if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
1636 if redirect_resp.url != location {
1637 let fixed_location = location.replace(&redirect_resp.url, "");
1638
1639 if !fixed_location.is_empty() {
1640 if let Some(resp) = request.response.as_mut() {
1641 resp.headers.0["Location"] =
1642 serde_json::Value::String(fixed_location.clone());
1643 }
1644 }
1645
1646 redirect_location = Some(fixed_location);
1647 }
1648 }
1649 }
1650
1651 {
1652 let mut redirect_resp = redirect_resp.clone();
1653
1654 if let Some(redirect_location) = redirect_location {
1655 if !redirect_location.is_empty() {
1656 redirect_resp.headers.0["Location"] =
1657 serde_json::Value::String(redirect_location);
1658 }
1659 }
1660
1661 self.handle_request_redirect(&mut request, redirect_resp);
1662 }
1663
1664 redirect_chain = std::mem::take(&mut request.redirect_chain);
1665 redirect_chain.push(request);
1666 }
1667 }
1668
1669 if let Some(cap) = self.max_redirects {
1672 let is_document = matches!(event.r#type, Some(ResourceType::Document));
1673 if is_document && redirect_chain.len() > cap {
1674 let mut failed = HttpRequest::new(
1675 event.request_id.clone(),
1676 event.frame_id.clone(),
1677 interception_id,
1678 self.user_request_interception_enabled,
1679 redirect_chain,
1680 );
1681 failed.url = Some(event.request.url.clone());
1682 failed.method = Some(event.request.method.clone());
1683 failed.failure_text = Some("net::ERR_TOO_MANY_REDIRECTS".into());
1684 self.push_cdp_request(
1685 chromiumoxide_cdp::cdp::browser_protocol::page::StopLoadingParams::default(),
1686 );
1687 self.queued_events
1688 .push_back(NetworkEvent::RequestFailed(failed));
1689 return;
1690 }
1691 }
1692
1693 let request = HttpRequest::new(
1694 event.request_id.clone(),
1695 event.frame_id.clone(),
1696 interception_id,
1697 self.user_request_interception_enabled,
1698 redirect_chain,
1699 );
1700
1701 let rid = event.request_id.clone();
1702 self.queued_events
1703 .push_back(NetworkEvent::Request(rid.clone()));
1704 self.requests.insert(rid, request);
1705 }
1706
1707 fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1709 request.set_response(response);
1710 if let Some(interception_id) = request.interception_id.as_ref() {
1711 self.attempted_authentications
1712 .remove(interception_id.as_ref());
1713 }
1714 }
1715}
1716
1717#[derive(Debug)]
1718pub enum NetworkEvent {
1719 SendCdpRequest((MethodId, serde_json::Value)),
1721 Request(RequestId),
1723 Response(RequestId),
1725 RequestFailed(HttpRequest),
1727 RequestFinished(HttpRequest),
1729 BytesConsumed(u64),
1731}
1732
1733#[cfg(test)]
1734mod tests {
1735 use super::ALLOWED_MATCHER_3RD_PARTY;
1736 use crate::handler::network::NetworkManager;
1737 use std::time::Duration;
1738
1739 #[test]
1740 fn test_allowed_matcher_3rd_party() {
1741 let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1743 assert!(
1744 ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1745 "expected Cloudflare challenge script to be allowed"
1746 );
1747
1748 let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1750 assert!(
1751 !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1752 "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1753 );
1754
1755 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1757 assert!(ALLOWED_MATCHER_3RD_PARTY
1758 .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1759 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1760 }
1761
1762 #[test]
1763 fn test_script_allowed_by_default_when_not_blocklisted() {
1764 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1765 nm.set_page_url(
1766 "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1767 );
1768
1769 let ok = "https://cdn.example.net/assets/some-app-bundle-12345.js";
1771 assert!(
1772 !nm.should_block_script_blocklist_only(ok),
1773 "expected non-blocklisted script to be allowed"
1774 );
1775 }
1776
1777 #[test]
1778 fn test_script_blocked_when_matches_ignore_trie_or_blocklist() {
1779 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1780 nm.set_page_url(
1781 "https://forum.cursor.com/t/is-2000-fast-requests-the-maximum/51085".to_string(),
1782 );
1783
1784 let bad = "https://cdn.example.net/js/analytics.js";
1786 assert!(
1787 nm.should_block_script_blocklist_only(bad),
1788 "expected analytics.js to be blocklisted"
1789 );
1790 }
1791
1792 #[test]
1793 fn test_allowed_matcher_3rd_party_sanity() {
1794 let cf_challenge = "https://www.something.com.ba/cdn-cgi/challenge-platform/h/g/orchestrate/chl_page/v1?ray=9abf7b523d90987e";
1796 assert!(
1797 ALLOWED_MATCHER_3RD_PARTY.is_match(cf_challenge),
1798 "expected Cloudflare challenge script to be allowed"
1799 );
1800
1801 let cf_insights = "https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015";
1803 assert!(
1804 !ALLOWED_MATCHER_3RD_PARTY.is_match(cf_insights),
1805 "expected Cloudflare Insights beacon to remain blocked (not in allow-list)"
1806 );
1807
1808 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://js.stripe.com/v3/"));
1809 assert!(ALLOWED_MATCHER_3RD_PARTY
1810 .is_match("https://www.google.com/recaptcha/api.js?render=explicit"));
1811 assert!(ALLOWED_MATCHER_3RD_PARTY.is_match("https://code.jquery.com/jquery-3.7.1.min.js"));
1812 }
1813 #[test]
1814 fn test_dynamic_blacklist_blocks_url() {
1815 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1816 nm.set_page_url("https://example.com/".to_string());
1817
1818 nm.set_blacklist_patterns(["static.cloudflareinsights.com", "googletagmanager.com"]);
1819 assert!(nm.is_blacklisted("https://static.cloudflareinsights.com/beacon.min.js"));
1820 assert!(nm.is_blacklisted("https://www.googletagmanager.com/gtm.js?id=GTM-XXXX"));
1821
1822 assert!(!nm.is_blacklisted("https://cdn.example.net/assets/app.js"));
1823 }
1824
1825 #[test]
1826 fn test_blacklist_strict_wins_over_whitelist() {
1827 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1828 nm.set_page_url("https://example.com/".to_string());
1829
1830 nm.set_blacklist_patterns(["beacon.min.js"]);
1832 nm.set_whitelist_patterns(["beacon.min.js"]);
1833
1834 nm.set_blacklist_strict(true);
1835
1836 let u = "https://static.cloudflareinsights.com/beacon.min.js";
1837 assert!(nm.is_whitelisted(u));
1838 assert!(nm.is_blacklisted(u));
1839
1840 assert!(nm.blacklist_strict);
1843 }
1844
1845 #[cfg(feature = "adblock")]
1846 fn make_request_paused(
1847 url: &str,
1848 resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
1849 is_same_site: bool,
1850 ) -> chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused {
1851 use chromiumoxide_cdp::cdp::browser_protocol::fetch::EventRequestPaused;
1852 use chromiumoxide_cdp::cdp::browser_protocol::network::{
1853 Headers, Request, RequestReferrerPolicy, ResourcePriority,
1854 };
1855
1856 EventRequestPaused {
1857 request_id: chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
1858 "test-req".to_string(),
1859 )
1860 .into(),
1861 request: Request {
1862 url: url.to_string(),
1863 method: "GET".to_string(),
1864 headers: Headers::new(serde_json::Value::Object(Default::default())),
1865 initial_priority: ResourcePriority::Medium,
1866 referrer_policy: RequestReferrerPolicy::NoReferrer,
1867 url_fragment: None,
1868 has_post_data: None,
1869 post_data_entries: None,
1870 mixed_content_type: None,
1871 is_link_preload: None,
1872 trust_token_params: None,
1873 is_same_site: Some(is_same_site),
1874 is_ad_related: None,
1875 },
1876 frame_id: chromiumoxide_cdp::cdp::browser_protocol::page::FrameId::from(
1877 "frame1".to_string(),
1878 ),
1879 resource_type,
1880 response_error_reason: None,
1881 response_status_code: None,
1882 response_status_text: None,
1883 response_headers: None,
1884 network_id: None,
1885 redirected_request_id: None,
1886 }
1887 }
1888
1889 #[cfg(feature = "adblock")]
1890 #[test]
1891 fn test_detect_ad_blocks_known_tracker_scripts() {
1892 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1893
1894 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1895 nm.set_page_url("https://www.wine-searcher.com/".to_string());
1896
1897 let event = make_request_paused(
1898 "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
1899 ResourceType::Script,
1900 false,
1901 );
1902
1903 assert!(
1904 nm.detect_ad(&event),
1905 "googletagmanager.com script should be detected as ad"
1906 );
1907 }
1908
1909 #[cfg(feature = "adblock")]
1910 #[test]
1911 fn test_detect_ad_allows_legitimate_scripts() {
1912 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1913
1914 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1915 nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
1916
1917 let event = make_request_paused(
1918 "https://www.mylegitsite-test.com/static/js/app-bundle.js",
1919 ResourceType::Script,
1920 true,
1921 );
1922
1923 assert!(
1924 !nm.detect_ad(&event),
1925 "legitimate first-party app bundle should not be blocked"
1926 );
1927 }
1928
1929 #[cfg(feature = "adblock")]
1930 #[test]
1931 fn test_detect_ad_uses_source_domain() {
1932 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1933
1934 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1935 nm.set_page_url("https://www.wine-searcher.com/some-page".to_string());
1936
1937 assert!(
1938 !nm.document_target_domain.is_empty(),
1939 "document_target_domain should be set after set_page_url"
1940 );
1941
1942 let event = make_request_paused(
1943 "https://www.google-analytics.com/analytics.js",
1944 ResourceType::Script,
1945 false,
1946 );
1947
1948 assert!(
1949 nm.detect_ad(&event),
1950 "google-analytics.com should be blocked as tracker"
1951 );
1952 }
1953
1954 #[cfg(feature = "adblock")]
1955 #[test]
1956 fn test_custom_adblock_engine_takes_precedence() {
1957 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
1958
1959 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
1960 nm.set_page_url("https://example.com/".to_string());
1961
1962 let mut filter_set = adblock::lists::FilterSet::new(false);
1964 let mut opts = adblock::lists::ParseOptions::default();
1965 opts.rule_types = adblock::lists::RuleTypes::All;
1966 filter_set.add_filters(["||custom-tracker.example.net^"], opts);
1967 let engine = adblock::Engine::from_filter_set(filter_set, true);
1968 nm.set_adblock_engine(std::sync::Arc::new(engine));
1969
1970 let event = make_request_paused(
1971 "https://custom-tracker.example.net/pixel.js",
1972 ResourceType::Script,
1973 false,
1974 );
1975
1976 assert!(
1977 nm.detect_ad(&event),
1978 "custom engine rule should block custom-tracker.example.net"
1979 );
1980 }
1981
1982 #[cfg(feature = "adblock")]
1985 fn run_full_interception(
1986 nm: &mut NetworkManager,
1987 url: &str,
1988 resource_type: chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType,
1989 is_same_site: bool,
1990 ) -> bool {
1991 use super::NetworkEvent;
1992
1993 while nm.poll().is_some() {}
1995
1996 let event = make_request_paused(url, resource_type, is_same_site);
1997 nm.on_fetch_request_paused(&event);
1998
1999 let mut blocked = false;
2001 while let Some(ev) = nm.poll() {
2002 if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
2003 let m: &str = method.as_ref();
2004 if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
2005 blocked = true;
2006 }
2007 }
2008 }
2009 blocked
2010 }
2011
2012 #[cfg(feature = "adblock")]
2015 #[test]
2016 fn test_e2e_tracker_script_blocked() {
2017 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2018
2019 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2020 nm.set_page_url("https://www.wine-searcher.com/".to_string());
2021
2022 assert!(
2023 run_full_interception(
2024 &mut nm,
2025 "https://www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2026 ResourceType::Script,
2027 false,
2028 ),
2029 "GTM script should be blocked through full pipeline"
2030 );
2031 }
2032
2033 #[cfg(feature = "adblock")]
2034 #[test]
2035 fn test_e2e_legitimate_script_allowed() {
2036 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2037
2038 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2039 nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
2040
2041 assert!(
2042 !run_full_interception(
2043 &mut nm,
2044 "https://www.mylegitsite-test.com/static/js/app-bundle.js",
2045 ResourceType::Script,
2046 true,
2047 ),
2048 "legitimate first-party script should be allowed through full pipeline"
2049 );
2050 }
2051
2052 #[cfg(feature = "adblock")]
2053 #[test]
2054 fn test_e2e_analytics_xhr_blocked() {
2055 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2056
2057 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2058 nm.set_page_url("https://example.org/".to_string());
2059
2060 assert!(
2061 run_full_interception(
2062 &mut nm,
2063 "https://www.google-analytics.com/g/collect?v=2&tid=UA-123",
2064 ResourceType::Xhr,
2065 false,
2066 ),
2067 "Google Analytics XHR should be blocked through full pipeline"
2068 );
2069 }
2070
2071 #[cfg(feature = "adblock")]
2072 #[test]
2073 fn test_e2e_whitelisted_overrides_adblock() {
2074 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2075
2076 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2077 nm.set_page_url("https://example.org/".to_string());
2078 nm.set_whitelist_patterns(["googletagmanager.com"]);
2079
2080 assert!(
2082 !run_full_interception(
2083 &mut nm,
2084 "https://www.googletagmanager.com/gtm.js?id=GTM-TEST",
2085 ResourceType::Script,
2086 false,
2087 ),
2088 "whitelisted tracker should be allowed even when adblock would block it"
2089 );
2090 }
2091
2092 #[cfg(feature = "adblock")]
2093 #[test]
2094 fn test_e2e_blacklist_strict_overrides_whitelist() {
2095 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2096
2097 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2098 nm.set_page_url("https://example.org/".to_string());
2099 nm.set_blacklist_patterns(["cdn.example.net/evil.js"]);
2100 nm.set_whitelist_patterns(["cdn.example.net/evil.js"]);
2101 nm.set_blacklist_strict(true);
2102
2103 assert!(
2104 run_full_interception(
2105 &mut nm,
2106 "https://cdn.example.net/evil.js",
2107 ResourceType::Script,
2108 false,
2109 ),
2110 "strict blacklist should win over whitelist"
2111 );
2112 }
2113
2114 #[cfg(feature = "adblock")]
2115 #[test]
2116 fn test_e2e_first_party_stylesheet_passes_when_block_stylesheets_on() {
2117 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2118
2119 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2120 nm.set_page_url("https://developer.intuit.com/".to_string());
2121 nm.block_stylesheets = true;
2122
2123 assert!(
2124 !run_full_interception(
2125 &mut nm,
2126 "https://developer.intuit.com/static/app.css",
2127 ResourceType::Stylesheet,
2128 true,
2129 ),
2130 "first-party CSS must pass when allow_first_party_stylesheets default-true"
2131 );
2132 }
2133
2134 #[cfg(feature = "adblock")]
2135 #[test]
2136 fn test_e2e_first_party_stylesheet_blocked_when_allow_disabled() {
2137 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2138
2139 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2140 nm.set_page_url("https://developer.intuit.com/".to_string());
2141 nm.block_stylesheets = true;
2142 nm.allow_first_party_stylesheets = false;
2143
2144 assert!(
2145 run_full_interception(
2146 &mut nm,
2147 "https://developer.intuit.com/static/app.css",
2148 ResourceType::Stylesheet,
2149 true,
2150 ),
2151 "first-party CSS must be blocked when allow_first_party_stylesheets=false"
2152 );
2153 }
2154
2155 #[cfg(feature = "adblock")]
2156 #[test]
2157 fn test_e2e_third_party_stylesheet_still_blocked_with_default_allow() {
2158 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2159
2160 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2164 nm.set_page_url("https://developer.intuit.com/".to_string());
2165 nm.block_stylesheets = true;
2166 nm.protocol_request_interception_enabled = true;
2170
2171 let rwbs_url = "https://tracker.evil.example/track.css";
2172 let rwbs_json = serde_json::json!({
2173 "requestId": "tp-css-1",
2174 "loaderId": "test-loader",
2175 "documentURL": "https://developer.intuit.com/",
2176 "request": {
2177 "url": rwbs_url,
2178 "method": "GET",
2179 "headers": {},
2180 "initialPriority": "Medium",
2181 "referrerPolicy": "no-referrer"
2182 },
2183 "timestamp": 0.0,
2184 "wallTime": 0.0,
2185 "initiator": { "type": "script" },
2186 "redirectHasExtraInfo": false,
2187 "type": "Stylesheet",
2188 "frameId": "frame1"
2189 });
2190 let rwbs_event: chromiumoxide_cdp::cdp::browser_protocol::network::EventRequestWillBeSent =
2191 serde_json::from_value(rwbs_json).unwrap();
2192 nm.on_request_will_be_sent(&rwbs_event);
2193
2194 use super::NetworkEvent;
2197 while nm.poll().is_some() {}
2198 let mut paused_event = make_request_paused(rwbs_url, ResourceType::Stylesheet, false);
2199 paused_event.network_id = Some(
2200 chromiumoxide_cdp::cdp::browser_protocol::network::RequestId::from(
2201 "tp-css-1".to_string(),
2202 ),
2203 );
2204 nm.on_fetch_request_paused(&paused_event);
2205
2206 let mut blocked = false;
2207 while let Some(ev) = nm.poll() {
2208 if let NetworkEvent::SendCdpRequest((method, _)) = &ev {
2209 let m: &str = method.as_ref();
2210 if m == "Fetch.fulfillRequest" || m == "Fetch.failRequest" {
2211 blocked = true;
2212 }
2213 }
2214 }
2215 assert!(blocked, "third-party Script-initiated CSS must remain blocked");
2216 }
2217
2218 #[cfg(feature = "adblock")]
2219 #[test]
2220 fn test_e2e_first_party_image_passes_when_ignore_visuals_on() {
2221 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2222
2223 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2224 nm.set_page_url("https://shop.example/".to_string());
2225 nm.ignore_visuals = true;
2226
2227 assert!(
2228 !run_full_interception(
2229 &mut nm,
2230 "https://shop.example/img/hero.png",
2231 ResourceType::Image,
2232 true,
2233 ),
2234 "first-party image must pass when allow_first_party_visuals default-true"
2235 );
2236 }
2237
2238 #[cfg(feature = "adblock")]
2239 #[test]
2240 fn test_e2e_third_party_image_blocked_when_ignore_visuals_on() {
2241 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2242
2243 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2244 nm.set_page_url("https://shop.example/".to_string());
2245 nm.ignore_visuals = true;
2246
2247 assert!(
2248 run_full_interception(
2249 &mut nm,
2250 "https://cdn.thirdparty.io/banner.png",
2251 ResourceType::Image,
2252 false,
2253 ),
2254 "third-party image must remain blocked when ignore_visuals=true"
2255 );
2256 }
2257
2258 #[cfg(feature = "adblock")]
2259 #[test]
2260 fn test_e2e_first_party_document_not_blocked() {
2261 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2262
2263 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2264 nm.set_page_url("https://www.nytimes.com/".to_string());
2265
2266 assert!(
2267 !run_full_interception(
2268 &mut nm,
2269 "https://www.nytimes.com/2024/article.html",
2270 ResourceType::Document,
2271 true,
2272 ),
2273 "first-party document navigation should never be blocked"
2274 );
2275 }
2276
2277 #[cfg(feature = "adblock")]
2278 #[test]
2279 fn test_e2e_custom_engine_blocks_through_pipeline() {
2280 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2281
2282 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2283 nm.set_page_url("https://mysite.com/".to_string());
2284
2285 let mut filter_set = adblock::lists::FilterSet::new(false);
2286 let mut opts = adblock::lists::ParseOptions::default();
2287 opts.rule_types = adblock::lists::RuleTypes::All;
2288 filter_set.add_filters(["||evil-cdn.example.net^$script"], opts);
2289 let engine = adblock::Engine::from_filter_set(filter_set, true);
2290 nm.set_adblock_engine(std::sync::Arc::new(engine));
2291
2292 assert!(
2293 run_full_interception(
2294 &mut nm,
2295 "https://evil-cdn.example.net/tracker.js",
2296 ResourceType::Script,
2297 false,
2298 ),
2299 "custom engine rule should block through full pipeline"
2300 );
2301
2302 assert!(
2304 !run_full_interception(
2305 &mut nm,
2306 "https://mysite.com/app.js",
2307 ResourceType::Script,
2308 true,
2309 ),
2310 "first-party script should still be allowed with custom engine"
2311 );
2312 }
2313
2314 #[cfg(feature = "adblock")]
2315 #[test]
2316 fn test_e2e_ad_image_blocked() {
2317 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2318
2319 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2320 nm.set_page_url("https://www.mylegitsite-test.com/".to_string());
2321
2322 assert!(
2324 run_full_interception(
2325 &mut nm,
2326 "https://googleads.g.doubleclick.net/pagead/viewthroughconversion/123/?random=456",
2327 ResourceType::Image,
2328 false,
2329 ),
2330 "doubleclick ad image/tracking pixel should be blocked"
2331 );
2332
2333 assert!(
2335 !run_full_interception(
2336 &mut nm,
2337 "https://www.mylegitsite-test.com/images/logo.png",
2338 ResourceType::Image,
2339 true,
2340 ),
2341 "legitimate first-party image should not be blocked"
2342 );
2343 }
2344
2345 #[cfg(feature = "adblock")]
2346 #[test]
2347 fn test_e2e_hostname_with_userinfo() {
2348 use chromiumoxide_cdp::cdp::browser_protocol::network::ResourceType;
2349
2350 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2351 nm.set_page_url("https://example.org/".to_string());
2352
2353 assert!(
2355 run_full_interception(
2356 &mut nm,
2357 "https://user:pass@www.googletagmanager.com/gtm.js?id=GTM-XXXX",
2358 ResourceType::Script,
2359 false,
2360 ),
2361 "tracker URL with userinfo should still be blocked"
2362 );
2363 }
2364
2365 #[test]
2366 fn test_blacklist_non_strict_allows_whitelist_override() {
2367 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2368 nm.set_page_url("https://example.com/".to_string());
2369
2370 nm.set_blacklist_patterns(["beacon.min.js"]);
2371 nm.set_whitelist_patterns(["beacon.min.js"]);
2372
2373 nm.set_blacklist_strict(false);
2374
2375 let u = "https://static.cloudflareinsights.com/beacon.min.js";
2376 assert!(nm.is_blacklisted(u));
2377 assert!(nm.is_whitelisted(u));
2378 assert!(!nm.blacklist_strict);
2379 }
2380
2381 fn make_request_will_be_sent(
2390 request_id: &str,
2391 url: &str,
2392 resource_type: &str,
2393 redirect_from_url: Option<&str>,
2394 ) -> chromiumoxide_cdp::cdp::browser_protocol::network::EventRequestWillBeSent {
2395 let mut v = serde_json::json!({
2396 "requestId": request_id,
2397 "loaderId": "test-loader",
2398 "documentURL": url,
2399 "request": {
2400 "url": url,
2401 "method": "GET",
2402 "headers": {},
2403 "initialPriority": "Medium",
2404 "referrerPolicy": "no-referrer"
2405 },
2406 "timestamp": 0.0,
2407 "wallTime": 0.0,
2408 "initiator": { "type": "other" },
2409 "redirectHasExtraInfo": false,
2410 "type": resource_type,
2411 "frameId": "frame1"
2412 });
2413 if let Some(from) = redirect_from_url {
2414 v["redirectResponse"] = serde_json::json!({
2415 "url": from,
2416 "status": 302,
2417 "statusText": "Found",
2418 "headers": { "Location": url },
2419 "mimeType": "text/html",
2420 "charset": "",
2421 "connectionReused": false,
2422 "connectionId": 0.0,
2423 "encodedDataLength": 0.0,
2424 "securityState": "unknown"
2425 });
2426 }
2427 serde_json::from_value(v).expect("EventRequestWillBeSent should deserialize")
2428 }
2429
2430 fn drain_too_many_redirects(nm: &mut NetworkManager) -> Option<super::HttpRequest> {
2431 while let Some(ev) = nm.poll() {
2432 if let super::NetworkEvent::RequestFailed(req) = ev {
2433 if req.failure_text.as_deref() == Some("net::ERR_TOO_MANY_REDIRECTS") {
2434 return Some(req);
2435 }
2436 }
2437 }
2438 None
2439 }
2440
2441 fn drain_stop_loading(nm: &mut NetworkManager) -> bool {
2442 while let Some(ev) = nm.poll() {
2443 if let super::NetworkEvent::SendCdpRequest((method, _)) = ev {
2444 let m: &str = method.as_ref();
2445 if m == "Page.stopLoading" {
2446 return true;
2447 }
2448 }
2449 }
2450 false
2451 }
2452
2453 #[test]
2454 fn test_max_redirects_none_allows_unlimited_chain() {
2455 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2456 nm.on_request_will_be_sent(&make_request_will_be_sent(
2460 "r1",
2461 "https://example.com/0",
2462 "Document",
2463 None,
2464 ));
2465 for i in 1..10 {
2466 nm.on_request_will_be_sent(&make_request_will_be_sent(
2467 "r1",
2468 &format!("https://example.com/{i}"),
2469 "Document",
2470 Some(&format!("https://example.com/{}", i - 1)),
2471 ));
2472 }
2473
2474 assert!(
2475 drain_too_many_redirects(&mut nm).is_none(),
2476 "no cap set: chain of 10 hops must not emit ERR_TOO_MANY_REDIRECTS"
2477 );
2478 }
2479
2480 #[test]
2481 fn test_max_redirects_caps_document_chain() {
2482 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2483 nm.max_redirects = Some(3);
2484
2485 nm.on_request_will_be_sent(&make_request_will_be_sent(
2488 "r1",
2489 "https://example.com/0",
2490 "Document",
2491 None,
2492 ));
2493 for i in 1..=4 {
2494 nm.on_request_will_be_sent(&make_request_will_be_sent(
2495 "r1",
2496 &format!("https://example.com/{i}"),
2497 "Document",
2498 Some(&format!("https://example.com/{}", i - 1)),
2499 ));
2500 }
2501
2502 let failed = drain_too_many_redirects(&mut nm)
2503 .expect("cap of 3 on a 4-hop chain must emit ERR_TOO_MANY_REDIRECTS");
2504 assert_eq!(
2505 failed.redirect_chain.len(),
2506 4,
2507 "failed request should preserve the full accumulated chain"
2508 );
2509 assert_eq!(
2510 failed.url.as_deref(),
2511 Some("https://example.com/4"),
2512 "failed request url should be the hop that tripped the cap"
2513 );
2514
2515 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2518 nm.max_redirects = Some(3);
2519 nm.on_request_will_be_sent(&make_request_will_be_sent(
2520 "r2",
2521 "https://example.com/0",
2522 "Document",
2523 None,
2524 ));
2525 for i in 1..=4 {
2526 nm.on_request_will_be_sent(&make_request_will_be_sent(
2527 "r2",
2528 &format!("https://example.com/{i}"),
2529 "Document",
2530 Some(&format!("https://example.com/{}", i - 1)),
2531 ));
2532 }
2533 assert!(
2534 drain_stop_loading(&mut nm),
2535 "cap hit must dispatch Page.stopLoading to abort navigation"
2536 );
2537 }
2538
2539 #[test]
2540 fn test_max_redirects_ignores_subresources() {
2541 let mut nm = NetworkManager::new(false, Duration::from_secs(30));
2542 nm.max_redirects = Some(2);
2543
2544 nm.on_request_will_be_sent(&make_request_will_be_sent(
2546 "s1",
2547 "https://cdn.example.com/0.js",
2548 "Script",
2549 None,
2550 ));
2551 for i in 1..=5 {
2552 nm.on_request_will_be_sent(&make_request_will_be_sent(
2553 "s1",
2554 &format!("https://cdn.example.com/{i}.js"),
2555 "Script",
2556 Some(&format!("https://cdn.example.com/{}.js", i - 1)),
2557 ));
2558 }
2559
2560 assert!(
2561 drain_too_many_redirects(&mut nm).is_none(),
2562 "sub-resource redirect chains must never be capped"
2563 );
2564 }
2565}