1use super::blockers::{
2 block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
3 xhr::IGNORE_XHR_ASSETS,
4};
5use crate::auth::Credentials;
6#[cfg(feature = "_cache")]
7use crate::cache::BasicCachePolicy;
8use crate::cmd::CommandChain;
9use crate::handler::http::HttpRequest;
10use aho_corasick::AhoCorasick;
11use case_insensitive_string::CaseInsensitiveString;
12use chromiumoxide_cdp::cdp::browser_protocol::network::{
13 EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
14 EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
15 InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
16 SetExtraHttpHeadersParams,
17};
18use chromiumoxide_cdp::cdp::browser_protocol::{
19 fetch::{
20 self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
21 ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
22 RequestPattern,
23 },
24 network::SetBypassServiceWorkerParams,
25};
26use chromiumoxide_cdp::cdp::browser_protocol::{
27 network::EnableParams, security::SetIgnoreCertificateErrorsParams,
28};
29use chromiumoxide_types::{Command, Method, MethodId};
30use hashbrown::{HashMap, HashSet};
31use lazy_static::lazy_static;
32use reqwest::header::PROXY_AUTHORIZATION;
33use spider_network_blocker::intercept_manager::NetworkInterceptManager;
34pub use spider_network_blocker::scripts::{
35 URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
36};
37use std::borrow::Cow;
38use std::collections::VecDeque;
39use std::time::Duration;
40
41lazy_static! {
42 static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
44 "jquery", "angular",
46 "react", "vue", "bootstrap",
49 "d3",
50 "lodash",
51 "ajax",
52 "application",
53 "app", "main",
55 "index",
56 "bundle",
57 "vendor",
58 "runtime",
59 "polyfill",
60 "scripts",
61 "es2015.",
62 "es2020.",
63 "webpack",
64 "/wp-content/js/", "https://m.stripe.network/",
67 "https://challenges.cloudflare.com/",
68 "https://www.google.com/recaptcha/api.js",
69 "https://google.com/recaptcha/api.js",
70 "https://js.stripe.com/",
71 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-"
74 ];
75
76 pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
78
79 pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
81 phf::phf_set! {
82 "_astro/", "_app/immutable"
84 }
85 };
86
87 pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
89 "application/pdf",
90 "application/zip",
91 "application/x-rar-compressed",
92 "application/x-tar",
93 "image/png",
94 "image/jpeg",
95 "image/gif",
96 "image/bmp",
97 "image/svg+xml",
98 "video/mp4",
99 "video/x-msvideo",
100 "video/x-matroska",
101 "video/webm",
102 "audio/mpeg",
103 "audio/ogg",
104 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
105 "application/vnd.ms-excel",
106 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
107 "application/vnd.ms-powerpoint",
108 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
109 "application/x-7z-compressed",
110 "application/x-rpm",
111 "application/x-shockwave-flash",
112 "application/rtf",
113 };
114
115 pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
117 "Image",
118 "Media",
119 "Font"
120 };
121
122 pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
124 "CspViolationReport",
125 "Manifest",
126 "Other",
127 "Prefetch",
128 "Ping",
129 };
130
131 pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
133
134 pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
136 let enable = EnableParams::default();
137
138 if let Ok(c) = serde_json::to_value(&enable) {
139 vec![(enable.identifier(), c)]
140 } else {
141 vec![]
142 }
143 };
144
145 pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
147 let enable = EnableParams::default();
148 let mut v = vec![];
149 if let Ok(c) = serde_json::to_value(&enable) {
150 v.push((enable.identifier(), c));
151 }
152 let ignore = SetIgnoreCertificateErrorsParams::new(true);
153 if let Ok(ignored) = serde_json::to_value(&ignore) {
154 v.push((ignore.identifier(), ignored));
155 }
156
157 v
158 };
159
160 pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
162 fetch::EnableParams::builder()
163 .handle_auth_requests(true)
164 .pattern(RequestPattern::builder().url_pattern("*").build())
165 .build()
166 };
167}
168
169pub(crate) fn is_redirect_status(status: i64) -> bool {
171 matches!(status, 301 | 302 | 303 | 307 | 308)
172}
173
174#[derive(Debug)]
175pub struct NetworkManager {
177 queued_events: VecDeque<NetworkEvent>,
178 ignore_httpserrors: bool,
179 requests: HashMap<RequestId, HttpRequest>,
180 requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
182 extra_headers: std::collections::HashMap<String, String>,
183 request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
184 user_cache_disabled: bool,
185 attempted_authentications: HashSet<RequestId>,
186 credentials: Option<Credentials>,
187 pub(crate) user_request_interception_enabled: bool,
188 block_all: bool,
189 pub(crate) protocol_request_interception_enabled: bool,
190 offline: bool,
192 pub request_timeout: Duration,
194 pub ignore_visuals: bool,
197 pub block_stylesheets: bool,
199 pub block_javascript: bool,
201 pub block_analytics: bool,
203 pub only_html: bool,
205 pub xml_document: bool,
207 pub intercept_manager: NetworkInterceptManager,
209 pub document_reload_tracker: u8,
211 pub document_target_domain: String,
213 pub max_bytes_allowed: Option<u64>,
215 #[cfg(feature = "_cache")]
216 pub cache_site_key: Option<String>,
218 #[cfg(feature = "_cache")]
220 pub cache_policy: Option<BasicCachePolicy>,
221}
222
223impl NetworkManager {
224 pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
225 Self {
226 queued_events: Default::default(),
227 ignore_httpserrors,
228 requests: Default::default(),
229 requests_will_be_sent: Default::default(),
230 extra_headers: Default::default(),
231 request_id_to_interception_id: Default::default(),
232 user_cache_disabled: false,
233 attempted_authentications: Default::default(),
234 credentials: None,
235 block_all: false,
236 user_request_interception_enabled: false,
237 protocol_request_interception_enabled: false,
238 offline: false,
239 request_timeout,
240 ignore_visuals: false,
241 block_javascript: false,
242 block_stylesheets: false,
243 block_analytics: true,
244 only_html: false,
245 xml_document: false,
246 intercept_manager: NetworkInterceptManager::Unknown,
247 document_reload_tracker: 0,
248 document_target_domain: String::new(),
249 max_bytes_allowed: None,
250 #[cfg(feature = "_cache")]
251 cache_site_key: None,
252 #[cfg(feature = "_cache")]
253 cache_policy: None,
254 }
255 }
256
257 pub fn init_commands(&self) -> CommandChain {
258 let cmds = if self.ignore_httpserrors {
259 INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
260 } else {
261 INIT_CHAIN.clone()
262 };
263 CommandChain::new(cmds, self.request_timeout)
264 }
265
266 pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
267 let method = cmd.identifier();
268 if let Ok(params) = serde_json::to_value(cmd) {
269 self.queued_events
270 .push_back(NetworkEvent::SendCdpRequest((method, params)));
271 }
272 }
273
274 pub fn poll(&mut self) -> Option<NetworkEvent> {
276 self.queued_events.pop_front()
277 }
278
279 pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
280 &self.extra_headers
281 }
282
283 pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
284 self.extra_headers = headers;
285 self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
286 self.extra_headers.remove("Proxy-Authorization");
287 if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
288 self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
289 }
290 }
291
292 pub fn set_service_worker_enabled(&mut self, bypass: bool) {
293 self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
294 }
295
296 pub fn set_block_all(&mut self, block_all: bool) {
297 self.block_all = block_all;
298 }
299
300 pub fn set_request_interception(&mut self, enabled: bool) {
301 self.user_request_interception_enabled = enabled;
302 self.update_protocol_request_interception();
303 }
304
305 pub fn set_cache_enabled(&mut self, enabled: bool) {
306 let run = self.user_cache_disabled != !enabled;
307 self.user_cache_disabled = !enabled;
308 if run {
309 self.update_protocol_cache_disabled();
310 }
311 }
312
313 pub fn disable_request_intercept(&mut self) {
314 self.protocol_request_interception_enabled = true;
315 }
316
317 #[cfg(feature = "_cache")]
319 pub fn set_cache_site_key(&mut self, cache_site_key: Option<String>) {
320 self.cache_site_key = cache_site_key;
321 }
322
323 #[cfg(feature = "_cache")]
325 pub fn set_cache_policy(&mut self, cache_policy: Option<BasicCachePolicy>) {
326 self.cache_policy = cache_policy;
327 }
328
329 pub fn update_protocol_cache_disabled(&mut self) {
330 self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
331 }
332
333 pub fn authenticate(&mut self, credentials: Credentials) {
334 self.credentials = Some(credentials);
335 self.update_protocol_request_interception();
336 self.protocol_request_interception_enabled = true;
337 }
338
339 fn update_protocol_request_interception(&mut self) {
340 let enabled = self.user_request_interception_enabled || self.credentials.is_some();
341
342 if enabled == self.protocol_request_interception_enabled {
343 return;
344 }
345
346 if enabled {
347 self.push_cdp_request(ENABLE_FETCH.clone())
348 } else {
349 self.push_cdp_request(DisableParams::default())
350 }
351 }
352
353 pub(crate) fn ignore_script(
355 &self,
356 url: &str,
357 block_analytics: bool,
358 intercept_manager: NetworkInterceptManager,
359 ) -> bool {
360 let mut ignore_script = block_analytics
361 && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url);
362
363 if !ignore_script {
364 if let Some(index) = url.find("//") {
365 let pos = index + 2;
366
367 if pos < url.len() {
369 if let Some(slash_index) = url[pos..].find('/') {
371 let base_path_index = pos + slash_index + 1;
372
373 if url.len() > base_path_index {
374 let new_url: &str = &url[base_path_index..];
375
376 if !ignore_script
378 && intercept_manager == NetworkInterceptManager::Unknown
379 {
380 let hydration_file =
381 JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
382
383 if hydration_file && new_url.ends_with(".js") {
385 ignore_script = true;
386 }
387 }
388
389 if !ignore_script
390 && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
391 {
392 ignore_script = true;
393 }
394
395 if !ignore_script
396 && self.ignore_visuals
397 && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
398 {
399 ignore_script = true;
400 }
401 }
402 }
403 }
404 }
405 }
406
407 if !ignore_script && block_analytics {
409 ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
410 }
411
412 ignore_script
413 }
414
415 fn skip_xhr(
417 &self,
418 skip_networking: bool,
419 event: &EventRequestPaused,
420 network_event: bool,
421 ) -> bool {
422 if !skip_networking && network_event {
424 let request_url = event.request.url.as_str();
425
426 let skip_analytics =
428 self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
429
430 if skip_analytics {
431 true
432 } else if self.block_stylesheets || self.ignore_visuals {
433 let block_css = self.block_stylesheets;
434 let block_media = self.ignore_visuals;
435
436 let mut block_request = false;
437
438 if let Some(position) = request_url.rfind('.') {
439 let hlen = request_url.len();
440 let has_asset = hlen - position;
441
442 if has_asset >= 3 {
443 let next_position = position + 1;
444
445 if block_media
446 && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
447 &request_url[next_position..].into(),
448 )
449 {
450 block_request = true;
451 } else if block_css {
452 block_request =
453 CaseInsensitiveString::from(request_url[next_position..].as_bytes())
454 .contains(&**CSS_EXTENSION)
455 }
456 }
457 }
458
459 if !block_request {
460 block_request = ignore_script_xhr_media(request_url);
461 }
462
463 block_request
464 } else {
465 skip_networking
466 }
467 } else {
468 skip_networking
469 }
470 }
471
472 #[cfg(feature = "adblock")]
473 #[inline]
474 fn detect_ad_if_enabled(&mut self, event: &EventRequestPaused, skip_networking: bool) -> bool {
476 if skip_networking {
477 true
478 } else {
479 self.detect_ad(event)
480 }
481 }
482
483 #[cfg(not(feature = "adblock"))]
485 #[inline]
486 fn detect_ad_if_enabled(&mut self, _event: &EventRequestPaused, skip_networking: bool) -> bool {
487 skip_networking
488 }
489
490 #[inline]
491 fn fail_request_blocked(
493 &mut self,
494 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
495 ) {
496 let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FailRequestParams::new(
497 request_id.clone(),
498 chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason::BlockedByClient,
499 );
500 self.push_cdp_request(params);
501 }
502
503 #[inline]
504 fn fulfill_request_empty_200(
506 &mut self,
507 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
508 ) {
509 let params = chromiumoxide_cdp::cdp::browser_protocol::fetch::FulfillRequestParams::new(
510 request_id.clone(),
511 200,
512 );
513 self.push_cdp_request(params);
514 }
515
516 #[cfg(feature = "_cache")]
517 #[inline]
518 fn fulfill_request_from_cache(
522 &mut self,
523 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
524 body: &[u8],
525 headers: &std::collections::HashMap<String, String>,
526 status: i64,
527 ) {
528 use crate::cdp::browser_protocol::fetch::HeaderEntry;
529 use crate::handler::network::fetch::FulfillRequestParams;
530 use base64::Engine;
531
532 let mut resp_headers = Vec::<HeaderEntry>::with_capacity(headers.len());
533
534 for (k, v) in headers.iter() {
535 resp_headers.push(HeaderEntry {
536 name: k.clone().into(),
537 value: v.clone().into(),
538 });
539 }
540
541 let mut params = FulfillRequestParams::new(request_id.clone(), status);
542
543 params.body = Some(
545 base64::engine::general_purpose::STANDARD
546 .encode(body)
547 .into(),
548 );
549
550 params.response_headers = Some(resp_headers);
551
552 self.push_cdp_request(params);
553 }
554
555 #[inline]
556 fn continue_request_with_url(
558 &mut self,
559 request_id: &chromiumoxide_cdp::cdp::browser_protocol::fetch::RequestId,
560 url: Option<&str>,
561 intercept_response: bool,
562 ) {
563 let mut params = ContinueRequestParams::new(request_id.clone());
564 if let Some(url) = url {
565 params.url = Some(url.to_string());
566 params.intercept_response = Some(intercept_response);
567 }
568 self.push_cdp_request(params);
569 }
570
571 #[inline]
573 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
574 if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
576 return;
577 }
578
579 if self.block_all {
580 tracing::debug!(
581 "Blocked (block_all): {:?} - {}",
582 event.resource_type,
583 event.request.url
584 );
585 return self.fail_request_blocked(&event.request_id);
586 }
587
588 if let Some(network_id) = event.network_id.as_ref() {
590 if let Some(request_will_be_sent) =
591 self.requests_will_be_sent.remove(network_id.as_ref())
592 {
593 return self
594 .on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
595 }
596 } else {
597 return self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()));
599 }
600
601 let resource_type = &event.resource_type;
603 let javascript_resource = *resource_type == ResourceType::Script;
604 let document_resource = *resource_type == ResourceType::Document;
605 let network_resource = !document_resource && crate::utils::is_data_resource(resource_type);
606
607 let mut skip_networking =
609 self.block_all || IGNORE_NETWORKING_RESOURCE_MAP.contains(resource_type.as_ref());
610
611 if !skip_networking {
613 skip_networking = self.document_reload_tracker >= 3;
614 }
615
616 let (current_url_cow, had_replacer) =
618 self.handle_document_replacement_and_tracking(event, document_resource);
619
620 let current_url: &str = current_url_cow.as_ref();
621
622 if !skip_networking {
624 if self.xml_document && current_url.ends_with(".xsl") {
626 skip_networking = false;
627 } else {
628 skip_networking = self.should_skip_for_visuals_and_basic_js(
629 resource_type,
630 javascript_resource,
631 current_url,
632 );
633 }
634 }
635
636 skip_networking = self.detect_ad_if_enabled(event, skip_networking);
638
639 if !skip_networking
641 && (self.only_html || self.ignore_visuals)
642 && (javascript_resource || document_resource)
643 {
644 skip_networking = ignore_script_embedded(current_url);
645 }
646
647 if !skip_networking && javascript_resource {
649 skip_networking =
650 self.ignore_script(current_url, self.block_analytics, self.intercept_manager);
651 }
652
653 skip_networking = self.skip_xhr(skip_networking, event, network_resource);
655
656 if !skip_networking && (javascript_resource || network_resource || document_resource) {
658 skip_networking = self.intercept_manager.intercept_detection(
659 current_url,
660 self.ignore_visuals,
661 network_resource,
662 );
663 }
664
665 if !skip_networking && (javascript_resource || network_resource) {
667 skip_networking = crate::handler::blockers::block_websites::block_website(current_url);
668 }
669
670 if skip_networking {
671 tracing::debug!("Blocked: {:?} - {}", resource_type, current_url);
672 self.fulfill_request_empty_200(&event.request_id);
673 } else {
674 #[cfg(feature = "_cache")]
675 {
676 if let (Some(policy), Some(cache_site_key)) =
677 (self.cache_policy.as_ref(), self.cache_site_key.as_deref())
678 {
679 let current_url = format!("{}:{}", event.request.method, ¤t_url);
680
681 if let Some((res, cache_policy)) =
682 crate::cache::remote::get_session_cache_item(cache_site_key, ¤t_url)
683 {
684 if policy.allows_cached(&cache_policy) {
685 tracing::debug!(
686 "Remote Cached: {:?} - {}",
687 resource_type,
688 ¤t_url
689 );
690 return self.fulfill_request_from_cache(
691 &event.request_id,
692 &res.body,
693 &res.headers,
694 res.status as i64,
695 );
696 }
697 }
698 }
699 }
700
701 tracing::debug!("Allowed: {:?} - {}", resource_type, current_url);
703 self.continue_request_with_url(
704 &event.request_id,
705 if had_replacer {
706 Some(current_url)
707 } else {
708 None
709 },
710 !had_replacer,
711 );
712 }
713 }
714
715 pub fn has_target_domain(&self) -> bool {
717 !self.document_target_domain.is_empty()
718 }
719
720 pub fn set_page_url(&mut self, page_target_url: String) {
722 self.document_target_domain = page_target_url;
723 }
724
725 pub fn clear_target_domain(&mut self) {
727 self.document_reload_tracker = 0;
728 self.document_target_domain = Default::default();
729 }
730
731 #[inline]
739 fn handle_document_replacement_and_tracking<'a>(
740 &mut self,
741 event: &'a EventRequestPaused,
742 document_resource: bool,
743 ) -> (Cow<'a, str>, bool) {
744 let mut replacer: Option<String> = None;
745 let current_url = event.request.url.as_str();
746
747 if document_resource {
748 if self.document_target_domain == current_url {
749 self.document_reload_tracker += 1;
751 } else if !self.document_target_domain.is_empty()
752 && event.redirected_request_id.is_some()
753 {
754 let (http_document_replacement, mut https_document_replacement) =
756 if self.document_target_domain.starts_with("http://") {
757 (
758 self.document_target_domain.replace("http://", "http//"),
759 self.document_target_domain.replace("http://", "https://"),
760 )
761 } else {
762 (
763 self.document_target_domain.replace("https://", "https//"),
764 self.document_target_domain.replace("https://", "http://"),
765 )
766 };
767
768 let trailing = https_document_replacement.ends_with('/');
770 if trailing {
771 https_document_replacement.pop();
772 }
773 if https_document_replacement.ends_with('/') {
774 https_document_replacement.pop();
775 }
776
777 let redirect_mask = format!(
778 "{}{}",
779 https_document_replacement, http_document_replacement
780 );
781
782 if current_url == redirect_mask {
783 replacer = Some(if trailing {
784 format!("{}/", https_document_replacement)
785 } else {
786 https_document_replacement
787 });
788 }
789 }
790
791 if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
792 self.xml_document = true;
793 }
794
795 self.document_target_domain = event.request.url.clone();
797 }
798
799 let current_url_cow = match replacer {
800 Some(r) => Cow::Owned(r),
801 None => Cow::Borrowed(event.request.url.as_str()),
802 };
803
804 let had_replacer = matches!(current_url_cow, Cow::Owned(_));
805 (current_url_cow, had_replacer)
806 }
807
808 #[inline]
810 fn should_skip_for_visuals_and_basic_js(
811 &self,
812 resource_type: &ResourceType,
813 javascript_resource: bool,
814 current_url: &str,
815 ) -> bool {
816 (self.ignore_visuals && IGNORE_VISUAL_RESOURCE_MAP.contains(resource_type.as_ref()))
817 || (self.block_stylesheets && *resource_type == ResourceType::Stylesheet)
818 || (self.block_javascript
819 && javascript_resource
820 && self.intercept_manager == NetworkInterceptManager::Unknown
821 && !ALLOWED_MATCHER.is_match(current_url))
822 }
823
824 #[cfg(feature = "adblock")]
826 pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
827 use adblock::{
828 lists::{FilterSet, ParseOptions, RuleTypes},
829 Engine,
830 };
831
832 lazy_static::lazy_static! {
833 static ref AD_ENGINE: Engine = {
834 let mut filter_set = FilterSet::new(false);
835 let mut rules = ParseOptions::default();
836 rules.rule_types = RuleTypes::All;
837
838 filter_set.add_filters(
839 &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
840 rules,
841 );
842
843 Engine::from_filter_set(filter_set, true)
844 };
845 };
846
847 let blockable = ResourceType::Image == event.resource_type
848 || event.resource_type == ResourceType::Media
849 || event.resource_type == ResourceType::Stylesheet
850 || event.resource_type == ResourceType::Document
851 || event.resource_type == ResourceType::Fetch
852 || event.resource_type == ResourceType::Xhr;
853
854 let u = &event.request.url;
855
856 let block_request = blockable
857 && {
859 let request = adblock::request::Request::preparsed(
860 &u,
861 "example.com",
862 "example.com",
863 &event.resource_type.as_ref().to_lowercase(),
864 !event.request.is_same_site.unwrap_or_default());
865
866 AD_ENGINE.check_network_request(&request).matched
867 };
868
869 block_request
870 }
871
872 pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
873 let response = if self
874 .attempted_authentications
875 .contains(event.request_id.as_ref())
876 {
877 AuthChallengeResponseResponse::CancelAuth
878 } else if self.credentials.is_some() {
879 self.attempted_authentications
880 .insert(event.request_id.clone().into());
881 AuthChallengeResponseResponse::ProvideCredentials
882 } else {
883 AuthChallengeResponseResponse::Default
884 };
885
886 let mut auth = AuthChallengeResponse::new(response);
887 if let Some(creds) = self.credentials.clone() {
888 auth.username = Some(creds.username);
889 auth.password = Some(creds.password);
890 }
891 self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
892 }
893
894 pub fn set_offline_mode(&mut self, value: bool) {
895 if self.offline == value {
896 return;
897 }
898 self.offline = value;
899 if let Ok(network) = EmulateNetworkConditionsParams::builder()
900 .offline(self.offline)
901 .latency(0)
902 .download_throughput(-1.)
903 .upload_throughput(-1.)
904 .build()
905 {
906 self.push_cdp_request(network);
907 }
908 }
909
910 pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
912 if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
913 if let Some(interception_id) = self
914 .request_id_to_interception_id
915 .remove(event.request_id.as_ref())
916 {
917 self.on_request(event, Some(interception_id));
918 } else {
919 self.requests_will_be_sent
921 .insert(event.request_id.clone(), event.clone());
922 }
923 } else {
924 self.on_request(event, None);
925 }
926 }
927
928 pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
930 if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
931 request.from_memory_cache = true;
932 }
933 }
934
935 pub fn on_response_received(&mut self, event: &EventResponseReceived) {
937 let mut request_failed = false;
938
939 let mut deducted: u64 = 0;
941
942 if let Some(max_bytes) = self.max_bytes_allowed.as_mut() {
943 let before = *max_bytes;
944
945 let received_bytes: u64 = event.response.encoded_data_length as u64;
947
948 let content_length: Option<u64> = event
950 .response
951 .headers
952 .inner()
953 .get("content-length")
954 .and_then(|v| v.as_str())
955 .and_then(|s| s.trim().parse::<u64>().ok());
956
957 *max_bytes = max_bytes.saturating_sub(received_bytes);
959
960 if let Some(cl) = content_length {
962 if cl > *max_bytes {
963 *max_bytes = 0;
964 }
965 }
966
967 request_failed = *max_bytes == 0;
968
969 deducted = before.saturating_sub(*max_bytes);
971 }
972
973 if deducted > 0 {
975 self.queued_events
976 .push_back(NetworkEvent::BytesConsumed(deducted));
977 }
978
979 if request_failed && self.max_bytes_allowed.is_some() {
981 self.set_block_all(true);
982 }
983
984 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
985 request.set_response(event.response.clone());
986 self.queued_events.push_back(if request_failed {
987 NetworkEvent::RequestFailed(request)
988 } else {
989 NetworkEvent::RequestFinished(request)
990 });
991 }
992 }
993
994 pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
995 if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
996 if let Some(interception_id) = request.interception_id.as_ref() {
997 self.attempted_authentications
998 .remove(interception_id.as_ref());
999 }
1000 self.queued_events
1001 .push_back(NetworkEvent::RequestFinished(request));
1002 }
1003 }
1004
1005 pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
1006 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1007 request.failure_text = Some(event.error_text.clone());
1008 if let Some(interception_id) = request.interception_id.as_ref() {
1009 self.attempted_authentications
1010 .remove(interception_id.as_ref());
1011 }
1012 self.queued_events
1013 .push_back(NetworkEvent::RequestFailed(request));
1014 }
1015 }
1016
1017 fn on_request(
1018 &mut self,
1019 event: &EventRequestWillBeSent,
1020 interception_id: Option<InterceptionId>,
1021 ) {
1022 let mut redirect_chain = Vec::new();
1023 let mut redirect_location = None;
1024
1025 if let Some(redirect_resp) = &event.redirect_response {
1026 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1027 if is_redirect_status(redirect_resp.status) {
1028 if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
1029 if redirect_resp.url != location {
1030 let fixed_location = location.replace(&redirect_resp.url, "");
1031
1032 request.response.as_mut().map(|resp| {
1033 resp.headers.0["Location"] =
1034 serde_json::Value::String(fixed_location.clone());
1035 });
1036
1037 redirect_location = Some(fixed_location);
1038 }
1039 }
1040 }
1041
1042 self.handle_request_redirect(
1043 &mut request,
1044 if let Some(redirect_location) = redirect_location {
1045 let mut redirect_resp = redirect_resp.clone();
1046
1047 redirect_resp.headers.0["Location"] =
1048 serde_json::Value::String(redirect_location);
1049
1050 redirect_resp
1051 } else {
1052 redirect_resp.clone()
1053 },
1054 );
1055
1056 redirect_chain = std::mem::take(&mut request.redirect_chain);
1057 redirect_chain.push(request);
1058 }
1059 }
1060
1061 let request = HttpRequest::new(
1062 event.request_id.clone(),
1063 event.frame_id.clone(),
1064 interception_id,
1065 self.user_request_interception_enabled,
1066 redirect_chain,
1067 );
1068
1069 self.requests.insert(event.request_id.clone(), request);
1070 self.queued_events
1071 .push_back(NetworkEvent::Request(event.request_id.clone()));
1072 }
1073
1074 fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1075 request.set_response(response);
1076 if let Some(interception_id) = request.interception_id.as_ref() {
1077 self.attempted_authentications
1078 .remove(interception_id.as_ref());
1079 }
1080 }
1081}
1082
1083#[derive(Debug)]
1084pub enum NetworkEvent {
1085 SendCdpRequest((MethodId, serde_json::Value)),
1086 Request(RequestId),
1087 Response(RequestId),
1088 RequestFailed(HttpRequest),
1089 RequestFinished(HttpRequest),
1090 BytesConsumed(u64),
1091}