1use super::blockers::{
2 block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
3 xhr::IGNORE_XHR_ASSETS,
4};
5use crate::auth::Credentials;
6use crate::cmd::CommandChain;
7use crate::handler::http::HttpRequest;
8use aho_corasick::AhoCorasick;
9use case_insensitive_string::CaseInsensitiveString;
10use chromiumoxide_cdp::cdp::browser_protocol::network::{
11 EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
12 EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
13 InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
14 SetExtraHttpHeadersParams,
15};
16use chromiumoxide_cdp::cdp::browser_protocol::{
17 fetch::{
18 self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
19 ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
20 RequestPattern,
21 },
22 network::SetBypassServiceWorkerParams,
23};
24use chromiumoxide_cdp::cdp::browser_protocol::{
25 network::EnableParams, security::SetIgnoreCertificateErrorsParams,
26};
27use chromiumoxide_types::{Command, Method, MethodId};
28use hashbrown::{HashMap, HashSet};
29use lazy_static::lazy_static;
30use reqwest::header::PROXY_AUTHORIZATION;
31use spider_network_blocker::intercept_manager::NetworkInterceptManager;
32pub use spider_network_blocker::scripts::{
33 URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
34};
35use std::collections::VecDeque;
36use std::time::Duration;
37
38lazy_static! {
39 static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
41 "jquery", "angular",
43 "react", "vue", "bootstrap",
46 "d3",
47 "lodash",
48 "ajax",
49 "application",
50 "app", "main",
52 "index",
53 "bundle",
54 "vendor",
55 "runtime",
56 "polyfill",
57 "scripts",
58 "es2015.",
59 "es2020.",
60 "webpack",
61 "/wp-content/js/", "https://m.stripe.network/",
64 "https://challenges.cloudflare.com/",
65 "https://www.google.com/recaptcha/api.js",
66 "https://google.com/recaptcha/api.js",
67 "https://js.stripe.com/",
68 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-"
71 ];
72
73 pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
75
76 pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
78 phf::phf_set! {
79 "_astro/", "_app/immutable"
81 }
82 };
83
84 pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
86 "application/pdf",
87 "application/zip",
88 "application/x-rar-compressed",
89 "application/x-tar",
90 "image/png",
91 "image/jpeg",
92 "image/gif",
93 "image/bmp",
94 "image/svg+xml",
95 "video/mp4",
96 "video/x-msvideo",
97 "video/x-matroska",
98 "video/webm",
99 "audio/mpeg",
100 "audio/ogg",
101 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
102 "application/vnd.ms-excel",
103 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
104 "application/vnd.ms-powerpoint",
105 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
106 "application/x-7z-compressed",
107 "application/x-rpm",
108 "application/x-shockwave-flash",
109 "application/rtf",
110 };
111
112 pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
114 "Image",
115 "Media",
116 "Font"
117 };
118
119 pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
121 "CspViolationReport",
122 "Manifest",
123 "Other",
124 "Prefetch",
125 "Ping",
126 };
127
128 pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
130
131 pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
133 let enable = EnableParams::default();
134
135 if let Ok(c) = serde_json::to_value(&enable) {
136 vec![(enable.identifier(), c)]
137 } else {
138 vec![]
139 }
140 };
141
142 pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)> = {
144 let enable = EnableParams::default();
145 let mut v = vec![];
146 if let Ok(c) = serde_json::to_value(&enable) {
147 v.push((enable.identifier(), c));
148 }
149 let ignore = SetIgnoreCertificateErrorsParams::new(true);
150 if let Ok(ignored) = serde_json::to_value(&ignore) {
151 v.push((ignore.identifier(), ignored));
152 }
153
154 v
155 };
156
157 pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
159 fetch::EnableParams::builder()
160 .handle_auth_requests(true)
161 .pattern(RequestPattern::builder().url_pattern("*").build())
162 .build()
163 };
164}
165
166pub(crate) fn is_redirect_status(status: i64) -> bool {
168 matches!(status, 301 | 302 | 303 | 307 | 308)
169}
170
171#[derive(Debug)]
172pub struct NetworkManager {
174 queued_events: VecDeque<NetworkEvent>,
175 ignore_httpserrors: bool,
176 requests: HashMap<RequestId, HttpRequest>,
177 requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
179 extra_headers: std::collections::HashMap<String, String>,
180 request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
181 user_cache_disabled: bool,
182 attempted_authentications: HashSet<RequestId>,
183 credentials: Option<Credentials>,
184 pub(crate) user_request_interception_enabled: bool,
185 block_all: bool,
186 pub(crate) protocol_request_interception_enabled: bool,
187 offline: bool,
189 pub request_timeout: Duration,
191 pub ignore_visuals: bool,
194 pub block_stylesheets: bool,
196 pub block_javascript: bool,
198 pub block_analytics: bool,
200 pub only_html: bool,
202 pub xml_document: bool,
204 pub intercept_manager: NetworkInterceptManager,
206 pub document_reload_tracker: u8,
208 pub document_target_domain: String,
210 pub max_bytes_allowed: Option<u64>,
212}
213
214impl NetworkManager {
215 pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
216 Self {
217 queued_events: Default::default(),
218 ignore_httpserrors,
219 requests: Default::default(),
220 requests_will_be_sent: Default::default(),
221 extra_headers: Default::default(),
222 request_id_to_interception_id: Default::default(),
223 user_cache_disabled: false,
224 attempted_authentications: Default::default(),
225 credentials: None,
226 block_all: false,
227 user_request_interception_enabled: false,
228 protocol_request_interception_enabled: false,
229 offline: false,
230 request_timeout,
231 ignore_visuals: false,
232 block_javascript: false,
233 block_stylesheets: false,
234 block_analytics: true,
235 only_html: false,
236 xml_document: false,
237 intercept_manager: NetworkInterceptManager::Unknown,
238 document_reload_tracker: 0,
239 document_target_domain: String::new(),
240 max_bytes_allowed: None,
241 }
242 }
243
244 pub fn init_commands(&self) -> CommandChain {
245 let cmds = if self.ignore_httpserrors {
246 INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
247 } else {
248 INIT_CHAIN.clone()
249 };
250 CommandChain::new(cmds, self.request_timeout)
251 }
252
253 pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
254 let method = cmd.identifier();
255 if let Ok(params) = serde_json::to_value(cmd) {
256 self.queued_events
257 .push_back(NetworkEvent::SendCdpRequest((method, params)));
258 }
259 }
260
261 pub fn poll(&mut self) -> Option<NetworkEvent> {
263 self.queued_events.pop_front()
264 }
265
266 pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
267 &self.extra_headers
268 }
269
270 pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
271 self.extra_headers = headers;
272 self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
273 self.extra_headers.remove("Proxy-Authorization");
274 if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
275 self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
276 }
277 }
278
279 pub fn set_service_worker_enabled(&mut self, bypass: bool) {
280 self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
281 }
282
283 pub fn set_block_all(&mut self, block_all: bool) {
284 self.block_all = block_all;
285 }
286
287 pub fn set_request_interception(&mut self, enabled: bool) {
288 self.user_request_interception_enabled = enabled;
289 self.update_protocol_request_interception();
290 }
291
292 pub fn set_cache_enabled(&mut self, enabled: bool) {
293 let run = self.user_cache_disabled != !enabled;
294 self.user_cache_disabled = !enabled;
295 if run {
296 self.update_protocol_cache_disabled();
297 }
298 }
299
300 pub fn disable_request_intercept(&mut self) {
301 self.protocol_request_interception_enabled = true;
302 }
303
304 pub fn update_protocol_cache_disabled(&mut self) {
305 self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
306 }
307
308 pub fn authenticate(&mut self, credentials: Credentials) {
309 self.credentials = Some(credentials);
310 self.update_protocol_request_interception();
311 self.protocol_request_interception_enabled = true;
312 }
313
314 fn update_protocol_request_interception(&mut self) {
315 let enabled = self.user_request_interception_enabled || self.credentials.is_some();
316
317 if enabled == self.protocol_request_interception_enabled {
318 return;
319 }
320
321 if enabled {
322 self.push_cdp_request(ENABLE_FETCH.clone())
323 } else {
324 self.push_cdp_request(DisableParams::default())
325 }
326 }
327
328 pub(crate) fn ignore_script(
330 &self,
331 url: &str,
332 block_analytics: bool,
333 intercept_manager: NetworkInterceptManager,
334 ) -> bool {
335 let mut ignore_script = block_analytics
336 && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url);
337
338 if !ignore_script {
339 if let Some(index) = url.find("//") {
340 let pos = index + 2;
341
342 if pos < url.len() {
344 if let Some(slash_index) = url[pos..].find('/') {
346 let base_path_index = pos + slash_index + 1;
347
348 if url.len() > base_path_index {
349 let new_url: &str = &url[base_path_index..];
350
351 if !ignore_script
353 && intercept_manager == NetworkInterceptManager::Unknown
354 {
355 let hydration_file =
356 JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
357
358 if hydration_file && new_url.ends_with(".js") {
360 ignore_script = true;
361 }
362 }
363
364 if !ignore_script
365 && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
366 {
367 ignore_script = true;
368 }
369
370 if !ignore_script
371 && self.ignore_visuals
372 && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
373 {
374 ignore_script = true;
375 }
376 }
377 }
378 }
379 }
380 }
381
382 if !ignore_script && block_analytics {
384 ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
385 }
386
387 ignore_script
388 }
389
390 fn skip_xhr(
392 &self,
393 skip_networking: bool,
394 event: &EventRequestPaused,
395 network_event: bool,
396 ) -> bool {
397 if !skip_networking && network_event {
399 let request_url = event.request.url.as_str();
400
401 let skip_analytics =
403 self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
404
405 if skip_analytics {
406 true
407 } else if self.block_stylesheets || self.ignore_visuals {
408 let block_css = self.block_stylesheets;
409 let block_media = self.ignore_visuals;
410
411 let mut block_request = false;
412
413 if let Some(position) = request_url.rfind('.') {
414 let hlen = request_url.len();
415 let has_asset = hlen - position;
416
417 if has_asset >= 3 {
418 let next_position = position + 1;
419
420 if block_media
421 && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
422 &request_url[next_position..].into(),
423 )
424 {
425 block_request = true;
426 } else if block_css {
427 block_request =
428 CaseInsensitiveString::from(request_url[next_position..].as_bytes())
429 .contains(&**CSS_EXTENSION)
430 }
431 }
432 }
433
434 if !block_request {
435 block_request = ignore_script_xhr_media(request_url);
436 }
437
438 block_request
439 } else {
440 skip_networking
441 }
442 } else {
443 skip_networking
444 }
445 }
446
447 #[cfg(not(feature = "adblock"))]
448 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
449 use super::blockers::block_websites::block_website;
450
451 if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
452 return;
453 }
454
455 if self.block_all {
456 use chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason;
457 tracing::debug!("Blocked: {:?} - {}", event.resource_type, event.request.url);
458 let fullfill_params = crate::handler::network::fetch::FailRequestParams::new(
459 event.request_id.clone(),
460 ErrorReason::BlockedByClient,
461 );
462 self.push_cdp_request(fullfill_params);
463 } else {
464 if let Some(network_id) = event.network_id.as_ref() {
465 if let Some(request_will_be_sent) =
466 self.requests_will_be_sent.remove(network_id.as_ref())
467 {
468 self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
469 } else {
470 let current_url = event.request.url.as_str();
471 let javascript_resource = event.resource_type == ResourceType::Script;
472 let document_resource = event.resource_type == ResourceType::Document;
473 let network_resource =
474 !document_resource && crate::utils::is_data_resource(&event.resource_type);
475
476 let skip_networking = self.block_all
477 || IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
478
479 let skip_networking = skip_networking || self.document_reload_tracker >= 3;
480 let mut replacer = None;
481
482 if document_resource {
483 if self.document_target_domain == current_url {
484 self.document_reload_tracker += 1;
486 } else if !self.document_target_domain.is_empty()
487 && event.redirected_request_id.is_some()
488 {
489 let (http_document_replacement, mut https_document_replacement) =
490 if self.document_target_domain.starts_with("http://") {
491 (
492 self.document_target_domain.replace("http://", "http//"),
493 self.document_target_domain.replace("http://", "https://"),
494 )
495 } else {
496 (
497 self.document_target_domain.replace("https://", "https//"),
498 self.document_target_domain.replace("https://", "http://"),
499 )
500 };
501
502 let trailing = https_document_replacement.ends_with('/');
503
504 if trailing {
505 https_document_replacement.pop();
506 }
507
508 if https_document_replacement.ends_with('/') {
509 https_document_replacement.pop();
510 }
511
512 let redirect_mask = format!(
513 "{}{}",
514 https_document_replacement, http_document_replacement
515 );
516
517 if current_url == redirect_mask {
519 replacer = Some(if trailing {
520 format!("{}/", https_document_replacement)
521 } else {
522 https_document_replacement
523 });
524 }
525 }
526
527 if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
528 self.xml_document = true;
529 }
530
531 self.document_target_domain = event.request.url.clone();
532 }
533
534 let current_url = match &replacer {
535 Some(r) => r,
536 _ => &event.request.url,
537 }
538 .as_str();
539
540 let skip_networking = if !skip_networking {
542 if self.xml_document && current_url.ends_with(".xsl") {
544 false
545 } else {
546 self.ignore_visuals
547 && (IGNORE_VISUAL_RESOURCE_MAP
548 .contains(event.resource_type.as_ref()))
549 || self.block_stylesheets
550 && ResourceType::Stylesheet == event.resource_type
551 || self.block_javascript
552 && javascript_resource
553 && self.intercept_manager == NetworkInterceptManager::Unknown
554 && !ALLOWED_MATCHER.is_match(current_url)
555 }
556 } else {
557 skip_networking
558 };
559
560 let skip_networking = if !skip_networking
561 && (self.only_html || self.ignore_visuals)
562 && (javascript_resource || document_resource)
563 {
564 ignore_script_embedded(current_url)
565 } else {
566 skip_networking
567 };
568
569 let skip_networking = if !skip_networking && javascript_resource {
571 self.ignore_script(
572 current_url,
573 self.block_analytics,
574 self.intercept_manager,
575 )
576 } else {
577 skip_networking
578 };
579
580 let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
582
583 let skip_networking = if !skip_networking
585 && (javascript_resource || network_resource || document_resource)
586 {
587 self.intercept_manager.intercept_detection(
588 ¤t_url,
589 self.ignore_visuals,
590 network_resource,
591 )
592 } else {
593 skip_networking
594 };
595
596 let skip_networking =
597 if !skip_networking && (javascript_resource || network_resource) {
598 block_website(¤t_url)
599 } else {
600 skip_networking
601 };
602
603 if skip_networking {
604 tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
605 let fullfill_params =
606 crate::handler::network::fetch::FulfillRequestParams::new(
607 event.request_id.clone(),
608 200,
609 );
610 self.push_cdp_request(fullfill_params);
611 } else {
612 tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
613 let mut continue_params =
614 ContinueRequestParams::new(event.request_id.clone());
615
616 if replacer.is_some() {
617 continue_params.url = Some(current_url.into());
618 continue_params.intercept_response = Some(false);
619 }
620
621 self.push_cdp_request(continue_params)
622 }
623 }
624 } else {
625 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
626 }
627 }
628 }
629
630 #[cfg(feature = "adblock")]
631 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
632 if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
633 return;
634 }
635
636 if self.block_all {
637 use chromiumoxide_cdp::cdp::browser_protocol::network::ErrorReason;
638 tracing::debug!("Blocked: {:?} - {}", event.resource_type, event.request.url);
639 let fullfill_params = crate::handler::network::fetch::FailRequestParams::new(
640 event.request_id.clone(),
641 ErrorReason::BlockedByClient,
642 );
643 self.push_cdp_request(fullfill_params);
644 } else {
645 if let Some(network_id) = event.network_id.as_ref() {
646 if let Some(request_will_be_sent) =
647 self.requests_will_be_sent.remove(network_id.as_ref())
648 {
649 self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
650 } else {
651 let current_url = event.request.url.as_str();
652 let javascript_resource = event.resource_type == ResourceType::Script;
653 let document_resource = event.resource_type == ResourceType::Document;
654 let network_resource =
655 !document_resource && crate::utils::is_data_resource(&event.resource_type);
656 let mut replacer = None;
657
658 let skip_networking = self.block_all
660 || IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
661
662 let skip_networking = skip_networking || self.document_reload_tracker >= 3;
663
664 if document_resource {
665 if self.document_target_domain == current_url {
666 self.document_reload_tracker += 1;
668 } else if !self.document_target_domain.is_empty()
669 && event.redirected_request_id.is_some()
670 {
671 let (http_document_replacement, mut https_document_replacement) =
672 if self.document_target_domain.starts_with("http://") {
673 (
674 self.document_target_domain.replace("http://", "http//"),
675 self.document_target_domain.replace("http://", "https://"),
676 )
677 } else {
678 (
679 self.document_target_domain.replace("https://", "https//"),
680 self.document_target_domain.replace("https://", "http://"),
681 )
682 };
683
684 let trailing = https_document_replacement.ends_with('/');
685
686 if trailing {
687 https_document_replacement.pop();
688 }
689
690 if https_document_replacement.ends_with('/') {
691 https_document_replacement.pop();
692 }
693
694 let redirect_mask = format!(
695 "{}{}",
696 https_document_replacement, http_document_replacement
697 );
698
699 if current_url == redirect_mask {
701 replacer = Some(if trailing {
702 format!("{}/", https_document_replacement)
703 } else {
704 https_document_replacement
705 });
706 }
707 }
708
709 if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
710 self.xml_document = true;
711 }
712
713 self.document_target_domain = event.request.url.clone();
714 }
715
716 let current_url = match &replacer {
717 Some(r) => r,
718 _ => &event.request.url,
719 }
720 .as_str();
721
722 let skip_networking = if !skip_networking {
724 if self.xml_document && current_url.ends_with(".xsl") {
726 false
727 } else {
728 self.ignore_visuals
729 && (IGNORE_VISUAL_RESOURCE_MAP
730 .contains(event.resource_type.as_ref()))
731 || self.block_stylesheets
732 && ResourceType::Stylesheet == event.resource_type
733 || self.block_javascript
734 && javascript_resource
735 && self.intercept_manager == NetworkInterceptManager::Unknown
736 && !ALLOWED_MATCHER.is_match(current_url)
737 }
738 } else {
739 skip_networking
740 };
741
742 let skip_networking = if !skip_networking {
743 self.detect_ad(event)
744 } else {
745 skip_networking
746 };
747
748 let skip_networking = if !skip_networking
749 && (self.only_html || self.ignore_visuals)
750 && (javascript_resource || document_resource)
751 {
752 ignore_script_embedded(current_url)
753 } else {
754 skip_networking
755 };
756
757 let skip_networking = if !skip_networking && javascript_resource {
759 self.ignore_script(
760 current_url,
761 self.block_analytics,
762 self.intercept_manager,
763 )
764 } else {
765 skip_networking
766 };
767
768 let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
770
771 let skip_networking = if !skip_networking
773 && (javascript_resource || network_resource || document_resource)
774 {
775 self.intercept_manager.intercept_detection(
776 &event.request.url,
777 self.ignore_visuals,
778 network_resource,
779 )
780 } else {
781 skip_networking
782 };
783
784 let skip_networking = if !skip_networking
785 && (javascript_resource || network_resource)
786 {
787 crate::handler::blockers::block_websites::block_website(&event.request.url)
788 } else {
789 skip_networking
790 };
791
792 if skip_networking {
793 tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
794
795 let fullfill_params =
796 crate::handler::network::fetch::FulfillRequestParams::new(
797 event.request_id.clone(),
798 200,
799 );
800 self.push_cdp_request(fullfill_params);
801 } else {
802 tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
803
804 let mut continue_params =
805 ContinueRequestParams::new(event.request_id.clone());
806
807 if replacer.is_some() {
808 continue_params.url = Some(current_url.into());
809 continue_params.intercept_response = Some(false);
810 }
811 }
812 }
813 } else {
814 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
815 }
816 }
817
818 }
822
823 #[cfg(feature = "adblock")]
825 pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
826 use adblock::{
827 lists::{FilterSet, ParseOptions, RuleTypes},
828 Engine,
829 };
830
831 lazy_static::lazy_static! {
832 static ref AD_ENGINE: Engine = {
833 let mut filter_set = FilterSet::new(false);
834 let mut rules = ParseOptions::default();
835 rules.rule_types = RuleTypes::All;
836
837 filter_set.add_filters(
838 &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
839 rules,
840 );
841
842 Engine::from_filter_set(filter_set, true)
843 };
844 };
845
846 let blockable = ResourceType::Image == event.resource_type
847 || event.resource_type == ResourceType::Media
848 || event.resource_type == ResourceType::Stylesheet
849 || event.resource_type == ResourceType::Document
850 || event.resource_type == ResourceType::Fetch
851 || event.resource_type == ResourceType::Xhr;
852
853 let u = &event.request.url;
854
855 let block_request = blockable
856 && {
858 let request = adblock::request::Request::preparsed(
859 &u,
860 "example.com",
861 "example.com",
862 &event.resource_type.as_ref().to_lowercase(),
863 !event.request.is_same_site.unwrap_or_default());
864
865 AD_ENGINE.check_network_request(&request).matched
866 };
867
868 block_request
869 }
870
871 pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
872 let response = if self
873 .attempted_authentications
874 .contains(event.request_id.as_ref())
875 {
876 AuthChallengeResponseResponse::CancelAuth
877 } else if self.credentials.is_some() {
878 self.attempted_authentications
879 .insert(event.request_id.clone().into());
880 AuthChallengeResponseResponse::ProvideCredentials
881 } else {
882 AuthChallengeResponseResponse::Default
883 };
884
885 let mut auth = AuthChallengeResponse::new(response);
886 if let Some(creds) = self.credentials.clone() {
887 auth.username = Some(creds.username);
888 auth.password = Some(creds.password);
889 }
890 self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
891 }
892
893 pub fn set_offline_mode(&mut self, value: bool) {
894 if self.offline == value {
895 return;
896 }
897 self.offline = value;
898 if let Ok(network) = EmulateNetworkConditionsParams::builder()
899 .offline(self.offline)
900 .latency(0)
901 .download_throughput(-1.)
902 .upload_throughput(-1.)
903 .build()
904 {
905 self.push_cdp_request(network);
906 }
907 }
908
909 pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
911 if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
912 if let Some(interception_id) = self
913 .request_id_to_interception_id
914 .remove(event.request_id.as_ref())
915 {
916 self.on_request(event, Some(interception_id));
917 } else {
918 self.requests_will_be_sent
920 .insert(event.request_id.clone(), event.clone());
921 }
922 } else {
923 self.on_request(event, None);
924 }
925 }
926
927 pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
928 if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
929 request.from_memory_cache = true;
930 }
931 }
932
933 pub fn on_response_received(&mut self, event: &EventResponseReceived) {
935 let mut request_failed = false;
936
937 let mut deducted: u64 = 0;
939
940 if let Some(max_bytes) = self.max_bytes_allowed.as_mut() {
941 let before = *max_bytes;
942
943 let received_bytes: u64 = event.response.encoded_data_length as u64;
945
946 let content_length: Option<u64> = event
948 .response
949 .headers
950 .inner()
951 .get("content-length")
952 .and_then(|v| v.as_str())
953 .and_then(|s| s.trim().parse::<u64>().ok());
954
955 *max_bytes = max_bytes.saturating_sub(received_bytes);
957
958 if let Some(cl) = content_length {
960 if cl > *max_bytes {
961 *max_bytes = 0;
962 }
963 }
964
965 request_failed = *max_bytes == 0;
966
967 deducted = before.saturating_sub(*max_bytes);
969 }
970
971 if deducted > 0 {
973 self.queued_events
974 .push_back(NetworkEvent::BytesConsumed(deducted));
975 }
976
977 if request_failed && self.max_bytes_allowed.is_some() {
979 self.set_block_all(true);
980 }
981
982 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
983 request.set_response(event.response.clone());
984 self.queued_events.push_back(if request_failed {
985 NetworkEvent::RequestFailed(request)
986 } else {
987 NetworkEvent::RequestFinished(request)
988 });
989 }
990 }
991
992 pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
993 if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
994 if let Some(interception_id) = request.interception_id.as_ref() {
995 self.attempted_authentications
996 .remove(interception_id.as_ref());
997 }
998 self.queued_events
999 .push_back(NetworkEvent::RequestFinished(request));
1000 }
1001 }
1002
1003 pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
1004 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1005 request.failure_text = Some(event.error_text.clone());
1006 if let Some(interception_id) = request.interception_id.as_ref() {
1007 self.attempted_authentications
1008 .remove(interception_id.as_ref());
1009 }
1010 self.queued_events
1011 .push_back(NetworkEvent::RequestFailed(request));
1012 }
1013 }
1014
1015 fn on_request(
1016 &mut self,
1017 event: &EventRequestWillBeSent,
1018 interception_id: Option<InterceptionId>,
1019 ) {
1020 let mut redirect_chain = Vec::new();
1021 let mut redirect_location = None;
1022
1023 if let Some(redirect_resp) = &event.redirect_response {
1024 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
1025 if is_redirect_status(redirect_resp.status) {
1026 if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
1027 if redirect_resp.url != location {
1028 let fixed_location = location.replace(&redirect_resp.url, "");
1029
1030 request.response.as_mut().map(|resp| {
1031 resp.headers.0["Location"] =
1032 serde_json::Value::String(fixed_location.clone());
1033 });
1034
1035 redirect_location = Some(fixed_location);
1036 }
1037 }
1038 }
1039
1040 self.handle_request_redirect(
1041 &mut request,
1042 if let Some(redirect_location) = redirect_location {
1043 let mut redirect_resp = redirect_resp.clone();
1044
1045 redirect_resp.headers.0["Location"] =
1046 serde_json::Value::String(redirect_location);
1047
1048 redirect_resp
1049 } else {
1050 redirect_resp.clone()
1051 },
1052 );
1053
1054 redirect_chain = std::mem::take(&mut request.redirect_chain);
1055 redirect_chain.push(request);
1056 }
1057 }
1058
1059 let request = HttpRequest::new(
1060 event.request_id.clone(),
1061 event.frame_id.clone(),
1062 interception_id,
1063 self.user_request_interception_enabled,
1064 redirect_chain,
1065 );
1066
1067 self.requests.insert(event.request_id.clone(), request);
1068 self.queued_events
1069 .push_back(NetworkEvent::Request(event.request_id.clone()));
1070 }
1071
1072 fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1073 request.set_response(response);
1074 if let Some(interception_id) = request.interception_id.as_ref() {
1075 self.attempted_authentications
1076 .remove(interception_id.as_ref());
1077 }
1078 }
1079}
1080
1081#[derive(Debug)]
1082pub enum NetworkEvent {
1083 SendCdpRequest((MethodId, serde_json::Value)),
1084 Request(RequestId),
1085 Response(RequestId),
1086 RequestFailed(HttpRequest),
1087 RequestFinished(HttpRequest),
1088 BytesConsumed(u64),
1089}