1use super::blockers::{
2 ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
3 intercept_manager::NetworkInterceptManager,
4 scripts::{
5 URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE,
6 URL_IGNORE_TRIE_PATHS,
7 },
8 xhr::IGNORE_XHR_ASSETS,
9};
10use crate::auth::Credentials;
11use crate::cmd::CommandChain;
12use crate::handler::http::HttpRequest;
13use aho_corasick::AhoCorasick;
14use case_insensitive_string::CaseInsensitiveString;
15use chromiumoxide_cdp::cdp::browser_protocol::fetch::{
16 self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
17 ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused, RequestPattern,
18};
19use chromiumoxide_cdp::cdp::browser_protocol::network::{
20 EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
21 EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
22 InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
23 SetExtraHttpHeadersParams,
24};
25use chromiumoxide_cdp::cdp::browser_protocol::{
26 network::EnableParams, security::SetIgnoreCertificateErrorsParams,
27};
28use chromiumoxide_types::{Command, Method, MethodId};
29use hashbrown::{HashMap, HashSet};
30use lazy_static::lazy_static;
31use reqwest::header::PROXY_AUTHORIZATION;
32use std::collections::VecDeque;
33use std::time::Duration;
34
35lazy_static! {
36 static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
38 "jquery", "angular",
40 "react", "vue", "bootstrap",
43 "d3",
44 "lodash",
45 "ajax",
46 "application",
47 "app", "main",
49 "index",
50 "bundle",
51 "vendor",
52 "runtime",
53 "polyfill",
54 "scripts",
55 "/wp-content/js/", "https://m.stripe.network/",
58 "https://challenges.cloudflare.com/",
59 "https://js.stripe.com/",
60 "https://cdn.prod.website-files.com/", "https://cdnjs.cloudflare.com/", "https://code.jquery.com/jquery-"
63 ];
64
65 pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).unwrap();
67
68 pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
70 phf::phf_set! {
71 "_next/static/", "_astro/",
73 }
74 };
75
76 pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
78 "application/pdf",
79 "application/zip",
80 "application/x-rar-compressed",
81 "application/x-tar",
82 "image/png",
83 "image/jpeg",
84 "image/gif",
85 "image/bmp",
86 "image/svg+xml",
87 "video/mp4",
88 "video/x-msvideo",
89 "video/x-matroska",
90 "video/webm",
91 "audio/mpeg",
92 "audio/ogg",
93 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
94 "application/vnd.ms-excel",
95 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
96 "application/vnd.ms-powerpoint",
97 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
98 "application/x-7z-compressed",
99 "application/x-rpm",
100 "application/x-shockwave-flash",
101 };
102
103 pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
105 "Image",
106 "Media",
107 "Font"
108 };
109
110 pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
112 "Prefetch",
113 "Ping",
114 };
115
116 pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
118
119}
120
121#[derive(Debug)]
122pub struct NetworkManager {
123 queued_events: VecDeque<NetworkEvent>,
124 ignore_httpserrors: bool,
125 requests: HashMap<RequestId, HttpRequest>,
126 requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
128 extra_headers: std::collections::HashMap<String, String>,
129 request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
130 user_cache_disabled: bool,
131 attempted_authentications: HashSet<RequestId>,
132 credentials: Option<Credentials>,
133 user_request_interception_enabled: bool,
134 protocol_request_interception_enabled: bool,
135 offline: bool,
136 request_timeout: Duration,
137 pub ignore_visuals: bool,
140 pub block_stylesheets: bool,
142 pub block_javascript: bool,
144 pub block_analytics: bool,
146 pub only_html: bool,
148 pub intercept_manager: NetworkInterceptManager,
150}
151
152impl NetworkManager {
153 pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
154 Self {
155 queued_events: Default::default(),
156 ignore_httpserrors,
157 requests: Default::default(),
158 requests_will_be_sent: Default::default(),
159 extra_headers: Default::default(),
160 request_id_to_interception_id: Default::default(),
161 user_cache_disabled: false,
162 attempted_authentications: Default::default(),
163 credentials: None,
164 user_request_interception_enabled: false,
165 protocol_request_interception_enabled: false,
166 offline: false,
167 request_timeout,
168 ignore_visuals: false,
169 block_javascript: false,
170 block_stylesheets: false,
171 block_analytics: true,
172 only_html: false,
173 intercept_manager: NetworkInterceptManager::UNKNOWN,
174 }
175 }
176
177 pub fn init_commands(&self) -> CommandChain {
178 let enable = EnableParams::default();
179 let mut v = vec![];
180
181 if let Ok(c) = serde_json::to_value(&enable) {
182 v.push((enable.identifier(), c));
183 }
184
185 let cmds = if self.ignore_httpserrors {
186 let ignore = SetIgnoreCertificateErrorsParams::new(true);
187
188 if let Ok(ignored) = serde_json::to_value(&ignore) {
189 v.push((ignore.identifier(), ignored));
190 }
191
192 v
193 } else {
194 v
195 };
196
197 CommandChain::new(cmds, self.request_timeout)
198 }
199
200 fn push_cdp_request<T: Command>(&mut self, cmd: T) {
201 let method = cmd.identifier();
202 if let Ok(params) = serde_json::to_value(cmd) {
203 self.queued_events
204 .push_back(NetworkEvent::SendCdpRequest((method, params)));
205 }
206 }
207
208 pub fn poll(&mut self) -> Option<NetworkEvent> {
210 self.queued_events.pop_front()
211 }
212
213 pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
214 &self.extra_headers
215 }
216
217 pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
218 self.extra_headers = headers;
219 self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
220 if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
221 self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
222 }
223 }
224
225 pub fn set_request_interception(&mut self, enabled: bool) {
226 self.user_request_interception_enabled = enabled;
227 self.update_protocol_request_interception();
228 }
229
230 pub fn set_cache_enabled(&mut self, enabled: bool) {
231 self.user_cache_disabled = !enabled;
232 self.update_protocol_cache_disabled();
233 }
234
235 pub fn update_protocol_cache_disabled(&mut self) {
236 self.push_cdp_request(SetCacheDisabledParams::new(
237 self.user_cache_disabled || self.protocol_request_interception_enabled,
238 ));
239 }
240
241 pub fn authenticate(&mut self, credentials: Credentials) {
242 self.credentials = Some(credentials);
243 self.update_protocol_request_interception()
244 }
245
246 fn update_protocol_request_interception(&mut self) {
247 let enabled = self.user_request_interception_enabled || self.credentials.is_some();
248
249 if enabled == self.protocol_request_interception_enabled {
250 return;
251 }
252 self.update_protocol_cache_disabled();
253
254 if enabled {
255 self.push_cdp_request(
256 fetch::EnableParams::builder()
257 .handle_auth_requests(true)
258 .pattern(RequestPattern::builder().url_pattern("*").build())
259 .build(),
260 )
261 } else {
262 self.push_cdp_request(DisableParams::default())
263 }
264 }
265
266 pub(crate) fn ignore_script(
268 &self,
269 url: &str,
270 block_analytics: bool,
271 intercept_manager: NetworkInterceptManager,
272 ) -> bool {
273 let mut ignore_script = block_analytics && URL_IGNORE_TRIE.contains_prefix(url);
274
275 if !ignore_script {
276 if let Some(index) = url.find("//") {
277 let pos = index + 2;
278
279 if pos < url.len() {
281 if let Some(slash_index) = url[pos..].find('/') {
283 let base_path_index = pos + slash_index + 1;
284
285 if url.len() > base_path_index {
286 let new_url: &str = &url[base_path_index..];
287
288 ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(new_url);
289
290 if !ignore_script
292 && intercept_manager == NetworkInterceptManager::UNKNOWN
293 {
294 let hydration_file =
295 JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
296
297 if hydration_file && new_url.ends_with(".js") {
299 ignore_script = true;
300 }
301 }
302
303 if !ignore_script
304 && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
305 {
306 ignore_script = true;
307 }
308
309 if !ignore_script
310 && self.ignore_visuals
311 && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
312 {
313 ignore_script = true;
314 }
315 }
316 }
317 }
318 }
319 }
320
321 if !ignore_script {
323 ignore_script = url.ends_with("analytics.js")
324 || url.ends_with("ads.js")
325 || url.ends_with("tracking.js")
326 || url.ends_with("track.js");
327 }
328
329 ignore_script
330 }
331
332 fn skip_xhr(&self, skip_networking: bool, event: &EventRequestPaused) -> bool {
334 if !skip_networking
336 && (event.resource_type == ResourceType::Xhr
337 || event.resource_type == ResourceType::WebSocket
338 || event.resource_type == ResourceType::Fetch)
339 {
340 let request_url = event.request.url.as_str();
341
342 let skip_analytics = self.block_analytics && ignore_script_xhr(request_url);
344
345 if skip_analytics {
346 true
347 } else if self.block_stylesheets || self.ignore_visuals {
348 let block_css = self.block_stylesheets;
349 let block_media = self.ignore_visuals;
350
351 let mut block_request = false;
352
353 if let Some(position) = request_url.rfind('.') {
354 let hlen = request_url.len();
355 let has_asset = hlen - position;
356
357 if has_asset >= 3 {
358 let next_position = position + 1;
359
360 if block_media
361 && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
362 &request_url[next_position..].into(),
363 )
364 {
365 block_request = true;
366 } else if block_css {
367 block_request =
368 CaseInsensitiveString::from(request_url[next_position..].as_bytes())
369 .contains(&**CSS_EXTENSION)
370 }
371 }
372 }
373
374 if !block_request {
375 block_request = ignore_script_xhr_media(request_url);
376 }
377
378 block_request
379 } else {
380 skip_networking
381 }
382 } else {
383 skip_networking
384 }
385 }
386
387 #[cfg(not(feature = "adblock"))]
388 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
389 if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
390 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
391 } else {
392 if let Some(network_id) = event.network_id.as_ref() {
393 if let Some(request_will_be_sent) =
394 self.requests_will_be_sent.remove(network_id.as_ref())
395 {
396 self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
397 } else {
398 let current_url = event.request.url.as_str();
399 let javascript_resource = event.resource_type == ResourceType::Script;
400 let skip_networking = event.resource_type == ResourceType::Other
401 || event.resource_type == ResourceType::Manifest
402 || event.resource_type == ResourceType::CspViolationReport
403 || event.resource_type == ResourceType::Ping
404 || event.resource_type == ResourceType::Prefetch;
405 let network_resource = event.resource_type == ResourceType::Xhr
406 || event.resource_type == ResourceType::Fetch
407 || event.resource_type == ResourceType::WebSocket;
408
409 let skip_networking = if !skip_networking {
411 IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref())
412 || self.ignore_visuals
413 && (IGNORE_VISUAL_RESOURCE_MAP
414 .contains(event.resource_type.as_ref()))
415 || self.block_stylesheets
416 && ResourceType::Stylesheet == event.resource_type
417 || self.block_javascript
418 && javascript_resource
419 && self.intercept_manager == NetworkInterceptManager::UNKNOWN
420 && !ALLOWED_MATCHER.is_match(current_url)
421 } else {
422 skip_networking
423 };
424
425 let skip_networking = if !skip_networking
426 && (self.only_html || self.ignore_visuals)
427 && (javascript_resource || event.resource_type == ResourceType::Document)
428 {
429 ignore_script_embedded(current_url)
430 } else {
431 skip_networking
432 };
433
434 let skip_networking = if !skip_networking && javascript_resource {
436 self.ignore_script(
437 current_url,
438 self.block_analytics,
439 self.intercept_manager,
440 )
441 } else {
442 skip_networking
443 };
444
445 let skip_networking = self.skip_xhr(skip_networking, &event);
447
448 let skip_networking = if !skip_networking
450 && (javascript_resource
451 || network_resource
452 || event.resource_type == ResourceType::Document)
453 {
454 self.intercept_manager.intercept_detection(
455 &event.request.url,
456 self.ignore_visuals,
457 network_resource,
458 )
459 } else {
460 skip_networking
461 };
462
463 if skip_networking {
464 tracing::debug!(
465 "Blocked: {:?} - {}",
466 event.resource_type,
467 event.request.url
468 );
469 let fullfill_params =
470 crate::handler::network::fetch::FulfillRequestParams::new(
471 event.request_id.clone(),
472 200,
473 );
474 self.push_cdp_request(fullfill_params);
475 } else {
476 tracing::debug!(
477 "Allowed: {:?} - {}",
478 event.resource_type,
479 event.request.url
480 );
481 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
482 }
483 }
484 } else {
485 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
486 }
487 }
488 }
489
490 #[cfg(feature = "adblock")]
491 pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
492 if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
493 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
494 } else {
495 if let Some(network_id) = event.network_id.as_ref() {
496 if let Some(request_will_be_sent) =
497 self.requests_will_be_sent.remove(network_id.as_ref())
498 {
499 self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
500 } else {
501 let current_url = event.request.url.as_str();
502 let javascript_resource = event.resource_type == ResourceType::Script;
503 let skip_networking = event.resource_type == ResourceType::Other
504 || event.resource_type == ResourceType::Manifest
505 || event.resource_type == ResourceType::CspViolationReport
506 || event.resource_type == ResourceType::Ping
507 || event.resource_type == ResourceType::Prefetch;
508 let network_resource = event.resource_type == ResourceType::Xhr
509 || event.resource_type == ResourceType::Fetch
510 || event.resource_type == ResourceType::WebSocket;
511
512 let skip_networking = if !skip_networking {
514 IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref())
515 || self.ignore_visuals
516 && (IGNORE_VISUAL_RESOURCE_MAP
517 .contains(event.resource_type.as_ref()))
518 || self.block_stylesheets
519 && ResourceType::Stylesheet == event.resource_type
520 || self.block_javascript
521 && javascript_resource
522 && self.intercept_manager == NetworkInterceptManager::UNKNOWN
523 && !ALLOWED_MATCHER.is_match(current_url)
524 } else {
525 skip_networking
526 };
527
528 let skip_networking = if !skip_networking {
529 self.detect_ad(event)
530 } else {
531 skip_networking
532 };
533
534 let skip_networking = if !skip_networking
535 && (self.only_html || self.ignore_visuals)
536 && (javascript_resource || event.resource_type == ResourceType::Document)
537 {
538 ignore_script_embedded(current_url)
539 } else {
540 skip_networking
541 };
542
543 let skip_networking = if !skip_networking && javascript_resource {
545 self.ignore_script(
546 current_url,
547 self.block_analytics,
548 self.intercept_manager,
549 )
550 } else {
551 skip_networking
552 };
553
554 let skip_networking = self.skip_xhr(skip_networking, &event);
556
557 let skip_networking = if !skip_networking
559 && (javascript_resource
560 || network_resource
561 || event.resource_type == ResourceType::Document)
562 {
563 self.intercept_manager.intercept_detection(
564 &event.request.url,
565 self.ignore_visuals,
566 network_resource,
567 )
568 } else {
569 skip_networking
570 };
571
572 if skip_networking {
573 let fullfill_params =
574 crate::handler::network::fetch::FulfillRequestParams::new(
575 event.request_id.clone(),
576 200,
577 );
578 self.push_cdp_request(fullfill_params);
579 } else {
580 self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
581 }
582 }
583 }
584 }
585
586 }
590
591 #[cfg(feature = "adblock")]
593 pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
594 use adblock::{
595 lists::{FilterSet, ParseOptions, RuleTypes},
596 Engine,
597 };
598
599 lazy_static::lazy_static! {
600 static ref AD_ENGINE: Engine = {
601 let mut filter_set = FilterSet::new(false);
602 let mut rules = ParseOptions::default();
603 rules.rule_types = RuleTypes::All;
604
605 filter_set.add_filters(
606 &*crate::handler::blockers::adblock_patterns::ADBLOCK_PATTERNS,
607 rules,
608 );
609
610 Engine::from_filter_set(filter_set, true)
611 };
612 };
613
614 let blockable = ResourceType::Image == event.resource_type
615 || event.resource_type == ResourceType::Media
616 || event.resource_type == ResourceType::Stylesheet
617 || event.resource_type == ResourceType::Document
618 || event.resource_type == ResourceType::Fetch
619 || event.resource_type == ResourceType::Xhr;
620
621 let u = &event.request.url;
622
623 let block_request = blockable
624 && {
626 let request = adblock::request::Request::preparsed(
627 &u,
628 "example.com",
629 "example.com",
630 &event.resource_type.as_ref().to_lowercase(),
631 !event.request.is_same_site.unwrap_or_default());
632
633 AD_ENGINE.check_network_request(&request).matched
634 };
635
636 block_request
637 }
638
639 pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
640 let response = if self
641 .attempted_authentications
642 .contains(event.request_id.as_ref())
643 {
644 AuthChallengeResponseResponse::CancelAuth
645 } else if self.credentials.is_some() {
646 self.attempted_authentications
647 .insert(event.request_id.clone().into());
648 AuthChallengeResponseResponse::ProvideCredentials
649 } else {
650 AuthChallengeResponseResponse::Default
651 };
652
653 let mut auth = AuthChallengeResponse::new(response);
654 if let Some(creds) = self.credentials.clone() {
655 auth.username = Some(creds.username);
656 auth.password = Some(creds.password);
657 }
658 self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
659 }
660
661 pub fn set_offline_mode(&mut self, value: bool) {
662 if self.offline == value {
663 return;
664 }
665 self.offline = value;
666 if let Ok(network) = EmulateNetworkConditionsParams::builder()
667 .offline(self.offline)
668 .latency(0)
669 .download_throughput(-1.)
670 .upload_throughput(-1.)
671 .build()
672 {
673 self.push_cdp_request(network);
674 }
675 }
676
677 pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
679 if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
680 if let Some(interception_id) = self
681 .request_id_to_interception_id
682 .remove(event.request_id.as_ref())
683 {
684 self.on_request(event, Some(interception_id));
685 } else {
686 self.requests_will_be_sent
688 .insert(event.request_id.clone(), event.clone());
689 }
690 } else {
691 self.on_request(event, None);
692 }
693 }
694
695 pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
696 if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
697 request.from_memory_cache = true;
698 }
699 }
700
701 pub fn on_response_received(&mut self, event: &EventResponseReceived) {
702 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
703 request.set_response(event.response.clone());
704 self.queued_events
705 .push_back(NetworkEvent::RequestFinished(request))
706 }
707 }
708
709 pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
710 if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
711 if let Some(interception_id) = request.interception_id.as_ref() {
712 self.attempted_authentications
713 .remove(interception_id.as_ref());
714 }
715 self.queued_events
716 .push_back(NetworkEvent::RequestFinished(request));
717 }
718 }
719
720 pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
721 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
722 request.failure_text = Some(event.error_text.clone());
723 if let Some(interception_id) = request.interception_id.as_ref() {
724 self.attempted_authentications
725 .remove(interception_id.as_ref());
726 }
727 self.queued_events
728 .push_back(NetworkEvent::RequestFailed(request));
729 }
730 }
731
732 fn on_request(
733 &mut self,
734 event: &EventRequestWillBeSent,
735 interception_id: Option<InterceptionId>,
736 ) {
737 let mut redirect_chain = Vec::new();
738 if let Some(redirect_resp) = event.redirect_response.as_ref() {
739 if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
740 self.handle_request_redirect(&mut request, redirect_resp.clone());
741 redirect_chain = std::mem::take(&mut request.redirect_chain);
742 redirect_chain.push(request);
743 }
744 }
745 let request = HttpRequest::new(
746 event.request_id.clone(),
747 event.frame_id.clone(),
748 interception_id,
749 self.user_request_interception_enabled,
750 redirect_chain,
751 );
752
753 self.requests.insert(event.request_id.clone(), request);
754 self.queued_events
755 .push_back(NetworkEvent::Request(event.request_id.clone()));
756 }
757
758 fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
759 request.set_response(response);
760 if let Some(interception_id) = request.interception_id.as_ref() {
761 self.attempted_authentications
762 .remove(interception_id.as_ref());
763 }
764 }
765}
766
767#[derive(Debug)]
768pub enum NetworkEvent {
769 SendCdpRequest((MethodId, serde_json::Value)),
770 Request(RequestId),
771 Response(RequestId),
772 RequestFailed(HttpRequest),
773 RequestFinished(HttpRequest),
774}