chromiumoxide/handler/
network.rs

1use super::blockers::{
2    block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
3    xhr::IGNORE_XHR_ASSETS,
4};
5use crate::auth::Credentials;
6use crate::cmd::CommandChain;
7use crate::handler::http::HttpRequest;
8use aho_corasick::AhoCorasick;
9use case_insensitive_string::CaseInsensitiveString;
10use chromiumoxide_cdp::cdp::browser_protocol::network::{
11    EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
12    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
13    InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
14    SetExtraHttpHeadersParams,
15};
16use chromiumoxide_cdp::cdp::browser_protocol::{
17    fetch::{
18        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
19        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
20        RequestPattern,
21    },
22    network::SetBypassServiceWorkerParams,
23};
24use chromiumoxide_cdp::cdp::browser_protocol::{
25    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
26};
27use chromiumoxide_types::{Command, Method, MethodId};
28use hashbrown::{HashMap, HashSet};
29use lazy_static::lazy_static;
30use reqwest::header::PROXY_AUTHORIZATION;
31use spider_network_blocker::intercept_manager::NetworkInterceptManager;
32pub use spider_network_blocker::scripts::{
33    URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
34};
35use std::collections::VecDeque;
36use std::time::Duration;
37
38lazy_static! {
39    /// General patterns for popular libraries and resources
40    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
41        "jquery",           // Covers jquery.min.js, jquery.js, etc.
42        "angular",
43        "react",            // Covers all React-related patterns
44        "vue",              // Covers all Vue-related patterns
45        "bootstrap",
46        "d3",
47        "lodash",
48        "ajax",
49        "application",
50        "app",              // Covers general app scripts like app.js
51        "main",
52        "index",
53        "bundle",
54        "vendor",
55        "runtime",
56        "polyfill",
57        "scripts",
58        "es2015.",
59        "es2020.",
60        "webpack",
61        "/wp-content/js/",  // Covers Wordpress content
62        // Verified 3rd parties for request
63        "https://m.stripe.network/",
64        "https://challenges.cloudflare.com/",
65        "https://www.google.com/recaptcha/api.js",
66        "https://google.com/recaptcha/api.js",
67        "https://js.stripe.com/",
68        "https://cdn.prod.website-files.com/", // webflow cdn scripts
69        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
70        "https://code.jquery.com/jquery-"
71    ];
72
73    /// Determine if a script should be rendered in the browser by name.
74    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).expect("matcher to build");
75
76    /// path of a js framework
77    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
78        phf::phf_set! {
79            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
80            "_astro/", "_app/immutable"
81        }
82    };
83
84    /// Ignore the content types.
85    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
86        "application/pdf",
87        "application/zip",
88        "application/x-rar-compressed",
89        "application/x-tar",
90        "image/png",
91        "image/jpeg",
92        "image/gif",
93        "image/bmp",
94        "image/svg+xml",
95        "video/mp4",
96        "video/x-msvideo",
97        "video/x-matroska",
98        "video/webm",
99        "audio/mpeg",
100        "audio/ogg",
101        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
102        "application/vnd.ms-excel",
103        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
104        "application/vnd.ms-powerpoint",
105        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
106        "application/x-7z-compressed",
107        "application/x-rpm",
108        "application/x-shockwave-flash",
109        "application/rtf",
110    };
111
112    /// Ignore the resources for visual content types.
113    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
114        "Image",
115        "Media",
116        "Font"
117    };
118
119    /// Ignore the resources for visual content types.
120    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
121        "CspViolationReport",
122        "Manifest",
123        "Other",
124        "Prefetch",
125        "Ping",
126    };
127
128    /// Case insenstive css matching
129    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
130
131    /// The command chain.
132    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
133        let enable = EnableParams::default();
134
135        if let Ok(c) = serde_json::to_value(&enable) {
136            vec![(enable.identifier(), c)]
137        } else {
138            vec![]
139        }
140    };
141
142    /// The command chain with https ignore.
143    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
144        let enable = EnableParams::default();
145        let mut v = vec![];
146        if let Ok(c) = serde_json::to_value(&enable) {
147            v.push((enable.identifier(), c));
148        }
149        let ignore = SetIgnoreCertificateErrorsParams::new(true);
150        if let Ok(ignored) = serde_json::to_value(&ignore) {
151            v.push((ignore.identifier(), ignored));
152        }
153
154        v
155    };
156
157    /// Enable the fetch intercept command
158    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
159        fetch::EnableParams::builder()
160        .handle_auth_requests(true)
161        .pattern(RequestPattern::builder().url_pattern("*").build())
162        .build()
163    };
164}
165
166/// Determine if a redirect is true.
167pub(crate) fn is_redirect_status(status: i64) -> bool {
168    matches!(status, 301 | 302 | 303 | 307 | 308)
169}
170
171#[derive(Debug)]
172/// The base network manager.
173pub struct NetworkManager {
174    queued_events: VecDeque<NetworkEvent>,
175    ignore_httpserrors: bool,
176    requests: HashMap<RequestId, HttpRequest>,
177    // TODO put event in an Arc?
178    requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
179    extra_headers: std::collections::HashMap<String, String>,
180    request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
181    user_cache_disabled: bool,
182    attempted_authentications: HashSet<RequestId>,
183    credentials: Option<Credentials>,
184    // unused atm for remote connections, needs to be used for self launches.
185    user_request_interception_enabled: bool,
186    protocol_request_interception_enabled: bool,
187    /// The network is offline.
188    offline: bool,
189    /// The page request timeout.
190    pub request_timeout: Duration,
191    // made_request: bool,
192    /// Ignore visuals (no pings, prefetching, and etc).
193    pub ignore_visuals: bool,
194    /// Block CSS stylesheets.
195    pub block_stylesheets: bool,
196    /// Block javascript that is not critical to rendering.
197    pub block_javascript: bool,
198    /// Block analytics from rendering
199    pub block_analytics: bool,
200    /// Only html from loading.
201    pub only_html: bool,
202    /// Is xml document?
203    pub xml_document: bool,
204    /// The custom intercept handle logic to run on the website.
205    pub intercept_manager: NetworkInterceptManager,
206    /// Track the amount of times the document reloaded.
207    pub document_reload_tracker: u8,
208    /// The initial target domain.
209    pub document_target_domain: String,
210}
211
212impl NetworkManager {
213    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
214        Self {
215            queued_events: Default::default(),
216            ignore_httpserrors,
217            requests: Default::default(),
218            requests_will_be_sent: Default::default(),
219            extra_headers: Default::default(),
220            request_id_to_interception_id: Default::default(),
221            user_cache_disabled: false,
222            attempted_authentications: Default::default(),
223            credentials: None,
224            user_request_interception_enabled: false,
225            protocol_request_interception_enabled: false,
226            offline: false,
227            request_timeout,
228            ignore_visuals: false,
229            block_javascript: false,
230            block_stylesheets: false,
231            block_analytics: true,
232            only_html: false,
233            xml_document: false,
234            intercept_manager: NetworkInterceptManager::Unknown,
235            document_reload_tracker: 0,
236            document_target_domain: String::new(),
237        }
238    }
239
240    pub fn init_commands(&self) -> CommandChain {
241        let cmds = if self.ignore_httpserrors {
242            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
243        } else {
244            INIT_CHAIN.clone()
245        };
246
247        CommandChain::new(cmds, self.request_timeout)
248    }
249
250    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
251        let method = cmd.identifier();
252        if let Ok(params) = serde_json::to_value(cmd) {
253            self.queued_events
254                .push_back(NetworkEvent::SendCdpRequest((method, params)));
255        }
256    }
257
258    /// The next event to handle
259    pub fn poll(&mut self) -> Option<NetworkEvent> {
260        self.queued_events.pop_front()
261    }
262
263    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
264        &self.extra_headers
265    }
266
267    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
268        self.extra_headers = headers;
269        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
270        self.extra_headers.remove("Proxy-Authorization");
271        if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
272            self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
273        }
274    }
275
276    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
277        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
278    }
279
280    pub fn set_request_interception(&mut self, enabled: bool) {
281        self.user_request_interception_enabled = enabled;
282        self.update_protocol_request_interception();
283    }
284
285    pub fn set_cache_enabled(&mut self, enabled: bool) {
286        let run = self.user_cache_disabled != !enabled;
287        self.user_cache_disabled = !enabled;
288        if run {
289            self.update_protocol_cache_disabled();
290        }
291    }
292
293    pub fn disable_request_intercept(&mut self) {
294        self.protocol_request_interception_enabled = true;
295    }
296
297    pub fn update_protocol_cache_disabled(&mut self) {
298        self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
299    }
300
301    pub fn authenticate(&mut self, credentials: Credentials) {
302        self.credentials = Some(credentials);
303        self.update_protocol_request_interception();
304        self.protocol_request_interception_enabled = true;
305    }
306
307    fn update_protocol_request_interception(&mut self) {
308        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
309
310        if enabled == self.protocol_request_interception_enabled {
311            return;
312        }
313
314        if enabled {
315            self.push_cdp_request(ENABLE_FETCH.clone())
316        } else {
317            self.push_cdp_request(DisableParams::default())
318        }
319    }
320
321    /// Url matches analytics that we want to ignore or trackers.
322    pub(crate) fn ignore_script(
323        &self,
324        url: &str,
325        block_analytics: bool,
326        intercept_manager: NetworkInterceptManager,
327    ) -> bool {
328        let mut ignore_script = block_analytics
329            && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url);
330
331        if !ignore_script {
332            if let Some(index) = url.find("//") {
333                let pos = index + 2;
334
335                // Ensure there is something after `//`
336                if pos < url.len() {
337                    // Find the first slash after the `//`
338                    if let Some(slash_index) = url[pos..].find('/') {
339                        let base_path_index = pos + slash_index + 1;
340
341                        if url.len() > base_path_index {
342                            let new_url: &str = &url[base_path_index..];
343
344                            // ignore assets we do not need for frameworks
345                            if !ignore_script
346                                && intercept_manager == NetworkInterceptManager::Unknown
347                            {
348                                let hydration_file =
349                                    JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
350
351                                // ignore astro paths
352                                if hydration_file && new_url.ends_with(".js") {
353                                    ignore_script = true;
354                                }
355                            }
356
357                            if !ignore_script
358                                && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
359                            {
360                                ignore_script = true;
361                            }
362
363                            if !ignore_script
364                                && self.ignore_visuals
365                                && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
366                            {
367                                ignore_script = true;
368                            }
369                        }
370                    }
371                }
372            }
373        }
374
375        // fallback for file ending in analytics.js
376        if !ignore_script && block_analytics {
377            ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
378        }
379
380        ignore_script
381    }
382
383    /// Determine if the request should be skipped.
384    fn skip_xhr(
385        &self,
386        skip_networking: bool,
387        event: &EventRequestPaused,
388        network_event: bool,
389    ) -> bool {
390        // XHR check
391        if !skip_networking && network_event {
392            let request_url = event.request.url.as_str();
393
394            // check if part of ignore scripts.
395            let skip_analytics =
396                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
397
398            if skip_analytics {
399                true
400            } else if self.block_stylesheets || self.ignore_visuals {
401                let block_css = self.block_stylesheets;
402                let block_media = self.ignore_visuals;
403
404                let mut block_request = false;
405
406                if let Some(position) = request_url.rfind('.') {
407                    let hlen = request_url.len();
408                    let has_asset = hlen - position;
409
410                    if has_asset >= 3 {
411                        let next_position = position + 1;
412
413                        if block_media
414                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
415                                &request_url[next_position..].into(),
416                            )
417                        {
418                            block_request = true;
419                        } else if block_css {
420                            block_request =
421                                CaseInsensitiveString::from(request_url[next_position..].as_bytes())
422                                    .contains(&**CSS_EXTENSION)
423                        }
424                    }
425                }
426
427                if !block_request {
428                    block_request = ignore_script_xhr_media(request_url);
429                }
430
431                block_request
432            } else {
433                skip_networking
434            }
435        } else {
436            skip_networking
437        }
438    }
439
440    #[cfg(not(feature = "adblock"))]
441    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
442        use super::blockers::block_websites::block_website;
443
444        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
445            return;
446        }
447
448        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
449            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
450        } else {
451            if let Some(network_id) = event.network_id.as_ref() {
452                if let Some(request_will_be_sent) =
453                    self.requests_will_be_sent.remove(network_id.as_ref())
454                {
455                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
456                } else {
457                    let current_url = event.request.url.as_str();
458                    let javascript_resource = event.resource_type == ResourceType::Script;
459                    let document_resource = event.resource_type == ResourceType::Document;
460                    let network_resource = !document_resource
461                        && (event.resource_type == ResourceType::Xhr
462                            || event.resource_type == ResourceType::Fetch
463                            || event.resource_type == ResourceType::WebSocket);
464
465                    let skip_networking =
466                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
467
468                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
469                    let mut replacer = None;
470
471                    if document_resource {
472                        if self.document_target_domain == current_url {
473                            // this will prevent the domain from looping (3 times is enough).
474                            self.document_reload_tracker += 1;
475                        } else if !self.document_target_domain.is_empty()
476                            && event.redirected_request_id.is_some()
477                        {
478                            let (http_document_replacement, mut https_document_replacement) =
479                                if self.document_target_domain.starts_with("http://") {
480                                    (
481                                        self.document_target_domain.replace("http://", "http//"),
482                                        self.document_target_domain.replace("http://", "https://"),
483                                    )
484                                } else {
485                                    (
486                                        self.document_target_domain.replace("https://", "https//"),
487                                        self.document_target_domain.replace("https://", "http://"),
488                                    )
489                                };
490
491                            let trailing = https_document_replacement.ends_with('/');
492
493                            if trailing {
494                                https_document_replacement.pop();
495                            }
496
497                            if https_document_replacement.ends_with('/') {
498                                https_document_replacement.pop();
499                            }
500
501                            let redirect_mask = format!(
502                                "{}{}",
503                                https_document_replacement, http_document_replacement
504                            );
505
506                            // handle redirect masking
507                            if current_url == redirect_mask {
508                                replacer = Some(if trailing {
509                                    format!("{}/", https_document_replacement)
510                                } else {
511                                    https_document_replacement
512                                });
513                            }
514                        }
515
516                        if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
517                            self.xml_document = true;
518                        }
519
520                        self.document_target_domain = event.request.url.clone();
521                    }
522
523                    let current_url = match &replacer {
524                        Some(r) => r,
525                        _ => &event.request.url,
526                    }
527                    .as_str();
528
529                    // main initial check
530                    let skip_networking = if !skip_networking {
531                        // allow sitemap xml building xsl
532                        if self.xml_document && current_url.ends_with(".xsl") {
533                            false
534                        } else {
535                            self.ignore_visuals
536                                && (IGNORE_VISUAL_RESOURCE_MAP
537                                    .contains(event.resource_type.as_ref()))
538                                || self.block_stylesheets
539                                    && ResourceType::Stylesheet == event.resource_type
540                                || self.block_javascript
541                                    && javascript_resource
542                                    && self.intercept_manager == NetworkInterceptManager::Unknown
543                                    && !ALLOWED_MATCHER.is_match(current_url)
544                        }
545                    } else {
546                        skip_networking
547                    };
548
549                    let skip_networking = if !skip_networking
550                        && (self.only_html || self.ignore_visuals)
551                        && (javascript_resource || document_resource)
552                    {
553                        ignore_script_embedded(current_url)
554                    } else {
555                        skip_networking
556                    };
557
558                    // analytics check
559                    let skip_networking = if !skip_networking && javascript_resource {
560                        self.ignore_script(
561                            current_url,
562                            self.block_analytics,
563                            self.intercept_manager,
564                        )
565                    } else {
566                        skip_networking
567                    };
568
569                    // XHR check
570                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
571
572                    // custom interception layer.
573                    let skip_networking = if !skip_networking
574                        && (javascript_resource || network_resource || document_resource)
575                    {
576                        self.intercept_manager.intercept_detection(
577                            &current_url,
578                            self.ignore_visuals,
579                            network_resource,
580                        )
581                    } else {
582                        skip_networking
583                    };
584
585                    let skip_networking =
586                        if !skip_networking && (javascript_resource || network_resource) {
587                            block_website(&current_url)
588                        } else {
589                            skip_networking
590                        };
591
592                    if skip_networking {
593                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
594                        let fullfill_params =
595                            crate::handler::network::fetch::FulfillRequestParams::new(
596                                event.request_id.clone(),
597                                200,
598                            );
599                        self.push_cdp_request(fullfill_params);
600                    } else {
601                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
602                        let mut continue_params =
603                            ContinueRequestParams::new(event.request_id.clone());
604
605                        if replacer.is_some() {
606                            continue_params.url = Some(current_url.into());
607                            continue_params.intercept_response = Some(false);
608                        }
609
610                        self.push_cdp_request(continue_params)
611                    }
612                }
613            } else {
614                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
615            }
616        }
617    }
618
619    #[cfg(feature = "adblock")]
620    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
621        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
622            return;
623        }
624
625        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
626            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
627        } else {
628            if let Some(network_id) = event.network_id.as_ref() {
629                if let Some(request_will_be_sent) =
630                    self.requests_will_be_sent.remove(network_id.as_ref())
631                {
632                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
633                } else {
634                    let current_url = event.request.url.as_str();
635                    let javascript_resource = event.resource_type == ResourceType::Script;
636                    let document_resource = event.resource_type == ResourceType::Document;
637                    let network_resource = !document_resource
638                        && (event.resource_type == ResourceType::Xhr
639                            || event.resource_type == ResourceType::Fetch
640                            || event.resource_type == ResourceType::WebSocket);
641                    let mut replacer = None;
642
643                    // block all of these events.
644                    let skip_networking =
645                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
646
647                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
648
649                    if document_resource {
650                        if self.document_target_domain == current_url {
651                            // this will prevent the domain from looping (3 times is enough).
652                            self.document_reload_tracker += 1;
653                        } else if !self.document_target_domain.is_empty()
654                            && event.redirected_request_id.is_some()
655                        {
656                            let (http_document_replacement, mut https_document_replacement) =
657                                if self.document_target_domain.starts_with("http://") {
658                                    (
659                                        self.document_target_domain.replace("http://", "http//"),
660                                        self.document_target_domain.replace("http://", "https://"),
661                                    )
662                                } else {
663                                    (
664                                        self.document_target_domain.replace("https://", "https//"),
665                                        self.document_target_domain.replace("https://", "http://"),
666                                    )
667                                };
668
669                            let trailing = https_document_replacement.ends_with('/');
670
671                            if trailing {
672                                https_document_replacement.pop();
673                            }
674
675                            if https_document_replacement.ends_with('/') {
676                                https_document_replacement.pop();
677                            }
678
679                            let redirect_mask = format!(
680                                "{}{}",
681                                https_document_replacement, http_document_replacement
682                            );
683
684                            // handle redirect masking
685                            if current_url == redirect_mask {
686                                replacer = Some(if trailing {
687                                    format!("{}/", https_document_replacement)
688                                } else {
689                                    https_document_replacement
690                                });
691                            }
692                        }
693
694                        if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
695                            self.xml_document = true;
696                        }
697
698                        self.document_target_domain = event.request.url.clone();
699                    }
700
701                    let current_url = match &replacer {
702                        Some(r) => r,
703                        _ => &event.request.url,
704                    }
705                    .as_str();
706
707                    // main initial check
708                    let skip_networking = if !skip_networking {
709                        // allow sitemap xml building xsl
710                        if self.xml_document && current_url.ends_with(".xsl") {
711                            false
712                        } else {
713                            self.ignore_visuals
714                                && (IGNORE_VISUAL_RESOURCE_MAP
715                                    .contains(event.resource_type.as_ref()))
716                                || self.block_stylesheets
717                                    && ResourceType::Stylesheet == event.resource_type
718                                || self.block_javascript
719                                    && javascript_resource
720                                    && self.intercept_manager == NetworkInterceptManager::Unknown
721                                    && !ALLOWED_MATCHER.is_match(current_url)
722                        }
723                    } else {
724                        skip_networking
725                    };
726
727                    let skip_networking = if !skip_networking {
728                        self.detect_ad(event)
729                    } else {
730                        skip_networking
731                    };
732
733                    let skip_networking = if !skip_networking
734                        && (self.only_html || self.ignore_visuals)
735                        && (javascript_resource || document_resource)
736                    {
737                        ignore_script_embedded(current_url)
738                    } else {
739                        skip_networking
740                    };
741
742                    // analytics check
743                    let skip_networking = if !skip_networking && javascript_resource {
744                        self.ignore_script(
745                            current_url,
746                            self.block_analytics,
747                            self.intercept_manager,
748                        )
749                    } else {
750                        skip_networking
751                    };
752
753                    // XHR check
754                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
755
756                    // custom interception layer.
757                    let skip_networking = if !skip_networking
758                        && (javascript_resource || network_resource || document_resource)
759                    {
760                        self.intercept_manager.intercept_detection(
761                            &event.request.url,
762                            self.ignore_visuals,
763                            network_resource,
764                        )
765                    } else {
766                        skip_networking
767                    };
768
769                    let skip_networking = if !skip_networking
770                        && (javascript_resource || network_resource)
771                    {
772                        crate::handler::blockers::block_websites::block_website(&event.request.url)
773                    } else {
774                        skip_networking
775                    };
776
777                    if skip_networking {
778                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
779
780                        let fullfill_params =
781                            crate::handler::network::fetch::FulfillRequestParams::new(
782                                event.request_id.clone(),
783                                200,
784                            );
785                        self.push_cdp_request(fullfill_params);
786                    } else {
787                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
788
789                        let mut continue_params =
790                            ContinueRequestParams::new(event.request_id.clone());
791
792                        if replacer.is_some() {
793                            continue_params.url = Some(current_url.into());
794                            continue_params.intercept_response = Some(false);
795                        }
796                    }
797                }
798            } else {
799                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
800            }
801        }
802
803        // if self.only_html {
804        //     self.made_request = true;
805        // }
806    }
807
808    /// Perform a page intercept for chrome
809    #[cfg(feature = "adblock")]
810    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
811        use adblock::{
812            lists::{FilterSet, ParseOptions, RuleTypes},
813            Engine,
814        };
815
816        lazy_static::lazy_static! {
817            static ref AD_ENGINE: Engine = {
818                let mut filter_set = FilterSet::new(false);
819                let mut rules = ParseOptions::default();
820                rules.rule_types = RuleTypes::All;
821
822                filter_set.add_filters(
823                    &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
824                    rules,
825                );
826
827                Engine::from_filter_set(filter_set, true)
828            };
829        };
830
831        let blockable = ResourceType::Image == event.resource_type
832            || event.resource_type == ResourceType::Media
833            || event.resource_type == ResourceType::Stylesheet
834            || event.resource_type == ResourceType::Document
835            || event.resource_type == ResourceType::Fetch
836            || event.resource_type == ResourceType::Xhr;
837
838        let u = &event.request.url;
839
840        let block_request = blockable
841            // set it to example.com for 3rd party handling is_same_site
842        && {
843            let request = adblock::request::Request::preparsed(
844                 &u,
845                 "example.com",
846                 "example.com",
847                 &event.resource_type.as_ref().to_lowercase(),
848                 !event.request.is_same_site.unwrap_or_default());
849
850            AD_ENGINE.check_network_request(&request).matched
851        };
852
853        block_request
854    }
855
856    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
857        let response = if self
858            .attempted_authentications
859            .contains(event.request_id.as_ref())
860        {
861            AuthChallengeResponseResponse::CancelAuth
862        } else if self.credentials.is_some() {
863            self.attempted_authentications
864                .insert(event.request_id.clone().into());
865            AuthChallengeResponseResponse::ProvideCredentials
866        } else {
867            AuthChallengeResponseResponse::Default
868        };
869
870        let mut auth = AuthChallengeResponse::new(response);
871        if let Some(creds) = self.credentials.clone() {
872            auth.username = Some(creds.username);
873            auth.password = Some(creds.password);
874        }
875        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
876    }
877
878    pub fn set_offline_mode(&mut self, value: bool) {
879        if self.offline == value {
880            return;
881        }
882        self.offline = value;
883        if let Ok(network) = EmulateNetworkConditionsParams::builder()
884            .offline(self.offline)
885            .latency(0)
886            .download_throughput(-1.)
887            .upload_throughput(-1.)
888            .build()
889        {
890            self.push_cdp_request(network);
891        }
892    }
893
894    /// Request interception doesn't happen for data URLs with Network Service.
895    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
896        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
897            if let Some(interception_id) = self
898                .request_id_to_interception_id
899                .remove(event.request_id.as_ref())
900            {
901                self.on_request(event, Some(interception_id));
902            } else {
903                // TODO remove the clone for event
904                self.requests_will_be_sent
905                    .insert(event.request_id.clone(), event.clone());
906            }
907        } else {
908            self.on_request(event, None);
909        }
910    }
911
912    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
913        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
914            request.from_memory_cache = true;
915        }
916    }
917
918    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
919        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
920            request.set_response(event.response.clone());
921            self.queued_events
922                .push_back(NetworkEvent::RequestFinished(request))
923        }
924    }
925
926    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
927        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
928            if let Some(interception_id) = request.interception_id.as_ref() {
929                self.attempted_authentications
930                    .remove(interception_id.as_ref());
931            }
932            self.queued_events
933                .push_back(NetworkEvent::RequestFinished(request));
934        }
935    }
936
937    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
938        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
939            request.failure_text = Some(event.error_text.clone());
940            if let Some(interception_id) = request.interception_id.as_ref() {
941                self.attempted_authentications
942                    .remove(interception_id.as_ref());
943            }
944            self.queued_events
945                .push_back(NetworkEvent::RequestFailed(request));
946        }
947    }
948
949    fn on_request(
950        &mut self,
951        event: &EventRequestWillBeSent,
952        interception_id: Option<InterceptionId>,
953    ) {
954        let mut redirect_chain = Vec::new();
955        let mut redirect_location = None;
956
957        if let Some(redirect_resp) = &event.redirect_response {
958            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
959                if is_redirect_status(redirect_resp.status) {
960                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
961                        if redirect_resp.url != location {
962                            let fixed_location = location.replace(&redirect_resp.url, "");
963
964                            request.response.as_mut().map(|resp| {
965                                resp.headers.0["Location"] =
966                                    serde_json::Value::String(fixed_location.clone());
967                            });
968
969                            redirect_location = Some(fixed_location);
970                        }
971                    }
972                }
973
974                self.handle_request_redirect(
975                    &mut request,
976                    if let Some(redirect_location) = redirect_location {
977                        let mut redirect_resp = redirect_resp.clone();
978
979                        redirect_resp.headers.0["Location"] =
980                            serde_json::Value::String(redirect_location);
981
982                        redirect_resp
983                    } else {
984                        redirect_resp.clone()
985                    },
986                );
987
988                redirect_chain = std::mem::take(&mut request.redirect_chain);
989                redirect_chain.push(request);
990            }
991        }
992
993        let request = HttpRequest::new(
994            event.request_id.clone(),
995            event.frame_id.clone(),
996            interception_id,
997            self.user_request_interception_enabled,
998            redirect_chain,
999        );
1000
1001        self.requests.insert(event.request_id.clone(), request);
1002        self.queued_events
1003            .push_back(NetworkEvent::Request(event.request_id.clone()));
1004    }
1005
1006    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1007        request.set_response(response);
1008        if let Some(interception_id) = request.interception_id.as_ref() {
1009            self.attempted_authentications
1010                .remove(interception_id.as_ref());
1011        }
1012    }
1013}
1014
1015#[derive(Debug)]
1016pub enum NetworkEvent {
1017    SendCdpRequest((MethodId, serde_json::Value)),
1018    Request(RequestId),
1019    Response(RequestId),
1020    RequestFailed(HttpRequest),
1021    RequestFinished(HttpRequest),
1022}