chromiumoxide/handler/
network.rs

1use super::blockers::{
2    block_websites::block_xhr, ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
3    xhr::IGNORE_XHR_ASSETS,
4};
5use crate::auth::Credentials;
6use crate::cmd::CommandChain;
7use crate::handler::http::HttpRequest;
8use aho_corasick::AhoCorasick;
9use case_insensitive_string::CaseInsensitiveString;
10use chromiumoxide_cdp::cdp::browser_protocol::network::{
11    EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
12    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
13    InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
14    SetExtraHttpHeadersParams,
15};
16use chromiumoxide_cdp::cdp::browser_protocol::{
17    fetch::{
18        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
19        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
20        RequestPattern,
21    },
22    network::SetBypassServiceWorkerParams,
23};
24use chromiumoxide_cdp::cdp::browser_protocol::{
25    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
26};
27use chromiumoxide_types::{Command, Method, MethodId};
28use hashbrown::{HashMap, HashSet};
29use lazy_static::lazy_static;
30use reqwest::header::PROXY_AUTHORIZATION;
31use spider_network_blocker::intercept_manager::NetworkInterceptManager;
32pub use spider_network_blocker::scripts::{
33    URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE_PATHS,
34};
35use std::collections::VecDeque;
36use std::time::Duration;
37
38lazy_static! {
39    /// General patterns for popular libraries and resources
40    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
41        "jquery",           // Covers jquery.min.js, jquery.js, etc.
42        "angular",
43        "react",            // Covers all React-related patterns
44        "vue",              // Covers all Vue-related patterns
45        "bootstrap",
46        "d3",
47        "lodash",
48        "ajax",
49        "application",
50        "app",              // Covers general app scripts like app.js
51        "main",
52        "index",
53        "bundle",
54        "vendor",
55        "runtime",
56        "polyfill",
57        "scripts",
58        "es2015.",
59        "es2020.",
60        "webpack",
61        "/wp-content/js/",  // Covers Wordpress content
62        // Verified 3rd parties for request
63        "https://m.stripe.network/",
64        "https://challenges.cloudflare.com/",
65        "https://www.google.com/recaptcha/api.js",
66        "https://google.com/recaptcha/api.js",
67        "https://js.stripe.com/",
68        "https://cdn.prod.website-files.com/", // webflow cdn scripts
69        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
70        "https://code.jquery.com/jquery-"
71    ];
72
73    /// Determine if a script should be rendered in the browser by name.
74    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).unwrap();
75
76    /// path of a js framework
77    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
78        phf::phf_set! {
79            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
80            "_astro/", "_app/immutable"
81        }
82    };
83
84    /// Ignore the content types.
85    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
86        "application/pdf",
87        "application/zip",
88        "application/x-rar-compressed",
89        "application/x-tar",
90        "image/png",
91        "image/jpeg",
92        "image/gif",
93        "image/bmp",
94        "image/svg+xml",
95        "video/mp4",
96        "video/x-msvideo",
97        "video/x-matroska",
98        "video/webm",
99        "audio/mpeg",
100        "audio/ogg",
101        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
102        "application/vnd.ms-excel",
103        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
104        "application/vnd.ms-powerpoint",
105        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
106        "application/x-7z-compressed",
107        "application/x-rpm",
108        "application/x-shockwave-flash",
109        "application/rtf",
110    };
111
112    /// Ignore the resources for visual content types.
113    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
114        "Image",
115        "Media",
116        "Font"
117    };
118
119    /// Ignore the resources for visual content types.
120    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
121        "CspViolationReport",
122        "Manifest",
123        "Other",
124        "Prefetch",
125        "Ping",
126    };
127
128    /// Case insenstive css matching
129    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
130
131    /// The command chain.
132    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
133        let enable = EnableParams::default();
134
135        if let Ok(c) = serde_json::to_value(&enable) {
136            vec![(enable.identifier(), c)]
137        } else {
138            vec![]
139        }
140    };
141
142    /// The command chain with https ignore.
143    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
144        let enable = EnableParams::default();
145        let mut v = vec![];
146        if let Ok(c) = serde_json::to_value(&enable) {
147            v.push((enable.identifier(), c));
148        }
149        let ignore = SetIgnoreCertificateErrorsParams::new(true);
150        if let Ok(ignored) = serde_json::to_value(&ignore) {
151            v.push((ignore.identifier(), ignored));
152        }
153
154        v
155    };
156
157    /// Enable the fetch intercept command
158    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
159        fetch::EnableParams::builder()
160        .handle_auth_requests(true)
161        .pattern(RequestPattern::builder().url_pattern("*").build())
162        .build()
163    };
164}
165
166/// Determine if a redirect is true.
167pub(crate) fn is_redirect_status(status: i64) -> bool {
168    matches!(status, 301 | 302 | 303 | 307 | 308)
169}
170
171#[derive(Debug)]
172/// The base network manager.
173pub struct NetworkManager {
174    queued_events: VecDeque<NetworkEvent>,
175    ignore_httpserrors: bool,
176    requests: HashMap<RequestId, HttpRequest>,
177    // TODO put event in an Arc?
178    requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
179    extra_headers: std::collections::HashMap<String, String>,
180    request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
181    user_cache_disabled: bool,
182    attempted_authentications: HashSet<RequestId>,
183    credentials: Option<Credentials>,
184    // unused atm for remote connections, needs to be used for self launches.
185    user_request_interception_enabled: bool,
186    protocol_request_interception_enabled: bool,
187    offline: bool,
188    request_timeout: Duration,
189    // made_request: bool,
190    /// Ignore visuals (no pings, prefetching, and etc).
191    pub ignore_visuals: bool,
192    /// Block CSS stylesheets.
193    pub block_stylesheets: bool,
194    /// Block javascript that is not critical to rendering.
195    pub block_javascript: bool,
196    /// Block analytics from rendering
197    pub block_analytics: bool,
198    /// Only html from loading.
199    pub only_html: bool,
200    /// Is xml document?
201    pub xml_document: bool,
202    /// The custom intercept handle logic to run on the website.
203    pub intercept_manager: NetworkInterceptManager,
204    /// Track the amount of times the document reloaded.
205    pub document_reload_tracker: u8,
206    /// The initial target domain.
207    pub document_target_domain: String,
208}
209
210impl NetworkManager {
211    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
212        Self {
213            queued_events: Default::default(),
214            ignore_httpserrors,
215            requests: Default::default(),
216            requests_will_be_sent: Default::default(),
217            extra_headers: Default::default(),
218            request_id_to_interception_id: Default::default(),
219            user_cache_disabled: false,
220            attempted_authentications: Default::default(),
221            credentials: None,
222            user_request_interception_enabled: false,
223            protocol_request_interception_enabled: false,
224            offline: false,
225            request_timeout,
226            ignore_visuals: false,
227            block_javascript: false,
228            block_stylesheets: false,
229            block_analytics: true,
230            only_html: false,
231            xml_document: false,
232            intercept_manager: NetworkInterceptManager::Unknown,
233            document_reload_tracker: 0,
234            document_target_domain: String::new(),
235        }
236    }
237
238    pub fn init_commands(&self) -> CommandChain {
239        let cmds = if self.ignore_httpserrors {
240            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
241        } else {
242            INIT_CHAIN.clone()
243        };
244
245        CommandChain::new(cmds, self.request_timeout)
246    }
247
248    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
249        let method = cmd.identifier();
250        if let Ok(params) = serde_json::to_value(cmd) {
251            self.queued_events
252                .push_back(NetworkEvent::SendCdpRequest((method, params)));
253        }
254    }
255
256    /// The next event to handle
257    pub fn poll(&mut self) -> Option<NetworkEvent> {
258        self.queued_events.pop_front()
259    }
260
261    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
262        &self.extra_headers
263    }
264
265    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
266        self.extra_headers = headers;
267        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
268        self.extra_headers.remove("Proxy-Authorization");
269        if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
270            self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
271        }
272    }
273
274    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
275        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
276    }
277
278    pub fn set_request_interception(&mut self, enabled: bool) {
279        self.user_request_interception_enabled = enabled;
280        self.update_protocol_request_interception();
281    }
282
283    pub fn set_cache_enabled(&mut self, enabled: bool) {
284        let run = self.user_cache_disabled != !enabled;
285        self.user_cache_disabled = !enabled;
286        if run {
287            self.update_protocol_cache_disabled();
288        }
289    }
290
291    pub fn disable_request_intercept(&mut self) {
292        self.protocol_request_interception_enabled = true;
293    }
294
295    pub fn update_protocol_cache_disabled(&mut self) {
296        self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
297    }
298
299    pub fn authenticate(&mut self, credentials: Credentials) {
300        self.credentials = Some(credentials);
301        self.update_protocol_request_interception();
302        self.protocol_request_interception_enabled = true;
303    }
304
305    fn update_protocol_request_interception(&mut self) {
306        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
307
308        if enabled == self.protocol_request_interception_enabled {
309            return;
310        }
311
312        if enabled {
313            self.push_cdp_request(ENABLE_FETCH.clone())
314        } else {
315            self.push_cdp_request(DisableParams::default())
316        }
317    }
318
319    /// Url matches analytics that we want to ignore or trackers.
320    pub(crate) fn ignore_script(
321        &self,
322        url: &str,
323        block_analytics: bool,
324        intercept_manager: NetworkInterceptManager,
325    ) -> bool {
326        let mut ignore_script = block_analytics
327            && spider_network_blocker::scripts::URL_IGNORE_TRIE.contains_prefix(url);
328
329        if !ignore_script {
330            if let Some(index) = url.find("//") {
331                let pos = index + 2;
332
333                // Ensure there is something after `//`
334                if pos < url.len() {
335                    // Find the first slash after the `//`
336                    if let Some(slash_index) = url[pos..].find('/') {
337                        let base_path_index = pos + slash_index + 1;
338
339                        if url.len() > base_path_index {
340                            let new_url: &str = &url[base_path_index..];
341
342                            // ignore assets we do not need for frameworks
343                            if !ignore_script
344                                && intercept_manager == NetworkInterceptManager::Unknown
345                            {
346                                let hydration_file =
347                                    JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
348
349                                // ignore astro paths
350                                if hydration_file && new_url.ends_with(".js") {
351                                    ignore_script = true;
352                                }
353                            }
354
355                            if !ignore_script
356                                && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
357                            {
358                                ignore_script = true;
359                            }
360
361                            if !ignore_script
362                                && self.ignore_visuals
363                                && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
364                            {
365                                ignore_script = true;
366                            }
367                        }
368                    }
369                }
370            }
371        }
372
373        // fallback for file ending in analytics.js
374        if !ignore_script && block_analytics {
375            ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
376        }
377
378        ignore_script
379    }
380
381    /// Determine if the request should be skipped.
382    fn skip_xhr(
383        &self,
384        skip_networking: bool,
385        event: &EventRequestPaused,
386        network_event: bool,
387    ) -> bool {
388        // XHR check
389        if !skip_networking && network_event {
390            let request_url = event.request.url.as_str();
391
392            // check if part of ignore scripts.
393            let skip_analytics =
394                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
395
396            if skip_analytics {
397                true
398            } else if self.block_stylesheets || self.ignore_visuals {
399                let block_css = self.block_stylesheets;
400                let block_media = self.ignore_visuals;
401
402                let mut block_request = false;
403
404                if let Some(position) = request_url.rfind('.') {
405                    let hlen = request_url.len();
406                    let has_asset = hlen - position;
407
408                    if has_asset >= 3 {
409                        let next_position = position + 1;
410
411                        if block_media
412                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
413                                &request_url[next_position..].into(),
414                            )
415                        {
416                            block_request = true;
417                        } else if block_css {
418                            block_request =
419                                CaseInsensitiveString::from(request_url[next_position..].as_bytes())
420                                    .contains(&**CSS_EXTENSION)
421                        }
422                    }
423                }
424
425                if !block_request {
426                    block_request = ignore_script_xhr_media(request_url);
427                }
428
429                block_request
430            } else {
431                skip_networking
432            }
433        } else {
434            skip_networking
435        }
436    }
437
438    #[cfg(not(feature = "adblock"))]
439    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
440        use super::blockers::block_websites::block_website;
441
442        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
443            return;
444        }
445
446        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
447            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
448        } else {
449            if let Some(network_id) = event.network_id.as_ref() {
450                if let Some(request_will_be_sent) =
451                    self.requests_will_be_sent.remove(network_id.as_ref())
452                {
453                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
454                } else {
455                    let current_url = event.request.url.as_str();
456                    let javascript_resource = event.resource_type == ResourceType::Script;
457                    let document_resource = event.resource_type == ResourceType::Document;
458                    let network_resource = !document_resource
459                        && (event.resource_type == ResourceType::Xhr
460                            || event.resource_type == ResourceType::Fetch
461                            || event.resource_type == ResourceType::WebSocket);
462
463                    let skip_networking =
464                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
465
466                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
467                    let mut replacer = None;
468
469                    if document_resource {
470                        if self.document_target_domain == current_url {
471                            // this will prevent the domain from looping (3 times is enough).
472                            self.document_reload_tracker += 1;
473                        } else if !self.document_target_domain.is_empty()
474                            && event.redirected_request_id.is_some()
475                        {
476                            let (http_document_replacement, mut https_document_replacement) =
477                                if self.document_target_domain.starts_with("http://") {
478                                    (
479                                        self.document_target_domain.replace("http://", "http//"),
480                                        self.document_target_domain.replace("http://", "https://"),
481                                    )
482                                } else {
483                                    (
484                                        self.document_target_domain.replace("https://", "https//"),
485                                        self.document_target_domain.replace("https://", "http://"),
486                                    )
487                                };
488
489                            let trailing = https_document_replacement.ends_with('/');
490
491                            if trailing {
492                                https_document_replacement.pop();
493                            }
494
495                            if https_document_replacement.ends_with('/') {
496                                https_document_replacement.pop();
497                            }
498
499                            let redirect_mask = format!(
500                                "{}{}",
501                                https_document_replacement, http_document_replacement
502                            );
503
504                            // handle redirect masking
505                            if current_url == redirect_mask {
506                                replacer = Some(if trailing {
507                                    format!("{}/", https_document_replacement)
508                                } else {
509                                    https_document_replacement
510                                });
511                            }
512                        }
513
514                        if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
515                            self.xml_document = true;
516                        }
517
518                        self.document_target_domain = event.request.url.clone();
519                    }
520
521                    let current_url = match &replacer {
522                        Some(r) => r,
523                        _ => &event.request.url,
524                    }
525                    .as_str();
526
527                    // main initial check
528                    let skip_networking = if !skip_networking {
529                        // allow sitemap xml building xsl
530                        if self.xml_document && current_url.ends_with(".xsl") {
531                            false
532                        } else {
533                            self.ignore_visuals
534                                && (IGNORE_VISUAL_RESOURCE_MAP
535                                    .contains(event.resource_type.as_ref()))
536                                || self.block_stylesheets
537                                    && ResourceType::Stylesheet == event.resource_type
538                                || self.block_javascript
539                                    && javascript_resource
540                                    && self.intercept_manager == NetworkInterceptManager::Unknown
541                                    && !ALLOWED_MATCHER.is_match(current_url)
542                        }
543                    } else {
544                        skip_networking
545                    };
546
547                    let skip_networking = if !skip_networking
548                        && (self.only_html || self.ignore_visuals)
549                        && (javascript_resource || document_resource)
550                    {
551                        ignore_script_embedded(current_url)
552                    } else {
553                        skip_networking
554                    };
555
556                    // analytics check
557                    let skip_networking = if !skip_networking && javascript_resource {
558                        self.ignore_script(
559                            current_url,
560                            self.block_analytics,
561                            self.intercept_manager,
562                        )
563                    } else {
564                        skip_networking
565                    };
566
567                    // XHR check
568                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
569
570                    // custom interception layer.
571                    let skip_networking = if !skip_networking
572                        && (javascript_resource || network_resource || document_resource)
573                    {
574                        self.intercept_manager.intercept_detection(
575                            &current_url,
576                            self.ignore_visuals,
577                            network_resource,
578                        )
579                    } else {
580                        skip_networking
581                    };
582
583                    let skip_networking =
584                        if !skip_networking && (javascript_resource || network_resource) {
585                            block_website(&current_url)
586                        } else {
587                            skip_networking
588                        };
589
590                    if skip_networking {
591                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
592                        let fullfill_params =
593                            crate::handler::network::fetch::FulfillRequestParams::new(
594                                event.request_id.clone(),
595                                200,
596                            );
597                        self.push_cdp_request(fullfill_params);
598                    } else {
599                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
600                        let mut continue_params =
601                            ContinueRequestParams::new(event.request_id.clone());
602
603                        if replacer.is_some() {
604                            continue_params.url = Some(current_url.into());
605                            continue_params.intercept_response = Some(false);
606                        }
607
608                        self.push_cdp_request(continue_params)
609                    }
610                }
611            } else {
612                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
613            }
614        }
615    }
616
617    #[cfg(feature = "adblock")]
618    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
619        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
620            return;
621        }
622
623        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
624            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
625        } else {
626            if let Some(network_id) = event.network_id.as_ref() {
627                if let Some(request_will_be_sent) =
628                    self.requests_will_be_sent.remove(network_id.as_ref())
629                {
630                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
631                } else {
632                    let current_url = event.request.url.as_str();
633                    let javascript_resource = event.resource_type == ResourceType::Script;
634                    let document_resource = event.resource_type == ResourceType::Document;
635                    let network_resource = !document_resource
636                        && (event.resource_type == ResourceType::Xhr
637                            || event.resource_type == ResourceType::Fetch
638                            || event.resource_type == ResourceType::WebSocket);
639                    let mut replacer = None;
640
641                    // block all of these events.
642                    let skip_networking =
643                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
644
645                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
646
647                    if document_resource {
648                        if self.document_target_domain == current_url {
649                            // this will prevent the domain from looping (3 times is enough).
650                            self.document_reload_tracker += 1;
651                        } else if !self.document_target_domain.is_empty()
652                            && event.redirected_request_id.is_some()
653                        {
654                            let (http_document_replacement, mut https_document_replacement) =
655                                if self.document_target_domain.starts_with("http://") {
656                                    (
657                                        self.document_target_domain.replace("http://", "http//"),
658                                        self.document_target_domain.replace("http://", "https://"),
659                                    )
660                                } else {
661                                    (
662                                        self.document_target_domain.replace("https://", "https//"),
663                                        self.document_target_domain.replace("https://", "http://"),
664                                    )
665                                };
666
667                            let trailing = https_document_replacement.ends_with('/');
668
669                            if trailing {
670                                https_document_replacement.pop();
671                            }
672
673                            if https_document_replacement.ends_with('/') {
674                                https_document_replacement.pop();
675                            }
676
677                            let redirect_mask = format!(
678                                "{}{}",
679                                https_document_replacement, http_document_replacement
680                            );
681
682                            // handle redirect masking
683                            if current_url == redirect_mask {
684                                replacer = Some(if trailing {
685                                    format!("{}/", https_document_replacement)
686                                } else {
687                                    https_document_replacement
688                                });
689                            }
690                        }
691
692                        if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
693                            self.xml_document = true;
694                        }
695
696                        self.document_target_domain = event.request.url.clone();
697                    }
698
699                    let current_url = match &replacer {
700                        Some(r) => r,
701                        _ => &event.request.url,
702                    }
703                    .as_str();
704
705                    // main initial check
706                    let skip_networking = if !skip_networking {
707                        // allow sitemap xml building xsl
708                        if self.xml_document && current_url.ends_with(".xsl") {
709                            false
710                        } else {
711                            self.ignore_visuals
712                                && (IGNORE_VISUAL_RESOURCE_MAP
713                                    .contains(event.resource_type.as_ref()))
714                                || self.block_stylesheets
715                                    && ResourceType::Stylesheet == event.resource_type
716                                || self.block_javascript
717                                    && javascript_resource
718                                    && self.intercept_manager == NetworkInterceptManager::Unknown
719                                    && !ALLOWED_MATCHER.is_match(current_url)
720                        }
721                    } else {
722                        skip_networking
723                    };
724
725                    let skip_networking = if !skip_networking {
726                        self.detect_ad(event)
727                    } else {
728                        skip_networking
729                    };
730
731                    let skip_networking = if !skip_networking
732                        && (self.only_html || self.ignore_visuals)
733                        && (javascript_resource || document_resource)
734                    {
735                        ignore_script_embedded(current_url)
736                    } else {
737                        skip_networking
738                    };
739
740                    // analytics check
741                    let skip_networking = if !skip_networking && javascript_resource {
742                        self.ignore_script(
743                            current_url,
744                            self.block_analytics,
745                            self.intercept_manager,
746                        )
747                    } else {
748                        skip_networking
749                    };
750
751                    // XHR check
752                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
753
754                    // custom interception layer.
755                    let skip_networking = if !skip_networking
756                        && (javascript_resource || network_resource || document_resource)
757                    {
758                        self.intercept_manager.intercept_detection(
759                            &event.request.url,
760                            self.ignore_visuals,
761                            network_resource,
762                        )
763                    } else {
764                        skip_networking
765                    };
766
767                    let skip_networking = if !skip_networking
768                        && (javascript_resource || network_resource)
769                    {
770                        crate::handler::blockers::block_websites::block_website(&event.request.url)
771                    } else {
772                        skip_networking
773                    };
774
775                    if skip_networking {
776                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
777
778                        let fullfill_params =
779                            crate::handler::network::fetch::FulfillRequestParams::new(
780                                event.request_id.clone(),
781                                200,
782                            );
783                        self.push_cdp_request(fullfill_params);
784                    } else {
785                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
786
787                        let mut continue_params =
788                            ContinueRequestParams::new(event.request_id.clone());
789
790                        if replacer.is_some() {
791                            continue_params.url = Some(current_url.into());
792                            continue_params.intercept_response = Some(false);
793                        }
794                    }
795                }
796            } else {
797                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
798            }
799        }
800
801        // if self.only_html {
802        //     self.made_request = true;
803        // }
804    }
805
806    /// Perform a page intercept for chrome
807    #[cfg(feature = "adblock")]
808    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
809        use adblock::{
810            lists::{FilterSet, ParseOptions, RuleTypes},
811            Engine,
812        };
813
814        lazy_static::lazy_static! {
815            static ref AD_ENGINE: Engine = {
816                let mut filter_set = FilterSet::new(false);
817                let mut rules = ParseOptions::default();
818                rules.rule_types = RuleTypes::All;
819
820                filter_set.add_filters(
821                    &*spider_network_blocker::adblock::ADBLOCK_PATTERNS,
822                    rules,
823                );
824
825                Engine::from_filter_set(filter_set, true)
826            };
827        };
828
829        let blockable = ResourceType::Image == event.resource_type
830            || event.resource_type == ResourceType::Media
831            || event.resource_type == ResourceType::Stylesheet
832            || event.resource_type == ResourceType::Document
833            || event.resource_type == ResourceType::Fetch
834            || event.resource_type == ResourceType::Xhr;
835
836        let u = &event.request.url;
837
838        let block_request = blockable
839            // set it to example.com for 3rd party handling is_same_site
840        && {
841            let request = adblock::request::Request::preparsed(
842                 &u,
843                 "example.com",
844                 "example.com",
845                 &event.resource_type.as_ref().to_lowercase(),
846                 !event.request.is_same_site.unwrap_or_default());
847
848            AD_ENGINE.check_network_request(&request).matched
849        };
850
851        block_request
852    }
853
854    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
855        let response = if self
856            .attempted_authentications
857            .contains(event.request_id.as_ref())
858        {
859            AuthChallengeResponseResponse::CancelAuth
860        } else if self.credentials.is_some() {
861            self.attempted_authentications
862                .insert(event.request_id.clone().into());
863            AuthChallengeResponseResponse::ProvideCredentials
864        } else {
865            AuthChallengeResponseResponse::Default
866        };
867
868        let mut auth = AuthChallengeResponse::new(response);
869        if let Some(creds) = self.credentials.clone() {
870            auth.username = Some(creds.username);
871            auth.password = Some(creds.password);
872        }
873        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
874    }
875
876    pub fn set_offline_mode(&mut self, value: bool) {
877        if self.offline == value {
878            return;
879        }
880        self.offline = value;
881        if let Ok(network) = EmulateNetworkConditionsParams::builder()
882            .offline(self.offline)
883            .latency(0)
884            .download_throughput(-1.)
885            .upload_throughput(-1.)
886            .build()
887        {
888            self.push_cdp_request(network);
889        }
890    }
891
892    /// Request interception doesn't happen for data URLs with Network Service.
893    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
894        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
895            if let Some(interception_id) = self
896                .request_id_to_interception_id
897                .remove(event.request_id.as_ref())
898            {
899                self.on_request(event, Some(interception_id));
900            } else {
901                // TODO remove the clone for event
902                self.requests_will_be_sent
903                    .insert(event.request_id.clone(), event.clone());
904            }
905        } else {
906            self.on_request(event, None);
907        }
908    }
909
910    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
911        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
912            request.from_memory_cache = true;
913        }
914    }
915
916    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
917        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
918            request.set_response(event.response.clone());
919            self.queued_events
920                .push_back(NetworkEvent::RequestFinished(request))
921        }
922    }
923
924    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
925        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
926            if let Some(interception_id) = request.interception_id.as_ref() {
927                self.attempted_authentications
928                    .remove(interception_id.as_ref());
929            }
930            self.queued_events
931                .push_back(NetworkEvent::RequestFinished(request));
932        }
933    }
934
935    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
936        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
937            request.failure_text = Some(event.error_text.clone());
938            if let Some(interception_id) = request.interception_id.as_ref() {
939                self.attempted_authentications
940                    .remove(interception_id.as_ref());
941            }
942            self.queued_events
943                .push_back(NetworkEvent::RequestFailed(request));
944        }
945    }
946
947    fn on_request(
948        &mut self,
949        event: &EventRequestWillBeSent,
950        interception_id: Option<InterceptionId>,
951    ) {
952        let mut redirect_chain = Vec::new();
953        let mut redirect_location = None;
954
955        if let Some(redirect_resp) = &event.redirect_response {
956            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
957                if is_redirect_status(redirect_resp.status) {
958                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
959                        if redirect_resp.url != location {
960                            let fixed_location = location.replace(&redirect_resp.url, "");
961
962                            request.response.as_mut().map(|resp| {
963                                resp.headers.0["Location"] =
964                                    serde_json::Value::String(fixed_location.clone());
965                            });
966
967                            redirect_location = Some(fixed_location);
968                        }
969                    }
970                }
971
972                self.handle_request_redirect(
973                    &mut request,
974                    if let Some(redirect_location) = redirect_location {
975                        let mut redirect_resp = redirect_resp.clone();
976
977                        redirect_resp.headers.0["Location"] =
978                            serde_json::Value::String(redirect_location);
979
980                        redirect_resp
981                    } else {
982                        redirect_resp.clone()
983                    },
984                );
985
986                redirect_chain = std::mem::take(&mut request.redirect_chain);
987                redirect_chain.push(request);
988            }
989        }
990
991        let request = HttpRequest::new(
992            event.request_id.clone(),
993            event.frame_id.clone(),
994            interception_id,
995            self.user_request_interception_enabled,
996            redirect_chain,
997        );
998
999        self.requests.insert(event.request_id.clone(), request);
1000        self.queued_events
1001            .push_back(NetworkEvent::Request(event.request_id.clone()));
1002    }
1003
1004    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
1005        request.set_response(response);
1006        if let Some(interception_id) = request.interception_id.as_ref() {
1007            self.attempted_authentications
1008                .remove(interception_id.as_ref());
1009        }
1010    }
1011}
1012
1013#[derive(Debug)]
1014pub enum NetworkEvent {
1015    SendCdpRequest((MethodId, serde_json::Value)),
1016    Request(RequestId),
1017    Response(RequestId),
1018    RequestFailed(HttpRequest),
1019    RequestFinished(HttpRequest),
1020}