chromiumoxide/handler/
network.rs

1use super::blockers::{
2    block_websites::block_xhr,
3    ignore_script_embedded, ignore_script_xhr, ignore_script_xhr_media,
4    intercept_manager::NetworkInterceptManager,
5    scripts::{
6        URL_IGNORE_SCRIPT_BASE_PATHS, URL_IGNORE_SCRIPT_STYLES_PATHS, URL_IGNORE_TRIE,
7        URL_IGNORE_TRIE_PATHS,
8    },
9    xhr::IGNORE_XHR_ASSETS,
10};
11use crate::auth::Credentials;
12use crate::cmd::CommandChain;
13use crate::handler::http::HttpRequest;
14use aho_corasick::AhoCorasick;
15use case_insensitive_string::CaseInsensitiveString;
16use chromiumoxide_cdp::cdp::browser_protocol::network::{
17    EmulateNetworkConditionsParams, EventLoadingFailed, EventLoadingFinished,
18    EventRequestServedFromCache, EventRequestWillBeSent, EventResponseReceived, Headers,
19    InterceptionId, RequestId, ResourceType, Response, SetCacheDisabledParams,
20    SetExtraHttpHeadersParams,
21};
22use chromiumoxide_cdp::cdp::browser_protocol::{
23    fetch::{
24        self, AuthChallengeResponse, AuthChallengeResponseResponse, ContinueRequestParams,
25        ContinueWithAuthParams, DisableParams, EventAuthRequired, EventRequestPaused,
26        RequestPattern,
27    },
28    network::SetBypassServiceWorkerParams,
29};
30use chromiumoxide_cdp::cdp::browser_protocol::{
31    network::EnableParams, security::SetIgnoreCertificateErrorsParams,
32};
33use chromiumoxide_types::{Command, Method, MethodId};
34use hashbrown::{HashMap, HashSet};
35use lazy_static::lazy_static;
36use reqwest::header::PROXY_AUTHORIZATION;
37use std::collections::VecDeque;
38use std::time::Duration;
39
40lazy_static! {
41    /// General patterns for popular libraries and resources
42    static ref JS_FRAMEWORK_ALLOW: Vec<&'static str> = vec![
43        "jquery",           // Covers jquery.min.js, jquery.js, etc.
44        "angular",
45        "react",            // Covers all React-related patterns
46        "vue",              // Covers all Vue-related patterns
47        "bootstrap",
48        "d3",
49        "lodash",
50        "ajax",
51        "application",
52        "app",              // Covers general app scripts like app.js
53        "main",
54        "index",
55        "bundle",
56        "vendor",
57        "runtime",
58        "polyfill",
59        "scripts",
60        "es2015.",
61        "es2020.",
62        "webpack",
63        "/wp-content/js/",  // Covers Wordpress content
64        // Verified 3rd parties for request
65        "https://m.stripe.network/",
66        "https://challenges.cloudflare.com/",
67        "https://www.google.com/recaptcha/api.js",
68        "https://google.com/recaptcha/api.js",
69        "https://js.stripe.com/",
70        "https://cdn.prod.website-files.com/", // webflow cdn scripts
71        "https://cdnjs.cloudflare.com/",        // cloudflare cdn scripts
72        "https://code.jquery.com/jquery-"
73    ];
74
75    /// Determine if a script should be rendered in the browser by name.
76    pub static ref ALLOWED_MATCHER: AhoCorasick = AhoCorasick::new(JS_FRAMEWORK_ALLOW.iter()).unwrap();
77
78    /// path of a js framework
79    pub static ref JS_FRAMEWORK_PATH: phf::Set<&'static str> = {
80        phf::phf_set! {
81            // Add allowed assets from JS_FRAMEWORK_ASSETS except the excluded ones
82            "_astro/", "_app/immutable"
83        }
84    };
85
86    /// Ignore the content types.
87    pub static ref IGNORE_CONTENT_TYPES: phf::Set<&'static str> = phf::phf_set! {
88        "application/pdf",
89        "application/zip",
90        "application/x-rar-compressed",
91        "application/x-tar",
92        "image/png",
93        "image/jpeg",
94        "image/gif",
95        "image/bmp",
96        "image/svg+xml",
97        "video/mp4",
98        "video/x-msvideo",
99        "video/x-matroska",
100        "video/webm",
101        "audio/mpeg",
102        "audio/ogg",
103        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
104        "application/vnd.ms-excel",
105        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
106        "application/vnd.ms-powerpoint",
107        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
108        "application/x-7z-compressed",
109        "application/x-rpm",
110        "application/x-shockwave-flash",
111        "application/rtf",
112    };
113
114    /// Ignore the resources for visual content types.
115    pub static ref IGNORE_VISUAL_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
116        "Image",
117        "Media",
118        "Font"
119    };
120
121    /// Ignore the resources for visual content types.
122    pub static ref IGNORE_NETWORKING_RESOURCE_MAP: phf::Set<&'static str> = phf::phf_set! {
123        "CspViolationReport",
124        "Manifest",
125        "Other",
126        "Prefetch",
127        "Ping",
128    };
129
130    /// Case insenstive css matching
131    pub static ref CSS_EXTENSION: CaseInsensitiveString = CaseInsensitiveString::from("css");
132
133    /// The command chain.
134    pub static ref INIT_CHAIN: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
135        let enable = EnableParams::default();
136
137        if let Ok(c) = serde_json::to_value(&enable) {
138            vec![(enable.identifier(), c)]
139        } else {
140            vec![]
141        }
142    };
143
144    /// The command chain with https ignore.
145    pub static ref INIT_CHAIN_IGNORE_HTTP_ERRORS: Vec<(std::borrow::Cow<'static, str>, serde_json::Value)>  = {
146        let enable = EnableParams::default();
147        let mut v = vec![];
148        if let Ok(c) = serde_json::to_value(&enable) {
149            v.push((enable.identifier(), c));
150        }
151        let ignore = SetIgnoreCertificateErrorsParams::new(true);
152        if let Ok(ignored) = serde_json::to_value(&ignore) {
153            v.push((ignore.identifier(), ignored));
154        }
155
156        v
157    };
158
159    /// Enable the fetch intercept command
160    pub static ref ENABLE_FETCH: chromiumoxide_cdp::cdp::browser_protocol::fetch::EnableParams = {
161        fetch::EnableParams::builder()
162        .handle_auth_requests(true)
163        .pattern(RequestPattern::builder().url_pattern("*").build())
164        .build()
165    };
166}
167
168/// Determine if a redirect is true.
169pub(crate) fn is_redirect_status(status: i64) -> bool {
170    matches!(status, 301 | 302 | 303 | 307 | 308)
171}
172
173#[derive(Debug)]
174/// The base network manager.
175pub struct NetworkManager {
176    queued_events: VecDeque<NetworkEvent>,
177    ignore_httpserrors: bool,
178    requests: HashMap<RequestId, HttpRequest>,
179    // TODO put event in an Arc?
180    requests_will_be_sent: HashMap<RequestId, EventRequestWillBeSent>,
181    extra_headers: std::collections::HashMap<String, String>,
182    request_id_to_interception_id: HashMap<RequestId, InterceptionId>,
183    user_cache_disabled: bool,
184    attempted_authentications: HashSet<RequestId>,
185    credentials: Option<Credentials>,
186    // unused atm for remote connections, needs to be used for self launches.
187    user_request_interception_enabled: bool,
188    protocol_request_interception_enabled: bool,
189    offline: bool,
190    request_timeout: Duration,
191    // made_request: bool,
192    /// Ignore visuals (no pings, prefetching, and etc).
193    pub ignore_visuals: bool,
194    /// Block CSS stylesheets.
195    pub block_stylesheets: bool,
196    /// Block javascript that is not critical to rendering.
197    pub block_javascript: bool,
198    /// Block analytics from rendering
199    pub block_analytics: bool,
200    /// Only html from loading.
201    pub only_html: bool,
202    /// Is xml document?
203    pub xml_document: bool,
204    /// The custom intercept handle logic to run on the website.
205    pub intercept_manager: NetworkInterceptManager,
206    /// Track the amount of times the document reloaded.
207    pub document_reload_tracker: u8,
208    /// The initial target domain.
209    pub document_target_domain: String,
210}
211
212impl NetworkManager {
213    pub fn new(ignore_httpserrors: bool, request_timeout: Duration) -> Self {
214        Self {
215            queued_events: Default::default(),
216            ignore_httpserrors,
217            requests: Default::default(),
218            requests_will_be_sent: Default::default(),
219            extra_headers: Default::default(),
220            request_id_to_interception_id: Default::default(),
221            user_cache_disabled: false,
222            attempted_authentications: Default::default(),
223            credentials: None,
224            user_request_interception_enabled: false,
225            protocol_request_interception_enabled: false,
226            offline: false,
227            request_timeout,
228            ignore_visuals: false,
229            block_javascript: false,
230            block_stylesheets: false,
231            block_analytics: true,
232            only_html: false,
233            xml_document: false,
234            intercept_manager: NetworkInterceptManager::Unknown,
235            document_reload_tracker: 0,
236            document_target_domain: String::new(),
237        }
238    }
239
240    pub fn init_commands(&self) -> CommandChain {
241        let cmds = if self.ignore_httpserrors {
242            INIT_CHAIN_IGNORE_HTTP_ERRORS.clone()
243        } else {
244            INIT_CHAIN.clone()
245        };
246
247        CommandChain::new(cmds, self.request_timeout)
248    }
249
250    pub(crate) fn push_cdp_request<T: Command>(&mut self, cmd: T) {
251        let method = cmd.identifier();
252        if let Ok(params) = serde_json::to_value(cmd) {
253            self.queued_events
254                .push_back(NetworkEvent::SendCdpRequest((method, params)));
255        }
256    }
257
258    /// The next event to handle
259    pub fn poll(&mut self) -> Option<NetworkEvent> {
260        self.queued_events.pop_front()
261    }
262
263    pub fn extra_headers(&self) -> &std::collections::HashMap<String, String> {
264        &self.extra_headers
265    }
266
267    pub fn set_extra_headers(&mut self, headers: std::collections::HashMap<String, String>) {
268        self.extra_headers = headers;
269        self.extra_headers.remove(PROXY_AUTHORIZATION.as_str());
270        if let Ok(headers) = serde_json::to_value(&self.extra_headers) {
271            self.push_cdp_request(SetExtraHttpHeadersParams::new(Headers::new(headers)));
272        }
273    }
274
275    pub fn set_service_worker_enabled(&mut self, bypass: bool) {
276        self.push_cdp_request(SetBypassServiceWorkerParams::new(bypass));
277    }
278
279    pub fn set_request_interception(&mut self, enabled: bool) {
280        self.user_request_interception_enabled = enabled;
281        self.update_protocol_request_interception();
282    }
283
284    pub fn set_cache_enabled(&mut self, enabled: bool) {
285        let run = self.user_cache_disabled != !enabled;
286        self.user_cache_disabled = !enabled;
287        if run {
288            self.update_protocol_cache_disabled();
289        }
290    }
291
292    pub fn disable_request_intercept(&mut self) {
293        self.protocol_request_interception_enabled = true;
294    }
295
296    pub fn update_protocol_cache_disabled(&mut self) {
297        self.push_cdp_request(SetCacheDisabledParams::new(self.user_cache_disabled));
298    }
299
300    pub fn authenticate(&mut self, credentials: Credentials) {
301        self.credentials = Some(credentials);
302        self.update_protocol_request_interception();
303        self.protocol_request_interception_enabled = true;
304    }
305
306    fn update_protocol_request_interception(&mut self) {
307        let enabled = self.user_request_interception_enabled || self.credentials.is_some();
308
309        if enabled == self.protocol_request_interception_enabled {
310            return;
311        }
312
313        if enabled {
314            self.push_cdp_request(ENABLE_FETCH.clone())
315        } else {
316            self.push_cdp_request(DisableParams::default())
317        }
318    }
319
320    /// Url matches analytics that we want to ignore or trackers.
321    pub(crate) fn ignore_script(
322        &self,
323        url: &str,
324        block_analytics: bool,
325        intercept_manager: NetworkInterceptManager,
326    ) -> bool {
327        let mut ignore_script = block_analytics && URL_IGNORE_TRIE.contains_prefix(url);
328
329        if !ignore_script {
330            if let Some(index) = url.find("//") {
331                let pos = index + 2;
332
333                // Ensure there is something after `//`
334                if pos < url.len() {
335                    // Find the first slash after the `//`
336                    if let Some(slash_index) = url[pos..].find('/') {
337                        let base_path_index = pos + slash_index + 1;
338
339                        if url.len() > base_path_index {
340                            let new_url: &str = &url[base_path_index..];
341
342                            // ignore assets we do not need for frameworks
343                            if !ignore_script
344                                && intercept_manager == NetworkInterceptManager::Unknown
345                            {
346                                let hydration_file =
347                                    JS_FRAMEWORK_PATH.iter().any(|p| new_url.starts_with(p));
348
349                                // ignore astro paths
350                                if hydration_file && new_url.ends_with(".js") {
351                                    ignore_script = true;
352                                }
353                            }
354
355                            if !ignore_script
356                                && URL_IGNORE_SCRIPT_BASE_PATHS.contains_prefix(new_url)
357                            {
358                                ignore_script = true;
359                            }
360
361                            if !ignore_script
362                                && self.ignore_visuals
363                                && URL_IGNORE_SCRIPT_STYLES_PATHS.contains_prefix(new_url)
364                            {
365                                ignore_script = true;
366                            }
367                        }
368                    }
369                }
370            }
371        }
372
373        // fallback for file ending in analytics.js
374        if !ignore_script && block_analytics {
375            ignore_script = URL_IGNORE_TRIE_PATHS.contains_prefix(url);
376        }
377
378        ignore_script
379    }
380
381    /// Determine if the request should be skipped.
382    fn skip_xhr(
383        &self,
384        skip_networking: bool,
385        event: &EventRequestPaused,
386        network_event: bool,
387    ) -> bool {
388        // XHR check
389        if !skip_networking && network_event {
390            let request_url = event.request.url.as_str();
391
392            // check if part of ignore scripts.
393            let skip_analytics =
394                self.block_analytics && (ignore_script_xhr(request_url) || block_xhr(request_url));
395
396            if skip_analytics {
397                true
398            } else if self.block_stylesheets || self.ignore_visuals {
399                let block_css = self.block_stylesheets;
400                let block_media = self.ignore_visuals;
401
402                let mut block_request = false;
403
404                if let Some(position) = request_url.rfind('.') {
405                    let hlen = request_url.len();
406                    let has_asset = hlen - position;
407
408                    if has_asset >= 3 {
409                        let next_position = position + 1;
410
411                        if block_media
412                            && IGNORE_XHR_ASSETS.contains::<CaseInsensitiveString>(
413                                &request_url[next_position..].into(),
414                            )
415                        {
416                            block_request = true;
417                        } else if block_css {
418                            block_request =
419                                CaseInsensitiveString::from(request_url[next_position..].as_bytes())
420                                    .contains(&**CSS_EXTENSION)
421                        }
422                    }
423                }
424
425                if !block_request {
426                    block_request = ignore_script_xhr_media(request_url);
427                }
428
429                block_request
430            } else {
431                skip_networking
432            }
433        } else {
434            skip_networking
435        }
436    }
437
438    #[cfg(not(feature = "adblock"))]
439    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
440        use super::blockers::block_websites::block_website;
441
442        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
443            return;
444        }
445
446        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
447            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
448        } else {
449            if let Some(network_id) = event.network_id.as_ref() {
450                if let Some(request_will_be_sent) =
451                    self.requests_will_be_sent.remove(network_id.as_ref())
452                {
453                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
454                } else {
455                    let current_url = event.request.url.as_str();
456                    let javascript_resource = event.resource_type == ResourceType::Script;
457                    let document_resource = event.resource_type == ResourceType::Document;
458                    let network_resource = !document_resource
459                        && (event.resource_type == ResourceType::Xhr
460                            || event.resource_type == ResourceType::Fetch
461                            || event.resource_type == ResourceType::WebSocket);
462
463                    let skip_networking =
464                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
465
466                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
467                    let mut replacer = None;
468
469                    if document_resource {
470                        if self.document_target_domain == current_url {
471                            // this will prevent the domain from looping (3 times is enough).
472                            self.document_reload_tracker += 1;
473                        } else if !self.document_target_domain.is_empty()
474                            && event.redirected_request_id.is_some()
475                        {
476                            let (http_document_replacement, mut https_document_replacement) =
477                                if self.document_target_domain.starts_with("http://") {
478                                    (
479                                        self.document_target_domain.replace("http://", "http//"),
480                                        self.document_target_domain.replace("http://", "https://"),
481                                    )
482                                } else {
483                                    (
484                                        self.document_target_domain.replace("https://", "https//"),
485                                        self.document_target_domain.replace("https://", "http://"),
486                                    )
487                                };
488
489                            let trailing = https_document_replacement.ends_with('/');
490
491                            if trailing {
492                                https_document_replacement.pop();
493                            }
494
495                            if https_document_replacement.ends_with('/') {
496                                https_document_replacement.pop();
497                            }
498
499                            let redirect_mask = format!(
500                                "{}{}",
501                                https_document_replacement, http_document_replacement
502                            );
503
504                            // handle redirect masking
505                            if current_url == redirect_mask {
506                                replacer = Some(if trailing {
507                                    format!("{}/", https_document_replacement)
508                                } else {
509                                    https_document_replacement
510                                });
511                            }
512                        }
513
514                        if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
515                            self.xml_document = true;
516                        }
517
518                        self.document_target_domain = event.request.url.clone();
519                    }
520
521                    let current_url = match &replacer {
522                        Some(r) => r,
523                        _ => &event.request.url,
524                    }
525                    .as_str();
526
527                    // main initial check
528                    let skip_networking = if !skip_networking {
529                        if self.xml_document && current_url.ends_with("sitemap-style.xsl") {
530                            false
531                        } else {
532                            self.ignore_visuals
533                                && (IGNORE_VISUAL_RESOURCE_MAP
534                                    .contains(event.resource_type.as_ref()))
535                                || self.block_stylesheets
536                                    && ResourceType::Stylesheet == event.resource_type
537                                || self.block_javascript
538                                    && javascript_resource
539                                    && self.intercept_manager == NetworkInterceptManager::Unknown
540                                    && !ALLOWED_MATCHER.is_match(current_url)
541                        }
542                    } else {
543                        skip_networking
544                    };
545
546                    let skip_networking = if !skip_networking
547                        && (self.only_html || self.ignore_visuals)
548                        && (javascript_resource || document_resource)
549                    {
550                        ignore_script_embedded(current_url)
551                    } else {
552                        skip_networking
553                    };
554
555                    // analytics check
556                    let skip_networking = if !skip_networking && javascript_resource {
557                        self.ignore_script(
558                            current_url,
559                            self.block_analytics,
560                            self.intercept_manager,
561                        )
562                    } else {
563                        skip_networking
564                    };
565
566                    // XHR check
567                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
568
569                    // custom interception layer.
570                    let skip_networking = if !skip_networking
571                        && (javascript_resource || network_resource || document_resource)
572                    {
573                        self.intercept_manager.intercept_detection(
574                            &current_url,
575                            self.ignore_visuals,
576                            network_resource,
577                        )
578                    } else {
579                        skip_networking
580                    };
581
582                    let skip_networking =
583                        if !skip_networking && (javascript_resource || network_resource) {
584                            block_website(&current_url)
585                        } else {
586                            skip_networking
587                        };
588
589                    if skip_networking {
590                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
591                        let fullfill_params =
592                            crate::handler::network::fetch::FulfillRequestParams::new(
593                                event.request_id.clone(),
594                                200,
595                            );
596                        self.push_cdp_request(fullfill_params);
597                    } else {
598                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
599                        let mut continue_params =
600                            ContinueRequestParams::new(event.request_id.clone());
601
602                        if replacer.is_some() {
603                            continue_params.url = Some(current_url.into());
604                            continue_params.intercept_response = Some(false);
605                        }
606
607                        self.push_cdp_request(continue_params)
608                    }
609                }
610            } else {
611                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
612            }
613        }
614    }
615
616    #[cfg(feature = "adblock")]
617    pub fn on_fetch_request_paused(&mut self, event: &EventRequestPaused) {
618        if self.user_request_interception_enabled && self.protocol_request_interception_enabled {
619            return;
620        }
621
622        if !self.user_request_interception_enabled && self.protocol_request_interception_enabled {
623            self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
624        } else {
625            if let Some(network_id) = event.network_id.as_ref() {
626                if let Some(request_will_be_sent) =
627                    self.requests_will_be_sent.remove(network_id.as_ref())
628                {
629                    self.on_request(&request_will_be_sent, Some(event.request_id.clone().into()));
630                } else {
631                    let current_url = event.request.url.as_str();
632                    let javascript_resource = event.resource_type == ResourceType::Script;
633                    let document_resource = event.resource_type == ResourceType::Document;
634                    let network_resource = !document_resource
635                        && (event.resource_type == ResourceType::Xhr
636                            || event.resource_type == ResourceType::Fetch
637                            || event.resource_type == ResourceType::WebSocket);
638                    let mut replacer = None;
639
640                    // block all of these events.
641                    let skip_networking =
642                        IGNORE_NETWORKING_RESOURCE_MAP.contains(event.resource_type.as_ref());
643
644                    let skip_networking = skip_networking || self.document_reload_tracker >= 3;
645
646                    if document_resource {
647                        if self.document_target_domain == current_url {
648                            // this will prevent the domain from looping (3 times is enough).
649                            self.document_reload_tracker += 1;
650                        } else if !self.document_target_domain.is_empty()
651                            && event.redirected_request_id.is_some()
652                        {
653                            let (http_document_replacement, mut https_document_replacement) =
654                                if self.document_target_domain.starts_with("http://") {
655                                    (
656                                        self.document_target_domain.replace("http://", "http//"),
657                                        self.document_target_domain.replace("http://", "https://"),
658                                    )
659                                } else {
660                                    (
661                                        self.document_target_domain.replace("https://", "https//"),
662                                        self.document_target_domain.replace("https://", "http://"),
663                                    )
664                                };
665
666                            let trailing = https_document_replacement.ends_with('/');
667
668                            if trailing {
669                                https_document_replacement.pop();
670                            }
671
672                            if https_document_replacement.ends_with('/') {
673                                https_document_replacement.pop();
674                            }
675
676                            let redirect_mask = format!(
677                                "{}{}",
678                                https_document_replacement, http_document_replacement
679                            );
680
681                            // handle redirect masking
682                            if current_url == redirect_mask {
683                                replacer = Some(if trailing {
684                                    format!("{}/", https_document_replacement)
685                                } else {
686                                    https_document_replacement
687                                });
688                            }
689                        }
690
691                        if self.document_target_domain.is_empty() && current_url.ends_with(".xml") {
692                            self.xml_document = true;
693                        }
694
695                        self.document_target_domain = event.request.url.clone();
696                    }
697
698                    let current_url = match &replacer {
699                        Some(r) => r,
700                        _ => &event.request.url,
701                    }
702                    .as_str();
703
704                    // main initial check
705                    let skip_networking = if !skip_networking {
706                        if self.xml_document && current_url.ends_with("sitemap-style.xsl") {
707                            false
708                        } else {
709                            self.ignore_visuals
710                                && (IGNORE_VISUAL_RESOURCE_MAP
711                                    .contains(event.resource_type.as_ref()))
712                                || self.block_stylesheets
713                                    && ResourceType::Stylesheet == event.resource_type
714                                || self.block_javascript
715                                    && javascript_resource
716                                    && self.intercept_manager == NetworkInterceptManager::Unknown
717                                    && !ALLOWED_MATCHER.is_match(current_url)
718                        }
719                    } else {
720                        skip_networking
721                    };
722
723                    let skip_networking = if !skip_networking {
724                        self.detect_ad(event)
725                    } else {
726                        skip_networking
727                    };
728
729                    let skip_networking = if !skip_networking
730                        && (self.only_html || self.ignore_visuals)
731                        && (javascript_resource || document_resource)
732                    {
733                        ignore_script_embedded(current_url)
734                    } else {
735                        skip_networking
736                    };
737
738                    // analytics check
739                    let skip_networking = if !skip_networking && javascript_resource {
740                        self.ignore_script(
741                            current_url,
742                            self.block_analytics,
743                            self.intercept_manager,
744                        )
745                    } else {
746                        skip_networking
747                    };
748
749                    // XHR check
750                    let skip_networking = self.skip_xhr(skip_networking, &event, network_resource);
751
752                    // custom interception layer.
753                    let skip_networking = if !skip_networking
754                        && (javascript_resource || network_resource || document_resource)
755                    {
756                        self.intercept_manager.intercept_detection(
757                            &event.request.url,
758                            self.ignore_visuals,
759                            network_resource,
760                        )
761                    } else {
762                        skip_networking
763                    };
764
765                    let skip_networking = if !skip_networking
766                        && (javascript_resource || network_resource)
767                    {
768                        crate::handler::blockers::block_websites::block_website(&event.request.url)
769                    } else {
770                        skip_networking
771                    };
772
773                    if skip_networking {
774                        tracing::debug!("Blocked: {:?} - {}", event.resource_type, current_url);
775
776                        let fullfill_params =
777                            crate::handler::network::fetch::FulfillRequestParams::new(
778                                event.request_id.clone(),
779                                200,
780                            );
781                        self.push_cdp_request(fullfill_params);
782                    } else {
783                        tracing::debug!("Allowed: {:?} - {}", event.resource_type, current_url);
784
785                        let mut continue_params =
786                            ContinueRequestParams::new(event.request_id.clone());
787
788                        if replacer.is_some() {
789                            continue_params.url = Some(current_url.into());
790                            continue_params.intercept_response = Some(false);
791                        }
792                    }
793                }
794            } else {
795                self.push_cdp_request(ContinueRequestParams::new(event.request_id.clone()))
796            }
797        }
798
799        // if self.only_html {
800        //     self.made_request = true;
801        // }
802    }
803
804    /// Perform a page intercept for chrome
805    #[cfg(feature = "adblock")]
806    pub fn detect_ad(&self, event: &EventRequestPaused) -> bool {
807        use adblock::{
808            lists::{FilterSet, ParseOptions, RuleTypes},
809            Engine,
810        };
811
812        lazy_static::lazy_static! {
813            static ref AD_ENGINE: Engine = {
814                let mut filter_set = FilterSet::new(false);
815                let mut rules = ParseOptions::default();
816                rules.rule_types = RuleTypes::All;
817
818                filter_set.add_filters(
819                    &*crate::handler::blockers::adblock_patterns::ADBLOCK_PATTERNS,
820                    rules,
821                );
822
823                Engine::from_filter_set(filter_set, true)
824            };
825        };
826
827        let blockable = ResourceType::Image == event.resource_type
828            || event.resource_type == ResourceType::Media
829            || event.resource_type == ResourceType::Stylesheet
830            || event.resource_type == ResourceType::Document
831            || event.resource_type == ResourceType::Fetch
832            || event.resource_type == ResourceType::Xhr;
833
834        let u = &event.request.url;
835
836        let block_request = blockable
837            // set it to example.com for 3rd party handling is_same_site
838        && {
839            let request = adblock::request::Request::preparsed(
840                 &u,
841                 "example.com",
842                 "example.com",
843                 &event.resource_type.as_ref().to_lowercase(),
844                 !event.request.is_same_site.unwrap_or_default());
845
846            AD_ENGINE.check_network_request(&request).matched
847        };
848
849        block_request
850    }
851
852    pub fn on_fetch_auth_required(&mut self, event: &EventAuthRequired) {
853        let response = if self
854            .attempted_authentications
855            .contains(event.request_id.as_ref())
856        {
857            AuthChallengeResponseResponse::CancelAuth
858        } else if self.credentials.is_some() {
859            self.attempted_authentications
860                .insert(event.request_id.clone().into());
861            AuthChallengeResponseResponse::ProvideCredentials
862        } else {
863            AuthChallengeResponseResponse::Default
864        };
865
866        let mut auth = AuthChallengeResponse::new(response);
867        if let Some(creds) = self.credentials.clone() {
868            auth.username = Some(creds.username);
869            auth.password = Some(creds.password);
870        }
871        self.push_cdp_request(ContinueWithAuthParams::new(event.request_id.clone(), auth));
872    }
873
874    pub fn set_offline_mode(&mut self, value: bool) {
875        if self.offline == value {
876            return;
877        }
878        self.offline = value;
879        if let Ok(network) = EmulateNetworkConditionsParams::builder()
880            .offline(self.offline)
881            .latency(0)
882            .download_throughput(-1.)
883            .upload_throughput(-1.)
884            .build()
885        {
886            self.push_cdp_request(network);
887        }
888    }
889
890    /// Request interception doesn't happen for data URLs with Network Service.
891    pub fn on_request_will_be_sent(&mut self, event: &EventRequestWillBeSent) {
892        if self.protocol_request_interception_enabled && !event.request.url.starts_with("data:") {
893            if let Some(interception_id) = self
894                .request_id_to_interception_id
895                .remove(event.request_id.as_ref())
896            {
897                self.on_request(event, Some(interception_id));
898            } else {
899                // TODO remove the clone for event
900                self.requests_will_be_sent
901                    .insert(event.request_id.clone(), event.clone());
902            }
903        } else {
904            self.on_request(event, None);
905        }
906    }
907
908    pub fn on_request_served_from_cache(&mut self, event: &EventRequestServedFromCache) {
909        if let Some(request) = self.requests.get_mut(event.request_id.as_ref()) {
910            request.from_memory_cache = true;
911        }
912    }
913
914    pub fn on_response_received(&mut self, event: &EventResponseReceived) {
915        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
916            request.set_response(event.response.clone());
917            self.queued_events
918                .push_back(NetworkEvent::RequestFinished(request))
919        }
920    }
921
922    pub fn on_network_loading_finished(&mut self, event: &EventLoadingFinished) {
923        if let Some(request) = self.requests.remove(event.request_id.as_ref()) {
924            if let Some(interception_id) = request.interception_id.as_ref() {
925                self.attempted_authentications
926                    .remove(interception_id.as_ref());
927            }
928            self.queued_events
929                .push_back(NetworkEvent::RequestFinished(request));
930        }
931    }
932
933    pub fn on_network_loading_failed(&mut self, event: &EventLoadingFailed) {
934        if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
935            request.failure_text = Some(event.error_text.clone());
936            if let Some(interception_id) = request.interception_id.as_ref() {
937                self.attempted_authentications
938                    .remove(interception_id.as_ref());
939            }
940            self.queued_events
941                .push_back(NetworkEvent::RequestFailed(request));
942        }
943    }
944
945    fn on_request(
946        &mut self,
947        event: &EventRequestWillBeSent,
948        interception_id: Option<InterceptionId>,
949    ) {
950        let mut redirect_chain = Vec::new();
951        let mut redirect_location = None;
952
953        if let Some(redirect_resp) = &event.redirect_response {
954            if let Some(mut request) = self.requests.remove(event.request_id.as_ref()) {
955                if is_redirect_status(redirect_resp.status) {
956                    if let Some(location) = redirect_resp.headers.inner()["Location"].as_str() {
957                        if redirect_resp.url != location {
958                            let fixed_location = location.replace(&redirect_resp.url, "");
959                            request.response.as_mut().map(|resp| {
960                                resp.headers.0["Location"] =
961                                    serde_json::Value::String(fixed_location.clone());
962                            });
963                            redirect_location = Some(fixed_location);
964                        }
965                    }
966                }
967
968                self.handle_request_redirect(
969                    &mut request,
970                    if let Some(redirect_location) = redirect_location {
971                        let mut redirect_resp = redirect_resp.clone();
972                        redirect_resp.headers.0["Location"] =
973                            serde_json::Value::String(redirect_location);
974                        redirect_resp
975                    } else {
976                        redirect_resp.clone()
977                    },
978                );
979
980                redirect_chain = std::mem::take(&mut request.redirect_chain);
981                redirect_chain.push(request);
982            }
983        }
984
985        let request = HttpRequest::new(
986            event.request_id.clone(),
987            event.frame_id.clone(),
988            interception_id,
989            self.user_request_interception_enabled,
990            redirect_chain,
991        );
992
993        self.requests.insert(event.request_id.clone(), request);
994        self.queued_events
995            .push_back(NetworkEvent::Request(event.request_id.clone()));
996    }
997
998    fn handle_request_redirect(&mut self, request: &mut HttpRequest, response: Response) {
999        request.set_response(response);
1000        if let Some(interception_id) = request.interception_id.as_ref() {
1001            self.attempted_authentications
1002                .remove(interception_id.as_ref());
1003        }
1004    }
1005}
1006
1007#[derive(Debug)]
1008pub enum NetworkEvent {
1009    SendCdpRequest((MethodId, serde_json::Value)),
1010    Request(RequestId),
1011    Response(RequestId),
1012    RequestFailed(HttpRequest),
1013    RequestFinished(HttpRequest),
1014}